Consolidate stegasoo and verisoo into soosef monorepo

Merge stegasoo (v4.3.0, steganography) and verisoo (v0.1.0, attestation)
as subpackages under soosef.stegasoo and soosef.verisoo. This eliminates
cross-repo coordination and enables atomic changes across the full stack.

- Copy stegasoo (34 modules) and verisoo (15 modules) into src/soosef/
- Convert all verisoo absolute imports to relative imports
- Rewire ~50 import sites across soosef code (cli, web, keystore, tests)
- Replace stegasoo/verisoo pip deps with inlined code + pip extras
  (stego-dct, stego-audio, attest, web, api, cli, fieldkit, all, dev)
- Add _availability.py for runtime feature detection
- Add unified FastAPI mount point at soosef.api
- Copy and adapt tests from both repos (155 pass, 1 skip)
- Drop standalone CLI/web frontends; keep FastAPI as optional modules
- Both source repos tagged pre-monorepo-consolidation on GitHub

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Aaron D. Lee 2026-04-01 19:06:14 -04:00
parent c8dc9be011
commit e3bc1cce1f
91 changed files with 30573 additions and 62 deletions

View File

@ -123,8 +123,8 @@ def create_app(config: SoosefConfig | None = None) -> Flask:
# Stegasoo capabilities
try:
from stegasoo import HAS_AUDIO_SUPPORT, get_channel_status, has_dct_support
from stegasoo.constants import (
from soosef.stegasoo import HAS_AUDIO_SUPPORT, get_channel_status, has_dct_support
from soosef.stegasoo.constants import (
DEFAULT_PASSPHRASE_WORDS,
MAX_FILE_PAYLOAD_SIZE,
MAX_MESSAGE_CHARS,
@ -164,7 +164,7 @@ def create_app(config: SoosefConfig | None = None) -> Flask:
# Verisoo availability
try:
import verisoo # noqa: F401
import soosef.verisoo # noqa: F401
has_verisoo = True
except ImportError:
@ -241,13 +241,13 @@ def _register_stegasoo_routes(app: Flask) -> None:
"""
import temp_storage
from auth import admin_required, login_required
from stegasoo import (
from soosef.stegasoo import (
export_rsa_key_pem,
generate_credentials,
get_channel_status,
load_rsa_key,
)
from stegasoo.constants import (
from soosef.stegasoo.constants import (
DEFAULT_PASSPHRASE_WORDS,
MAX_PIN_LENGTH,
MIN_PASSPHRASE_WORDS,
@ -255,7 +255,7 @@ def _register_stegasoo_routes(app: Flask) -> None:
TEMP_FILE_EXPIRY,
VALID_RSA_SIZES,
)
from stegasoo.qr_utils import (
from soosef.stegasoo.qr_utils import (
can_fit_in_qr,
generate_qr_code,
)

View File

@ -273,7 +273,7 @@ def verify_and_reset_admin_password(recovery_key: str, new_password: str) -> tup
Returns:
(success, message) tuple
"""
from stegasoo.recovery import verify_recovery_key
from soosef.stegasoo.recovery import verify_recovery_key
stored_hash = get_recovery_key_hash()
if not stored_hash:

View File

@ -21,7 +21,7 @@ bp = Blueprint("attest", __name__)
def _get_storage():
"""Get verisoo LocalStorage pointed at soosef's attestation directory."""
from verisoo.storage import LocalStorage
from soosef.verisoo.storage import LocalStorage
from soosef.paths import ATTESTATIONS_DIR
@ -30,7 +30,7 @@ def _get_storage():
def _get_private_key():
"""Load the Ed25519 private key from soosef identity directory."""
from verisoo.crypto import load_private_key
from soosef.verisoo.crypto import load_private_key
from soosef.paths import IDENTITY_PRIVATE_KEY
@ -139,7 +139,7 @@ def attest():
auto_exif = request.form.get("auto_exif", "on") == "on"
# Create the attestation
from verisoo.attestation import create_attestation
from soosef.verisoo.attestation import create_attestation
attestation = create_attestation(
image_data=image_data,
@ -167,7 +167,7 @@ def attest():
# Save our own identity so we can look it up during verification
from cryptography.hazmat.primitives.serialization import Encoding, PublicFormat
from verisoo.models import Identity
from soosef.verisoo.models import Identity
pub_key = private_key.public_key()
pub_bytes = pub_key.public_bytes(Encoding.Raw, PublicFormat.Raw)
@ -217,7 +217,7 @@ def _verify_image(image_data: bytes) -> dict:
matches list of match dicts (record, match_type, distances, attestor_name)
record_count total records searched
"""
from verisoo.hashing import compute_all_distances, hash_image, is_same_image
from soosef.verisoo.hashing import compute_all_distances, hash_image, is_same_image
query_hashes = hash_image(image_data)
storage = _get_storage()

View File

@ -41,7 +41,7 @@ def register_stego_routes(app, **deps):
temp_storage = deps["temp_storage"]
_has_qrcode_read = deps.get("has_qrcode_read", False)
from stegasoo import (
from soosef.stegasoo import (
HAS_AUDIO_SUPPORT,
CapacityError,
DecryptionError,
@ -60,13 +60,13 @@ def register_stego_routes(app, **deps):
validate_rsa_key,
validate_security_factors,
)
from stegasoo.channel import resolve_channel_key
from stegasoo.constants import (
from soosef.stegasoo.channel import resolve_channel_key
from soosef.stegasoo.constants import (
TEMP_FILE_EXPIRY,
THUMBNAIL_QUALITY,
THUMBNAIL_SIZE,
)
from stegasoo.qr_utils import (
from soosef.stegasoo.qr_utils import (
decompress_data,
extract_key_from_qr,
is_compressed,
@ -686,7 +686,7 @@ def register_stego_routes(app, **deps):
return _error_response(result.error_message)
# Pre-check payload capacity BEFORE encode (fail fast)
from stegasoo.steganography import will_fit_by_mode
from soosef.stegasoo.steganography import will_fit_by_mode
payload_size = (
len(payload.data) if hasattr(payload, "data") else len(payload.encode("utf-8"))
@ -1613,8 +1613,8 @@ def register_stego_routes(app, **deps):
@app.route("/about")
def about():
from auth import get_current_user
from stegasoo import has_argon2
from stegasoo.channel import get_channel_status
from soosef.stegasoo import has_argon2
from soosef.stegasoo.channel import get_channel_status
channel_status = get_channel_status()
current_user = get_current_user()
@ -1644,7 +1644,7 @@ def register_stego_routes(app, **deps):
@login_required
def api_tools_capacity():
"""Calculate image capacity for steganography."""
from stegasoo.dct_steganography import estimate_capacity_comparison
from soosef.stegasoo.dct_steganography import estimate_capacity_comparison
carrier = request.files.get("image")
if not carrier:
@ -1666,7 +1666,7 @@ def register_stego_routes(app, **deps):
"""Strip EXIF/metadata from image."""
import io
from stegasoo.utils import strip_image_metadata
from soosef.stegasoo.utils import strip_image_metadata
image_file = request.files.get("image")
if not image_file:
@ -1689,7 +1689,7 @@ def register_stego_routes(app, **deps):
@login_required
def api_tools_exif():
"""Read EXIF metadata from image."""
from stegasoo.utils import read_image_exif
from soosef.stegasoo.utils import read_image_exif
image_file = request.files.get("image")
if not image_file:
@ -1718,7 +1718,7 @@ def register_stego_routes(app, **deps):
@login_required
def api_tools_exif_update():
"""Update EXIF fields in image."""
from stegasoo.utils import write_image_exif
from soosef.stegasoo.utils import write_image_exif
image_file = request.files.get("image")
if not image_file:
@ -1757,7 +1757,7 @@ def register_stego_routes(app, **deps):
@login_required
def api_tools_exif_clear():
"""Remove all EXIF metadata from image."""
from stegasoo.utils import strip_image_metadata
from soosef.stegasoo.utils import strip_image_metadata
image_file = request.files.get("image")
if not image_file:

View File

@ -73,7 +73,7 @@ def _get_channel_info(resolved_key):
Returns:
(mode, fingerprint) tuple
"""
from stegasoo import get_channel_status, has_channel_key
from soosef.stegasoo import get_channel_status, has_channel_key
if resolved_key == "":
return "public", None
@ -94,7 +94,7 @@ def _get_channel_info(resolved_key):
def encode_operation(params: dict) -> dict:
"""Handle encode operation."""
logger.debug("encode_operation: mode=%s", params.get("embed_mode", "lsb"))
from stegasoo import FilePayload, encode
from soosef.stegasoo import FilePayload, encode
# Decode base64 inputs
carrier_data = base64.b64decode(params["carrier_b64"])
@ -173,7 +173,7 @@ def _write_decode_progress(progress_file: str | None, percent: int, phase: str)
def decode_operation(params: dict) -> dict:
"""Handle decode operation."""
logger.debug("decode_operation: mode=%s", params.get("embed_mode", "auto"))
from stegasoo import decode
from soosef.stegasoo import decode
progress_file = params.get("progress_file")
@ -227,7 +227,7 @@ def decode_operation(params: dict) -> dict:
def compare_operation(params: dict) -> dict:
"""Handle compare_modes operation."""
from stegasoo import compare_modes
from soosef.stegasoo import compare_modes
carrier_data = base64.b64decode(params["carrier_b64"])
result = compare_modes(carrier_data)
@ -240,7 +240,7 @@ def compare_operation(params: dict) -> dict:
def capacity_check_operation(params: dict) -> dict:
"""Handle will_fit_by_mode operation."""
from stegasoo import will_fit_by_mode
from soosef.stegasoo import will_fit_by_mode
carrier_data = base64.b64decode(params["carrier_b64"])
@ -259,7 +259,7 @@ def capacity_check_operation(params: dict) -> dict:
def encode_audio_operation(params: dict) -> dict:
"""Handle audio encode operation (v4.3.0)."""
logger.debug("encode_audio_operation: mode=%s", params.get("embed_mode", "audio_lsb"))
from stegasoo import FilePayload, encode_audio
from soosef.stegasoo import FilePayload, encode_audio
carrier_data = base64.b64decode(params["carrier_b64"])
reference_data = base64.b64decode(params["reference_b64"])
@ -324,7 +324,7 @@ def encode_audio_operation(params: dict) -> dict:
def decode_audio_operation(params: dict) -> dict:
"""Handle audio decode operation (v4.3.0)."""
logger.debug("decode_audio_operation: mode=%s", params.get("embed_mode", "audio_auto"))
from stegasoo import decode_audio
from soosef.stegasoo import decode_audio
progress_file = params.get("progress_file")
_write_decode_progress(progress_file, 5, "reading")
@ -370,9 +370,9 @@ def decode_audio_operation(params: dict) -> dict:
def audio_info_operation(params: dict) -> dict:
"""Handle audio info operation (v4.3.0)."""
from stegasoo import get_audio_info
from stegasoo.audio_steganography import calculate_audio_lsb_capacity
from stegasoo.spread_steganography import calculate_audio_spread_capacity
from soosef.stegasoo import get_audio_info
from soosef.stegasoo.audio_steganography import calculate_audio_lsb_capacity
from soosef.stegasoo.spread_steganography import calculate_audio_spread_capacity
audio_data = base64.b64decode(params["audio_b64"])
@ -397,7 +397,7 @@ def audio_info_operation(params: dict) -> dict:
def channel_status_operation(params: dict) -> dict:
"""Handle channel status check (v4.0.0)."""
from stegasoo import get_channel_status
from soosef.stegasoo import get_channel_status
status = get_channel_status()
reveal = params.get("reveal", False)

View File

@ -40,44 +40,69 @@ classifiers = [
]
dependencies = [
"stegasoo>=4.3.0",
"verisoo>=0.1.0",
"pillow>=10.0.0",
"cryptography>=41.0.0",
"argon2-cffi>=23.0.0",
"zstandard>=0.22.0",
"cbor2>=5.6.0",
"uuid-utils>=0.9.0",
]
[project.optional-dependencies]
web = [
"flask>=3.0.0",
"gunicorn>=21.0.0",
# Stegasoo web extras
"stegasoo[web]",
# Verisoo storage
"lmdb>=1.4.0",
stego-dct = [
"numpy>=2.0.0",
"scipy>=1.10.0",
"jpeglib>=1.0.0",
"reedsolo>=1.7.0",
]
stego-audio = [
"pydub>=0.25.0",
"numpy>=2.0.0",
"scipy>=1.10.0",
"soundfile>=0.12.0",
"reedsolo>=1.7.0",
]
stego-compression = [
"lz4>=4.0.0",
]
attest = [
"imagehash>=4.3.0",
"lmdb>=1.4.0",
"exifread>=3.0.0",
]
cli = [
"click>=8.0.0",
"rich>=13.0.0",
"stegasoo[cli]",
"verisoo[cli]",
"qrcode>=7.30",
"piexif>=1.1.0",
]
web = [
"flask>=3.0.0",
"gunicorn>=21.0.0",
"qrcode>=7.3.0",
"pyzbar>=0.1.9",
"piexif>=1.1.0",
"soosef[attest,stego-dct]",
]
api = [
"fastapi>=0.109.0",
"uvicorn[standard]>=0.27.0",
"python-multipart>=0.0.6",
"soosef[stego-dct]",
]
fieldkit = [
"watchdog>=4.0.0",
"pyudev>=0.24.0",
]
federation = [
"aiohttp>=3.9.0",
]
rpi = [
"soosef[web,cli,fieldkit]",
"gpiozero>=2.0",
]
all = [
"soosef[web,cli,fieldkit]",
"stegasoo[all]",
"verisoo[all]",
"soosef[stego-dct,stego-audio,stego-compression,attest,cli,web,api,fieldkit,federation]",
]
dev = [
"soosef[all]",
@ -107,6 +132,9 @@ packages = ["src/soosef", "frontends"]
[tool.hatch.build.targets.wheel.sources]
"src" = ""
[tool.hatch.build.targets.wheel.force-include]
"src/soosef/stegasoo/data/bip39-words.txt" = "soosef/stegasoo/data/bip39-words.txt"
[tool.pytest.ini_options]
testpaths = ["tests"]
python_files = ["test_*.py"]
@ -123,6 +151,14 @@ line-length = 100
select = ["E", "F", "I", "N", "W", "UP"]
ignore = ["E501"]
[tool.ruff.lint.per-file-ignores]
# YCbCr colorspace variables (R, G, B, Y, Cb, Cr) are standard names
"src/soosef/stegasoo/dct_steganography.py" = ["N803", "N806"]
# MDCT transform variables (N, X) are standard mathematical names
"src/soosef/stegasoo/spread_steganography.py" = ["N803", "N806"]
# Package __init__.py has imports after try/except and aliases - intentional structure
"src/soosef/stegasoo/__init__.py" = ["E402"]
[tool.mypy]
python_version = "3.11"
warn_return_any = true

View File

@ -0,0 +1,21 @@
"""Runtime availability checks for optional soosef subpackages."""
def has_stegasoo() -> bool:
"""Check if soosef.stegasoo is importable (core deps are always present)."""
try:
import soosef.stegasoo # noqa: F401
return True
except ImportError:
return False
def has_verisoo() -> bool:
"""Check if soosef.verisoo is importable (requires [attest] extra)."""
try:
import soosef.verisoo # noqa: F401
return True
except ImportError:
return False

36
src/soosef/api.py Normal file
View File

@ -0,0 +1,36 @@
"""Optional unified FastAPI app combining stegasoo and verisoo APIs.
Usage::
uvicorn soosef.api:app --host 0.0.0.0 --port 8000
Requires the [api] extra: pip install soosef[api]
"""
from fastapi import FastAPI
app = FastAPI(
title="SooSeF API",
version="0.1.0",
description="Unified steganography and attestation API",
)
try:
from soosef.stegasoo.api import app as stego_api
app.mount("/stego", stego_api)
except ImportError:
pass
try:
from soosef.verisoo.api import app as verisoo_api
app.mount("/attest", verisoo_api)
except ImportError:
pass
@app.get("/health")
async def health():
"""Health check endpoint."""
return {"status": "ok"}

View File

@ -226,7 +226,7 @@ def stego():
try:
from stegasoo.cli import cli as stegasoo_cli
from soosef.stegasoo.cli import cli as stegasoo_cli
# Re-register stegasoo commands under the 'stego' group
for name, cmd in stegasoo_cli.commands.items():
@ -249,7 +249,7 @@ def attest():
try:
from verisoo.cli import main as verisoo_cli
from soosef.verisoo.cli import main as verisoo_cli
for name, cmd in verisoo_cli.commands.items():
attest.add_command(cmd, name)
@ -288,8 +288,8 @@ def _attest_file(
PublicFormat,
load_pem_private_key,
)
from verisoo.attestation import create_attestation
from verisoo.models import Identity
from soosef.verisoo.attestation import create_attestation
from soosef.verisoo.models import Identity
from soosef.config import SoosefConfig
from soosef.federation.chain import ChainStore
@ -380,8 +380,8 @@ def batch(directory: Path, caption: str | None, extensions: str, no_exif: bool)
soosef attest batch ./docs --extensions pdf,png --no-exif
"""
from verisoo.crypto import load_private_key
from verisoo.storage import LocalStorage
from soosef.verisoo.crypto import load_private_key
from soosef.verisoo.storage import LocalStorage
from soosef.paths import ATTESTATIONS_DIR, IDENTITY_PRIVATE_KEY
@ -1013,7 +1013,7 @@ def backfill():
private_key = load_pem_private_key(priv_pem, password=None)
try:
from verisoo.storage import LocalStorage
from soosef.verisoo.storage import LocalStorage
storage = LocalStorage(base_path=ATTESTATIONS_DIR)
stats = storage.get_stats()

View File

@ -206,7 +206,7 @@ class KeystoreManager:
def set_channel_key(self, key: str) -> None:
"""Store a channel key."""
from stegasoo import validate_channel_key
from soosef.stegasoo import validate_channel_key
validate_channel_key(key)
self._channel_key_file.parent.mkdir(parents=True, exist_ok=True)
@ -215,7 +215,7 @@ class KeystoreManager:
def generate_channel_key(self) -> str:
"""Generate and store a new channel key."""
from stegasoo import generate_channel_key
from soosef.stegasoo import generate_channel_key
key: str = generate_channel_key()
self.set_channel_key(key)
@ -245,7 +245,7 @@ class KeystoreManager:
"the key in the keystore first."
)
from stegasoo.crypto import get_channel_fingerprint
from soosef.stegasoo.crypto import get_channel_fingerprint
old_key = self._channel_key_file.read_text().strip()
old_fp = get_channel_fingerprint(old_key)
@ -281,7 +281,7 @@ class KeystoreManager:
if self.has_channel_key():
key = self.get_channel_key()
if key:
from stegasoo.crypto import get_channel_fingerprint
from soosef.stegasoo.crypto import get_channel_fingerprint
channel_fp = get_channel_fingerprint(key)

View File

@ -0,0 +1,401 @@
"""
Stegasoo - Secure Steganography with Multi-Factor Authentication (v4.0.1)
Changes in v4.0.0:
- Added channel key support for deployment/group isolation
- New functions: get_channel_key, get_channel_fingerprint, generate_channel_key, etc.
- encode() and decode() now accept channel_key parameter
"""
__version__ = "4.3.0"
# Core functionality
# Channel key management (v4.0.0)
from .channel import (
clear_channel_key,
format_channel_key,
generate_channel_key,
get_channel_key,
get_channel_status,
has_channel_key,
set_channel_key,
validate_channel_key,
)
# Audio support — gated by STEGASOO_AUDIO env var and dependency availability
from .constants import AUDIO_ENABLED, VIDEO_ENABLED
# Crypto functions
from .crypto import get_active_channel_key, get_channel_fingerprint, has_argon2
from .decode import decode, decode_file, decode_text
from .encode import encode
# Credential generation
from .generate import (
export_rsa_key_pem,
generate_credentials,
generate_passphrase,
generate_pin,
generate_rsa_key,
load_rsa_key,
)
# Image utilities
from .image_utils import (
compare_capacity,
get_image_info,
)
# Backend registry
from .backends import EmbeddingBackend, registry as backend_registry
# Platform presets
from .platform_presets import PLATFORMS, get_preset
# Steganalysis
from .steganalysis import check_image
from .backends.registry import BackendNotFoundError
# Steganography functions
from .steganography import (
calculate_capacity_by_mode,
compare_modes,
has_dct_support,
will_fit_by_mode,
)
# Utilities
from .utils import generate_filename
HAS_AUDIO_SUPPORT = AUDIO_ENABLED
HAS_VIDEO_SUPPORT = VIDEO_ENABLED
if AUDIO_ENABLED:
from .audio_utils import (
detect_audio_format,
get_audio_info,
has_ffmpeg_support,
validate_audio,
)
from .decode import decode_audio
from .encode import encode_audio
else:
detect_audio_format = None
get_audio_info = None
has_ffmpeg_support = None
validate_audio = None
encode_audio = None
decode_audio = None
# Video support — gated by STEGASOO_VIDEO env var and ffmpeg + audio deps
if VIDEO_ENABLED:
from .decode import decode_video
from .encode import encode_video
from .video_utils import (
calculate_video_capacity,
detect_video_format,
get_video_info,
validate_video,
)
else:
detect_video_format = None
get_video_info = None
validate_video = None
calculate_video_capacity = None
encode_video = None
decode_video = None
# QR Code utilities - optional, may not be available
try:
from .qr_utils import (
detect_and_crop_qr,
extract_key_from_qr,
generate_qr_code,
)
HAS_QR_UTILS = True
except ImportError:
HAS_QR_UTILS = False
generate_qr_code = None
extract_key_from_qr = None
detect_and_crop_qr = None
# Validation
from .validation import (
validate_file_payload,
validate_image,
validate_message,
validate_passphrase,
validate_pin,
validate_rsa_key,
validate_security_factors,
)
# Validation aliases for public API
validate_reference_photo = validate_image
validate_carrier = validate_image
# Additional validators
# Constants
from .constants import (
DEFAULT_PASSPHRASE_WORDS,
EMBED_MODE_AUDIO_AUTO,
EMBED_MODE_AUDIO_LSB,
EMBED_MODE_AUDIO_SPREAD,
EMBED_MODE_AUTO,
EMBED_MODE_DCT,
EMBED_MODE_LSB,
EMBED_MODE_VIDEO_AUTO,
EMBED_MODE_VIDEO_LSB,
FORMAT_VERSION,
LOSSLESS_FORMATS,
MAX_FILE_PAYLOAD_SIZE,
MAX_IMAGE_PIXELS,
MAX_MESSAGE_SIZE,
MAX_PASSPHRASE_WORDS,
MAX_PIN_LENGTH,
MIN_IMAGE_PIXELS,
MIN_PASSPHRASE_WORDS,
MIN_PIN_LENGTH,
RECOMMENDED_PASSPHRASE_WORDS,
)
# Exceptions
from .exceptions import (
AudioCapacityError,
AudioError,
AudioExtractionError,
AudioTranscodeError,
AudioValidationError,
CapacityError,
CryptoError,
DecryptionError,
EmbeddingError,
EncryptionError,
ExtractionError,
ImageValidationError,
InvalidHeaderError,
InvalidMagicBytesError,
KeyDerivationError,
KeyGenerationError,
KeyPasswordError,
KeyValidationError,
MessageValidationError,
ModeMismatchError,
NoDataFoundError,
PinValidationError,
ReedSolomonError,
SecurityFactorError,
SteganographyError,
StegasooError,
UnsupportedAudioFormatError,
UnsupportedVideoFormatError,
ValidationError,
VideoCapacityError,
VideoError,
VideoExtractionError,
VideoTranscodeError,
VideoValidationError,
)
# Models
from .models import (
AudioCapacityInfo,
AudioEmbedStats,
AudioInfo,
CapacityComparison,
Credentials,
DecodeResult,
EncodeResult,
FilePayload,
GenerateResult,
ImageInfo,
ValidationResult,
VideoCapacityInfo,
VideoEmbedStats,
VideoInfo,
)
from .validation import (
validate_audio_embed_mode,
validate_audio_file,
validate_dct_color_mode,
validate_dct_output_format,
validate_embed_mode,
)
# Aliases for backward compatibility
MIN_MESSAGE_LENGTH = 1
MAX_MESSAGE_LENGTH = MAX_MESSAGE_SIZE
MAX_PAYLOAD_SIZE = MAX_MESSAGE_SIZE
# MAX_FILE_PAYLOAD_SIZE imported from constants above
SUPPORTED_IMAGE_FORMATS = LOSSLESS_FORMATS
LSB_BYTES_PER_PIXEL = 3 / 8
DCT_BYTES_PER_PIXEL = 0.125
__all__ = [
# Version
"__version__",
# Core
"encode",
"decode",
"decode_file",
"decode_text",
# Audio (v4.3.0)
"encode_audio",
"decode_audio",
"detect_audio_format",
"get_audio_info",
"has_ffmpeg_support",
"validate_audio",
"HAS_AUDIO_SUPPORT",
"HAS_VIDEO_SUPPORT",
"validate_audio_embed_mode",
"validate_audio_file",
# Video (v4.4.0)
"encode_video",
"decode_video",
"detect_video_format",
"get_video_info",
"validate_video",
"calculate_video_capacity",
# Generation
"generate_pin",
"generate_passphrase",
"generate_rsa_key",
"generate_credentials",
"export_rsa_key_pem",
"load_rsa_key",
# Channel key management (v4.0.0)
"generate_channel_key",
"get_channel_key",
"set_channel_key",
"clear_channel_key",
"has_channel_key",
"get_channel_status",
"validate_channel_key",
"format_channel_key",
"get_active_channel_key",
"get_channel_fingerprint",
# Image utilities
"get_image_info",
"compare_capacity",
# Utilities
"generate_filename",
# Crypto
"has_argon2",
# Backends
"EmbeddingBackend",
"backend_registry",
"BackendNotFoundError",
# Platform presets
"get_preset",
"PLATFORMS",
# Steganalysis
"check_image",
# Steganography
"has_dct_support",
"calculate_capacity_by_mode",
"compare_modes",
"will_fit_by_mode",
# QR utilities
"generate_qr_code",
"extract_key_from_qr",
"detect_and_crop_qr",
"HAS_QR_UTILS",
# Validation
"validate_reference_photo",
"validate_carrier",
"validate_message",
"validate_file_payload",
"validate_passphrase",
"validate_pin",
"validate_rsa_key",
"validate_security_factors",
"validate_embed_mode",
"validate_dct_output_format",
"validate_dct_color_mode",
"validate_channel_key",
# Models
"ImageInfo",
"CapacityComparison",
"GenerateResult",
"EncodeResult",
"DecodeResult",
"FilePayload",
"Credentials",
"ValidationResult",
# Audio models
"AudioEmbedStats",
"AudioInfo",
"AudioCapacityInfo",
# Video models
"VideoEmbedStats",
"VideoInfo",
"VideoCapacityInfo",
# Exceptions
"StegasooError",
"ValidationError",
"PinValidationError",
"MessageValidationError",
"ImageValidationError",
"KeyValidationError",
"SecurityFactorError",
"CryptoError",
"EncryptionError",
"DecryptionError",
"KeyDerivationError",
"KeyGenerationError",
"KeyPasswordError",
"SteganographyError",
"CapacityError",
"ExtractionError",
"EmbeddingError",
"InvalidHeaderError",
"InvalidMagicBytesError",
"ReedSolomonError",
"NoDataFoundError",
"ModeMismatchError",
# Audio exceptions
"AudioError",
"AudioValidationError",
"AudioCapacityError",
"AudioExtractionError",
"AudioTranscodeError",
"UnsupportedAudioFormatError",
# Video exceptions
"VideoError",
"VideoValidationError",
"VideoCapacityError",
"VideoExtractionError",
"VideoTranscodeError",
"UnsupportedVideoFormatError",
# Constants
"FORMAT_VERSION",
"MIN_PASSPHRASE_WORDS",
"RECOMMENDED_PASSPHRASE_WORDS",
"DEFAULT_PASSPHRASE_WORDS",
"MAX_PASSPHRASE_WORDS",
"MIN_PIN_LENGTH",
"MAX_PIN_LENGTH",
"MIN_MESSAGE_LENGTH",
"MAX_MESSAGE_LENGTH",
"MAX_MESSAGE_SIZE",
"MAX_PAYLOAD_SIZE",
"MAX_FILE_PAYLOAD_SIZE",
"MIN_IMAGE_PIXELS",
"MAX_IMAGE_PIXELS",
"SUPPORTED_IMAGE_FORMATS",
"LOSSLESS_FORMATS",
"LSB_BYTES_PER_PIXEL",
"DCT_BYTES_PER_PIXEL",
"EMBED_MODE_LSB",
"EMBED_MODE_DCT",
"EMBED_MODE_AUTO",
# Audio constants
"EMBED_MODE_AUDIO_LSB",
"EMBED_MODE_AUDIO_SPREAD",
"EMBED_MODE_AUDIO_AUTO",
# Video constants
"EMBED_MODE_VIDEO_LSB",
"EMBED_MODE_VIDEO_AUTO",
]

2256
src/soosef/stegasoo/api.py Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,257 @@
"""
API Key Authentication for Stegasoo REST API.
Provides simple API key authentication with hashed key storage.
Keys can be stored in user config (~/.stegasoo/) or project config (./config/).
Usage:
from .auth import require_api_key, get_api_key_status
@app.get("/protected")
async def protected_endpoint(api_key: str = Depends(require_api_key)):
return {"status": "authenticated"}
"""
import hashlib
import json
import os
import secrets
from pathlib import Path
from fastapi import HTTPException, Security
from fastapi.security import APIKeyHeader
# API key header name
API_KEY_HEADER = APIKeyHeader(name="X-API-Key", auto_error=False)
# Config locations
USER_CONFIG_DIR = Path.home() / ".stegasoo"
PROJECT_CONFIG_DIR = Path("./config")
# Key file name
API_KEYS_FILE = "api_keys.json"
# Environment variable for API key (alternative to file)
API_KEY_ENV_VAR = "STEGASOO_API_KEY"
def _hash_key(key: str) -> str:
"""Hash an API key for storage."""
return hashlib.sha256(key.encode()).hexdigest()
def _get_keys_file(location: str = "user") -> Path:
"""Get path to API keys file."""
if location == "project":
return PROJECT_CONFIG_DIR / API_KEYS_FILE
return USER_CONFIG_DIR / API_KEYS_FILE
def _load_keys(location: str = "user") -> dict:
"""Load API keys from config file."""
keys_file = _get_keys_file(location)
if keys_file.exists():
try:
with open(keys_file) as f:
return json.load(f)
except (OSError, json.JSONDecodeError):
return {"keys": [], "enabled": True}
return {"keys": [], "enabled": True}
def _save_keys(data: dict, location: str = "user") -> None:
"""Save API keys to config file."""
keys_file = _get_keys_file(location)
keys_file.parent.mkdir(parents=True, exist_ok=True)
with open(keys_file, "w") as f:
json.dump(data, f, indent=2)
# Secure permissions (owner read/write only)
os.chmod(keys_file, 0o600)
def generate_api_key() -> str:
"""Generate a new API key."""
# Format: stegasoo_XXXX_XXXXXXXXXXXXXXXXXXXXXXXXXXXX
# 32 bytes = 256 bits of entropy
random_part = secrets.token_hex(16)
return f"stegasoo_{random_part[:4]}_{random_part[4:]}"
def add_api_key(name: str, location: str = "user") -> str:
"""
Generate and store a new API key.
Args:
name: Descriptive name for the key (e.g., "laptop", "automation")
location: "user" or "project"
Returns:
The generated API key (only shown once!)
"""
key = generate_api_key()
key_hash = _hash_key(key)
data = _load_keys(location)
# Check for duplicate name
for existing in data["keys"]:
if existing["name"] == name:
raise ValueError(f"Key with name '{name}' already exists")
data["keys"].append(
{
"name": name,
"hash": key_hash,
"created": __import__("datetime").datetime.now().isoformat(),
}
)
_save_keys(data, location)
return key
def remove_api_key(name: str, location: str = "user") -> bool:
"""
Remove an API key by name.
Returns:
True if key was found and removed, False otherwise
"""
data = _load_keys(location)
original_count = len(data["keys"])
data["keys"] = [k for k in data["keys"] if k["name"] != name]
if len(data["keys"]) < original_count:
_save_keys(data, location)
return True
return False
def list_api_keys(location: str = "user") -> list[dict]:
"""
List all API keys (names and creation dates, not actual keys).
"""
data = _load_keys(location)
return [{"name": k["name"], "created": k.get("created", "unknown")} for k in data["keys"]]
def set_auth_enabled(enabled: bool, location: str = "user") -> None:
"""Enable or disable API key authentication."""
data = _load_keys(location)
data["enabled"] = enabled
_save_keys(data, location)
def is_auth_enabled() -> bool:
"""Check if API key authentication is enabled."""
# Check project config first, then user config
for location in ["project", "user"]:
data = _load_keys(location)
if "enabled" in data:
return data["enabled"]
# Default: enabled if any keys exist
return bool(get_all_key_hashes())
def get_all_key_hashes() -> set[str]:
"""Get all valid API key hashes from all sources."""
hashes = set()
# Check environment variable first
env_key = os.environ.get(API_KEY_ENV_VAR)
if env_key:
hashes.add(_hash_key(env_key))
# Check project and user configs
for location in ["project", "user"]:
data = _load_keys(location)
for key_entry in data.get("keys", []):
if "hash" in key_entry:
hashes.add(key_entry["hash"])
return hashes
def validate_api_key(key: str) -> bool:
"""Validate an API key against stored hashes."""
if not key:
return False
key_hash = _hash_key(key)
valid_hashes = get_all_key_hashes()
return key_hash in valid_hashes
def get_api_key_status() -> dict:
"""Get current API key authentication status."""
user_keys = list_api_keys("user")
project_keys = list_api_keys("project")
env_configured = bool(os.environ.get(API_KEY_ENV_VAR))
total_keys = len(user_keys) + len(project_keys) + (1 if env_configured else 0)
return {
"enabled": is_auth_enabled(),
"total_keys": total_keys,
"user_keys": len(user_keys),
"project_keys": len(project_keys),
"env_configured": env_configured,
"keys": {
"user": user_keys,
"project": project_keys,
},
}
# FastAPI dependency for API key authentication
async def require_api_key(api_key: str | None = Security(API_KEY_HEADER)) -> str:
"""
FastAPI dependency that requires a valid API key.
Usage:
@app.get("/protected")
async def endpoint(key: str = Depends(require_api_key)):
...
"""
# Check if auth is enabled
if not is_auth_enabled():
return "auth_disabled"
# No keys configured = auth disabled
if not get_all_key_hashes():
return "no_keys_configured"
# Validate the provided key
if not api_key:
raise HTTPException(
status_code=401,
detail="API key required. Provide X-API-Key header.",
headers={"WWW-Authenticate": "ApiKey"},
)
if not validate_api_key(api_key):
raise HTTPException(
status_code=403,
detail="Invalid API key.",
)
return api_key
async def optional_api_key(api_key: str | None = Security(API_KEY_HEADER)) -> str | None:
"""
FastAPI dependency that optionally validates API key.
Returns the key if valid, None if not provided or invalid.
Doesn't raise exceptions - useful for endpoints that work
with or without auth.
"""
if api_key and validate_api_key(api_key):
return api_key
return None

View File

@ -0,0 +1,510 @@
"""
Stegasoo Audio Steganography LSB Embedding/Extraction (v4.3.0)
LSB (Least Significant Bit) embedding for PCM audio samples.
Hides data in the least significant bit(s) of audio samples, analogous to
how steganography.py hides data in pixel LSBs. The carrier audio must be
lossless (WAV or FLAC) lossy codecs (MP3, OGG, AAC) destroy LSBs.
Uses ChaCha20 as a CSPRNG for pseudo-random sample index selection,
ensuring that without the key an attacker cannot determine which samples
were modified.
Supports:
- 16-bit PCM (int16 samples)
- 24-bit PCM (int32 samples from soundfile)
- Float audio (converted to int16 before embedding)
- 1 or 2 bits per sample embedding depth
- Mono and multi-channel audio (flattened for embedding)
"""
import io
import struct
import numpy as np
import soundfile as sf
from cryptography.hazmat.backends import default_backend
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms
from .constants import (
AUDIO_MAGIC_LSB,
EMBED_MODE_AUDIO_LSB,
)
from .debug import debug
from .exceptions import AudioCapacityError, AudioError
from .models import AudioEmbedStats
from .steganography import ENCRYPTION_OVERHEAD
# Progress reporting interval — write every N samples
PROGRESS_INTERVAL = 5000
# =============================================================================
# PROGRESS REPORTING
# =============================================================================
def _write_progress(progress_file: str | None, current: int, total: int, phase: str = "embedding"):
"""Write progress to file for frontend polling."""
if progress_file is None:
return
try:
import json
with open(progress_file, "w") as f:
json.dump(
{
"current": current,
"total": total,
"percent": round((current / total) * 100, 1) if total > 0 else 0,
"phase": phase,
},
f,
)
except Exception:
pass # Don't let progress writing break encoding
# =============================================================================
# CAPACITY
# =============================================================================
def calculate_audio_lsb_capacity(
audio_data: bytes,
bits_per_sample: int = 1,
) -> int:
"""
Calculate the maximum bytes that can be embedded in a WAV/FLAC file via LSB.
Reads the carrier audio with soundfile, counts the total number of individual
sample values (num_frames * channels), and computes how many payload bytes
can be hidden at the given bit depth, minus the fixed encryption overhead.
Args:
audio_data: Raw bytes of a WAV or FLAC file.
bits_per_sample: Number of LSBs to use per sample (1 or 2).
Returns:
Maximum embeddable payload size in bytes (after subtracting overhead).
Raises:
AudioError: If the audio cannot be read or is in an unsupported format.
"""
debug.validate(
bits_per_sample in (1, 2), f"bits_per_sample must be 1 or 2, got {bits_per_sample}"
)
try:
info = sf.info(io.BytesIO(audio_data))
except Exception as e:
raise AudioError(f"Failed to read audio file: {e}") from e
num_samples = info.frames * info.channels
total_bits = num_samples * bits_per_sample
max_bytes = total_bits // 8
capacity = max(0, max_bytes - ENCRYPTION_OVERHEAD)
debug.print(
f"Audio LSB capacity: {capacity} bytes "
f"({num_samples} samples, {bits_per_sample} bit(s)/sample, "
f"{info.samplerate} Hz, {info.channels} ch)"
)
return capacity
# =============================================================================
# SAMPLE INDEX GENERATION (ChaCha20 CSPRNG)
# =============================================================================
#
# Identical strategy to generate_pixel_indices in steganography.py:
# - >= 50% capacity utilisation: full Fisher-Yates shuffle, take first N
# - < 50%: direct random sampling with collision handling
#
# The key MUST be 32 bytes (same derivation path as the pixel key).
@debug.time
def generate_sample_indices(key: bytes, num_samples: int, num_needed: int) -> list[int]:
"""
Generate pseudo-random sample indices using ChaCha20 as a CSPRNG.
Produces a deterministic sequence of unique sample indices so that
the same key always yields the same embedding locations.
Args:
key: 32-byte key for the ChaCha20 cipher.
num_samples: Total number of samples in the carrier audio.
num_needed: How many unique sample indices are required.
Returns:
List of ``num_needed`` unique indices in [0, num_samples).
Raises:
AssertionError (via debug.validate): On invalid arguments.
"""
debug.validate(len(key) == 32, f"Sample key must be 32 bytes, got {len(key)}")
debug.validate(num_samples > 0, f"Number of samples must be positive, got {num_samples}")
debug.validate(num_needed > 0, f"Number needed must be positive, got {num_needed}")
debug.validate(
num_needed <= num_samples,
f"Cannot select {num_needed} samples from {num_samples} available",
)
debug.print(f"Generating {num_needed} sample indices from {num_samples} total samples")
# Strategy 1: Full Fisher-Yates shuffle when we need many indices
if num_needed >= num_samples // 2:
debug.print(f"Using full shuffle (needed {num_needed}/{num_samples} samples)")
nonce = b"\x00" * 16
cipher = Cipher(algorithms.ChaCha20(key, nonce), mode=None, backend=default_backend())
encryptor = cipher.encryptor()
indices = list(range(num_samples))
random_bytes = encryptor.update(b"\x00" * (num_samples * 4))
for i in range(num_samples - 1, 0, -1):
j_bytes = random_bytes[(num_samples - 1 - i) * 4 : (num_samples - i) * 4]
j = int.from_bytes(j_bytes, "big") % (i + 1)
indices[i], indices[j] = indices[j], indices[i]
selected = indices[:num_needed]
debug.print(f"Generated {len(selected)} indices via shuffle")
return selected
# Strategy 2: Direct sampling for lower utilisation
debug.print(f"Using optimized selection (needed {num_needed}/{num_samples} samples)")
selected: list[int] = []
used: set[int] = set()
nonce = b"\x00" * 16
cipher = Cipher(algorithms.ChaCha20(key, nonce), mode=None, backend=default_backend())
encryptor = cipher.encryptor()
# Pre-generate 2x bytes to handle expected collisions
bytes_needed = (num_needed * 2) * 4
random_bytes = encryptor.update(b"\x00" * bytes_needed)
byte_offset = 0
collisions = 0
while len(selected) < num_needed and byte_offset < len(random_bytes) - 4:
idx = int.from_bytes(random_bytes[byte_offset : byte_offset + 4], "big") % num_samples
byte_offset += 4
if idx not in used:
used.add(idx)
selected.append(idx)
else:
collisions += 1
# Edge case: ran out of pre-generated bytes (very high collision rate)
if len(selected) < num_needed:
debug.print(f"Need {num_needed - len(selected)} more indices, generating...")
extra_needed = num_needed - len(selected)
for _ in range(extra_needed * 2):
extra_bytes = encryptor.update(b"\x00" * 4)
idx = int.from_bytes(extra_bytes, "big") % num_samples
if idx not in used:
used.add(idx)
selected.append(idx)
if len(selected) == num_needed:
break
debug.print(f"Generated {len(selected)} indices with {collisions} collisions")
debug.validate(
len(selected) == num_needed,
f"Failed to generate enough indices: {len(selected)}/{num_needed}",
)
return selected
# =============================================================================
# EMBEDDING
# =============================================================================
@debug.time
def embed_in_audio_lsb(
data: bytes,
carrier_audio: bytes,
sample_key: bytes,
bits_per_sample: int = 1,
progress_file: str | None = None,
) -> tuple[bytes, AudioEmbedStats]:
"""
Embed data into PCM audio samples using LSB steganography.
The payload is prepended with a 4-byte magic header (``AUDIO_MAGIC_LSB``)
and a 4-byte big-endian length prefix, then converted to a binary string.
Pseudo-random sample indices are generated from ``sample_key`` and the
corresponding sample LSBs are overwritten.
The modified audio is written back as a 16-bit PCM WAV file.
Args:
data: Encrypted payload bytes to embed.
carrier_audio: Raw bytes of the carrier WAV/FLAC file.
sample_key: 32-byte key for sample index generation.
bits_per_sample: LSBs to use per sample (1 or 2).
progress_file: Optional path for progress JSON (frontend polling).
Returns:
Tuple of (stego WAV bytes, AudioEmbedStats).
Raises:
AudioCapacityError: If the payload is too large for the carrier.
AudioError: On any other embedding failure.
"""
debug.print(f"Audio LSB embedding {len(data)} bytes")
debug.data(sample_key, "Sample key for embedding")
debug.validate(
bits_per_sample in (1, 2), f"bits_per_sample must be 1 or 2, got {bits_per_sample}"
)
debug.validate(len(sample_key) == 32, f"Sample key must be 32 bytes, got {len(sample_key)}")
try:
# 1. Read carrier audio
samples, samplerate = sf.read(io.BytesIO(carrier_audio), dtype="int16", always_2d=True)
# samples shape: (num_frames, channels)
original_shape = samples.shape
channels = original_shape[1]
duration = original_shape[0] / samplerate
debug.print(
f"Carrier audio: {samplerate} Hz, {channels} ch, "
f"{original_shape[0]} frames, {duration:.2f}s"
)
# Flatten to 1D for embedding
flat_samples = samples.flatten().copy()
num_samples = len(flat_samples)
# 2. Prepend magic + length prefix
header = AUDIO_MAGIC_LSB + struct.pack(">I", len(data))
payload = header + data
debug.print(
f"Payload with header: {len(payload)} bytes (magic 4 + len 4 + data {len(data)})"
)
# 3. Check capacity
max_bytes = (num_samples * bits_per_sample) // 8
if len(payload) > max_bytes:
debug.print(f"Capacity error: need {len(payload)}, have {max_bytes}")
raise AudioCapacityError(len(payload), max_bytes)
debug.print(
f"Capacity usage: {len(payload)}/{max_bytes} bytes "
f"({len(payload) / max_bytes * 100:.1f}%)"
)
# 4. Convert payload to binary string
binary_data = "".join(format(b, "08b") for b in payload)
samples_needed = (len(binary_data) + bits_per_sample - 1) // bits_per_sample
debug.print(f"Need {samples_needed} samples to embed {len(binary_data)} bits")
# 5. Generate pseudo-random sample indices
selected_indices = generate_sample_indices(sample_key, num_samples, samples_needed)
# 6. Modify LSBs of selected samples
lsb_mask = (1 << bits_per_sample) - 1
bit_idx = 0
modified_count = 0
total_to_process = len(selected_indices)
# Initial progress
if progress_file:
_write_progress(progress_file, 5, 100, "embedding")
for progress_idx, sample_idx in enumerate(selected_indices):
if bit_idx >= len(binary_data):
break
bits = binary_data[bit_idx : bit_idx + bits_per_sample].ljust(bits_per_sample, "0")
bit_val = int(bits, 2)
sample_val = flat_samples[sample_idx]
# Work in unsigned 16-bit space to avoid overflow
unsigned_val = int(sample_val) & 0xFFFF
new_unsigned = (unsigned_val & ~lsb_mask) | bit_val
# Convert back to signed int16
new_val = np.int16(new_unsigned if new_unsigned < 32768 else new_unsigned - 65536)
if sample_val != new_val:
flat_samples[sample_idx] = new_val
modified_count += 1
bit_idx += bits_per_sample
# Report progress periodically
if progress_file and progress_idx % PROGRESS_INTERVAL == 0:
_write_progress(progress_file, progress_idx, total_to_process, "embedding")
# Final progress before save
if progress_file:
_write_progress(progress_file, total_to_process, total_to_process, "saving")
debug.print(f"Modified {modified_count} samples (out of {samples_needed} selected)")
# 7. Reshape and write back as WAV
stego_samples = flat_samples.reshape(original_shape)
output_buf = io.BytesIO()
sf.write(output_buf, stego_samples, samplerate, format="WAV", subtype="PCM_16")
output_buf.seek(0)
stego_bytes = output_buf.getvalue()
stats = AudioEmbedStats(
samples_modified=modified_count,
total_samples=num_samples,
capacity_used=len(payload) / max_bytes,
bytes_embedded=len(payload),
sample_rate=samplerate,
channels=channels,
duration_seconds=duration,
embed_mode=EMBED_MODE_AUDIO_LSB,
)
debug.print(f"Audio LSB embedding complete: {len(stego_bytes)} byte WAV")
return stego_bytes, stats
except AudioCapacityError:
raise
except Exception as e:
debug.exception(e, "embed_in_audio_lsb")
raise AudioError(f"Failed to embed data in audio: {e}") from e
# =============================================================================
# EXTRACTION
# =============================================================================
@debug.time
def extract_from_audio_lsb(
audio_data: bytes,
sample_key: bytes,
bits_per_sample: int = 1,
progress_file: str | None = None,
) -> bytes | None:
"""
Extract hidden data from audio using LSB steganography.
Reads the stego audio, generates the same pseudo-random sample indices
from ``sample_key``, extracts the LSBs, and reconstructs the payload.
Verifies the ``AUDIO_MAGIC_LSB`` header before returning.
Args:
audio_data: Raw bytes of the stego WAV file.
sample_key: 32-byte key (must match the one used for embedding).
bits_per_sample: LSBs per sample (must match embedding).
progress_file: Optional path for progress JSON.
Returns:
Extracted payload bytes (without magic/length prefix), or ``None``
if extraction fails (wrong key, no data, corrupted).
"""
debug.print(f"Audio LSB extracting from {len(audio_data)} byte audio")
debug.data(sample_key, "Sample key for extraction")
debug.validate(
bits_per_sample in (1, 2), f"bits_per_sample must be 1 or 2, got {bits_per_sample}"
)
try:
# 1. Read audio
samples, samplerate = sf.read(io.BytesIO(audio_data), dtype="int16", always_2d=True)
flat_samples = samples.flatten()
num_samples = len(flat_samples)
debug.print(f"Audio: {samplerate} Hz, {samples.shape[1]} ch, {num_samples} total samples")
# 2. Extract initial samples to find magic bytes + length (8 bytes = 64 bits)
header_bits_needed = 64 # 4 bytes magic + 4 bytes length
header_samples_needed = (header_bits_needed + bits_per_sample - 1) // bits_per_sample + 10
if header_samples_needed > num_samples:
debug.print("Audio too small to contain header")
return None
initial_indices = generate_sample_indices(sample_key, num_samples, header_samples_needed)
binary_data = ""
for sample_idx in initial_indices:
val = int(flat_samples[sample_idx]) & 0xFFFF
for bit_pos in range(bits_per_sample - 1, -1, -1):
binary_data += str((val >> bit_pos) & 1)
# 3. Verify magic bytes
if len(binary_data) < 64:
debug.print(f"Not enough bits for header: {len(binary_data)}/64")
return None
magic_bits = binary_data[:32]
magic_bytes = int(magic_bits, 2).to_bytes(4, "big")
if magic_bytes != AUDIO_MAGIC_LSB:
debug.print(f"Magic mismatch: got {magic_bytes!r}, expected {AUDIO_MAGIC_LSB!r}")
return None
debug.print("Magic bytes verified: AUDL")
# 4. Parse length
length_bits = binary_data[32:64]
data_length = struct.unpack(">I", int(length_bits, 2).to_bytes(4, "big"))[0]
debug.print(f"Extracted length: {data_length} bytes")
# Sanity check length
max_possible = (num_samples * bits_per_sample) // 8 - 8 # minus header
if data_length > max_possible or data_length < 1:
debug.print(f"Invalid data length: {data_length} (max possible: {max_possible})")
return None
# 5. Extract full payload
total_bits = (8 + data_length) * 8 # header (8 bytes) + payload
total_samples_needed = (total_bits + bits_per_sample - 1) // bits_per_sample
if total_samples_needed > num_samples:
debug.print(f"Need {total_samples_needed} samples but only {num_samples} available")
return None
debug.print(f"Need {total_samples_needed} samples to extract {data_length} bytes")
selected_indices = generate_sample_indices(sample_key, num_samples, total_samples_needed)
# Initial progress
if progress_file:
_write_progress(progress_file, 5, 100, "extracting")
binary_data = ""
for progress_idx, sample_idx in enumerate(selected_indices):
val = int(flat_samples[sample_idx]) & 0xFFFF
for bit_pos in range(bits_per_sample - 1, -1, -1):
binary_data += str((val >> bit_pos) & 1)
if progress_file and progress_idx % PROGRESS_INTERVAL == 0:
_write_progress(progress_file, progress_idx, total_samples_needed, "extracting")
if progress_file:
_write_progress(progress_file, total_samples_needed, total_samples_needed, "extracting")
# Skip the 8-byte header (magic + length) = 64 bits
data_bits = binary_data[64 : 64 + (data_length * 8)]
if len(data_bits) < data_length * 8:
debug.print(f"Insufficient bits: {len(data_bits)} < {data_length * 8}")
return None
# Convert bits back to bytes
data_bytes = bytearray()
for i in range(0, len(data_bits), 8):
byte_bits = data_bits[i : i + 8]
if len(byte_bits) == 8:
data_bytes.append(int(byte_bits, 2))
debug.print(f"Audio LSB successfully extracted {len(data_bytes)} bytes")
return bytes(data_bytes)
except Exception as e:
debug.exception(e, "extract_from_audio_lsb")
return None

View File

@ -0,0 +1,540 @@
"""
Stegasoo Audio Utilities (v4.3.0)
Audio format detection, transcoding, and metadata extraction for audio steganography.
Dependencies:
- soundfile (sf): Fast WAV/FLAC reading without ffmpeg
- pydub: MP3/OGG/AAC transcoding (wraps ffmpeg)
Both are optional functions degrade gracefully when unavailable.
"""
from __future__ import annotations
import io
import shutil
from .constants import (
EMBED_MODE_AUDIO_AUTO,
MAX_AUDIO_DURATION,
MAX_AUDIO_FILE_SIZE,
MAX_AUDIO_SAMPLE_RATE,
MIN_AUDIO_SAMPLE_RATE,
VALID_AUDIO_EMBED_MODES,
)
from .debug import get_logger
from .exceptions import AudioTranscodeError, AudioValidationError, UnsupportedAudioFormatError
from .models import AudioInfo, ValidationResult
logger = get_logger(__name__)
# =============================================================================
# FFMPEG AVAILABILITY
# =============================================================================
def has_ffmpeg_support() -> bool:
"""Check if ffmpeg is available on the system.
Returns:
True if ffmpeg is found on PATH, False otherwise.
"""
return shutil.which("ffmpeg") is not None
# =============================================================================
# FORMAT DETECTION
# =============================================================================
def detect_audio_format(audio_data: bytes) -> str:
"""Detect audio format from magic bytes.
Examines the first bytes of audio data to identify the container format.
Magic byte signatures:
- WAV: b"RIFF" at offset 0 + b"WAVE" at offset 8
- FLAC: b"fLaC" at offset 0
- MP3: b"\\xff\\xfb", b"\\xff\\xf3", b"\\xff\\xf2" (sync bytes) or b"ID3" (ID3 tag)
- OGG (Vorbis/Opus): b"OggS" at offset 0
- AAC: b"\\xff\\xf1" or b"\\xff\\xf9" (ADTS header)
- M4A/MP4: b"ftyp" at offset 4
Args:
audio_data: Raw audio file bytes.
Returns:
Format string: "wav", "flac", "mp3", "ogg", "aac", "m4a", or "unknown".
"""
if len(audio_data) < 12:
logger.debug("detect_audio_format: data too short (%d bytes)", len(audio_data))
return "unknown"
# WAV: RIFF....WAVE
if audio_data[:4] == b"RIFF" and audio_data[8:12] == b"WAVE":
logger.debug("Detected WAV format (%d bytes)", len(audio_data))
return "wav"
# FLAC
if audio_data[:4] == b"fLaC":
return "flac"
# OGG (Vorbis or Opus)
if audio_data[:4] == b"OggS":
return "ogg"
# MP3 with ID3 tag
if audio_data[:3] == b"ID3":
return "mp3"
# MP3 sync bytes (MPEG audio frame header)
if len(audio_data) >= 2 and audio_data[:2] in (b"\xff\xfb", b"\xff\xf3", b"\xff\xf2"):
return "mp3"
# M4A/MP4 container: "ftyp" at offset 4
if audio_data[4:8] == b"ftyp":
return "m4a"
# AAC ADTS header
if len(audio_data) >= 2 and audio_data[:2] in (b"\xff\xf1", b"\xff\xf9"):
return "aac"
return "unknown"
# =============================================================================
# TRANSCODING
# =============================================================================
def transcode_to_wav(audio_data: bytes) -> bytes:
"""Transcode any supported audio format to WAV PCM format.
Uses soundfile directly for WAV/FLAC (no ffmpeg needed).
Uses pydub (wraps ffmpeg) for lossy formats (MP3, OGG, AAC, M4A).
Args:
audio_data: Raw audio file bytes in any supported format.
Returns:
WAV PCM file bytes (16-bit, original sample rate).
Raises:
AudioTranscodeError: If transcoding fails.
UnsupportedAudioFormatError: If the format cannot be detected.
"""
fmt = detect_audio_format(audio_data)
logger.info("transcode_to_wav: input format=%s, size=%d bytes", fmt, len(audio_data))
if fmt == "unknown":
raise UnsupportedAudioFormatError(
"Cannot detect audio format. Supported: WAV, FLAC, MP3, OGG, AAC, M4A."
)
# WAV files: validate with soundfile but return as-is if already PCM
if fmt == "wav":
try:
import soundfile as sf
buf = io.BytesIO(audio_data)
info = sf.info(buf)
if info.subtype in ("PCM_16", "PCM_24", "PCM_32", "FLOAT", "DOUBLE"):
# Re-encode to ensure consistent PCM_16 output
buf.seek(0)
data, samplerate = sf.read(buf, dtype="int16")
out = io.BytesIO()
sf.write(out, data, samplerate, format="WAV", subtype="PCM_16")
return out.getvalue()
except ImportError:
raise AudioTranscodeError("soundfile package is required for WAV processing")
except Exception as e:
raise AudioTranscodeError(f"Failed to process WAV: {e}")
# FLAC: use soundfile (fast, no ffmpeg)
if fmt == "flac":
try:
import soundfile as sf
buf = io.BytesIO(audio_data)
data, samplerate = sf.read(buf, dtype="int16")
out = io.BytesIO()
sf.write(out, data, samplerate, format="WAV", subtype="PCM_16")
return out.getvalue()
except ImportError:
raise AudioTranscodeError("soundfile package is required for FLAC processing")
except Exception as e:
raise AudioTranscodeError(f"Failed to transcode FLAC to WAV: {e}")
# Lossy formats (MP3, OGG, AAC, M4A): use pydub + ffmpeg
return _transcode_with_pydub(audio_data, fmt, "wav")
def transcode_to_mp3(audio_data: bytes, bitrate: str = "256k") -> bytes:
"""Transcode audio to MP3 format.
Uses pydub (wraps ffmpeg) for transcoding.
Args:
audio_data: Raw audio file bytes in any supported format.
bitrate: Target MP3 bitrate (e.g., "128k", "192k", "256k", "320k").
Returns:
MP3 file bytes.
Raises:
AudioTranscodeError: If transcoding fails or pydub/ffmpeg unavailable.
"""
fmt = detect_audio_format(audio_data)
if fmt == "unknown":
raise UnsupportedAudioFormatError(
"Cannot detect audio format. Supported: WAV, FLAC, MP3, OGG, AAC, M4A."
)
try:
from pydub import AudioSegment
except ImportError:
raise AudioTranscodeError(
"pydub package is required for MP3 transcoding. Install with: pip install pydub"
)
if not has_ffmpeg_support():
raise AudioTranscodeError(
"ffmpeg is required for MP3 transcoding. Install ffmpeg on your system."
)
try:
# Map our format names to pydub format names
pydub_fmt = _pydub_format(fmt)
buf = io.BytesIO(audio_data)
audio = AudioSegment.from_file(buf, format=pydub_fmt)
out = io.BytesIO()
audio.export(out, format="mp3", bitrate=bitrate)
return out.getvalue()
except Exception as e:
raise AudioTranscodeError(f"Failed to transcode to MP3: {e}")
def _transcode_with_pydub(audio_data: bytes, src_fmt: str, dst_fmt: str) -> bytes:
"""Transcode audio using pydub (requires ffmpeg).
Args:
audio_data: Raw audio bytes.
src_fmt: Source format string (our naming).
dst_fmt: Destination format string ("wav" or "mp3").
Returns:
Transcoded audio bytes.
Raises:
AudioTranscodeError: If transcoding fails.
"""
try:
from pydub import AudioSegment
except ImportError:
raise AudioTranscodeError(
"pydub package is required for audio transcoding. Install with: pip install pydub"
)
if not has_ffmpeg_support():
raise AudioTranscodeError(
"ffmpeg is required for audio transcoding. Install ffmpeg on your system."
)
try:
pydub_fmt = _pydub_format(src_fmt)
buf = io.BytesIO(audio_data)
audio = AudioSegment.from_file(buf, format=pydub_fmt)
out = io.BytesIO()
if dst_fmt == "wav":
audio.export(out, format="wav")
else:
audio.export(out, format=dst_fmt)
return out.getvalue()
except Exception as e:
raise AudioTranscodeError(f"Failed to transcode {src_fmt} to {dst_fmt}: {e}")
def _pydub_format(fmt: str) -> str:
"""Map our format names to pydub/ffmpeg format names.
Args:
fmt: Our internal format name.
Returns:
pydub-compatible format string.
"""
mapping = {
"wav": "wav",
"flac": "flac",
"mp3": "mp3",
"ogg": "ogg",
"aac": "aac",
"m4a": "m4a",
}
return mapping.get(fmt, fmt)
# =============================================================================
# METADATA EXTRACTION
# =============================================================================
def get_audio_info(audio_data: bytes) -> AudioInfo:
"""Extract audio metadata from raw audio bytes.
Uses soundfile for WAV/FLAC (fast, no ffmpeg dependency).
Falls back to pydub for other formats (requires ffmpeg).
Args:
audio_data: Raw audio file bytes.
Returns:
AudioInfo dataclass with sample rate, channels, duration, etc.
Raises:
UnsupportedAudioFormatError: If the format cannot be detected.
AudioTranscodeError: If metadata extraction fails.
"""
fmt = detect_audio_format(audio_data)
if fmt == "unknown":
raise UnsupportedAudioFormatError(
"Cannot detect audio format. Supported: WAV, FLAC, MP3, OGG, AAC, M4A."
)
# WAV and FLAC: use soundfile (fast)
if fmt in ("wav", "flac"):
return _get_info_soundfile(audio_data, fmt)
# Lossy formats: use pydub
return _get_info_pydub(audio_data, fmt)
def _get_info_soundfile(audio_data: bytes, fmt: str) -> AudioInfo:
"""Extract audio info using soundfile (WAV/FLAC).
Args:
audio_data: Raw audio bytes.
fmt: Format string ("wav" or "flac").
Returns:
AudioInfo with metadata.
"""
try:
import soundfile as sf
except ImportError:
raise AudioTranscodeError(
"soundfile package is required. Install with: pip install soundfile"
)
try:
buf = io.BytesIO(audio_data)
info = sf.info(buf)
# Determine bit depth from subtype
bit_depth = _bit_depth_from_subtype(info.subtype)
return AudioInfo(
sample_rate=info.samplerate,
channels=info.channels,
duration_seconds=info.duration,
num_samples=info.frames,
format=fmt,
bitrate=None,
bit_depth=bit_depth,
)
except Exception as e:
raise AudioTranscodeError(f"Failed to read {fmt.upper()} metadata: {e}")
def _bit_depth_from_subtype(subtype: str) -> int | None:
"""Determine bit depth from soundfile subtype string.
Args:
subtype: Soundfile subtype (e.g., "PCM_16", "PCM_24", "FLOAT").
Returns:
Bit depth as integer, or None if unknown.
"""
subtype_map = {
"PCM_S8": 8,
"PCM_U8": 8,
"PCM_16": 16,
"PCM_24": 24,
"PCM_32": 32,
"FLOAT": 32,
"DOUBLE": 64,
}
return subtype_map.get(subtype)
def _get_info_pydub(audio_data: bytes, fmt: str) -> AudioInfo:
"""Extract audio info using pydub (lossy formats).
Args:
audio_data: Raw audio bytes.
fmt: Format string ("mp3", "ogg", "aac", "m4a").
Returns:
AudioInfo with metadata.
"""
try:
from pydub import AudioSegment
except ImportError:
raise AudioTranscodeError(
"pydub package is required for audio metadata. Install with: pip install pydub"
)
if not has_ffmpeg_support():
raise AudioTranscodeError(
"ffmpeg is required for audio metadata extraction. Install ffmpeg on your system."
)
try:
pydub_fmt = _pydub_format(fmt)
buf = io.BytesIO(audio_data)
audio = AudioSegment.from_file(buf, format=pydub_fmt)
num_samples = int(audio.frame_count())
duration = audio.duration_seconds
sample_rate = audio.frame_rate
channels = audio.channels
# Estimate bitrate from file size and duration
bitrate = None
if duration > 0:
bitrate = int((len(audio_data) * 8) / duration)
return AudioInfo(
sample_rate=sample_rate,
channels=channels,
duration_seconds=duration,
num_samples=num_samples,
format=fmt,
bitrate=bitrate,
bit_depth=audio.sample_width * 8 if audio.sample_width else None,
)
except Exception as e:
raise AudioTranscodeError(f"Failed to read {fmt.upper()} metadata: {e}")
# =============================================================================
# VALIDATION
# =============================================================================
def validate_audio(
audio_data: bytes,
name: str = "Audio",
check_duration: bool = True,
) -> ValidationResult:
"""Validate audio data for steganography.
Checks:
- Not empty
- Not too large (MAX_AUDIO_FILE_SIZE)
- Valid audio format (detectable via magic bytes)
- Duration within limits (MAX_AUDIO_DURATION) if check_duration=True
- Sample rate within limits (MIN_AUDIO_SAMPLE_RATE to MAX_AUDIO_SAMPLE_RATE)
Args:
audio_data: Raw audio file bytes.
name: Descriptive name for error messages (default: "Audio").
check_duration: Whether to enforce duration limit (default: True).
Returns:
ValidationResult with audio info in details (sample_rate, channels,
duration, num_samples, format) on success.
"""
if not audio_data:
return ValidationResult.error(f"{name} is required")
if len(audio_data) > MAX_AUDIO_FILE_SIZE:
size_mb = len(audio_data) / (1024 * 1024)
max_mb = MAX_AUDIO_FILE_SIZE / (1024 * 1024)
return ValidationResult.error(
f"{name} too large ({size_mb:.1f} MB). Maximum: {max_mb:.0f} MB"
)
# Detect format
fmt = detect_audio_format(audio_data)
if fmt == "unknown":
return ValidationResult.error(
f"Could not detect {name} format. " "Supported formats: WAV, FLAC, MP3, OGG, AAC, M4A."
)
# Extract metadata for further validation
try:
info = get_audio_info(audio_data)
except (AudioTranscodeError, UnsupportedAudioFormatError) as e:
return ValidationResult.error(f"Could not read {name}: {e}")
except Exception as e:
return ValidationResult.error(f"Could not read {name}: {e}")
# Check duration
if check_duration and info.duration_seconds > MAX_AUDIO_DURATION:
return ValidationResult.error(
f"{name} too long ({info.duration_seconds:.1f}s). "
f"Maximum: {MAX_AUDIO_DURATION}s ({MAX_AUDIO_DURATION // 60} minutes)"
)
# Check sample rate
if info.sample_rate < MIN_AUDIO_SAMPLE_RATE:
return ValidationResult.error(
f"{name} sample rate too low ({info.sample_rate} Hz). "
f"Minimum: {MIN_AUDIO_SAMPLE_RATE} Hz"
)
if info.sample_rate > MAX_AUDIO_SAMPLE_RATE:
return ValidationResult.error(
f"{name} sample rate too high ({info.sample_rate} Hz). "
f"Maximum: {MAX_AUDIO_SAMPLE_RATE} Hz"
)
return ValidationResult.ok(
sample_rate=info.sample_rate,
channels=info.channels,
duration=info.duration_seconds,
num_samples=info.num_samples,
format=info.format,
bitrate=info.bitrate,
bit_depth=info.bit_depth,
)
def require_valid_audio(audio_data: bytes, name: str = "Audio") -> None:
"""Validate audio, raising AudioValidationError on failure.
Args:
audio_data: Raw audio file bytes.
name: Descriptive name for error messages.
Raises:
AudioValidationError: If validation fails.
"""
result = validate_audio(audio_data, name)
if not result.is_valid:
raise AudioValidationError(result.error_message)
def validate_audio_embed_mode(mode: str) -> ValidationResult:
"""Validate audio embedding mode string.
Args:
mode: Embedding mode to validate (e.g., "audio_lsb", "audio_mdct", "audio_auto").
Returns:
ValidationResult with mode in details on success.
"""
valid_modes = VALID_AUDIO_EMBED_MODES | {EMBED_MODE_AUDIO_AUTO}
if mode not in valid_modes:
return ValidationResult.error(
f"Invalid audio embed_mode: '{mode}'. "
f"Valid options: {', '.join(sorted(valid_modes))}"
)
return ValidationResult.ok(mode=mode)

View File

@ -0,0 +1,31 @@
"""
Stegasoo embedding backends.
Provides a typed plugin interface for all embedding algorithms.
Backends register with the module-level ``registry`` on import.
Usage::
from stegasoo.backends import registry
backend = registry.get("lsb")
stego, stats = backend.embed(data, carrier, key)
"""
from .dct import DCTBackend
from .lsb import LSBBackend
from .protocol import EmbeddingBackend
from .registry import BackendNotFoundError, BackendRegistry, registry
# Auto-register built-in backends
registry.register(LSBBackend())
registry.register(DCTBackend())
__all__ = [
"EmbeddingBackend",
"BackendRegistry",
"BackendNotFoundError",
"registry",
"LSBBackend",
"DCTBackend",
]

View File

@ -0,0 +1,69 @@
"""
DCT (Discrete Cosine Transform) image embedding backend.
Wraps the existing frequency-domain DCT functions in dct_steganography.py.
"""
from __future__ import annotations
from typing import Any
class DCTBackend:
"""Frequency-domain DCT embedding for JPEG-resilient steganography."""
@property
def mode(self) -> str:
return "dct"
@property
def carrier_type(self) -> str:
return "image"
def is_available(self) -> bool:
from ..dct_steganography import HAS_SCIPY
return HAS_SCIPY
def embed(
self,
data: bytes,
carrier: bytes,
key: bytes,
*,
progress_file: str | None = None,
**options: Any,
) -> tuple[bytes, Any]:
from ..dct_steganography import embed_in_dct
output_format = options.get("dct_output_format", "png")
color_mode = options.get("dct_color_mode", "color")
quant_step = options.get("quant_step")
jpeg_quality = options.get("jpeg_quality")
max_dimension = options.get("max_dimension")
return embed_in_dct(
data, carrier, key, output_format, color_mode, progress_file,
quant_step=quant_step, jpeg_quality=jpeg_quality, max_dimension=max_dimension,
)
def extract(
self,
carrier: bytes,
key: bytes,
*,
progress_file: str | None = None,
**options: Any,
) -> bytes | None:
from ..dct_steganography import extract_from_dct
quant_step = options.get("quant_step")
try:
return extract_from_dct(carrier, key, progress_file, quant_step=quant_step)
except Exception:
return None
def calculate_capacity(self, carrier: bytes, **options: Any) -> int:
from ..dct_steganography import calculate_dct_capacity
info = calculate_dct_capacity(carrier)
return info.usable_capacity_bytes

View File

@ -0,0 +1,63 @@
"""
LSB (Least Significant Bit) image embedding backend.
Wraps the existing spatial-domain LSB functions in steganography.py.
"""
from __future__ import annotations
from typing import Any
class LSBBackend:
"""Spatial-domain LSB embedding for lossless image formats."""
@property
def mode(self) -> str:
return "lsb"
@property
def carrier_type(self) -> str:
return "image"
def is_available(self) -> bool:
return True # Only needs Pillow, which is always present
def embed(
self,
data: bytes,
carrier: bytes,
key: bytes,
*,
progress_file: str | None = None,
**options: Any,
) -> tuple[bytes, Any]:
from ..steganography import _embed_lsb
bits_per_channel = options.get("bits_per_channel", 1)
output_format = options.get("output_format", None)
stego_bytes, stats, ext = _embed_lsb(
data, carrier, key, bits_per_channel, output_format, progress_file
)
# Attach output extension to stats for callers that need it
stats.output_extension = ext # type: ignore[attr-defined]
return stego_bytes, stats
def extract(
self,
carrier: bytes,
key: bytes,
*,
progress_file: str | None = None,
**options: Any,
) -> bytes | None:
from ..steganography import _extract_lsb
bits_per_channel = options.get("bits_per_channel", 1)
return _extract_lsb(carrier, key, bits_per_channel)
def calculate_capacity(self, carrier: bytes, **options: Any) -> int:
from ..steganography import calculate_capacity
bits_per_channel = options.get("bits_per_channel", 1)
return calculate_capacity(carrier, bits_per_channel)

View File

@ -0,0 +1,91 @@
"""
Embedding backend protocol definition.
All embedding backends (LSB, DCT, audio, video, etc.) implement this protocol,
enabling registry-based dispatch instead of if/elif chains.
"""
from __future__ import annotations
from typing import Any, Protocol, runtime_checkable
@runtime_checkable
class EmbeddingBackend(Protocol):
"""Protocol that all embedding backends must satisfy.
Each backend handles a specific embedding mode (e.g. 'lsb', 'dct',
'audio_lsb', 'audio_spread') for a specific carrier type ('image',
'audio', 'video').
"""
@property
def mode(self) -> str:
"""The embedding mode identifier (e.g. 'lsb', 'dct')."""
...
@property
def carrier_type(self) -> str:
"""The carrier media type: 'image', 'audio', or 'video'."""
...
def is_available(self) -> bool:
"""Whether this backend's dependencies are installed."""
...
def embed(
self,
data: bytes,
carrier: bytes,
key: bytes,
*,
progress_file: str | None = None,
**options: Any,
) -> tuple[bytes, Any]:
"""Embed data into a carrier.
Args:
data: Encrypted payload bytes.
carrier: Raw carrier file bytes (image, audio, etc.).
key: Derived key for pixel/sample selection.
progress_file: Optional progress file path.
**options: Backend-specific options (bits_per_channel,
output_format, color_mode, chip_tier, etc.).
Returns:
Tuple of (stego carrier bytes, embed stats).
"""
...
def extract(
self,
carrier: bytes,
key: bytes,
*,
progress_file: str | None = None,
**options: Any,
) -> bytes | None:
"""Extract data from a carrier.
Args:
carrier: Stego carrier file bytes.
key: Derived key for pixel/sample selection.
progress_file: Optional progress file path.
**options: Backend-specific options.
Returns:
Extracted payload bytes, or None if no payload found.
"""
...
def calculate_capacity(self, carrier: bytes, **options: Any) -> int:
"""Calculate maximum embeddable payload size in bytes.
Args:
carrier: Raw carrier file bytes.
**options: Backend-specific options (e.g. bits_per_channel).
Returns:
Maximum payload capacity in bytes.
"""
...

View File

@ -0,0 +1,63 @@
"""
Backend registry for embedding mode dispatch.
Backends register themselves by mode string. The registry replaces
if/elif dispatch in steganography.py with a lookup table.
"""
from __future__ import annotations
from ..exceptions import StegasooError
from .protocol import EmbeddingBackend
class BackendNotFoundError(StegasooError):
"""Raised when a requested backend mode is not registered."""
class BackendRegistry:
"""Registry mapping mode strings to embedding backends."""
def __init__(self) -> None:
self._backends: dict[str, EmbeddingBackend] = {}
def register(self, backend: EmbeddingBackend) -> None:
"""Register a backend for its mode string."""
self._backends[backend.mode] = backend
def get(self, mode: str) -> EmbeddingBackend:
"""Look up a backend by mode. Raises BackendNotFoundError if not found."""
if mode not in self._backends:
available = ", ".join(sorted(self._backends.keys())) or "(none)"
raise BackendNotFoundError(
f"No backend registered for mode '{mode}'. Available: {available}"
)
return self._backends[mode]
def has(self, mode: str) -> bool:
"""Check if a backend is registered for the given mode."""
return mode in self._backends
def available_modes(self, carrier_type: str | None = None) -> list[str]:
"""List registered mode strings, optionally filtered by carrier type.
Only includes modes whose backend reports is_available() == True.
"""
return sorted(
mode
for mode, backend in self._backends.items()
if backend.is_available()
and (carrier_type is None or backend.carrier_type == carrier_type)
)
def all_modes(self, carrier_type: str | None = None) -> list[str]:
"""List all registered mode strings (including unavailable ones)."""
return sorted(
mode
for mode, backend in self._backends.items()
if carrier_type is None or backend.carrier_type == carrier_type
)
# Module-level singleton
registry = BackendRegistry()

View File

@ -0,0 +1,684 @@
"""
Stegasoo Batch Processing Module (v3.2.0)
Enables encoding/decoding multiple files in a single operation.
Supports parallel processing, progress tracking, and detailed reporting.
Changes in v3.2.0:
- BatchCredentials: renamed day_phrase passphrase, removed date_str
- Updated all credential handling to use v3.2.0 API
"""
import json
import threading
import time
from collections.abc import Callable, Iterator
from concurrent.futures import ThreadPoolExecutor, as_completed
from dataclasses import dataclass, field
from enum import Enum
from pathlib import Path
from .constants import ALLOWED_IMAGE_EXTENSIONS, LOSSLESS_FORMATS
class BatchStatus(Enum):
"""Status of individual batch items."""
PENDING = "pending"
PROCESSING = "processing"
SUCCESS = "success"
FAILED = "failed"
SKIPPED = "skipped"
@dataclass
class BatchItem:
"""Represents a single item in a batch operation."""
input_path: Path
output_path: Path | None = None
status: BatchStatus = BatchStatus.PENDING
error: str | None = None
start_time: float | None = None
end_time: float | None = None
input_size: int = 0
output_size: int = 0
message: str = ""
@property
def duration(self) -> float | None:
"""Processing duration in seconds."""
if self.start_time and self.end_time:
return self.end_time - self.start_time
return None
def to_dict(self) -> dict:
"""Convert to dictionary for JSON serialization."""
return {
"input_path": str(self.input_path),
"output_path": str(self.output_path) if self.output_path else None,
"status": self.status.value,
"error": self.error,
"duration_seconds": self.duration,
"input_size": self.input_size,
"output_size": self.output_size,
"message": self.message,
}
@dataclass
class BatchCredentials:
"""
Credentials for batch encode/decode operations (v3.2.0).
Provides a structured way to pass authentication factors
for batch processing instead of using plain dicts.
Changes in v3.2.0:
- Renamed day_phrase passphrase
- Removed date_str (no longer used in cryptographic operations)
Example:
creds = BatchCredentials(
reference_photo=ref_bytes,
passphrase="apple forest thunder mountain",
pin="123456"
)
result = processor.batch_encode(images, creds, message="secret")
"""
reference_photo: bytes
passphrase: str # v3.2.0: renamed from day_phrase
pin: str = ""
rsa_key_data: bytes | None = None
rsa_password: str | None = None
def to_dict(self) -> dict:
"""Convert to dictionary for API compatibility."""
return {
"reference_photo": self.reference_photo,
"passphrase": self.passphrase,
"pin": self.pin,
"rsa_key_data": self.rsa_key_data,
"rsa_password": self.rsa_password,
}
@classmethod
def from_dict(cls, data: dict) -> "BatchCredentials":
"""
Create BatchCredentials from a dictionary.
Handles both v3.2.0 format (passphrase) and legacy formats (day_phrase, phrase).
"""
# Handle legacy 'day_phrase' and 'phrase' keys
passphrase = data.get("passphrase") or data.get("day_phrase") or data.get("phrase", "")
return cls(
reference_photo=data["reference_photo"],
passphrase=passphrase,
pin=data.get("pin", ""),
rsa_key_data=data.get("rsa_key_data"),
rsa_password=data.get("rsa_password"),
)
@dataclass
class BatchResult:
"""Summary of a batch operation."""
operation: str
total: int = 0
succeeded: int = 0
failed: int = 0
skipped: int = 0
start_time: float = field(default_factory=time.time)
end_time: float | None = None
items: list[BatchItem] = field(default_factory=list)
@property
def duration(self) -> float | None:
"""Total batch duration in seconds."""
if self.end_time:
return self.end_time - self.start_time
return None
def to_dict(self) -> dict:
"""Convert to dictionary for JSON serialization."""
return {
"operation": self.operation,
"summary": {
"total": self.total,
"succeeded": self.succeeded,
"failed": self.failed,
"skipped": self.skipped,
"duration_seconds": self.duration,
},
"items": [item.to_dict() for item in self.items],
}
def to_json(self, indent: int = 2) -> str:
"""Serialize to JSON string."""
return json.dumps(self.to_dict(), indent=indent)
# Type alias for progress callback
ProgressCallback = Callable[[int, int, BatchItem], None]
class BatchProcessor:
"""
Handles batch encoding/decoding operations (v3.2.0).
Usage:
processor = BatchProcessor(max_workers=4)
# Batch encode with BatchCredentials
creds = BatchCredentials(
reference_photo=ref_bytes,
passphrase="apple forest thunder mountain",
pin="123456"
)
result = processor.batch_encode(
images=['img1.png', 'img2.png'],
message="Secret message",
output_dir="./encoded/",
credentials=creds,
)
# Batch encode with dict credentials
result = processor.batch_encode(
images=['img1.png', 'img2.png'],
message="Secret message",
credentials={
"reference_photo": ref_bytes,
"passphrase": "apple forest thunder mountain",
"pin": "123456"
},
)
# Batch decode
result = processor.batch_decode(
images=['encoded1.png', 'encoded2.png'],
credentials=creds,
)
"""
def __init__(self, max_workers: int = 4):
"""
Initialize batch processor.
Args:
max_workers: Maximum parallel workers (default 4)
"""
self.max_workers = max_workers
self._lock = threading.Lock()
def find_images(
self,
paths: list[str | Path],
recursive: bool = False,
) -> Iterator[Path]:
"""
Find all valid image files from paths.
Args:
paths: List of files or directories
recursive: Search directories recursively
Yields:
Path objects for each valid image
"""
for path in paths:
path = Path(path)
if path.is_file():
if self._is_valid_image(path):
yield path
elif path.is_dir():
pattern = "**/*" if recursive else "*"
for file_path in path.glob(pattern):
if file_path.is_file() and self._is_valid_image(file_path):
yield file_path
def _is_valid_image(self, path: Path) -> bool:
"""Check if path is a valid image file."""
return path.suffix.lower().lstrip(".") in ALLOWED_IMAGE_EXTENSIONS
def _normalize_credentials(
self, credentials: dict | BatchCredentials | None
) -> BatchCredentials:
"""
Normalize credentials to BatchCredentials object.
Handles both dict and BatchCredentials input, and legacy 'day_phrase' key.
"""
if credentials is None:
raise ValueError("Credentials are required")
if isinstance(credentials, BatchCredentials):
return credentials
if isinstance(credentials, dict):
return BatchCredentials.from_dict(credentials)
raise ValueError(f"Invalid credentials type: {type(credentials)}")
def batch_encode(
self,
images: list[str | Path],
message: str | None = None,
file_payload: Path | None = None,
output_dir: Path | None = None,
output_suffix: str = "_encoded",
credentials: dict | BatchCredentials | None = None,
compress: bool = True,
recursive: bool = False,
progress_callback: ProgressCallback | None = None,
encode_func: Callable = None,
) -> BatchResult:
"""
Encode message into multiple images.
Args:
images: List of image paths or directories
message: Text message to encode (mutually exclusive with file_payload)
file_payload: File to embed (mutually exclusive with message)
output_dir: Output directory (default: same as input)
output_suffix: Suffix for output files
credentials: BatchCredentials or dict with 'passphrase', 'pin', etc.
compress: Enable compression
recursive: Search directories recursively
progress_callback: Called for each item: callback(current, total, item)
encode_func: Custom encode function (for integration)
Returns:
BatchResult with operation summary
"""
if message is None and file_payload is None:
raise ValueError("Either message or file_payload must be provided")
# Normalize credentials to BatchCredentials
creds = self._normalize_credentials(credentials)
result = BatchResult(operation="encode")
image_paths = list(self.find_images(images, recursive))
result.total = len(image_paths)
if output_dir:
output_dir = Path(output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
# Prepare batch items
for img_path in image_paths:
if output_dir:
out_path = output_dir / f"{img_path.stem}{output_suffix}.png"
else:
out_path = img_path.parent / f"{img_path.stem}{output_suffix}.png"
item = BatchItem(
input_path=img_path,
output_path=out_path,
input_size=img_path.stat().st_size if img_path.exists() else 0,
)
result.items.append(item)
# Process items
def process_encode(item: BatchItem) -> BatchItem:
item.status = BatchStatus.PROCESSING
item.start_time = time.time()
try:
if encode_func:
# Use provided encode function
encode_func(
image_path=item.input_path,
output_path=item.output_path,
message=message,
file_payload=file_payload,
credentials=creds.to_dict(),
compress=compress,
)
else:
# Use stegasoo encode
self._do_encode(item, message, file_payload, creds, compress)
item.status = BatchStatus.SUCCESS
item.output_size = (
item.output_path.stat().st_size
if item.output_path and item.output_path.exists()
else 0
)
item.message = f"Encoded to {item.output_path.name}"
except Exception as e:
item.status = BatchStatus.FAILED
item.error = str(e)
item.end_time = time.time()
return item
# Execute with thread pool
self._execute_batch(result, process_encode, progress_callback)
return result
def batch_decode(
self,
images: list[str | Path],
output_dir: Path | None = None,
credentials: dict | BatchCredentials | None = None,
recursive: bool = False,
progress_callback: ProgressCallback | None = None,
decode_func: Callable = None,
) -> BatchResult:
"""
Decode messages from multiple images.
Args:
images: List of image paths or directories
output_dir: Output directory for file payloads (default: same as input)
credentials: BatchCredentials or dict with 'passphrase', 'pin', etc.
recursive: Search directories recursively
progress_callback: Called for each item: callback(current, total, item)
decode_func: Custom decode function (for integration)
Returns:
BatchResult with decoded messages in item.message fields
"""
# Normalize credentials to BatchCredentials
creds = self._normalize_credentials(credentials)
result = BatchResult(operation="decode")
image_paths = list(self.find_images(images, recursive))
result.total = len(image_paths)
if output_dir:
output_dir = Path(output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
# Prepare batch items
for img_path in image_paths:
item = BatchItem(
input_path=img_path,
output_path=output_dir,
input_size=img_path.stat().st_size if img_path.exists() else 0,
)
result.items.append(item)
# Process items
def process_decode(item: BatchItem) -> BatchItem:
item.status = BatchStatus.PROCESSING
item.start_time = time.time()
try:
if decode_func:
# Use provided decode function
decoded = decode_func(
image_path=item.input_path,
output_dir=item.output_path,
credentials=creds.to_dict(),
)
item.message = (
decoded.get("message", "") if isinstance(decoded, dict) else str(decoded)
)
else:
# Use stegasoo decode
item.message = self._do_decode(item, creds)
item.status = BatchStatus.SUCCESS
except Exception as e:
item.status = BatchStatus.FAILED
item.error = str(e)
item.end_time = time.time()
return item
# Execute with thread pool
self._execute_batch(result, process_decode, progress_callback)
return result
def _execute_batch(
self,
result: BatchResult,
process_func: Callable[[BatchItem], BatchItem],
progress_callback: ProgressCallback | None = None,
) -> None:
"""Execute batch processing with thread pool."""
completed = 0
with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
futures = {executor.submit(process_func, item): item for item in result.items}
for future in as_completed(futures):
item = future.result()
completed += 1
with self._lock:
if item.status == BatchStatus.SUCCESS:
result.succeeded += 1
elif item.status == BatchStatus.FAILED:
result.failed += 1
elif item.status == BatchStatus.SKIPPED:
result.skipped += 1
if progress_callback:
progress_callback(completed, result.total, item)
result.end_time = time.time()
def _do_encode(
self,
item: BatchItem,
message: str | None,
file_payload: Path | None,
creds: BatchCredentials,
compress: bool,
) -> None:
"""
Perform actual encoding using stegasoo.encode.
Override this method to customize encoding behavior.
"""
try:
from .encode import encode
from .models import FilePayload
# Read carrier image
carrier_image = item.input_path.read_bytes()
if file_payload:
# Encode file
payload = FilePayload.from_file(str(file_payload))
result = encode(
message=payload,
reference_photo=creds.reference_photo,
carrier_image=carrier_image,
passphrase=creds.passphrase,
pin=creds.pin,
rsa_key_data=creds.rsa_key_data,
rsa_password=creds.rsa_password,
)
else:
# Encode text message
result = encode(
message=message,
reference_photo=creds.reference_photo,
carrier_image=carrier_image,
passphrase=creds.passphrase,
pin=creds.pin,
rsa_key_data=creds.rsa_key_data,
rsa_password=creds.rsa_password,
)
# Write output
if item.output_path:
item.output_path.write_bytes(result.stego_image)
except ImportError:
# Fallback to mock if stegasoo.encode not available
self._mock_encode(item, message, creds, compress)
def _do_decode(
self,
item: BatchItem,
creds: BatchCredentials,
) -> str:
"""
Perform actual decoding using stegasoo.decode.
Override this method to customize decoding behavior.
"""
try:
from .decode import decode
# Read stego image
stego_image = item.input_path.read_bytes()
result = decode(
stego_image=stego_image,
reference_photo=creds.reference_photo,
passphrase=creds.passphrase,
pin=creds.pin,
rsa_key_data=creds.rsa_key_data,
rsa_password=creds.rsa_password,
)
if result.is_text:
return result.message or ""
else:
# File payload - save it
if item.output_path and result.file_data:
output_file = item.output_path / (result.filename or "extracted_file")
output_file.write_bytes(result.file_data)
return f"File extracted: {result.filename or 'extracted_file'}"
return f"[File: {result.filename or 'binary data'}]"
except ImportError:
# Fallback to mock if stegasoo.decode not available
return self._mock_decode(item, creds)
def _mock_encode(
self, item: BatchItem, message: str, creds: BatchCredentials, compress: bool
) -> None:
"""Mock encode for testing - replace with actual stego.encode()"""
# This is a placeholder - in real usage, you'd call your actual encode function
# For now, just copy the file to simulate encoding
import shutil
if item.output_path:
shutil.copy(item.input_path, item.output_path)
def _mock_decode(self, item: BatchItem, creds: BatchCredentials) -> str:
"""Mock decode for testing - replace with actual stego.decode()"""
# This is a placeholder - in real usage, you'd call your actual decode function
return "[Decoded message would appear here]"
def batch_capacity_check(
images: list[str | Path],
recursive: bool = False,
) -> list[dict]:
"""
Check capacity of multiple images without encoding.
Args:
images: List of image paths or directories
recursive: Search directories recursively
Returns:
List of dicts with path, dimensions, and estimated capacity
"""
from PIL import Image
from .constants import MAX_IMAGE_PIXELS
processor = BatchProcessor()
results = []
for img_path in processor.find_images(images, recursive):
try:
with Image.open(img_path) as img:
width, height = img.size
pixels = width * height
# Estimate: 3 bits per pixel (RGB LSB), minus header overhead
capacity_bits = pixels * 3
capacity_bytes = (capacity_bits // 8) - 100 # Header overhead
results.append(
{
"path": str(img_path),
"dimensions": f"{width}x{height}",
"pixels": pixels,
"format": img.format,
"mode": img.mode,
"capacity_bytes": max(0, capacity_bytes),
"capacity_kb": max(0, capacity_bytes // 1024),
"valid": pixels <= MAX_IMAGE_PIXELS and img.format in LOSSLESS_FORMATS,
"warnings": _get_image_warnings(img, img_path),
}
)
except Exception as e:
results.append(
{
"path": str(img_path),
"error": str(e),
"valid": False,
}
)
return results
def _get_image_warnings(img, path: Path) -> list[str]:
"""Generate warnings for an image."""
from .constants import LOSSLESS_FORMATS, MAX_IMAGE_PIXELS
warnings = []
if img.format not in LOSSLESS_FORMATS:
warnings.append(f"Lossy format ({img.format}) - quality will degrade on re-save")
if img.size[0] * img.size[1] > MAX_IMAGE_PIXELS:
warnings.append(f"Image exceeds {MAX_IMAGE_PIXELS:,} pixel limit")
if img.mode not in ("RGB", "RGBA"):
warnings.append(f"Non-RGB mode ({img.mode}) - will be converted")
return warnings
# CLI-friendly functions
def print_batch_result(result: BatchResult, verbose: bool = False) -> None:
"""Print batch result summary to console."""
print(f"\n{'='*60}")
print(f"Batch {result.operation.upper()} Complete")
print(f"{'='*60}")
print(f"Total: {result.total}")
print(f"Succeeded: {result.succeeded}")
print(f"Failed: {result.failed}")
print(f"Skipped: {result.skipped}")
if result.duration:
print(f"Duration: {result.duration:.2f}s")
if verbose or result.failed > 0:
print(f"\n{''*60}")
for item in result.items:
status_icon = {
BatchStatus.SUCCESS: "",
BatchStatus.FAILED: "",
BatchStatus.SKIPPED: "",
BatchStatus.PENDING: "",
BatchStatus.PROCESSING: "",
}.get(item.status, "?")
print(f"{status_icon} {item.input_path.name}")
if item.error:
print(f" Error: {item.error}")
elif item.message and verbose:
print(f" {item.message}")

View File

@ -0,0 +1,649 @@
"""
Channel Key Management for Stegasoo (v4.0.0)
A channel key ties encode/decode operations to a specific deployment or group.
Messages encoded with one channel key can only be decoded by systems with the
same channel key configured.
Use cases:
- Organization deployment: IT sets a company-wide channel key
- Friend groups: Share a channel key for private communication
- Air-gapped systems: Generate unique key per installation
- Public instances: No channel key = compatible with any instance without a channel key
Storage priority:
1. Environment variable: STEGASOO_CHANNEL_KEY
2. Config file: ~/.stegasoo/channel.key or ./config/channel.key
3. None (public mode - compatible with any instance without a channel key)
INTEGRATION STATUS (v4.0.0):
- get_channel_key_hash() integrated into derive_hybrid_key() in crypto.py
- get_channel_key_hash() integrated into derive_pixel_key() in crypto.py
- channel_key parameter added to encode() and decode() functions
- Header flags indicate whether message was encoded with channel key
- Helpful error messages for channel key mismatches
"""
import hashlib
import os
import re
import secrets
from pathlib import Path
from .debug import debug
# Channel key format: 8 groups of 4 alphanumeric chars (32 chars total)
# Example: ABCD-1234-EFGH-5678-IJKL-9012-MNOP-3456
CHANNEL_KEY_PATTERN = re.compile(r"^[A-Z0-9]{4}(-[A-Z0-9]{4}){7}$")
CHANNEL_KEY_LENGTH = 32 # Characters (excluding dashes)
CHANNEL_KEY_FORMATTED_LENGTH = 39 # With dashes
# Environment variable name
CHANNEL_KEY_ENV_VAR = "STEGASOO_CHANNEL_KEY"
# Config locations (in priority order)
CONFIG_LOCATIONS = [
Path("./config/channel.key"), # Project config
Path.home() / ".stegasoo" / "channel.key", # User config
]
# Encrypted config marker
ENCRYPTED_PREFIX = "ENC:"
def _get_machine_key() -> bytes:
"""
Get a machine-specific key for encrypting stored channel keys.
Uses /etc/machine-id on Linux, falls back to hostname hash.
This ties the encrypted key to this specific machine.
"""
machine_id = None
# Try Linux machine-id
try:
machine_id = Path("/etc/machine-id").read_text().strip()
except (OSError, FileNotFoundError):
pass
# Fallback to hostname
if not machine_id:
import socket
machine_id = socket.gethostname()
# Hash to get consistent 32 bytes
return hashlib.sha256(machine_id.encode()).digest()
def _encrypt_for_storage(plaintext: str) -> str:
"""
Encrypt a channel key for storage using machine-specific key.
Returns ENC: prefixed base64 string.
"""
import base64
key = _get_machine_key()
plaintext_bytes = plaintext.encode()
# XOR with key (cycling if needed)
encrypted = bytes(pb ^ key[i % len(key)] for i, pb in enumerate(plaintext_bytes))
return ENCRYPTED_PREFIX + base64.b64encode(encrypted).decode()
def _decrypt_from_storage(stored: str) -> str | None:
"""
Decrypt a stored channel key.
Returns None if decryption fails or format is invalid.
"""
import base64
if not stored.startswith(ENCRYPTED_PREFIX):
# Not encrypted, return as-is (legacy plaintext)
return stored
try:
encrypted = base64.b64decode(stored[len(ENCRYPTED_PREFIX) :])
key = _get_machine_key()
# XOR to decrypt
decrypted = bytes(eb ^ key[i % len(key)] for i, eb in enumerate(encrypted))
return decrypted.decode()
except Exception:
return None
def generate_channel_key() -> str:
"""
Generate a new random channel key.
Returns:
Formatted channel key (e.g., "ABCD-1234-EFGH-5678-IJKL-9012-MNOP-3456")
Example:
>>> key = generate_channel_key()
>>> len(key)
39
"""
# Generate 32 random alphanumeric characters
alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
raw_key = "".join(secrets.choice(alphabet) for _ in range(CHANNEL_KEY_LENGTH))
formatted = format_channel_key(raw_key)
debug.print(f"Generated channel key: {get_channel_fingerprint(formatted)}")
return formatted
def format_channel_key(raw_key: str) -> str:
"""
Format a raw key string into the standard format.
Args:
raw_key: Raw key string (with or without dashes)
Returns:
Formatted key with dashes (XXXX-XXXX-XXXX-XXXX-XXXX-XXXX-XXXX-XXXX)
Raises:
ValueError: If key is invalid length or contains invalid characters
Example:
>>> format_channel_key("ABCD1234EFGH5678IJKL9012MNOP3456")
"ABCD-1234-EFGH-5678-IJKL-9012-MNOP-3456"
"""
# Remove any existing dashes, spaces, and convert to uppercase
clean = raw_key.replace("-", "").replace(" ", "").upper()
if len(clean) != CHANNEL_KEY_LENGTH:
raise ValueError(f"Channel key must be {CHANNEL_KEY_LENGTH} characters (got {len(clean)})")
# Validate characters
if not all(c in "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" for c in clean):
raise ValueError("Channel key must contain only letters A-Z and digits 0-9")
# Format with dashes every 4 characters
return "-".join(clean[i : i + 4] for i in range(0, CHANNEL_KEY_LENGTH, 4))
def validate_channel_key(key: str) -> bool:
"""
Validate a channel key format.
Args:
key: Channel key to validate
Returns:
True if valid format, False otherwise
Example:
>>> validate_channel_key("ABCD-1234-EFGH-5678-IJKL-9012-MNOP-3456")
True
>>> validate_channel_key("invalid")
False
"""
if not key:
return False
try:
formatted = format_channel_key(key)
return bool(CHANNEL_KEY_PATTERN.match(formatted))
except ValueError:
return False
def get_channel_key() -> str | None:
"""
Get the current channel key from environment or config.
Checks in order:
1. STEGASOO_CHANNEL_KEY environment variable
2. ./config/channel.key file
3. ~/.stegasoo/channel.key file
Returns:
Channel key if configured, None if in public mode
Example:
>>> key = get_channel_key()
>>> if key:
... print("Private channel")
... else:
... print("Public mode")
"""
# 1. Check environment variable
env_key = os.environ.get(CHANNEL_KEY_ENV_VAR, "").strip()
if env_key:
if validate_channel_key(env_key):
debug.print(f"Channel key from environment: {get_channel_fingerprint(env_key)}")
return format_channel_key(env_key)
else:
debug.print(f"Warning: Invalid {CHANNEL_KEY_ENV_VAR} format, ignoring")
# 2. Check config files (may be encrypted)
for config_path in CONFIG_LOCATIONS:
if config_path.exists():
try:
stored = config_path.read_text().strip()
# Decrypt if encrypted, otherwise use as-is (legacy)
key = _decrypt_from_storage(stored)
if key and validate_channel_key(key):
debug.print(f"Channel key from {config_path}: {get_channel_fingerprint(key)}")
return format_channel_key(key)
except (OSError, PermissionError) as e:
debug.print(f"Could not read {config_path}: {e}")
continue
# 3. No channel key configured (public mode)
debug.print("No channel key configured (public mode)")
return None
def set_channel_key(key: str, location: str = "project") -> Path:
"""
Save a channel key to config file.
Args:
key: Channel key to save (will be formatted)
location: 'project' for ./config/ or 'user' for ~/.stegasoo/
Returns:
Path where key was saved
Raises:
ValueError: If key format is invalid
Example:
>>> path = set_channel_key("ABCD1234EFGH5678IJKL9012MNOP3456")
>>> print(path)
./config/channel.key
"""
formatted = format_channel_key(key)
if location == "user":
config_path = Path.home() / ".stegasoo" / "channel.key"
else:
config_path = Path("./config/channel.key")
# Create directory if needed
config_path.parent.mkdir(parents=True, exist_ok=True)
# Encrypt and write (tied to this machine's identity)
encrypted = _encrypt_for_storage(formatted)
config_path.write_text(encrypted + "\n")
# Set restrictive permissions (owner read/write only)
try:
config_path.chmod(0o600)
except (OSError, AttributeError):
pass # Windows doesn't support chmod the same way
debug.print(f"Channel key saved to {config_path}")
return config_path
def clear_channel_key(location: str = "all") -> list[Path]:
"""
Remove channel key configuration.
Args:
location: 'project', 'user', or 'all'
Returns:
List of paths that were deleted
Example:
>>> deleted = clear_channel_key('all')
>>> print(f"Removed {len(deleted)} files")
"""
deleted = []
paths_to_check = []
if location in ("project", "all"):
paths_to_check.append(Path("./config/channel.key"))
if location in ("user", "all"):
paths_to_check.append(Path.home() / ".stegasoo" / "channel.key")
for path in paths_to_check:
if path.exists():
try:
path.unlink()
deleted.append(path)
debug.print(f"Removed channel key: {path}")
except (OSError, PermissionError) as e:
debug.print(f"Could not remove {path}: {e}")
return deleted
def get_channel_key_hash(key: str | None = None) -> bytes | None:
"""
Get the channel key as a 32-byte hash suitable for key derivation.
This hash is mixed into the Argon2 key derivation to bind
encryption to a specific channel.
Args:
key: Channel key (if None, reads from config)
Returns:
32-byte SHA-256 hash of channel key, or None if no channel key
Example:
>>> hash_bytes = get_channel_key_hash()
>>> if hash_bytes:
... print(f"Hash: {len(hash_bytes)} bytes")
"""
if key is None:
key = get_channel_key()
if not key:
return None
# Hash the formatted key to get consistent 32 bytes
formatted = format_channel_key(key)
return hashlib.sha256(formatted.encode("utf-8")).digest()
def get_channel_fingerprint(key: str | None = None) -> str | None:
"""
Get a short fingerprint for display purposes.
Shows first and last 4 chars with masked middle.
Args:
key: Channel key (if None, reads from config)
Returns:
Fingerprint like "ABCD-••••-••••-••••-••••-••••-••••-3456" or None
Example:
>>> print(get_channel_fingerprint())
ABCD-------3456
"""
if key is None:
key = get_channel_key()
if not key:
return None
formatted = format_channel_key(key)
parts = formatted.split("-")
# Show first and last group, mask the rest
masked = [parts[0]] + ["••••"] * 6 + [parts[-1]]
return "-".join(masked)
def get_channel_status() -> dict:
"""
Get comprehensive channel key status.
Returns:
Dictionary with:
- mode: 'private' or 'public'
- configured: bool
- fingerprint: masked key or None
- source: where key came from or None
- key: full key (for export) or None
Example:
>>> status = get_channel_status()
>>> print(f"Mode: {status['mode']}")
Mode: private
"""
key = get_channel_key()
if key:
# Find which source provided the key
source = "unknown"
env_key = os.environ.get(CHANNEL_KEY_ENV_VAR, "").strip()
if env_key and validate_channel_key(env_key):
source = "environment"
else:
for config_path in CONFIG_LOCATIONS:
if config_path.exists():
try:
stored = config_path.read_text().strip()
file_key = _decrypt_from_storage(stored)
if (
file_key
and validate_channel_key(file_key)
and format_channel_key(file_key) == key
):
source = str(config_path)
break
except (OSError, PermissionError, ValueError):
continue
return {
"mode": "private",
"configured": True,
"fingerprint": get_channel_fingerprint(key),
"source": source,
"key": key,
}
else:
return {
"mode": "public",
"configured": False,
"fingerprint": None,
"source": None,
"key": None,
}
def has_channel_key() -> bool:
"""
Quick check if a channel key is configured.
Returns:
True if channel key is set, False for public mode
Example:
>>> if has_channel_key():
... print("Private channel active")
"""
return get_channel_key() is not None
def resolve_channel_key(
value: str | None = None,
*,
file_path: str | Path | None = None,
no_channel: bool = False,
) -> str | None:
"""
Resolve a channel key from user input (unified for all frontends).
This consolidates channel key resolution logic used by CLI, API, and WebUI.
Args:
value: Input value:
- 'auto' or None: Use server-configured key
- 'none' or '': Public mode (no channel key)
- explicit key: Validate and use
file_path: Path to file containing channel key
no_channel: If True, return "" for public mode (overrides value)
Returns:
None: Use server-configured key (auto mode)
"": Public mode (no channel key)
str: Explicit valid channel key
Raises:
ValueError: If key format is invalid
FileNotFoundError: If file_path doesn't exist
Example:
>>> resolve_channel_key("auto") # -> None
>>> resolve_channel_key("none") # -> ""
>>> resolve_channel_key(no_channel=True) # -> ""
>>> resolve_channel_key("ABCD-1234-...") # -> "ABCD-1234-..."
>>> resolve_channel_key(file_path="key.txt") # reads from file
"""
debug.print(
f"resolve_channel_key: value={value}, file_path={file_path}, no_channel={no_channel}"
)
# no_channel flag takes precedence
if no_channel:
debug.print("resolve_channel_key: public mode (no_channel=True)")
return ""
# Read from file if provided
if file_path:
path = Path(file_path)
if not path.exists():
raise FileNotFoundError(f"Channel key file not found: {file_path}")
key = path.read_text().strip()
if not validate_channel_key(key):
raise ValueError(f"Invalid channel key format in file: {file_path}")
debug.print(f"resolve_channel_key: from file -> {get_channel_fingerprint(key)}")
return format_channel_key(key)
# Handle value string
if value is None or value.lower() == "auto":
debug.print("resolve_channel_key: auto mode (server config)")
return None
if value == "" or value.lower() == "none":
debug.print("resolve_channel_key: public mode (explicit none)")
return ""
# Explicit key - validate
if validate_channel_key(value):
formatted = format_channel_key(value)
debug.print(f"resolve_channel_key: explicit key -> {get_channel_fingerprint(formatted)}")
return formatted
raise ValueError(
"Invalid channel key format. Expected: XXXX-XXXX-XXXX-XXXX-XXXX-XXXX-XXXX-XXXX\n"
"Generate a new key with: stegasoo channel generate"
)
def get_channel_response_info(channel_key: str | None) -> dict:
"""
Get channel info for API/WebUI responses.
Args:
channel_key: Resolved channel key (None=auto, ""=public, str=explicit)
Returns:
Dict with mode, fingerprint, and display info
Example:
>>> info = get_channel_response_info("ABCD-1234-...")
>>> info['mode']
'explicit'
"""
if channel_key is None:
# Auto mode - check server config
server_key = get_channel_key()
if server_key:
return {
"mode": "private",
"fingerprint": get_channel_fingerprint(server_key),
"source": "server",
}
return {
"mode": "public",
"fingerprint": None,
"source": "server",
}
if channel_key == "":
return {
"mode": "public",
"fingerprint": None,
"source": "explicit",
}
return {
"mode": "private",
"fingerprint": get_channel_fingerprint(channel_key),
"source": "explicit",
}
# =============================================================================
# CLI SUPPORT
# =============================================================================
if __name__ == "__main__":
import sys
def print_status():
"""Print current channel status."""
status = get_channel_status()
print(f"Mode: {status['mode'].upper()}")
if status["configured"]:
print(f"Fingerprint: {status['fingerprint']}")
print(f"Source: {status['source']}")
else:
print("No channel key configured (public mode)")
if len(sys.argv) < 2:
print("Channel Key Manager")
print("=" * 40)
print_status()
print()
print("Commands:")
print(" python -m stegasoo.channel generate - Generate new key")
print(" python -m stegasoo.channel set <KEY> - Set channel key")
print(" python -m stegasoo.channel show - Show full key")
print(" python -m stegasoo.channel clear - Remove channel key")
print(" python -m stegasoo.channel status - Show status")
sys.exit(0)
cmd = sys.argv[1].lower()
if cmd == "generate":
key = generate_channel_key()
print("Generated channel key:")
print(f" {key}")
print()
save = input("Save to config? [y/N]: ").strip().lower()
if save == "y":
path = set_channel_key(key)
print(f"Saved to: {path}")
elif cmd == "set":
if len(sys.argv) < 3:
print("Usage: python -m stegasoo.channel set <KEY>")
sys.exit(1)
try:
key = sys.argv[2]
formatted = format_channel_key(key)
path = set_channel_key(formatted)
print(f"Channel key set: {get_channel_fingerprint(formatted)}")
print(f"Saved to: {path}")
except ValueError as e:
print(f"Error: {e}")
sys.exit(1)
elif cmd == "show":
status = get_channel_status()
if status["configured"]:
print(f"Channel key: {status['key']}")
print(f"Source: {status['source']}")
else:
print("No channel key configured")
elif cmd == "clear":
deleted = clear_channel_key("all")
if deleted:
print(f"Removed channel key from: {', '.join(str(p) for p in deleted)}")
else:
print("No channel key files found")
elif cmd == "status":
print_status()
else:
print(f"Unknown command: {cmd}")
sys.exit(1)

2982
src/soosef/stegasoo/cli.py Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,252 @@
"""
Stegasoo Compression Module
Provides transparent compression/decompression for payloads before encryption.
Supports multiple algorithms with automatic detection on decompression.
"""
import struct
import zlib
from enum import IntEnum
from .debug import get_logger
logger = get_logger(__name__)
# Optional LZ4 support (faster, slightly worse ratio)
try:
import lz4.frame
HAS_LZ4 = True
except ImportError:
HAS_LZ4 = False
# Optional ZSTD support (best ratio, fast)
try:
import zstandard as zstd
HAS_ZSTD = True
except ImportError:
HAS_ZSTD = False
class CompressionAlgorithm(IntEnum):
"""Supported compression algorithms."""
NONE = 0
ZLIB = 1
LZ4 = 2
ZSTD = 3 # v4.2.0: Best ratio, fast compression
# Magic bytes for compressed payloads
COMPRESSION_MAGIC = b"\x00CMP"
# Minimum size to bother compressing (small data often expands)
MIN_COMPRESS_SIZE = 64
# Compression level for zlib (1-9, higher = better ratio but slower)
ZLIB_LEVEL = 6
class CompressionError(Exception):
"""Raised when compression/decompression fails."""
pass
def compress(data: bytes, algorithm: CompressionAlgorithm = CompressionAlgorithm.ZLIB) -> bytes:
"""
Compress data with specified algorithm.
Format: MAGIC (4) + ALGORITHM (1) + ORIGINAL_SIZE (4) + COMPRESSED_DATA
Args:
data: Raw bytes to compress
algorithm: Compression algorithm to use
Returns:
Compressed data with header, or original data if compression didn't help
"""
if len(data) < MIN_COMPRESS_SIZE:
# Too small to benefit from compression
return _wrap_uncompressed(data)
if algorithm == CompressionAlgorithm.NONE:
return _wrap_uncompressed(data)
elif algorithm == CompressionAlgorithm.ZLIB:
compressed = zlib.compress(data, level=ZLIB_LEVEL)
elif algorithm == CompressionAlgorithm.LZ4:
if not HAS_LZ4:
# Fall back to zlib if LZ4 not available
compressed = zlib.compress(data, level=ZLIB_LEVEL)
algorithm = CompressionAlgorithm.ZLIB
else:
compressed = lz4.frame.compress(data)
elif algorithm == CompressionAlgorithm.ZSTD:
if not HAS_ZSTD:
# Fall back to zlib if ZSTD not available
compressed = zlib.compress(data, level=ZLIB_LEVEL)
algorithm = CompressionAlgorithm.ZLIB
else:
cctx = zstd.ZstdCompressor(level=19) # High compression level
compressed = cctx.compress(data)
else:
raise CompressionError(f"Unknown compression algorithm: {algorithm}")
# Only use compression if it actually reduced size
if len(compressed) >= len(data):
return _wrap_uncompressed(data)
# Build header: MAGIC + algorithm + original_size + compressed_data
header = COMPRESSION_MAGIC + struct.pack("<BI", algorithm, len(data))
return header + compressed
def decompress(data: bytes) -> bytes:
"""
Decompress data, auto-detecting algorithm from header.
Args:
data: Potentially compressed data
Returns:
Decompressed data (or original if not compressed)
"""
# Check for compression magic
if not data.startswith(COMPRESSION_MAGIC):
# Not compressed by us, return as-is
return data
if len(data) < 9: # MAGIC(4) + ALGO(1) + SIZE(4)
raise CompressionError("Truncated compression header")
# Parse header
algorithm = CompressionAlgorithm(data[4])
original_size = struct.unpack("<I", data[5:9])[0]
compressed_data = data[9:]
if algorithm == CompressionAlgorithm.NONE:
result = compressed_data
elif algorithm == CompressionAlgorithm.ZLIB:
try:
result = zlib.decompress(compressed_data)
except zlib.error as e:
raise CompressionError(f"Zlib decompression failed: {e}")
elif algorithm == CompressionAlgorithm.LZ4:
if not HAS_LZ4:
raise CompressionError("LZ4 compression used but lz4 package not installed")
try:
result = lz4.frame.decompress(compressed_data)
except Exception as e:
raise CompressionError(f"LZ4 decompression failed: {e}")
elif algorithm == CompressionAlgorithm.ZSTD:
if not HAS_ZSTD:
raise CompressionError("ZSTD compression used but zstandard package not installed")
try:
dctx = zstd.ZstdDecompressor()
result = dctx.decompress(compressed_data)
except Exception as e:
raise CompressionError(f"ZSTD decompression failed: {e}")
else:
raise CompressionError(f"Unknown compression algorithm: {algorithm}")
# Verify size
if len(result) != original_size:
raise CompressionError(f"Size mismatch: expected {original_size}, got {len(result)}")
return result
def _wrap_uncompressed(data: bytes) -> bytes:
"""Wrap uncompressed data with header for consistency."""
header = COMPRESSION_MAGIC + struct.pack("<BI", CompressionAlgorithm.NONE, len(data))
return header + data
def get_compression_ratio(original: bytes, compressed: bytes) -> float:
"""
Calculate compression ratio.
Returns:
Ratio where < 1.0 means compression helped, > 1.0 means it expanded
"""
if len(original) == 0:
return 1.0
return len(compressed) / len(original)
def estimate_compressed_size(
data: bytes, algorithm: CompressionAlgorithm = CompressionAlgorithm.ZLIB
) -> int:
"""
Estimate compressed size without full compression.
Uses sampling for large data.
Args:
data: Data to estimate
algorithm: Algorithm to estimate for
Returns:
Estimated compressed size in bytes
"""
if len(data) < MIN_COMPRESS_SIZE:
return len(data) + 9 # Header overhead
# For small data, just compress it
if len(data) < 10000:
compressed = compress(data, algorithm)
return len(compressed)
# For large data, sample and extrapolate
sample_size = 8192
sample = data[:sample_size]
if algorithm == CompressionAlgorithm.ZLIB:
compressed_sample = zlib.compress(sample, level=ZLIB_LEVEL)
elif algorithm == CompressionAlgorithm.LZ4 and HAS_LZ4:
compressed_sample = lz4.frame.compress(sample)
elif algorithm == CompressionAlgorithm.ZSTD and HAS_ZSTD:
cctx = zstd.ZstdCompressor(level=19)
compressed_sample = cctx.compress(sample)
else:
compressed_sample = zlib.compress(sample, level=ZLIB_LEVEL)
ratio = len(compressed_sample) / len(sample)
estimated = int(len(data) * ratio) + 9 # Add header
return estimated
def get_available_algorithms() -> list[CompressionAlgorithm]:
"""Get list of available compression algorithms."""
algorithms = [CompressionAlgorithm.NONE, CompressionAlgorithm.ZLIB]
if HAS_LZ4:
algorithms.append(CompressionAlgorithm.LZ4)
if HAS_ZSTD:
algorithms.append(CompressionAlgorithm.ZSTD)
return algorithms
def get_best_algorithm() -> CompressionAlgorithm:
"""Get the best available compression algorithm (prefer ZSTD > ZLIB > LZ4)."""
if HAS_ZSTD:
return CompressionAlgorithm.ZSTD
return CompressionAlgorithm.ZLIB
def algorithm_name(algo: CompressionAlgorithm) -> str:
"""Get human-readable algorithm name."""
names = {
CompressionAlgorithm.NONE: "None",
CompressionAlgorithm.ZLIB: "Zlib (deflate)",
CompressionAlgorithm.LZ4: "LZ4 (fast)",
CompressionAlgorithm.ZSTD: "Zstd (best)",
}
return names.get(algo, "Unknown")

View File

@ -0,0 +1,455 @@
"""
Stegasoo Constants and Configuration (v4.2.0 - Performance & Compression)
Central location for all magic numbers, limits, and crypto parameters.
All version numbers, limits, and configuration values should be defined here.
CHANGES in v4.2.0:
- Added zstd compression for QR codes (better ratio than zlib)
- RSA key size capped at 3072 bits (4096 too large for QR codes)
- Progress bar improvements for encode/decode operations
- File auto-expire increased to 10 minutes
CHANGES in v4.0.2:
- Added Web UI authentication with SQLite3 user storage
- Added optional HTTPS with auto-generated self-signed certificates
- UI improvements for QR preview panels and PIN/channel columns
BREAKING CHANGES in v4.0.0:
- Added channel key support for deployment/group isolation
- FORMAT_VERSION bumped to 5 (adds flags byte to header)
- Header size increased by 1 byte for flags
BREAKING CHANGES in v3.2.0:
- Removed date dependency from cryptographic operations
- Renamed day_phrase passphrase throughout codebase
"""
import importlib.resources
from pathlib import Path
# ============================================================================
# VERSION
# ============================================================================
__version__ = "4.2.1"
# ============================================================================
# FILE FORMAT
# ============================================================================
MAGIC_HEADER = b"\x89ST3"
# FORMAT VERSION HISTORY:
# Version 1-3: Date-dependent encryption (v3.0.x - v3.1.x)
# Version 4: Date-independent encryption (v3.2.0)
# Version 5: Channel key support (v4.0.0) - adds flags byte to header
# Version 6: HKDF per-message key derivation (v4.4.0) - adds message nonce to header
FORMAT_VERSION = 6
FORMAT_VERSION_LEGACY = 5 # For backward-compatible decryption
# Payload type markers
PAYLOAD_TEXT = 0x01
PAYLOAD_FILE = 0x02
# ============================================================================
# CRYPTO PARAMETERS
# ============================================================================
SALT_SIZE = 32
IV_SIZE = 12
TAG_SIZE = 16
# Argon2 parameters (memory-hard KDF)
ARGON2_TIME_COST = 4
ARGON2_MEMORY_COST = 256 * 1024 # 256 MB
ARGON2_PARALLELISM = 4
# PBKDF2 fallback parameters
PBKDF2_ITERATIONS = 600000
# HKDF per-message key derivation (v4.4.0 / FORMAT_VERSION 6)
MESSAGE_NONCE_SIZE = 16 # 128-bit random nonce per message
HKDF_INFO_ENCRYPT = b"stegasoo-v6-encrypt" # HKDF info for encryption key
HKDF_INFO_PIXEL = b"stegasoo-v6-pixel" # HKDF info for pixel selection key (reserved)
# ============================================================================
# INPUT LIMITS
# ============================================================================
MAX_IMAGE_PIXELS = 24_000_000 # ~24 megapixels
MIN_IMAGE_PIXELS = 256 * 256 # Minimum viable image size
MAX_MESSAGE_SIZE = 250_000 # 250 KB (text messages)
MAX_MESSAGE_CHARS = 250_000 # Alias for clarity in templates
MIN_MESSAGE_LENGTH = 1 # Minimum message length
MAX_MESSAGE_LENGTH = MAX_MESSAGE_SIZE # Alias for consistency
MAX_PAYLOAD_SIZE = MAX_MESSAGE_SIZE # Maximum payload size (alias)
MAX_FILENAME_LENGTH = 255 # Max filename length to store
# File size limits
MAX_FILE_SIZE = 30 * 1024 * 1024 # 30MB total file size
MAX_FILE_PAYLOAD_SIZE = 2 * 1024 * 1024 # 2MB payload
MAX_UPLOAD_SIZE = 30 * 1024 * 1024 # 30MB max upload (Flask)
# PIN configuration
MIN_PIN_LENGTH = 6
MAX_PIN_LENGTH = 9
DEFAULT_PIN_LENGTH = 6
# Passphrase configuration (v3.2.0: renamed from PHRASE to PASSPHRASE)
# Increased defaults to compensate for removed date entropy (~33 bits)
MIN_PASSPHRASE_WORDS = 3
MAX_PASSPHRASE_WORDS = 12
DEFAULT_PASSPHRASE_WORDS = 4 # Increased from 3 (was DEFAULT_PHRASE_WORDS)
RECOMMENDED_PASSPHRASE_WORDS = 4 # Best practice guideline
# Legacy aliases for backward compatibility during transition
MIN_PHRASE_WORDS = MIN_PASSPHRASE_WORDS
MAX_PHRASE_WORDS = MAX_PASSPHRASE_WORDS
DEFAULT_PHRASE_WORDS = DEFAULT_PASSPHRASE_WORDS
# RSA configuration
MIN_RSA_BITS = 2048
VALID_RSA_SIZES = (2048, 3072) # 4096 removed - too large for QR codes
DEFAULT_RSA_BITS = 2048
MIN_KEY_PASSWORD_LENGTH = 8
# ============================================================================
# WEB/API CONFIGURATION
# ============================================================================
# Temporary file storage
TEMP_FILE_EXPIRY = 600 # 10 minutes in seconds
TEMP_FILE_EXPIRY_MINUTES = 10
# Thumbnail settings
THUMBNAIL_SIZE = (250, 250) # Maximum dimensions for thumbnails
THUMBNAIL_QUALITY = 85
# QR Code limits
QR_MAX_BINARY = 2900 # Safe limit for binary data in QR
QR_CROP_PADDING_PERCENT = 0.1 # Default padding when cropping QR codes
QR_CROP_MIN_PADDING_PX = 10 # Minimum padding in pixels
# ============================================================================
# FILE TYPES
# ============================================================================
ALLOWED_IMAGE_EXTENSIONS = {"png", "jpg", "jpeg", "bmp", "gif"}
ALLOWED_KEY_EXTENSIONS = {"pem", "key"}
# Lossless image formats (safe for steganography)
LOSSLESS_FORMATS = {"PNG", "BMP", "TIFF"}
# Supported image formats for steganography
SUPPORTED_IMAGE_FORMATS = LOSSLESS_FORMATS
# ============================================================================
# DAYS (kept for organizational/UI purposes, not crypto)
# ============================================================================
DAY_NAMES = ("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday")
# ============================================================================
# COMPRESSION
# ============================================================================
# Minimum payload size to attempt compression (smaller often expands)
MIN_COMPRESS_SIZE = 64
# Zlib compression level (1-9, higher = better ratio, slower)
ZLIB_COMPRESSION_LEVEL = 6
# Compression header magic bytes
COMPRESSION_MAGIC = b"\x00CMP"
# ============================================================================
# BATCH PROCESSING
# ============================================================================
# Default parallel workers for batch operations
BATCH_DEFAULT_WORKERS = 4
# Maximum parallel workers
BATCH_MAX_WORKERS = 16
# Output filename suffix for batch encode
BATCH_OUTPUT_SUFFIX = "_encoded"
# ============================================================================
# DATA FILES
# ============================================================================
def get_data_dir() -> Path:
"""Get the data directory path.
Checks locations in order:
1. Package data (installed via pip/wheel) using importlib.resources
2. Development layout (src/stegasoo -> project root/data)
3. Docker container (/app/data)
4. Current working directory fallbacks
"""
# Try package data first (works when installed via pip)
try:
pkg_data = importlib.resources.files("soosef.stegasoo.data")
# Check if the package data directory exists and has our files
if (pkg_data / "bip39-words.txt").is_file():
# Return as Path - importlib.resources.files returns a Traversable
return Path(str(pkg_data))
except (ModuleNotFoundError, TypeError):
pass
# Fallback to file-based locations
# From src/stegasoo/constants.py:
# .parent = src/stegasoo/
# .parent.parent = src/
# .parent.parent.parent = project root (where data/ lives)
candidates = [
Path(__file__).parent / "data", # Installed package (stegasoo/data/)
Path(__file__).parent.parent.parent / "data", # Development: src/stegasoo -> project root
Path("/app/data"), # Docker
Path.cwd() / "data", # Current directory
Path.cwd().parent / "data", # One level up from cwd
Path.cwd().parent.parent / "data", # Two levels up from cwd
]
for path in candidates:
if path.exists():
return path
# Default to package data path for clearer error messages
return Path(__file__).parent / "data"
def get_bip39_words() -> list[str]:
"""Load BIP-39 wordlist."""
wordlist_path = get_data_dir() / "bip39-words.txt"
if not wordlist_path.exists():
raise FileNotFoundError(
f"BIP-39 wordlist not found at {wordlist_path}. "
"Please ensure bip39-words.txt is in the data directory."
)
with open(wordlist_path) as f:
return [line.strip() for line in f if line.strip()]
# Lazy-loaded wordlist
_bip39_words: list[str] | None = None
def get_wordlist() -> list[str]:
"""Get the BIP-39 wordlist (cached)."""
global _bip39_words
if _bip39_words is None:
_bip39_words = get_bip39_words()
return _bip39_words
# =============================================================================
# STEGANALYSIS (v4.4.0)
# =============================================================================
# Chi-square p-value threshold: HIGH p-value = equalized PoV pairs = suspicious
STEGANALYSIS_CHI_SUSPICIOUS_THRESHOLD = 0.95 # p > 0.95 → pairs suspiciously equalized
# RS embedding rate thresholds (primary metric): higher = more likely embedded
STEGANALYSIS_RS_HIGH_THRESHOLD = 0.3 # > 30% estimated embedding → high risk
STEGANALYSIS_RS_MEDIUM_THRESHOLD = 0.1 # > 10% estimated embedding → medium risk
# =============================================================================
# DCT STEGANOGRAPHY (v3.0+)
# =============================================================================
# Embedding modes
EMBED_MODE_LSB = "lsb" # Spatial LSB embedding (default, original mode)
EMBED_MODE_DCT = "dct" # DCT domain embedding (new in v3.0)
EMBED_MODE_AUTO = "auto" # Auto-detect on decode
# DCT-specific constants
DCT_MAGIC_HEADER = b"\x89DCT" # Magic header for DCT mode
DCT_FORMAT_VERSION = 1
DCT_STEP_SIZE = 8 # QIM quantization step
# Recovery key obfuscation - FIXED value for admin recovery QR codes
# SHA256("\x89ST3\x89DCT") - hardcoded so it never changes even if headers are added
# Used to XOR recovery keys in QR codes so they scan as gibberish
RECOVERY_OBFUSCATION_KEY = bytes.fromhex(
"d6c70bce27780db942562550e9fe1459" "9dfdb8421f5acc79696b05db4e7afbd2"
) # 32 bytes
# Valid embedding modes
VALID_EMBED_MODES = {EMBED_MODE_LSB, EMBED_MODE_DCT}
# Capacity estimation constants
LSB_BYTES_PER_PIXEL = 3 / 8 # 3 bits per pixel (RGB, 1 bit per channel) / 8 bits per byte
DCT_BYTES_PER_PIXEL = 0.125 # Approximate for DCT mode (varies by implementation)
def detect_stego_mode(encrypted_data: bytes) -> str:
"""
Detect embedding mode from encrypted payload header.
Args:
encrypted_data: First few bytes of extracted payload
Returns:
'lsb' or 'dct' or 'unknown'
"""
if len(encrypted_data) < 4:
return "unknown"
header = encrypted_data[:4]
if header == b"\x89ST3":
return EMBED_MODE_LSB
elif header == b"\x89DCT":
return EMBED_MODE_DCT
else:
return "unknown"
# =============================================================================
# FEATURE TOGGLES (v4.3.1)
# =============================================================================
# Environment variables to enable/disable optional feature families.
# Values: "auto" (default — detect dependencies), "1"/"true" (force on),
# "0"/"false" (force off even if deps are installed).
# Pi builds or minimal installs can set STEGASOO_AUDIO=0 to stay image-only.
import os as _os
def _parse_feature_toggle(env_var: str, default: str = "auto") -> str | bool:
"""Parse a feature toggle env var. Returns 'auto', True, or False."""
val = _os.environ.get(env_var, default).strip().lower()
if val in ("1", "true", "yes", "on"):
return True
if val in ("0", "false", "no", "off"):
return False
return "auto"
def _check_audio_deps() -> bool:
"""Check if audio dependencies (soundfile, numpy) are importable."""
try:
import numpy # noqa: F401
import soundfile # noqa: F401
return True
except ImportError:
return False
def _check_video_deps() -> bool:
"""Check if video dependencies (ffmpeg binary + audio deps) are available."""
import shutil
if not _check_audio_deps():
return False
return shutil.which("ffmpeg") is not None
def _resolve_feature(toggle: str | bool, dep_check: callable) -> bool:
"""Resolve a feature toggle to a final bool."""
if toggle is True:
if not dep_check():
raise ImportError(
f"Feature force-enabled but required dependencies are missing. "
f"Install the relevant extras (e.g. pip install stegasoo[audio])."
)
return True
if toggle is False:
return False
# auto
return dep_check()
_audio_toggle = _parse_feature_toggle("STEGASOO_AUDIO")
_video_toggle = _parse_feature_toggle("STEGASOO_VIDEO")
AUDIO_ENABLED: bool = _resolve_feature(_audio_toggle, _check_audio_deps)
VIDEO_ENABLED: bool = _resolve_feature(_video_toggle, _check_video_deps)
# =============================================================================
# AUDIO STEGANOGRAPHY (v4.3.0)
# =============================================================================
# Audio embedding modes
EMBED_MODE_AUDIO_LSB = "audio_lsb"
EMBED_MODE_AUDIO_SPREAD = "audio_spread"
EMBED_MODE_AUDIO_AUTO = "audio_auto"
VALID_AUDIO_EMBED_MODES = {EMBED_MODE_AUDIO_LSB, EMBED_MODE_AUDIO_SPREAD}
# Audio magic bytes (for format detection in stego audio)
AUDIO_MAGIC_LSB = b"AUDL"
AUDIO_MAGIC_SPREAD = b"AUDS"
# Audio input limits
MAX_AUDIO_DURATION = 600 # 10 minutes
MAX_AUDIO_FILE_SIZE = 100 * 1024 * 1024 # 100 MB
MIN_AUDIO_SAMPLE_RATE = 8000 # G.729 level
MAX_AUDIO_SAMPLE_RATE = 192000 # Studio quality
ALLOWED_AUDIO_EXTENSIONS = {"wav", "flac", "mp3", "ogg", "opus", "aac", "m4a", "wma"}
# Spread spectrum parameters
AUDIO_SS_CHIP_LENGTH = 1024 # Samples per chip (spreading factor) — legacy/default
AUDIO_SS_AMPLITUDE = 0.05 # Per-sample embedding strength (~-26dB, masked by audio)
AUDIO_SS_RS_NSYM = 32 # Reed-Solomon parity symbols
# Spread spectrum v2: per-channel hybrid embedding (v4.4.0)
AUDIO_SS_HEADER_VERSION = 2 # v2 header format identifier
# Chip tier system — trade capacity for robustness
AUDIO_SS_CHIP_TIER_LOSSLESS = 0 # 256 chips — lossless carriers (FLAC/WAV/ALAC)
AUDIO_SS_CHIP_TIER_HIGH_LOSSY = 1 # 512 chips — high-rate lossy (AAC 256k+)
AUDIO_SS_CHIP_TIER_LOW_LOSSY = 2 # 1024 chips — low-rate lossy (AAC 128k, Opus)
AUDIO_SS_DEFAULT_CHIP_TIER = 2 # Most robust, backward compatible
AUDIO_SS_CHIP_LENGTHS = {0: 256, 1: 512, 2: 1024}
# Chip tier name mapping (for CLI/UI)
AUDIO_SS_CHIP_TIER_NAMES = {
"lossless": AUDIO_SS_CHIP_TIER_LOSSLESS,
"high": AUDIO_SS_CHIP_TIER_HIGH_LOSSY,
"low": AUDIO_SS_CHIP_TIER_LOW_LOSSY,
}
# LFE channel skipping — LFE is bandlimited to ~120Hz, terrible carrier
AUDIO_LFE_CHANNEL_INDEX = 3 # Standard WAV/WAVEFORMATEXTENSIBLE ordering
AUDIO_LFE_MIN_CHANNELS = 6 # Only skip LFE for 5.1+ layouts
# Echo hiding parameters
AUDIO_ECHO_DELAY_0 = 50 # Echo delay for bit 0 (samples at 44.1kHz ~ 1.1ms)
AUDIO_ECHO_DELAY_1 = 100 # Echo delay for bit 1 (samples at 44.1kHz ~ 2.3ms)
AUDIO_ECHO_AMPLITUDE = 0.3 # Echo strength (relative to original)
AUDIO_ECHO_WINDOW_SIZE = 8192 # Window size for echo embedding
# =============================================================================
# VIDEO STEGANOGRAPHY (v4.4.0)
# =============================================================================
# Video embedding modes
EMBED_MODE_VIDEO_LSB = "video_lsb"
EMBED_MODE_VIDEO_AUTO = "video_auto"
VALID_VIDEO_EMBED_MODES = {EMBED_MODE_VIDEO_LSB}
# Video magic bytes (for format detection in stego video)
VIDEO_MAGIC_LSB = b"VIDL"
# Video input limits
MAX_VIDEO_FILE_SIZE = 4 * 1024 * 1024 * 1024 # 4 GB
MAX_VIDEO_DURATION = 3600 # 1 hour in seconds
MIN_VIDEO_RESOLUTION = (64, 64)
MAX_VIDEO_RESOLUTION = (7680, 4320) # 8K UHD
ALLOWED_VIDEO_EXTENSIONS = {"mp4", "mkv", "webm", "avi", "mov"}
# Video output settings
VIDEO_OUTPUT_CODEC = "ffv1" # FFV1 lossless codec
VIDEO_OUTPUT_CONTAINER = "mkv" # MKV container for FFV1

View File

@ -0,0 +1,836 @@
"""
Stegasoo Cryptographic Functions (v4.0.0 - Channel Key Support)
This is the crypto layer - where we turn plaintext into indecipherable noise.
The security model is multi-factor:
SOMETHING YOU HAVE SOMETHING YOU KNOW
Reference photo Passphrase (4+ BIP-39 words)
RSA private key (opt) PIN (6-9 digits)
DEPLOYMENT BINDING
Channel key (ties messages to a specific server/group)
All factors get mixed together through Argon2id (memory-hard KDF) to derive
the actual encryption key. Miss any factor = wrong key = garbage output.
Encryption: AES-256-GCM (authenticated encryption - tamper = detection)
KDF: Argon2id (256MB RAM, 4 iterations) or PBKDF2 fallback (600K iterations)
v4.0.0: Added channel key for server/group isolation
v3.2.0: Removed date dependency (was cute but annoying in practice)
"""
import hashlib
import io
import secrets
import struct
from cryptography.hazmat.backends import default_backend
from cryptography.hazmat.primitives import hashes as _hashes
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
from cryptography.hazmat.primitives.kdf.hkdf import HKDFExpand
from PIL import Image
from .constants import (
ARGON2_MEMORY_COST,
ARGON2_PARALLELISM,
ARGON2_TIME_COST,
FORMAT_VERSION,
FORMAT_VERSION_LEGACY,
HKDF_INFO_ENCRYPT,
IV_SIZE,
MAGIC_HEADER,
MAX_FILENAME_LENGTH,
MESSAGE_NONCE_SIZE,
PAYLOAD_FILE,
PAYLOAD_TEXT,
PBKDF2_ITERATIONS,
SALT_SIZE,
TAG_SIZE,
)
from .debug import get_logger
from .exceptions import DecryptionError, EncryptionError, InvalidHeaderError, KeyDerivationError
from .models import DecodeResult, FilePayload
logger = get_logger(__name__)
# Check for Argon2 availability
try:
from argon2.low_level import Type, hash_secret_raw
HAS_ARGON2 = True
except ImportError:
HAS_ARGON2 = False
from cryptography.hazmat.primitives import hashes
from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC
# =============================================================================
# CHANNEL KEY RESOLUTION
# =============================================================================
# Sentinel value for "use auto-detected channel key"
CHANNEL_KEY_AUTO = "auto"
def _resolve_channel_key(channel_key: str | bool | None) -> bytes | None:
"""
Resolve channel key parameter to actual key hash.
Args:
channel_key: Channel key parameter with these behaviors:
- None or "auto": Use server's configured key (from env/config)
- str (valid key): Use this specific key
- "" or False: Explicitly use NO channel key (public mode)
Returns:
32-byte channel key hash, or None for public mode
"""
# Explicit public mode
if channel_key == "" or channel_key is False:
return None
# Auto-detect from environment/config
if channel_key is None or channel_key == CHANNEL_KEY_AUTO:
from .channel import get_channel_key_hash
return get_channel_key_hash()
# Explicit key provided - validate and hash it
if isinstance(channel_key, str):
from .channel import format_channel_key, validate_channel_key
if not validate_channel_key(channel_key):
raise ValueError(f"Invalid channel key format: {channel_key}")
formatted = format_channel_key(channel_key)
return hashlib.sha256(formatted.encode("utf-8")).digest()
raise ValueError(f"Invalid channel_key type: {type(channel_key)}")
# =============================================================================
# CORE CRYPTO FUNCTIONS
# =============================================================================
#
# The "reference photo as a key" concept is one of Stegasoo's unique features.
# Most steganography tools just use a password. We add the photo as a
# "something you have" factor - like a hardware token, but it's a cat picture.
def hash_photo(image_data: bytes) -> bytes:
"""
Compute deterministic hash of photo pixel content.
This is the magic sauce that turns your cat photo into a cryptographic key.
Why pixels and not the file hash?
- File metadata changes (EXIF stripped, resaved) = different file hash
- But pixel content stays the same
- We hash the RGB values directly, so format conversions don't matter
The double-hash with prefix is belt-and-suspenders mixing. Probably
overkill, but hey, it's crypto - paranoia is a feature.
Args:
image_data: Raw image file bytes (any format PIL can read)
Returns:
32-byte SHA-256 hash of pixel content
"""
# Convert to RGB to normalize (RGBA, grayscale, etc. all become RGB)
img: Image.Image = Image.open(io.BytesIO(image_data)).convert("RGB")
pixels = img.tobytes()
# Double-hash: SHA256(SHA256(pixels) + first 1KB of pixels)
# The prefix adds image-specific data to prevent length-extension shenanigans
h = hashlib.sha256(pixels).digest()
h = hashlib.sha256(h + pixels[:1024]).digest()
return h
def derive_hybrid_key(
photo_data: bytes,
passphrase: str,
salt: bytes,
pin: str = "",
rsa_key_data: bytes | None = None,
channel_key: str | bool | None = None,
) -> bytes:
"""
Derive encryption key from multiple factors.
This is the heart of Stegasoo's security model. We take all the things
you need to prove you're authorized (photo, passphrase, PIN, etc.) and
blend them together into one 32-byte key.
The flow:
Photo hash + passphrase + PIN + RSA + salt
Argon2id <- Memory-hard KDF
256MB / 4 iter <- Makes brute force expensive
32-byte AES key
Why Argon2id?
- Memory-hard: attackers can't just throw GPUs at it
- 256MB RAM per attempt = expensive at scale
- Winner of the Password Hashing Competition (2015)
- "id" variant resists both side-channel and GPU attacks
Fallback: PBKDF2-SHA512 with 600K iterations (for systems without argon2)
Args:
photo_data: Reference photo bytes
passphrase: Shared passphrase (recommend 4+ words from BIP-39)
salt: Random salt for this message (32 bytes)
pin: Optional static PIN
rsa_key_data: Optional RSA key bytes
channel_key: Channel key parameter:
- None or "auto": Use configured key
- str: Use this specific key
- "" or False: No channel key (public mode)
Returns:
32-byte derived key (ready for AES-256)
Raises:
KeyDerivationError: If key derivation fails
"""
try:
photo_hash = hash_photo(photo_data)
logger.debug(
"derive_hybrid_key: photo_hash=%s, pin=%s, rsa=%s, channel=%s, salt=%d bytes",
photo_hash[:4].hex(),
"set" if pin else "none",
"set" if rsa_key_data else "none",
(
"explicit"
if isinstance(channel_key, str) and channel_key
else "auto" if channel_key is None else "none"
),
len(salt),
)
# Resolve channel key (server-specific binding)
channel_hash = _resolve_channel_key(channel_key)
# Build key material by concatenating all factors
# Passphrase is lowercased to be forgiving of case differences
key_material = photo_hash + passphrase.lower().encode() + pin.encode() + salt
# Add RSA key hash if provided (another "something you have")
if rsa_key_data:
key_material += hashlib.sha256(rsa_key_data).digest()
# Add channel key hash if configured (v4.0.0 - deployment binding)
if channel_hash:
key_material += channel_hash
logger.debug("Key material: %d bytes", len(key_material))
# Run it all through the KDF
if HAS_ARGON2:
logger.debug(
"KDF: Argon2id (memory=%dKB, time=%d, parallel=%d)",
ARGON2_MEMORY_COST,
ARGON2_TIME_COST,
ARGON2_PARALLELISM,
)
# Argon2id: the good stuff
key = hash_secret_raw(
secret=key_material,
salt=salt[:32],
time_cost=ARGON2_TIME_COST, # 4 iterations
memory_cost=ARGON2_MEMORY_COST, # 256 MB RAM
parallelism=ARGON2_PARALLELISM, # 4 threads
hash_len=32,
type=Type.ID, # Hybrid mode: resists side-channel AND GPU attacks
)
else:
logger.warning(
"KDF: PBKDF2 fallback (%d iterations) - argon2 not available", PBKDF2_ITERATIONS
)
# PBKDF2 fallback for systems without argon2-cffi
# 600K iterations is slow but not memory-hard
kdf = PBKDF2HMAC(
algorithm=hashes.SHA512(),
length=32,
salt=salt,
iterations=PBKDF2_ITERATIONS,
backend=default_backend(),
)
key = kdf.derive(key_material)
logger.debug("KDF complete, derived %d-byte key", len(key))
return key
except Exception as e:
raise KeyDerivationError(f"Failed to derive key: {e}") from e
def derive_pixel_key(
photo_data: bytes,
passphrase: str,
pin: str = "",
rsa_key_data: bytes | None = None,
channel_key: str | bool | None = None,
) -> bytes:
"""
Derive key for pseudo-random pixel selection.
This key determines which pixels are used for embedding,
making the message location unpredictable without the correct inputs.
Args:
photo_data: Reference photo bytes
passphrase: Shared passphrase
pin: Optional static PIN
rsa_key_data: Optional RSA key bytes
channel_key: Channel key parameter (see derive_hybrid_key)
Returns:
32-byte key for pixel selection
"""
photo_hash = hash_photo(photo_data)
# Resolve channel key
channel_hash = _resolve_channel_key(channel_key)
material = photo_hash + passphrase.lower().encode() + pin.encode()
if rsa_key_data:
material += hashlib.sha256(rsa_key_data).digest()
# Add channel key hash if configured (v4.0.0)
if channel_hash:
material += channel_hash
return hashlib.sha256(material + b"pixel_selection").digest()
def derive_message_key(root_key: bytes, nonce: bytes) -> bytes:
"""
Derive a per-message encryption key via HKDF-Expand.
Each message gets a unique encryption key even with identical credentials,
because the nonce is random per message. This provides key diversification:
compromising the ciphertext of one message doesn't help with another.
Args:
root_key: 32-byte root key from Argon2id/PBKDF2
nonce: 16-byte random nonce (unique per message)
Returns:
32-byte per-message encryption key
"""
hkdf = HKDFExpand(
algorithm=_hashes.SHA256(),
length=32,
info=HKDF_INFO_ENCRYPT + nonce,
backend=default_backend(),
)
return hkdf.derive(root_key)
def _pack_payload(
content: str | bytes | FilePayload,
) -> tuple[bytes, int]:
"""
Pack payload with type marker and metadata.
Format for text:
[type:1][data]
Format for file:
[type:1][filename_len:2][filename][mime_len:2][mime][data]
Args:
content: Text string, raw bytes, or FilePayload
Returns:
Tuple of (packed bytes, payload type)
"""
if isinstance(content, str):
# Text message
data = content.encode("utf-8")
return bytes([PAYLOAD_TEXT]) + data, PAYLOAD_TEXT
elif isinstance(content, FilePayload):
# File with metadata
filename = content.filename[:MAX_FILENAME_LENGTH].encode("utf-8")
mime = (content.mime_type or "")[:100].encode("utf-8")
packed = (
bytes([PAYLOAD_FILE])
+ struct.pack(">H", len(filename))
+ filename
+ struct.pack(">H", len(mime))
+ mime
+ content.data
)
return packed, PAYLOAD_FILE
else:
# Raw bytes - treat as file with no name
packed = (
bytes([PAYLOAD_FILE])
+ struct.pack(">H", 0) # No filename
+ struct.pack(">H", 0) # No mime
+ content
)
return packed, PAYLOAD_FILE
def _unpack_payload(data: bytes) -> DecodeResult:
"""
Unpack payload and extract content with metadata.
Args:
data: Packed payload bytes
Returns:
DecodeResult with appropriate content
"""
if len(data) < 1:
raise DecryptionError("Empty payload")
payload_type = data[0]
if payload_type == PAYLOAD_TEXT:
# Text message
text = data[1:].decode("utf-8")
return DecodeResult(payload_type="text", message=text)
elif payload_type == PAYLOAD_FILE:
# File with metadata
offset = 1
# Read filename
filename_len = struct.unpack(">H", data[offset : offset + 2])[0]
offset += 2
filename = data[offset : offset + filename_len].decode("utf-8") if filename_len else None
offset += filename_len
# Read mime type
mime_len = struct.unpack(">H", data[offset : offset + 2])[0]
offset += 2
mime_type = data[offset : offset + mime_len].decode("utf-8") if mime_len else None
offset += mime_len
# Rest is file data
file_data = data[offset:]
return DecodeResult(
payload_type="file", file_data=file_data, filename=filename, mime_type=mime_type
)
else:
# Unknown type - try to decode as text (backward compatibility)
try:
text = data.decode("utf-8")
return DecodeResult(payload_type="text", message=text)
except UnicodeDecodeError:
return DecodeResult(payload_type="file", file_data=data)
# =============================================================================
# HEADER FLAGS (v4.0.0)
# =============================================================================
#
# The flags byte tells us about the message without decrypting it.
# Currently just one flag, but the byte gives us room for 8.
FLAG_CHANNEL_KEY = 0x01 # Bit 0: Message was encoded with a channel key
# Future flags could include: compression, file attachment, etc.
def encrypt_message(
message: str | bytes | FilePayload,
photo_data: bytes,
passphrase: str,
pin: str = "",
rsa_key_data: bytes | None = None,
channel_key: str | bool | None = None,
) -> bytes:
"""
Encrypt message or file using AES-256-GCM.
This is where plaintext becomes ciphertext. We use AES-256-GCM which is:
- AES: The standard, used by everyone from banks to governments
- 256-bit key: Enough entropy to survive until the heat death of the universe
- GCM mode: Authenticated encryption - if anyone tampers, decryption fails
The output format (v4.0.0):
\x89ST3 05 flags salt (32B) iv (12B) tag (16B) ···
magic ver cipher
Why the random padding at the end?
- Message length can reveal information (traffic analysis)
- We add 64-319 random bytes and round to 256-byte boundary
- All messages look roughly the same size
Args:
message: Message string, raw bytes, or FilePayload to encrypt
photo_data: Reference photo bytes (your "key photo")
passphrase: Shared passphrase (recommend 4+ words from BIP-39)
pin: Optional static PIN for additional security
rsa_key_data: Optional RSA key bytes (another "something you have")
channel_key: Channel key parameter:
- None or "auto": Use server's configured key
- str: Use this specific key
- "" or False: No channel key (public mode)
Returns:
Encrypted message bytes ready for embedding
Raises:
EncryptionError: If encryption fails (shouldn't happen with valid inputs)
"""
try:
salt = secrets.token_bytes(SALT_SIZE)
root_key = derive_hybrid_key(photo_data, passphrase, salt, pin, rsa_key_data, channel_key)
# v6: Per-message key via HKDF — each message gets a unique encryption key
message_nonce = secrets.token_bytes(MESSAGE_NONCE_SIZE)
key = derive_message_key(root_key, message_nonce)
iv = secrets.token_bytes(IV_SIZE)
# Determine flags
flags = 0
channel_hash = _resolve_channel_key(channel_key)
if channel_hash:
flags |= FLAG_CHANNEL_KEY
# Pack payload with type marker
packed_payload, _ = _pack_payload(message)
logger.debug(
"encrypt_message: packed_payload=%d bytes, flags=0x%02x, format_version=%d",
len(packed_payload),
flags,
FORMAT_VERSION,
)
# Random padding to hide message length
padding_len = secrets.randbelow(256) + 64
padded_len = ((len(packed_payload) + padding_len + 255) // 256) * 256
padding_needed = padded_len - len(packed_payload)
padding = secrets.token_bytes(padding_needed - 4) + struct.pack(">I", len(packed_payload))
padded_message = packed_payload + padding
logger.debug(
"Padded message: %d bytes (payload + %d padding)", len(padded_message), padding_needed
)
# Build header for AAD (v6: includes nonce in authenticated data)
header = MAGIC_HEADER + bytes([FORMAT_VERSION, flags])
# Encrypt with AES-256-GCM
cipher = Cipher(algorithms.AES(key), modes.GCM(iv), backend=default_backend())
encryptor = cipher.encryptor()
encryptor.authenticate_additional_data(header + message_nonce)
ciphertext = encryptor.update(padded_message) + encryptor.finalize()
total_size = (
len(header)
+ MESSAGE_NONCE_SIZE
+ len(salt)
+ len(iv)
+ len(encryptor.tag)
+ len(ciphertext)
)
logger.debug(
"Encrypted output: %d bytes (header=%d, nonce=%d, salt=%d, iv=%d, tag=%d, ct=%d)",
total_size,
len(header),
MESSAGE_NONCE_SIZE,
len(salt),
len(iv),
len(encryptor.tag),
len(ciphertext),
)
# v6: [magic|version|flags|nonce|salt|iv|tag|ciphertext]
return header + message_nonce + salt + iv + encryptor.tag + ciphertext
except Exception as e:
logger.error("Encryption failed: %s", e)
raise EncryptionError(f"Encryption failed: {e}") from e
def parse_header(encrypted_data: bytes) -> dict | None:
"""
Parse the header from encrypted data.
Supports both v5 (legacy) and v6 (HKDF) header formats.
v5: [magic:4][ver:1][flags:1][salt:32][iv:12][tag:16][ciphertext] (66+ bytes)
v6: [magic:4][ver:1][flags:1][nonce:16][salt:32][iv:12][tag:16][ciphertext] (82+ bytes)
Args:
encrypted_data: Raw encrypted bytes
Returns:
Dict with version, salt, iv, tag, ciphertext, flags, and optionally
message_nonce (v6). Returns None if invalid.
"""
# Min v5 size: 4+1+1+32+12+16 = 66 bytes
if len(encrypted_data) < 66 or encrypted_data[:4] != MAGIC_HEADER:
return None
try:
version = encrypted_data[4]
if version == FORMAT_VERSION:
# v6: has message nonce
if len(encrypted_data) < 82:
return None
flags = encrypted_data[5]
offset = 6
message_nonce = encrypted_data[offset : offset + MESSAGE_NONCE_SIZE]
offset += MESSAGE_NONCE_SIZE
salt = encrypted_data[offset : offset + SALT_SIZE]
offset += SALT_SIZE
iv = encrypted_data[offset : offset + IV_SIZE]
offset += IV_SIZE
tag = encrypted_data[offset : offset + TAG_SIZE]
offset += TAG_SIZE
ciphertext = encrypted_data[offset:]
return {
"version": version,
"flags": flags,
"has_channel_key": bool(flags & FLAG_CHANNEL_KEY),
"message_nonce": message_nonce,
"salt": salt,
"iv": iv,
"tag": tag,
"ciphertext": ciphertext,
}
elif version == FORMAT_VERSION_LEGACY:
# v5: no nonce
flags = encrypted_data[5]
offset = 6
salt = encrypted_data[offset : offset + SALT_SIZE]
offset += SALT_SIZE
iv = encrypted_data[offset : offset + IV_SIZE]
offset += IV_SIZE
tag = encrypted_data[offset : offset + TAG_SIZE]
offset += TAG_SIZE
ciphertext = encrypted_data[offset:]
return {
"version": version,
"flags": flags,
"has_channel_key": bool(flags & FLAG_CHANNEL_KEY),
"message_nonce": None,
"salt": salt,
"iv": iv,
"tag": tag,
"ciphertext": ciphertext,
}
else:
return None
except Exception:
return None
def decrypt_message(
encrypted_data: bytes,
photo_data: bytes,
passphrase: str,
pin: str = "",
rsa_key_data: bytes | None = None,
channel_key: str | bool | None = None,
) -> DecodeResult:
"""
Decrypt message (v4.0.0 - with channel key support).
Args:
encrypted_data: Encrypted message bytes
photo_data: Reference photo bytes
passphrase: Shared passphrase
pin: Optional static PIN
rsa_key_data: Optional RSA key bytes
channel_key: Channel key parameter (see encrypt_message)
Returns:
DecodeResult with decrypted content
Raises:
InvalidHeaderError: If data doesn't have valid Stegasoo header
DecryptionError: If decryption fails (wrong credentials)
"""
logger.debug("decrypt_message: %d bytes of encrypted data", len(encrypted_data))
header = parse_header(encrypted_data)
if not header:
logger.error("Invalid or missing Stegasoo header in %d bytes", len(encrypted_data))
raise InvalidHeaderError("Invalid or missing Stegasoo header")
logger.debug(
"Header: version=%d, flags=0x%02x, has_channel_key=%s, ciphertext=%d bytes",
header["version"],
header["flags"],
header["has_channel_key"],
len(header["ciphertext"]),
)
# Check for channel key mismatch and provide helpful error
channel_hash = _resolve_channel_key(channel_key)
has_configured_key = channel_hash is not None
message_has_key = header["has_channel_key"]
try:
root_key = derive_hybrid_key(
photo_data, passphrase, header["salt"], pin, rsa_key_data, channel_key
)
version = header["version"]
message_nonce = header["message_nonce"]
if version == FORMAT_VERSION and message_nonce is not None:
# v6: Derive per-message key via HKDF
key = derive_message_key(root_key, message_nonce)
aad_header = MAGIC_HEADER + bytes([FORMAT_VERSION, header["flags"]]) + message_nonce
else:
# v5 (legacy): Root key used directly
key = root_key
aad_header = MAGIC_HEADER + bytes([FORMAT_VERSION_LEGACY, header["flags"]])
cipher = Cipher(
algorithms.AES(key), modes.GCM(header["iv"], header["tag"]), backend=default_backend()
)
decryptor = cipher.decryptor()
decryptor.authenticate_additional_data(aad_header)
padded_plaintext = decryptor.update(header["ciphertext"]) + decryptor.finalize()
original_length = struct.unpack(">I", padded_plaintext[-4:])[0]
logger.debug(
"Decrypted %d bytes, original payload length: %d",
len(padded_plaintext),
original_length,
)
payload_data = padded_plaintext[:original_length]
result = _unpack_payload(payload_data)
logger.debug("Decryption successful: %s (v%d)", result.payload_type, version)
return result
except Exception as e:
# Provide more helpful error message for channel key issues
if message_has_key and not has_configured_key:
raise DecryptionError(
"Decryption failed. This message was encoded with a channel key, "
"but no channel key is configured. Provide the correct channel key."
) from e
elif not message_has_key and has_configured_key:
raise DecryptionError(
"Decryption failed. This message was encoded without a channel key, "
"but you have one configured. Try with channel_key='' for public mode."
) from e
else:
raise DecryptionError(
"Decryption failed. Check your passphrase, PIN, RSA key, "
"reference photo, and channel key."
) from e
def decrypt_message_text(
encrypted_data: bytes,
photo_data: bytes,
passphrase: str,
pin: str = "",
rsa_key_data: bytes | None = None,
channel_key: str | bool | None = None,
) -> str:
"""
Decrypt message and return as text string.
For backward compatibility - returns text content or raises error for files.
Args:
encrypted_data: Encrypted message bytes
photo_data: Reference photo bytes
passphrase: Shared passphrase
pin: Optional static PIN
rsa_key_data: Optional RSA key bytes
channel_key: Channel key parameter
Returns:
Decrypted message string
Raises:
DecryptionError: If decryption fails or content is a file
"""
result = decrypt_message(encrypted_data, photo_data, passphrase, pin, rsa_key_data, channel_key)
if result.is_file:
if result.file_data:
# Try to decode as text
try:
return result.file_data.decode("utf-8")
except UnicodeDecodeError:
raise DecryptionError(
f"Content is a binary file ({result.filename or 'unnamed'}), not text"
)
return ""
return result.message or ""
def has_argon2() -> bool:
"""Check if Argon2 is available."""
return HAS_ARGON2
# =============================================================================
# CHANNEL KEY UTILITIES (exposed for convenience)
# =============================================================================
def get_active_channel_key() -> str | None:
"""
Get the currently configured channel key (if any).
Returns:
Formatted channel key string, or None if not configured
"""
from .channel import get_channel_key
return get_channel_key()
def get_channel_fingerprint(key: str | None = None) -> str | None:
"""
Get a display-safe fingerprint of a channel key.
Args:
key: Channel key (if None, uses configured key)
Returns:
Masked key like "ABCD-••••-••••-••••-••••-••••-••••-3456" or None
"""
from .channel import get_channel_fingerprint as _get_fingerprint
return _get_fingerprint(key)

View File

@ -0,0 +1 @@
# Package data directory for stegasoo

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,262 @@
"""
Stegasoo Debugging Utilities
Debugging, logging, and performance monitoring tools.
Configuration:
STEGASOO_LOG_LEVEL env var controls log level:
- Not set or empty: logging disabled (production default)
- DEBUG: verbose debug output (encode/decode flow, crypto params, etc.)
- INFO: operational messages (format detection, mode selection)
- WARNING: potential issues (fallback KDF, format transcoding)
- ERROR: operation failures
STEGASOO_DEBUG=1 is a shorthand for STEGASOO_LOG_LEVEL=DEBUG
CLI: stegasoo --debug encode ... (sets DEBUG level for that invocation)
All output goes to Python's logging module under the 'stegasoo' logger hierarchy.
The legacy debug.print() API is preserved for backward compatibility.
"""
import logging
import os
import sys
import time
import traceback
from collections.abc import Callable
from functools import wraps
from typing import Any
# Map string level names to logging levels
_LEVEL_MAP = {
"DEBUG": logging.DEBUG,
"INFO": logging.INFO,
"WARNING": logging.WARNING,
"ERROR": logging.ERROR,
"CRITICAL": logging.CRITICAL,
}
# Root logger for the stegasoo package
logger = logging.getLogger("stegasoo")
# Global debug configuration
LOG_PERFORMANCE = True # Log function timing
VALIDATION_ASSERTIONS = True # Enable runtime validation assertions
def _configure_from_env() -> bool:
"""Configure logging from environment variables. Returns True if debug enabled."""
# STEGASOO_DEBUG=1 is shorthand for DEBUG level
if os.environ.get("STEGASOO_DEBUG", "").strip() in ("1", "true", "yes"):
_setup_logging(logging.DEBUG)
return True
level_str = os.environ.get("STEGASOO_LOG_LEVEL", "").strip().upper()
if level_str and level_str in _LEVEL_MAP:
_setup_logging(_LEVEL_MAP[level_str])
return level_str == "DEBUG"
return False
def _setup_logging(level: int) -> None:
"""Configure the stegasoo logger with a stderr handler."""
logger.setLevel(level)
# Only add handler if none exist (avoid duplicates on re-init)
if not logger.handlers:
handler = logging.StreamHandler(sys.stderr)
handler.setLevel(level)
formatter = logging.Formatter(
"[%(asctime)s.%(msecs)03d] [%(levelname)s] [%(name)s] %(message)s",
datefmt="%H:%M:%S",
)
handler.setFormatter(formatter)
logger.addHandler(handler)
else:
# Update existing handler level
for handler in logger.handlers:
handler.setLevel(level)
# Auto-configure on import
DEBUG_ENABLED = _configure_from_env()
def enable_debug(enable: bool = True) -> None:
"""Enable or disable debug mode globally."""
global DEBUG_ENABLED
DEBUG_ENABLED = enable
if enable:
_setup_logging(logging.DEBUG)
else:
logger.setLevel(logging.WARNING)
def enable_performance_logging(enable: bool = True) -> None:
"""Enable or disable performance timing."""
global LOG_PERFORMANCE
LOG_PERFORMANCE = enable
def enable_assertions(enable: bool = True) -> None:
"""Enable or disable validation assertions."""
global VALIDATION_ASSERTIONS
VALIDATION_ASSERTIONS = enable
def debug_print(message: str, level: str = "INFO") -> None:
"""Log a message at the given level via the stegasoo logger."""
log_level = _LEVEL_MAP.get(level.upper(), logging.DEBUG)
logger.log(log_level, message)
def debug_data(data: bytes, label: str = "Data", max_bytes: int = 32) -> str:
"""Format bytes for debugging."""
if not logger.isEnabledFor(logging.DEBUG):
return ""
if not data:
return f"{label}: Empty"
if len(data) <= max_bytes:
return f"{label} ({len(data)} bytes): {data.hex()}"
else:
return (
f"{label} ({len(data)} bytes): "
f"{data[:max_bytes // 2].hex()}...{data[-max_bytes // 2:].hex()}"
)
def debug_exception(e: Exception, context: str = "") -> None:
"""Log exception with context for debugging."""
logger.error("Exception in %s: %s: %s", context, type(e).__name__, e)
if logger.isEnabledFor(logging.DEBUG):
logger.debug(traceback.format_exc())
def time_function(func: Callable) -> Callable:
"""Decorator to time function execution for performance debugging."""
@wraps(func)
def wrapper(*args, **kwargs) -> Any:
if not (logger.isEnabledFor(logging.DEBUG) and LOG_PERFORMANCE):
return func(*args, **kwargs)
start = time.perf_counter()
try:
result = func(*args, **kwargs)
return result
finally:
end = time.perf_counter()
logger.debug("%s took %.6fs", func.__name__, end - start)
return wrapper
def validate_assertion(condition: bool, message: str) -> None:
"""Runtime validation that can be disabled in production."""
if VALIDATION_ASSERTIONS and not condition:
raise AssertionError(f"Validation failed: {message}")
def memory_usage() -> dict[str, float | str]:
"""Get current memory usage (if psutil is available)."""
try:
import psutil
process = psutil.Process(os.getpid())
mem_info = process.memory_info()
return {
"rss_mb": mem_info.rss / 1024 / 1024,
"vms_mb": mem_info.vms / 1024 / 1024,
"percent": process.memory_percent(),
}
except ImportError:
return {"error": "psutil not installed"}
def hexdump(data: bytes, offset: int = 0, length: int = 64) -> str:
"""Create hexdump string for debugging binary data."""
if not data:
return "Empty"
result = []
data_to_dump = data[:length]
for i in range(0, len(data_to_dump), 16):
chunk = data_to_dump[i : i + 16]
hex_str = " ".join(f"{b:02x}" for b in chunk)
hex_str = hex_str.ljust(47)
ascii_str = "".join(chr(b) if 32 <= b < 127 else "." for b in chunk)
result.append(f"{offset + i:08x}: {hex_str} {ascii_str}")
if len(data) > length:
result.append(f"... ({len(data) - length} more bytes)")
return "\n".join(result)
def get_logger(name: str) -> logging.Logger:
"""Get a child logger under the stegasoo namespace.
Usage in modules:
from .debug import get_logger
logger = get_logger(__name__)
logger.debug("message")
"""
return logging.getLogger(name)
class Debug:
"""Debugging utility class (backward-compatible API)."""
def __init__(self):
self.enabled = DEBUG_ENABLED
def print(self, message: str, level: str = "INFO") -> None:
"""Print debug message."""
debug_print(message, level)
def data(self, data: bytes, label: str = "Data", max_bytes: int = 32) -> str:
"""Format bytes for debugging."""
return debug_data(data, label, max_bytes)
def exception(self, e: Exception, context: str = "") -> None:
"""Log exception with context."""
debug_exception(e, context)
def time(self, func: Callable) -> Callable:
"""Decorator to time function execution."""
return time_function(func)
def validate(self, condition: bool, message: str) -> None:
"""Runtime validation assertion."""
validate_assertion(condition, message)
def memory(self) -> dict[str, float | str]:
"""Get current memory usage."""
return memory_usage()
def hexdump(self, data: bytes, offset: int = 0, length: int = 64) -> str:
"""Create hexdump string."""
return hexdump(data, offset, length)
def enable(self, enable: bool = True) -> None:
"""Enable or disable debug mode."""
enable_debug(enable)
self.enabled = enable
def enable_performance(self, enable: bool = True) -> None:
"""Enable or disable performance logging."""
enable_performance_logging(enable)
def enable_assertions(self, enable: bool = True) -> None:
"""Enable or disable validation assertions."""
enable_assertions(enable)
# Create singleton instance
debug = Debug()

View File

@ -0,0 +1,492 @@
"""
Stegasoo Decode Module (v4.0.0)
High-level decoding functions for extracting messages and files from images.
Changes in v4.0.0:
- Added channel_key parameter for deployment/group isolation
- Improved error messages for channel key mismatches
"""
import json
from pathlib import Path
from .constants import EMBED_MODE_AUTO
from .crypto import decrypt_message
from .debug import debug
from .exceptions import DecryptionError, ExtractionError
from .models import DecodeResult
from .steganography import extract_from_image
from .validation import (
require_security_factors,
require_valid_image,
require_valid_pin,
require_valid_rsa_key,
)
def _write_progress(progress_file: str | None, current: int, total: int, phase: str) -> None:
"""Write progress to file for UI polling."""
if progress_file is None:
return
try:
with open(progress_file, "w") as f:
json.dump(
{
"current": current,
"total": total,
"percent": (current / total * 100) if total > 0 else 0,
"phase": phase,
},
f,
)
except OSError:
pass
def decode(
stego_image: bytes,
reference_photo: bytes,
passphrase: str,
pin: str = "",
rsa_key_data: bytes | None = None,
rsa_password: str | None = None,
embed_mode: str = EMBED_MODE_AUTO,
channel_key: str | bool | None = None,
progress_file: str | None = None,
platform: str | None = None,
) -> DecodeResult:
"""
Decode a message or file from a stego image.
Args:
stego_image: Stego image bytes
reference_photo: Shared reference photo bytes
passphrase: Shared passphrase used during encoding
pin: Optional static PIN (if used during encoding)
rsa_key_data: Optional RSA key bytes (if used during encoding)
rsa_password: Optional RSA key password
embed_mode: 'auto' (default), 'lsb', or 'dct'
progress_file: Optional path to write progress JSON for UI polling
channel_key: Channel key for deployment/group isolation:
- None or "auto": Use server's configured key
- str: Use this specific channel key
- "" or False: No channel key (public mode)
Returns:
DecodeResult with message or file data
Example:
>>> result = decode(
... stego_image=stego_bytes,
... reference_photo=ref_bytes,
... passphrase="apple forest thunder mountain",
... pin="123456"
... )
>>> if result.is_text:
... print(result.message)
... else:
... with open(result.filename, 'wb') as f:
... f.write(result.file_data)
Example with explicit channel key:
>>> result = decode(
... stego_image=stego_bytes,
... reference_photo=ref_bytes,
... passphrase="apple forest thunder mountain",
... pin="123456",
... channel_key="ABCD-1234-EFGH-5678-IJKL-9012-MNOP-3456"
... )
"""
debug.print(
f"decode: passphrase length={len(passphrase.split())} words, "
f"mode={embed_mode}, "
f"channel_key={'explicit' if isinstance(channel_key, str) and channel_key else 'auto' if channel_key is None else 'none'}"
)
# Validate inputs
require_valid_image(stego_image, "Stego image")
require_valid_image(reference_photo, "Reference photo")
require_security_factors(pin, rsa_key_data)
if pin:
require_valid_pin(pin)
if rsa_key_data:
require_valid_rsa_key(rsa_key_data, rsa_password)
# Progress: starting key derivation (Argon2 - slow on Pi)
_write_progress(progress_file, 20, 100, "initializing")
# Derive pixel/coefficient selection key (with channel key)
from .crypto import derive_pixel_key
pixel_key = derive_pixel_key(reference_photo, passphrase, pin, rsa_key_data, channel_key)
# Progress: key derivation done, starting extraction
_write_progress(progress_file, 25, 100, "extracting")
# Resolve platform preset for DCT extraction
extract_kwargs = {}
if platform:
from .platform_presets import get_preset
preset = get_preset(platform)
extract_kwargs["quant_step"] = preset.quant_step
# Extract encrypted data
encrypted = extract_from_image(
stego_image,
pixel_key,
embed_mode=embed_mode,
progress_file=progress_file,
**extract_kwargs,
)
if not encrypted:
debug.print("No data extracted from image")
raise ExtractionError("Could not extract data. Check your credentials and image.")
debug.print(f"Extracted {len(encrypted)} bytes from image")
# Decrypt (with channel key)
result = decrypt_message(encrypted, reference_photo, passphrase, pin, rsa_key_data, channel_key)
debug.print(f"Decryption successful: {result.payload_type}")
return result
def decode_file(
stego_image: bytes,
reference_photo: bytes,
passphrase: str,
output_path: Path | None = None,
pin: str = "",
rsa_key_data: bytes | None = None,
rsa_password: str | None = None,
embed_mode: str = EMBED_MODE_AUTO,
channel_key: str | bool | None = None,
progress_file: str | None = None,
) -> Path:
"""
Decode a file from a stego image and save it.
Args:
stego_image: Stego image bytes
reference_photo: Shared reference photo bytes
passphrase: Shared passphrase
output_path: Optional output path (defaults to original filename)
pin: Optional static PIN
rsa_key_data: Optional RSA key bytes
rsa_password: Optional RSA key password
embed_mode: 'auto', 'lsb', or 'dct'
channel_key: Channel key parameter (see decode())
progress_file: Optional path to write progress JSON for UI polling
Returns:
Path where file was saved
Raises:
DecryptionError: If payload is text, not a file
"""
result = decode(
stego_image,
reference_photo,
passphrase,
pin,
rsa_key_data,
rsa_password,
embed_mode,
channel_key,
progress_file,
)
if not result.is_file:
raise DecryptionError("Payload is a text message, not a file")
if output_path is None:
output_path = Path(result.filename or "extracted_file")
else:
output_path = Path(output_path)
if output_path.is_dir():
output_path = output_path / (result.filename or "extracted_file")
# Write file
output_path.write_bytes(result.file_data or b"")
debug.print(f"File saved to: {output_path}")
return output_path
def decode_text(
stego_image: bytes,
reference_photo: bytes,
passphrase: str,
pin: str = "",
rsa_key_data: bytes | None = None,
rsa_password: str | None = None,
embed_mode: str = EMBED_MODE_AUTO,
channel_key: str | bool | None = None,
progress_file: str | None = None,
) -> str:
"""
Decode a text message from a stego image.
Convenience function that returns just the message string.
Args:
stego_image: Stego image bytes
reference_photo: Shared reference photo bytes
passphrase: Shared passphrase
pin: Optional static PIN
rsa_key_data: Optional RSA key bytes
rsa_password: Optional RSA key password
embed_mode: 'auto', 'lsb', or 'dct'
channel_key: Channel key parameter (see decode())
progress_file: Optional path to write progress JSON for UI polling
Returns:
Decoded message string
Raises:
DecryptionError: If payload is a file, not text
"""
result = decode(
stego_image,
reference_photo,
passphrase,
pin,
rsa_key_data,
rsa_password,
embed_mode,
channel_key,
progress_file,
)
if result.is_file:
# Try to decode as text
if result.file_data:
try:
return result.file_data.decode("utf-8")
except UnicodeDecodeError:
raise DecryptionError(
f"Payload is a binary file ({result.filename or 'unnamed'}), not text"
)
return ""
return result.message or ""
def decode_audio(
stego_audio: bytes,
reference_photo: bytes,
passphrase: str,
pin: str = "",
rsa_key_data: bytes | None = None,
rsa_password: str | None = None,
embed_mode: str = "audio_auto",
channel_key: str | bool | None = None,
progress_file: str | None = None,
) -> DecodeResult:
"""
Decode a message or file from stego audio.
Args:
stego_audio: Stego audio bytes
reference_photo: Shared reference photo bytes
passphrase: Shared passphrase
pin: Optional static PIN
rsa_key_data: Optional RSA key bytes
rsa_password: Optional RSA key password
embed_mode: 'audio_auto', 'audio_lsb', or 'audio_spread'
channel_key: Channel key for deployment/group isolation
progress_file: Optional path to write progress JSON
Returns:
DecodeResult with message or file data
"""
from .constants import (
AUDIO_ENABLED,
EMBED_MODE_AUDIO_AUTO,
EMBED_MODE_AUDIO_LSB,
EMBED_MODE_AUDIO_SPREAD,
)
if not AUDIO_ENABLED:
raise ExtractionError(
"Audio support is disabled. Install audio extras (pip install stegasoo[audio]) "
"or set STEGASOO_AUDIO=1 to force enable."
)
from .audio_utils import detect_audio_format, transcode_to_wav
debug.print(
f"decode_audio: mode={embed_mode}, " f"passphrase length={len(passphrase.split())} words"
)
# Validate inputs
require_valid_image(reference_photo, "Reference photo")
require_security_factors(pin, rsa_key_data)
if pin:
require_valid_pin(pin)
if rsa_key_data:
require_valid_rsa_key(rsa_key_data, rsa_password)
# Detect format and transcode to WAV for processing
audio_format = detect_audio_format(stego_audio)
debug.print(f"Detected audio format: {audio_format}")
wav_audio = stego_audio
if audio_format != "wav":
debug.print(f"Transcoding {audio_format} to WAV for extraction")
wav_audio = transcode_to_wav(stego_audio)
_write_progress(progress_file, 20, 100, "initializing")
# Derive sample selection key
from .crypto import derive_pixel_key
pixel_key = derive_pixel_key(reference_photo, passphrase, pin, rsa_key_data, channel_key)
_write_progress(progress_file, 25, 100, "extracting")
encrypted = None
if embed_mode == EMBED_MODE_AUDIO_AUTO:
# Try modes in order: spread spectrum -> LSB
try:
from .spread_steganography import extract_from_audio_spread
encrypted = extract_from_audio_spread(wav_audio, pixel_key)
if encrypted:
debug.print("Auto-detect: spread spectrum extraction succeeded")
except (ImportError, Exception):
pass
if not encrypted:
from .audio_steganography import extract_from_audio_lsb
encrypted = extract_from_audio_lsb(wav_audio, pixel_key)
if encrypted:
debug.print("Auto-detect: LSB extraction succeeded")
elif embed_mode == EMBED_MODE_AUDIO_LSB:
from .audio_steganography import extract_from_audio_lsb
encrypted = extract_from_audio_lsb(wav_audio, pixel_key, progress_file=progress_file)
elif embed_mode == EMBED_MODE_AUDIO_SPREAD:
from .spread_steganography import extract_from_audio_spread
encrypted = extract_from_audio_spread(wav_audio, pixel_key, progress_file=progress_file)
else:
raise ValueError(f"Invalid audio embed mode: {embed_mode}")
if not encrypted:
debug.print("No data extracted from audio")
raise ExtractionError("Could not extract data from audio. Check your credentials.")
debug.print(f"Extracted {len(encrypted)} bytes from audio")
# Decrypt
result = decrypt_message(encrypted, reference_photo, passphrase, pin, rsa_key_data, channel_key)
debug.print(f"Decryption successful: {result.payload_type}")
return result
def decode_video(
stego_video: bytes,
reference_photo: bytes,
passphrase: str,
pin: str = "",
rsa_key_data: bytes | None = None,
rsa_password: str | None = None,
embed_mode: str = "video_auto",
channel_key: str | bool | None = None,
progress_file: str | None = None,
) -> DecodeResult:
"""
Decode a message or file from stego video.
Extracts data from I-frames (keyframes) using LSB steganography.
Args:
stego_video: Stego video bytes
reference_photo: Shared reference photo bytes
passphrase: Shared passphrase
pin: Optional static PIN
rsa_key_data: Optional RSA key bytes
rsa_password: Optional RSA key password
embed_mode: 'video_auto' or 'video_lsb'
channel_key: Channel key for deployment/group isolation
progress_file: Optional path to write progress JSON
Returns:
DecodeResult with message or file data
"""
from .constants import (
EMBED_MODE_VIDEO_AUTO,
EMBED_MODE_VIDEO_LSB,
VIDEO_ENABLED,
)
if not VIDEO_ENABLED:
raise ExtractionError(
"Video support is disabled. Install video extras and ffmpeg, "
"or set STEGASOO_VIDEO=1 to force enable."
)
from .video_utils import detect_video_format
debug.print(
f"decode_video: mode={embed_mode}, " f"passphrase length={len(passphrase.split())} words"
)
# Validate inputs
require_valid_image(reference_photo, "Reference photo")
require_security_factors(pin, rsa_key_data)
if pin:
require_valid_pin(pin)
if rsa_key_data:
require_valid_rsa_key(rsa_key_data, rsa_password)
# Detect format
video_format = detect_video_format(stego_video)
debug.print(f"Detected video format: {video_format}")
if video_format == "unknown":
raise ExtractionError("Could not detect video format.")
_write_progress(progress_file, 20, 100, "initializing")
# Derive pixel/frame selection key
from .crypto import derive_pixel_key
pixel_key = derive_pixel_key(reference_photo, passphrase, pin, rsa_key_data, channel_key)
_write_progress(progress_file, 25, 100, "extracting")
encrypted = None
if embed_mode == EMBED_MODE_VIDEO_AUTO or embed_mode == EMBED_MODE_VIDEO_LSB:
from .video_steganography import extract_from_video_lsb
encrypted = extract_from_video_lsb(stego_video, pixel_key, progress_file=progress_file)
if encrypted:
debug.print("Video LSB extraction succeeded")
else:
raise ValueError(f"Invalid video embed mode: {embed_mode}")
if not encrypted:
debug.print("No data extracted from video")
raise ExtractionError("Could not extract data from video. Check your credentials.")
debug.print(f"Extracted {len(encrypted)} bytes from video")
# Decrypt
result = decrypt_message(encrypted, reference_photo, passphrase, pin, rsa_key_data, channel_key)
debug.print(f"Decryption successful: {result.payload_type}")
return result

View File

@ -0,0 +1,478 @@
"""
Stegasoo Encode Module (v4.0.0)
High-level encoding functions for hiding messages and files in images.
Changes in v4.0.0:
- Added channel_key parameter for deployment/group isolation
Changes in v4.3.0:
- Added encode_audio() for audio steganography
Changes in v4.4.0:
- Added encode_video() for video steganography
"""
from __future__ import annotations
from pathlib import Path
from typing import TYPE_CHECKING
from .constants import EMBED_MODE_LSB
from .crypto import derive_pixel_key, encrypt_message
from .debug import debug
from .exceptions import AudioError, VideoError
from .models import EncodeResult, FilePayload
from .steganography import embed_in_image
from .utils import generate_filename
from .validation import (
require_security_factors,
require_valid_image,
require_valid_payload,
require_valid_pin,
require_valid_rsa_key,
)
if TYPE_CHECKING:
from .models import AudioEmbedStats, VideoEmbedStats
def encode(
message: str | bytes | FilePayload,
reference_photo: bytes,
carrier_image: bytes,
passphrase: str,
pin: str = "",
rsa_key_data: bytes | None = None,
rsa_password: str | None = None,
output_format: str | None = None,
embed_mode: str = EMBED_MODE_LSB,
dct_output_format: str = "png",
dct_color_mode: str = "color",
channel_key: str | bool | None = None,
progress_file: str | None = None,
platform: str | None = None,
) -> EncodeResult:
"""
Encode a message or file into an image.
Args:
message: Text message, raw bytes, or FilePayload to hide
reference_photo: Shared reference photo bytes
carrier_image: Carrier image bytes
passphrase: Shared passphrase (recommend 4+ words)
pin: Optional static PIN
rsa_key_data: Optional RSA private key PEM bytes
rsa_password: Optional password for encrypted RSA key
output_format: Force output format ('PNG', 'BMP') - LSB mode only
embed_mode: 'lsb' (default) or 'dct'
dct_output_format: For DCT mode - 'png' or 'jpeg'
dct_color_mode: For DCT mode - 'grayscale' or 'color'
channel_key: Channel key for deployment/group isolation:
- None or "auto": Use server's configured key
- str: Use this specific channel key
- "" or False: No channel key (public mode)
Returns:
EncodeResult with stego image and metadata
Example:
>>> result = encode(
... message="Secret message",
... reference_photo=ref_bytes,
... carrier_image=carrier_bytes,
... passphrase="apple forest thunder mountain",
... pin="123456"
... )
>>> with open('stego.png', 'wb') as f:
... f.write(result.stego_image)
Example with explicit channel key:
>>> result = encode(
... message="Secret message",
... reference_photo=ref_bytes,
... carrier_image=carrier_bytes,
... passphrase="apple forest thunder mountain",
... pin="123456",
... channel_key="ABCD-1234-EFGH-5678-IJKL-9012-MNOP-3456"
... )
"""
debug.print(
f"encode: passphrase length={len(passphrase.split())} words, "
f"pin={'set' if pin else 'none'}, mode={embed_mode}, "
f"channel_key={'explicit' if isinstance(channel_key, str) and channel_key else 'auto' if channel_key is None else 'none'}"
)
# Validate inputs
require_valid_payload(message)
require_valid_image(reference_photo, "Reference photo")
require_valid_image(carrier_image, "Carrier image")
require_security_factors(pin, rsa_key_data)
if pin:
require_valid_pin(pin)
if rsa_key_data:
require_valid_rsa_key(rsa_key_data, rsa_password)
# Encrypt message (with channel key)
encrypted = encrypt_message(
message, reference_photo, passphrase, pin, rsa_key_data, channel_key
)
debug.print(f"Encrypted payload: {len(encrypted)} bytes")
# Derive pixel/coefficient selection key (with channel key)
pixel_key = derive_pixel_key(reference_photo, passphrase, pin, rsa_key_data, channel_key)
# Resolve platform preset for DCT encoding
platform_kwargs = {}
if platform:
from .platform_presets import get_preset
preset = get_preset(platform)
platform_kwargs = {
"quant_step": preset.quant_step,
"max_dimension": preset.max_dimension,
"jpeg_quality": preset.jpeg_quality,
}
# Embed in image
stego_data, stats, extension = embed_in_image(
encrypted,
carrier_image,
pixel_key,
output_format=output_format,
embed_mode=embed_mode,
dct_output_format=dct_output_format,
dct_color_mode=dct_color_mode,
progress_file=progress_file,
**platform_kwargs,
)
# Generate filename
filename = generate_filename(extension=extension)
# Create result
if hasattr(stats, "pixels_modified"):
# LSB mode stats
return EncodeResult(
stego_image=stego_data,
filename=filename,
pixels_modified=stats.pixels_modified,
total_pixels=stats.total_pixels,
capacity_used=stats.capacity_used,
date_used=None, # No longer used in v3.2.0+
)
else:
# DCT mode stats
return EncodeResult(
stego_image=stego_data,
filename=filename,
pixels_modified=stats.blocks_used * 64,
total_pixels=stats.blocks_available * 64,
capacity_used=stats.usage_percent / 100.0,
date_used=None,
)
def encode_file(
filepath: str | Path,
reference_photo: bytes,
carrier_image: bytes,
passphrase: str,
pin: str = "",
rsa_key_data: bytes | None = None,
rsa_password: str | None = None,
output_format: str | None = None,
filename_override: str | None = None,
embed_mode: str = EMBED_MODE_LSB,
dct_output_format: str = "png",
dct_color_mode: str = "color",
channel_key: str | bool | None = None,
) -> EncodeResult:
"""
Encode a file into an image.
Convenience wrapper that loads a file and encodes it.
Args:
filepath: Path to file to embed
reference_photo: Shared reference photo bytes
carrier_image: Carrier image bytes
passphrase: Shared passphrase
pin: Optional static PIN
rsa_key_data: Optional RSA key bytes
rsa_password: Optional RSA key password
output_format: Force output format - LSB only
filename_override: Override stored filename
embed_mode: 'lsb' or 'dct'
dct_output_format: 'png' or 'jpeg'
dct_color_mode: 'grayscale' or 'color'
channel_key: Channel key parameter (see encode())
Returns:
EncodeResult
"""
payload = FilePayload.from_file(str(filepath), filename_override)
return encode(
message=payload,
reference_photo=reference_photo,
carrier_image=carrier_image,
passphrase=passphrase,
pin=pin,
rsa_key_data=rsa_key_data,
rsa_password=rsa_password,
output_format=output_format,
embed_mode=embed_mode,
dct_output_format=dct_output_format,
dct_color_mode=dct_color_mode,
channel_key=channel_key,
)
def encode_bytes(
data: bytes,
filename: str,
reference_photo: bytes,
carrier_image: bytes,
passphrase: str,
pin: str = "",
rsa_key_data: bytes | None = None,
rsa_password: str | None = None,
output_format: str | None = None,
mime_type: str | None = None,
embed_mode: str = EMBED_MODE_LSB,
dct_output_format: str = "png",
dct_color_mode: str = "color",
channel_key: str | bool | None = None,
) -> EncodeResult:
"""
Encode raw bytes with metadata into an image.
Args:
data: Raw bytes to embed
filename: Filename to associate with data
reference_photo: Shared reference photo bytes
carrier_image: Carrier image bytes
passphrase: Shared passphrase
pin: Optional static PIN
rsa_key_data: Optional RSA key bytes
rsa_password: Optional RSA key password
output_format: Force output format - LSB only
mime_type: MIME type of data
embed_mode: 'lsb' or 'dct'
dct_output_format: 'png' or 'jpeg'
dct_color_mode: 'grayscale' or 'color'
channel_key: Channel key parameter (see encode())
Returns:
EncodeResult
"""
payload = FilePayload(data=data, filename=filename, mime_type=mime_type)
return encode(
message=payload,
reference_photo=reference_photo,
carrier_image=carrier_image,
passphrase=passphrase,
pin=pin,
rsa_key_data=rsa_key_data,
rsa_password=rsa_password,
output_format=output_format,
embed_mode=embed_mode,
dct_output_format=dct_output_format,
dct_color_mode=dct_color_mode,
channel_key=channel_key,
)
def encode_audio(
message: str | bytes | FilePayload,
reference_photo: bytes,
carrier_audio: bytes,
passphrase: str,
pin: str = "",
rsa_key_data: bytes | None = None,
rsa_password: str | None = None,
embed_mode: str = "audio_lsb",
channel_key: str | bool | None = None,
progress_file: str | None = None,
chip_tier: int | None = None,
) -> tuple[bytes, AudioEmbedStats]:
"""
Encode a message or file into an audio carrier.
Args:
message: Text message, raw bytes, or FilePayload to hide
reference_photo: Shared reference photo bytes
carrier_audio: Carrier audio bytes (WAV, FLAC, MP3, etc.)
passphrase: Shared passphrase
pin: Optional static PIN
rsa_key_data: Optional RSA private key PEM bytes
rsa_password: Optional password for encrypted RSA key
embed_mode: 'audio_lsb' or 'audio_spread'
channel_key: Channel key for deployment/group isolation
progress_file: Optional path to write progress JSON
chip_tier: Spread spectrum chip tier (0=lossless, 1=high_lossy, 2=low_lossy).
Only used for audio_spread mode. Default None uses constant default.
Returns:
Tuple of (stego audio bytes, AudioEmbedStats)
"""
from .constants import AUDIO_ENABLED, EMBED_MODE_AUDIO_LSB, EMBED_MODE_AUDIO_SPREAD
if not AUDIO_ENABLED:
raise AudioError(
"Audio support is disabled. Install audio extras (pip install stegasoo[audio]) "
"or set STEGASOO_AUDIO=1 to force enable."
)
from .audio_utils import detect_audio_format, transcode_to_wav
debug.print(
f"encode_audio: mode={embed_mode}, "
f"passphrase length={len(passphrase.split())} words, "
f"pin={'set' if pin else 'none'}"
)
# Validate inputs
require_valid_payload(message)
require_valid_image(reference_photo, "Reference photo")
require_security_factors(pin, rsa_key_data)
if pin:
require_valid_pin(pin)
if rsa_key_data:
require_valid_rsa_key(rsa_key_data, rsa_password)
# Detect audio format and transcode to WAV if needed
audio_format = detect_audio_format(carrier_audio)
debug.print(f"Detected audio format: {audio_format}")
if audio_format not in ("wav", "flac"):
debug.print(f"Transcoding {audio_format} to WAV for embedding")
carrier_audio = transcode_to_wav(carrier_audio)
# Encrypt message
encrypted = encrypt_message(
message, reference_photo, passphrase, pin, rsa_key_data, channel_key
)
debug.print(f"Encrypted payload: {len(encrypted)} bytes")
# Derive sample selection key
pixel_key = derive_pixel_key(reference_photo, passphrase, pin, rsa_key_data, channel_key)
# Embed based on mode
if embed_mode == EMBED_MODE_AUDIO_LSB:
from .audio_steganography import embed_in_audio_lsb
stego_audio, stats = embed_in_audio_lsb(
encrypted, carrier_audio, pixel_key, progress_file=progress_file
)
elif embed_mode == EMBED_MODE_AUDIO_SPREAD:
from .constants import AUDIO_SS_DEFAULT_CHIP_TIER
from .spread_steganography import embed_in_audio_spread
tier = chip_tier if chip_tier is not None else AUDIO_SS_DEFAULT_CHIP_TIER
stego_audio, stats = embed_in_audio_spread(
encrypted, carrier_audio, pixel_key, chip_tier=tier, progress_file=progress_file
)
else:
raise ValueError(f"Invalid audio embed mode: {embed_mode}")
return stego_audio, stats
def encode_video(
message: str | bytes | FilePayload,
reference_photo: bytes,
carrier_video: bytes,
passphrase: str,
pin: str = "",
rsa_key_data: bytes | None = None,
rsa_password: str | None = None,
embed_mode: str = "video_lsb",
channel_key: str | bool | None = None,
progress_file: str | None = None,
) -> tuple[bytes, VideoEmbedStats]:
"""
Encode a message or file into a video carrier.
Embeds data across I-frames (keyframes) using LSB steganography.
Output is an MKV container with FFV1 lossless codec to preserve
the embedded data perfectly.
Args:
message: Text message, raw bytes, or FilePayload to hide
reference_photo: Shared reference photo bytes
carrier_video: Carrier video bytes (MP4, MKV, WebM, AVI, MOV)
passphrase: Shared passphrase
pin: Optional static PIN
rsa_key_data: Optional RSA private key PEM bytes
rsa_password: Optional password for encrypted RSA key
embed_mode: 'video_lsb' (currently the only option)
channel_key: Channel key for deployment/group isolation
progress_file: Optional path to write progress JSON
Returns:
Tuple of (stego video bytes, VideoEmbedStats)
Note:
The output video will be in MKV format with FFV1 lossless codec,
regardless of the input format. This is necessary to preserve
the embedded data without lossy compression artifacts.
"""
from .constants import EMBED_MODE_VIDEO_LSB, VIDEO_ENABLED
if not VIDEO_ENABLED:
raise VideoError(
"Video support is disabled. Install video extras and ffmpeg, "
"or set STEGASOO_VIDEO=1 to force enable."
)
from .video_utils import detect_video_format
debug.print(
f"encode_video: mode={embed_mode}, "
f"passphrase length={len(passphrase.split())} words, "
f"pin={'set' if pin else 'none'}"
)
# Validate inputs
require_valid_payload(message)
require_valid_image(reference_photo, "Reference photo")
require_security_factors(pin, rsa_key_data)
if pin:
require_valid_pin(pin)
if rsa_key_data:
require_valid_rsa_key(rsa_key_data, rsa_password)
# Detect video format
video_format = detect_video_format(carrier_video)
debug.print(f"Detected video format: {video_format}")
if video_format == "unknown":
raise VideoError("Could not detect video format. Supported: MP4, MKV, WebM, AVI, MOV.")
# Encrypt message
encrypted = encrypt_message(
message, reference_photo, passphrase, pin, rsa_key_data, channel_key
)
debug.print(f"Encrypted payload: {len(encrypted)} bytes")
# Derive pixel/frame selection key
pixel_key = derive_pixel_key(reference_photo, passphrase, pin, rsa_key_data, channel_key)
# Embed based on mode
if embed_mode == EMBED_MODE_VIDEO_LSB:
from .video_steganography import embed_in_video_lsb
stego_video, stats = embed_in_video_lsb(
encrypted, carrier_video, pixel_key, progress_file=progress_file
)
else:
raise ValueError(f"Invalid video embed mode: {embed_mode}")
return stego_video, stats

View File

@ -0,0 +1,293 @@
"""
Stegasoo Exceptions
Custom exception classes for clear error handling across all frontends.
"""
class StegasooError(Exception):
"""Base exception for all Stegasoo errors."""
pass
# ============================================================================
# VALIDATION ERRORS
# ============================================================================
class ValidationError(StegasooError):
"""Base class for validation errors."""
pass
class PinValidationError(ValidationError):
"""PIN validation failed."""
pass
class MessageValidationError(ValidationError):
"""Message validation failed."""
pass
class ImageValidationError(ValidationError):
"""Image validation failed."""
pass
class KeyValidationError(ValidationError):
"""RSA key validation failed."""
pass
class SecurityFactorError(ValidationError):
"""Security factor requirements not met."""
pass
# ============================================================================
# CRYPTO ERRORS
# ============================================================================
class CryptoError(StegasooError):
"""Base class for cryptographic errors."""
pass
class EncryptionError(CryptoError):
"""Encryption failed."""
pass
class DecryptionError(CryptoError):
"""Decryption failed (wrong key, corrupted data, etc.)."""
pass
class KeyDerivationError(CryptoError):
"""Key derivation failed."""
pass
class KeyGenerationError(CryptoError):
"""Key generation failed."""
pass
class KeyPasswordError(CryptoError):
"""RSA key password is incorrect or missing."""
pass
# ============================================================================
# STEGANOGRAPHY ERRORS
# ============================================================================
class SteganographyError(StegasooError):
"""Base class for steganography errors."""
pass
class CapacityError(SteganographyError):
"""Carrier image too small for message."""
def __init__(self, needed: int, available: int):
self.needed = needed
self.available = available
super().__init__(
f"Carrier image too small. Need {needed:,} bytes, have {available:,} bytes capacity."
)
class ExtractionError(SteganographyError):
"""Failed to extract hidden data from image."""
pass
class EmbeddingError(SteganographyError):
"""Failed to embed data in image."""
pass
class InvalidHeaderError(SteganographyError):
"""Invalid or missing Stegasoo header in extracted data."""
pass
class InvalidMagicBytesError(SteganographyError):
"""Magic bytes don't match - not a Stegasoo image or wrong mode."""
pass
class ReedSolomonError(SteganographyError):
"""Reed-Solomon error correction failed - image too corrupted."""
pass
class NoDataFoundError(SteganographyError):
"""No hidden data found in image."""
pass
class ModeMismatchError(SteganographyError):
"""Wrong steganography mode (LSB vs DCT)."""
pass
# ============================================================================
# FILE ERRORS
# ============================================================================
class FileError(StegasooError):
"""Base class for file-related errors."""
pass
class FileNotFoundError(FileError):
"""Required file not found."""
pass
class FileTooLargeError(FileError):
"""File exceeds size limit."""
def __init__(self, size: int, limit: int, filename: str = "File"):
self.size = size
self.limit = limit
self.filename = filename
super().__init__(
f"{filename} too large ({size:,} bytes). Maximum allowed: {limit:,} bytes."
)
class UnsupportedFileTypeError(FileError):
"""File type not supported."""
def __init__(self, extension: str, allowed: set[str]):
self.extension = extension
self.allowed = allowed
super().__init__(
f"Unsupported file type: .{extension}. Allowed: {', '.join(sorted(allowed))}"
)
# ============================================================================
# AUDIO ERRORS
# ============================================================================
class AudioError(SteganographyError):
"""Base class for audio steganography errors."""
pass
class AudioValidationError(ValidationError):
"""Audio validation failed."""
pass
class AudioCapacityError(CapacityError):
"""Audio carrier too small for message."""
def __init__(self, needed: int, available: int):
self.needed = needed
self.available = available
# Call SteganographyError.__init__ directly (skip CapacityError's image-specific message)
SteganographyError.__init__(
self,
f"Audio carrier too small. Need {needed:,} bytes, have {available:,} bytes capacity.",
)
class AudioExtractionError(ExtractionError):
"""Failed to extract hidden data from audio."""
pass
class AudioTranscodeError(AudioError):
"""Audio transcoding failed."""
pass
class UnsupportedAudioFormatError(AudioError):
"""Audio format not supported."""
pass
# ============================================================================
# VIDEO ERRORS
# ============================================================================
class VideoError(SteganographyError):
"""Base class for video steganography errors."""
pass
class VideoValidationError(ValidationError):
"""Video validation failed."""
pass
class VideoCapacityError(CapacityError):
"""Video carrier too small for message."""
def __init__(self, needed: int, available: int):
self.needed = needed
self.available = available
# Call SteganographyError.__init__ directly (skip CapacityError's image-specific message)
SteganographyError.__init__(
self,
f"Video carrier too small. Need {needed:,} bytes, have {available:,} bytes capacity.",
)
class VideoExtractionError(ExtractionError):
"""Failed to extract hidden data from video."""
pass
class VideoTranscodeError(VideoError):
"""Video transcoding failed."""
pass
class UnsupportedVideoFormatError(VideoError):
"""Video format not supported."""
pass

View File

@ -0,0 +1,167 @@
"""
Stegasoo Generate Module (v3.2.0)
Public API for generating credentials (PINs, passphrases, RSA keys).
"""
from .constants import (
DEFAULT_PASSPHRASE_WORDS,
DEFAULT_PIN_LENGTH,
DEFAULT_RSA_BITS,
)
from .debug import debug
from .keygen import (
export_rsa_key_pem,
generate_phrase,
load_rsa_key,
)
from .keygen import (
generate_pin as _generate_pin,
)
from .keygen import (
generate_rsa_key as _generate_rsa_key,
)
from .models import Credentials
# Re-export from keygen for convenience
__all__ = [
"generate_pin",
"generate_passphrase",
"generate_rsa_key",
"generate_credentials",
"export_rsa_key_pem",
"load_rsa_key",
]
def generate_pin(length: int = DEFAULT_PIN_LENGTH) -> str:
"""
Generate a random PIN.
PINs never start with zero for usability.
Args:
length: PIN length (6-9 digits, default 6)
Returns:
PIN string
Example:
>>> pin = generate_pin()
>>> len(pin)
6
>>> pin[0] != '0'
True
"""
return _generate_pin(length)
def generate_passphrase(words: int = DEFAULT_PASSPHRASE_WORDS) -> str:
"""
Generate a random passphrase from BIP-39 wordlist.
In v3.2.0, this generates a single passphrase (not daily phrases).
Default is 4 words for good security (increased from 3 in v3.1.0).
Args:
words: Number of words (3-12, default 4)
Returns:
Space-separated passphrase
Example:
>>> passphrase = generate_passphrase(4)
>>> len(passphrase.split())
4
"""
return generate_phrase(words)
def generate_rsa_key(bits: int = DEFAULT_RSA_BITS, password: str | None = None) -> str:
"""
Generate an RSA private key in PEM format.
Args:
bits: Key size (2048 or 3072, default 2048)
password: Optional password to encrypt the key
Returns:
PEM-encoded key string
Example:
>>> key_pem = generate_rsa_key(2048)
>>> '-----BEGIN PRIVATE KEY-----' in key_pem
True
"""
key_obj = _generate_rsa_key(bits)
pem_bytes = export_rsa_key_pem(key_obj, password)
return pem_bytes.decode("utf-8")
def generate_credentials(
use_pin: bool = True,
use_rsa: bool = False,
pin_length: int = DEFAULT_PIN_LENGTH,
rsa_bits: int = DEFAULT_RSA_BITS,
passphrase_words: int = DEFAULT_PASSPHRASE_WORDS,
rsa_password: str | None = None,
) -> Credentials:
"""
Generate a complete set of credentials.
In v3.2.0, this generates a single passphrase (not daily phrases).
At least one of use_pin or use_rsa must be True.
Args:
use_pin: Whether to generate a PIN
use_rsa: Whether to generate an RSA key
pin_length: PIN length (default 6)
rsa_bits: RSA key size (default 2048)
passphrase_words: Number of words in passphrase (default 4)
rsa_password: Optional password for RSA key
Returns:
Credentials object with passphrase, PIN, and/or RSA key
Raises:
ValueError: If neither PIN nor RSA is selected
Example:
>>> creds = generate_credentials(use_pin=True, use_rsa=False)
>>> len(creds.passphrase.split())
4
>>> len(creds.pin)
6
"""
if not use_pin and not use_rsa:
raise ValueError("Must select at least one security factor (PIN or RSA key)")
debug.print(
f"Generating credentials: PIN={use_pin}, RSA={use_rsa}, "
f"passphrase_words={passphrase_words}"
)
# Generate passphrase (single, not daily)
passphrase = generate_phrase(passphrase_words)
# Generate PIN if requested
pin = _generate_pin(pin_length) if use_pin else None
# Generate RSA key if requested
rsa_key_pem = None
if use_rsa:
rsa_key_obj = _generate_rsa_key(rsa_bits)
rsa_key_bytes = export_rsa_key_pem(rsa_key_obj, rsa_password)
rsa_key_pem = rsa_key_bytes.decode("utf-8")
# Create Credentials object (v3.2.0 format)
creds = Credentials(
passphrase=passphrase,
pin=pin,
rsa_key_pem=rsa_key_pem,
rsa_bits=rsa_bits if use_rsa else None,
words_per_passphrase=passphrase_words,
)
debug.print(f"Credentials generated: {creds.total_entropy} bits total entropy")
return creds

View File

@ -0,0 +1,170 @@
"""
Stegasoo Image Utilities (v3.2.0)
Functions for analyzing images and comparing capacity.
"""
import io
from PIL import Image
from .constants import EMBED_MODE_LSB
from .debug import debug
from .models import CapacityComparison, ImageInfo
from .steganography import calculate_capacity, has_dct_support
def get_image_info(image_data: bytes) -> ImageInfo:
"""
Get detailed information about an image.
Args:
image_data: Image file bytes
Returns:
ImageInfo with dimensions, format, capacity estimates
Example:
>>> info = get_image_info(carrier_bytes)
>>> print(f"{info.width}x{info.height}, {info.lsb_capacity_kb} KB capacity")
"""
img = Image.open(io.BytesIO(image_data))
width, height = img.size
pixels = width * height
format_str = img.format or "Unknown"
mode = img.mode
# Calculate LSB capacity
lsb_capacity = calculate_capacity(image_data, bits_per_channel=1)
# Calculate DCT capacity if available
dct_capacity = None
if has_dct_support():
try:
from .dct_steganography import calculate_dct_capacity
dct_info = calculate_dct_capacity(image_data)
dct_capacity = dct_info.usable_capacity_bytes
except Exception as e:
debug.print(f"Could not calculate DCT capacity: {e}")
info = ImageInfo(
width=width,
height=height,
pixels=pixels,
format=format_str,
mode=mode,
file_size=len(image_data),
lsb_capacity_bytes=lsb_capacity,
lsb_capacity_kb=lsb_capacity / 1024,
dct_capacity_bytes=dct_capacity,
dct_capacity_kb=dct_capacity / 1024 if dct_capacity else None,
)
debug.print(
f"Image info: {width}x{height}, LSB={lsb_capacity} bytes, "
f"DCT={dct_capacity or 'N/A'} bytes"
)
return info
def compare_capacity(
carrier_image: bytes,
reference_photo: bytes | None = None,
) -> CapacityComparison:
"""
Compare embedding capacity between LSB and DCT modes.
Args:
carrier_image: Carrier image bytes
reference_photo: Optional reference photo (not used in v3.2.0, kept for API compatibility)
Returns:
CapacityComparison with capacity info for both modes
Example:
>>> comparison = compare_capacity(carrier_bytes)
>>> print(f"LSB: {comparison.lsb_kb:.1f} KB")
>>> print(f"DCT: {comparison.dct_kb:.1f} KB")
"""
img = Image.open(io.BytesIO(carrier_image))
width, height = img.size
# LSB capacity
lsb_bytes = calculate_capacity(carrier_image, bits_per_channel=1)
lsb_kb = lsb_bytes / 1024
# DCT capacity
dct_available = has_dct_support()
dct_bytes = None
dct_kb = None
if dct_available:
try:
from .dct_steganography import calculate_dct_capacity
dct_info = calculate_dct_capacity(carrier_image)
dct_bytes = dct_info.usable_capacity_bytes
dct_kb = dct_bytes / 1024
except Exception as e:
debug.print(f"DCT capacity calculation failed: {e}")
dct_available = False
comparison = CapacityComparison(
image_width=width,
image_height=height,
lsb_available=True,
lsb_bytes=lsb_bytes,
lsb_kb=lsb_kb,
lsb_output_format="PNG/BMP (color)",
dct_available=dct_available,
dct_bytes=dct_bytes,
dct_kb=dct_kb,
dct_output_formats=["PNG (grayscale)", "JPEG (grayscale)"] if dct_available else None,
dct_ratio_vs_lsb=(dct_bytes / lsb_bytes * 100) if dct_bytes else None,
)
debug.print(f"Capacity comparison: LSB={lsb_kb:.1f}KB, DCT={dct_kb or 'N/A'}KB")
return comparison
def validate_carrier_capacity(
carrier_image: bytes,
payload_size: int,
embed_mode: str = EMBED_MODE_LSB,
) -> dict:
"""
Check if a payload will fit in a carrier image.
Args:
carrier_image: Carrier image bytes
payload_size: Size of payload in bytes
embed_mode: 'lsb' or 'dct'
Returns:
Dict with 'fits', 'capacity', 'usage_percent', 'headroom'
"""
from .steganography import calculate_capacity_by_mode
capacity_info = calculate_capacity_by_mode(carrier_image, embed_mode)
capacity = capacity_info["capacity_bytes"]
# Add encryption overhead estimate
estimated_size = payload_size + 200 # Approximate overhead
fits = estimated_size <= capacity
usage_percent = (estimated_size / capacity * 100) if capacity > 0 else 100.0
headroom = capacity - estimated_size
return {
"fits": fits,
"capacity": capacity,
"payload_size": payload_size,
"estimated_size": estimated_size,
"usage_percent": min(usage_percent, 100.0),
"headroom": headroom,
"mode": embed_mode,
}

View File

@ -0,0 +1,410 @@
"""
Stegasoo Key Generation (v3.2.0)
Generate PINs, passphrases, and RSA keys.
Changes in v3.2.0:
- generate_credentials() now returns Credentials with single passphrase
- Removed generate_day_phrases() from main API (kept for legacy compatibility)
- Updated to use PASSPHRASE constants
"""
import secrets
from cryptography.hazmat.backends import default_backend
from cryptography.hazmat.primitives import serialization
from cryptography.hazmat.primitives.asymmetric import rsa
from cryptography.hazmat.primitives.asymmetric.types import PrivateKeyTypes
from cryptography.hazmat.primitives.serialization import load_pem_private_key
from .constants import (
DAY_NAMES,
DEFAULT_PASSPHRASE_WORDS,
DEFAULT_PIN_LENGTH,
DEFAULT_RSA_BITS,
MAX_PASSPHRASE_WORDS,
MAX_PIN_LENGTH,
MIN_PASSPHRASE_WORDS,
MIN_PIN_LENGTH,
VALID_RSA_SIZES,
get_wordlist,
)
from .debug import debug
from .exceptions import KeyGenerationError, KeyPasswordError
from .models import Credentials, KeyInfo
def generate_pin(length: int = DEFAULT_PIN_LENGTH) -> str:
"""
Generate a random PIN.
PINs never start with zero for usability.
Args:
length: PIN length (6-9 digits)
Returns:
PIN string
Example:
>>> generate_pin(6)
"812345"
"""
debug.validate(
MIN_PIN_LENGTH <= length <= MAX_PIN_LENGTH,
f"PIN length must be between {MIN_PIN_LENGTH} and {MAX_PIN_LENGTH}",
)
length = max(MIN_PIN_LENGTH, min(MAX_PIN_LENGTH, length))
# First digit: 1-9 (no leading zero)
first_digit = str(secrets.randbelow(9) + 1)
# Remaining digits: 0-9
rest = "".join(str(secrets.randbelow(10)) for _ in range(length - 1))
pin = first_digit + rest
debug.print(f"Generated PIN: {pin}")
return pin
def generate_phrase(words_per_phrase: int = DEFAULT_PASSPHRASE_WORDS) -> str:
"""
Generate a random passphrase from BIP-39 wordlist.
Args:
words_per_phrase: Number of words (3-12)
Returns:
Space-separated phrase
Example:
>>> generate_phrase(4)
"apple forest thunder mountain"
"""
debug.validate(
MIN_PASSPHRASE_WORDS <= words_per_phrase <= MAX_PASSPHRASE_WORDS,
f"Words per phrase must be between {MIN_PASSPHRASE_WORDS} and {MAX_PASSPHRASE_WORDS}",
)
words_per_phrase = max(MIN_PASSPHRASE_WORDS, min(MAX_PASSPHRASE_WORDS, words_per_phrase))
wordlist = get_wordlist()
words = [secrets.choice(wordlist) for _ in range(words_per_phrase)]
phrase = " ".join(words)
debug.print(f"Generated phrase: {phrase}")
return phrase
# Alias for backward compatibility and public API consistency
generate_passphrase = generate_phrase
def generate_day_phrases(words_per_phrase: int = DEFAULT_PASSPHRASE_WORDS) -> dict[str, str]:
"""
Generate phrases for all days of the week.
DEPRECATED in v3.2.0: Use generate_phrase() for single passphrase.
Kept for legacy compatibility and organizational use cases.
Args:
words_per_phrase: Number of words per phrase (3-12)
Returns:
Dict mapping day names to phrases
Example:
>>> generate_day_phrases(3)
{'Monday': 'apple forest thunder', 'Tuesday': 'banana river lightning', ...}
"""
import warnings
warnings.warn(
"generate_day_phrases() is deprecated in v3.2.0. "
"Use generate_phrase() for single passphrase.",
DeprecationWarning,
stacklevel=2,
)
phrases = {day: generate_phrase(words_per_phrase) for day in DAY_NAMES}
debug.print(f"Generated phrases for {len(phrases)} days")
return phrases
def generate_rsa_key(bits: int = DEFAULT_RSA_BITS) -> rsa.RSAPrivateKey:
"""
Generate an RSA private key.
Args:
bits: Key size (2048 or 3072)
Returns:
RSA private key object
Raises:
KeyGenerationError: If generation fails
Example:
>>> key = generate_rsa_key(2048)
>>> key.key_size
2048
"""
debug.validate(bits in VALID_RSA_SIZES, f"RSA key size must be one of {VALID_RSA_SIZES}")
if bits not in VALID_RSA_SIZES:
bits = DEFAULT_RSA_BITS
debug.print(f"Generating {bits}-bit RSA key...")
try:
key = rsa.generate_private_key(
public_exponent=65537, key_size=bits, backend=default_backend()
)
debug.print(f"RSA key generated: {bits} bits")
return key
except Exception as e:
debug.exception(e, "RSA key generation")
raise KeyGenerationError(f"Failed to generate RSA key: {e}") from e
def export_rsa_key_pem(private_key: rsa.RSAPrivateKey, password: str | None = None) -> bytes:
"""
Export RSA key to PEM format.
Args:
private_key: RSA private key object
password: Optional password for encryption
Returns:
PEM-encoded key bytes
Example:
>>> key = generate_rsa_key()
>>> pem = export_rsa_key_pem(key)
>>> pem[:50]
b'-----BEGIN PRIVATE KEY-----\\nMIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYw'
"""
debug.validate(private_key is not None, "Private key cannot be None")
encryption_algorithm: serialization.BestAvailableEncryption | serialization.NoEncryption
if password:
encryption_algorithm = serialization.BestAvailableEncryption(password.encode())
debug.print("Exporting RSA key with encryption")
else:
encryption_algorithm = serialization.NoEncryption()
debug.print("Exporting RSA key without encryption")
return private_key.private_bytes(
encoding=serialization.Encoding.PEM,
format=serialization.PrivateFormat.PKCS8,
encryption_algorithm=encryption_algorithm,
)
def load_rsa_key(key_data: bytes, password: str | None = None) -> rsa.RSAPrivateKey:
"""
Load RSA private key from PEM data.
Args:
key_data: PEM-encoded key bytes
password: Password if key is encrypted
Returns:
RSA private key object
Raises:
KeyPasswordError: If password is wrong or missing
KeyGenerationError: If key is invalid
Example:
>>> key = load_rsa_key(pem_data, "my_password")
"""
debug.validate(key_data is not None and len(key_data) > 0, "Key data cannot be empty")
try:
pwd_bytes = password.encode() if password else None
debug.print(f"Loading RSA key (encrypted: {bool(password)})")
key: PrivateKeyTypes = load_pem_private_key(
key_data, password=pwd_bytes, backend=default_backend()
)
# Verify it's an RSA key
if not isinstance(key, rsa.RSAPrivateKey):
raise KeyGenerationError(f"Expected RSA key, got {type(key).__name__}")
debug.print(f"RSA key loaded: {key.key_size} bits")
return key
except TypeError:
debug.print("RSA key is password-protected but no password provided")
raise KeyPasswordError("RSA key is password-protected. Please provide the password.")
except ValueError as e:
error_msg = str(e).lower()
if "password" in error_msg or "encrypted" in error_msg:
debug.print("Incorrect password for RSA key")
raise KeyPasswordError("Incorrect password for RSA key.")
debug.exception(e, "RSA key loading")
raise KeyGenerationError(f"Invalid RSA key: {e}") from e
except Exception as e:
debug.exception(e, "RSA key loading")
raise KeyGenerationError(f"Could not load RSA key: {e}") from e
def get_key_info(key_data: bytes, password: str | None = None) -> KeyInfo:
"""
Get information about an RSA key.
Args:
key_data: PEM-encoded key bytes
password: Password if key is encrypted
Returns:
KeyInfo with key size and encryption status
Example:
>>> info = get_key_info(pem_data)
>>> info.key_size
2048
>>> info.is_encrypted
False
"""
debug.print("Getting RSA key info")
# Check if encrypted
is_encrypted = b"ENCRYPTED" in key_data
private_key = load_rsa_key(key_data, password)
info = KeyInfo(key_size=private_key.key_size, is_encrypted=is_encrypted, pem_data=key_data)
debug.print(f"Key info: {info.key_size} bits, encrypted: {info.is_encrypted}")
return info
def generate_credentials(
use_pin: bool = True,
use_rsa: bool = False,
pin_length: int = DEFAULT_PIN_LENGTH,
rsa_bits: int = DEFAULT_RSA_BITS,
passphrase_words: int = DEFAULT_PASSPHRASE_WORDS,
rsa_password: str | None = None,
) -> Credentials:
"""
Generate a complete set of credentials.
v3.2.0: Now generates a single passphrase instead of daily phrases.
At least one of use_pin or use_rsa must be True.
Args:
use_pin: Whether to generate a PIN
use_rsa: Whether to generate an RSA key
pin_length: PIN length if generating (default 6)
rsa_bits: RSA key size if generating (default 2048)
passphrase_words: Words in passphrase (default 4)
rsa_password: Optional password for RSA key encryption
Returns:
Credentials object with passphrase, PIN, and/or RSA key
Raises:
ValueError: If neither PIN nor RSA is selected
Example:
>>> creds = generate_credentials(use_pin=True, use_rsa=False)
>>> creds.passphrase
"apple forest thunder mountain"
>>> creds.pin
"812345"
"""
debug.validate(use_pin or use_rsa, "Must select at least one security factor (PIN or RSA key)")
if not use_pin and not use_rsa:
raise ValueError("Must select at least one security factor (PIN or RSA key)")
debug.print(
f"Generating credentials: PIN={use_pin}, RSA={use_rsa}, "
f"passphrase_words={passphrase_words}"
)
# Generate single passphrase (v3.2.0 - no daily rotation)
passphrase = generate_phrase(passphrase_words)
# Generate PIN if requested
pin = generate_pin(pin_length) if use_pin else None
# Generate RSA key if requested
rsa_key_pem = None
if use_rsa:
rsa_key_obj = generate_rsa_key(rsa_bits)
rsa_key_pem = export_rsa_key_pem(rsa_key_obj, rsa_password).decode("utf-8")
# Create Credentials object (v3.2.0 format with single passphrase)
creds = Credentials(
passphrase=passphrase,
pin=pin,
rsa_key_pem=rsa_key_pem,
rsa_bits=rsa_bits if use_rsa else None,
words_per_passphrase=passphrase_words,
)
debug.print(f"Credentials generated: {creds.total_entropy} bits total entropy")
return creds
# =============================================================================
# LEGACY COMPATIBILITY
# =============================================================================
def generate_credentials_legacy(
use_pin: bool = True,
use_rsa: bool = False,
pin_length: int = DEFAULT_PIN_LENGTH,
rsa_bits: int = DEFAULT_RSA_BITS,
words_per_phrase: int = DEFAULT_PASSPHRASE_WORDS,
) -> dict:
"""
Generate credentials in legacy format (v3.1.0 style with daily phrases).
DEPRECATED: Use generate_credentials() for v3.2.0 format.
This function exists only for migration tools that need to work with
old-format credentials.
Args:
use_pin: Whether to generate a PIN
use_rsa: Whether to generate an RSA key
pin_length: PIN length if generating
rsa_bits: RSA key size if generating
words_per_phrase: Words per daily phrase
Returns:
Dict with 'phrases' (dict), 'pin', 'rsa_key_pem', etc.
"""
import warnings
warnings.warn(
"generate_credentials_legacy() returns v3.1.0 format. "
"Use generate_credentials() for v3.2.0 format.",
DeprecationWarning,
stacklevel=2,
)
if not use_pin and not use_rsa:
raise ValueError("Must select at least one security factor (PIN or RSA key)")
# Generate daily phrases (old format)
phrases = {day: generate_phrase(words_per_phrase) for day in DAY_NAMES}
pin = generate_pin(pin_length) if use_pin else None
rsa_key_pem = None
if use_rsa:
rsa_key_obj = generate_rsa_key(rsa_bits)
rsa_key_pem = export_rsa_key_pem(rsa_key_obj).decode("utf-8")
return {
"phrases": phrases,
"pin": pin,
"rsa_key_pem": rsa_key_pem,
"rsa_bits": rsa_bits if use_rsa else None,
"words_per_phrase": words_per_phrase,
}

View File

@ -0,0 +1,391 @@
"""
Stegasoo Data Models (v3.2.0)
Dataclasses for structured data exchange between modules and frontends.
Changes in v3.2.0:
- Renamed day_phrase passphrase
- Credentials now uses single passphrase instead of day mapping
- Removed date_str from EncodeInput (date no longer used in crypto)
- Made date_used optional in EncodeResult (cosmetic only)
- Added ImageInfo, CapacityComparison, GenerateResult
"""
from dataclasses import dataclass, field
@dataclass
class Credentials:
"""
Generated credentials for encoding/decoding.
v3.2.0: Simplified to use single passphrase instead of daily rotation.
"""
passphrase: str # Single passphrase (no daily rotation)
pin: str | None = None
rsa_key_pem: str | None = None
rsa_bits: int | None = None
words_per_passphrase: int = 4 # Increased from 3 in v3.1.0
# Optional: backup passphrases for multi-factor or rotation
backup_passphrases: list[str] | None = None
@property
def passphrase_entropy(self) -> int:
"""Entropy in bits from passphrase (~11 bits per BIP-39 word)."""
return self.words_per_passphrase * 11
@property
def pin_entropy(self) -> int:
"""Entropy in bits from PIN (~3.32 bits per digit)."""
if self.pin:
return int(len(self.pin) * 3.32)
return 0
@property
def rsa_entropy(self) -> int:
"""Effective entropy from RSA key."""
if self.rsa_key_pem and self.rsa_bits:
return min(self.rsa_bits // 16, 128)
return 0
@property
def total_entropy(self) -> int:
"""Total entropy in bits (excluding reference photo)."""
return self.passphrase_entropy + self.pin_entropy + self.rsa_entropy
# Legacy property for compatibility
@property
def phrase_entropy(self) -> int:
"""Alias for passphrase_entropy (backward compatibility)."""
return self.passphrase_entropy
@dataclass
class FilePayload:
"""Represents a file to be embedded."""
data: bytes
filename: str
mime_type: str | None = None
@property
def size(self) -> int:
return len(self.data)
@classmethod
def from_file(cls, filepath: str, filename: str | None = None) -> "FilePayload":
"""Create FilePayload from a file path."""
import mimetypes
from pathlib import Path
path = Path(filepath)
data = path.read_bytes()
name = filename or path.name
mime, _ = mimetypes.guess_type(name)
return cls(data=data, filename=name, mime_type=mime)
@dataclass
class EncodeInput:
"""
Input parameters for encoding a message.
v3.2.0: Removed date_str (date no longer used in crypto).
"""
message: str | bytes | FilePayload # Text, raw bytes, or file
reference_photo: bytes
carrier_image: bytes
passphrase: str # Renamed from day_phrase
pin: str = ""
rsa_key_data: bytes | None = None
rsa_password: str | None = None
@dataclass
class EncodeResult:
"""
Result of encoding operation.
v3.2.0: date_used is now optional/cosmetic (not used in crypto).
"""
stego_image: bytes
filename: str
pixels_modified: int
total_pixels: int
capacity_used: float # 0.0 - 1.0
date_used: str | None = None # Cosmetic only (for filename organization)
@property
def capacity_percent(self) -> float:
"""Capacity used as percentage."""
return self.capacity_used * 100
@dataclass
class DecodeInput:
"""
Input parameters for decoding a message.
v3.2.0: Renamed day_phrase passphrase, no date needed.
"""
stego_image: bytes
reference_photo: bytes
passphrase: str # Renamed from day_phrase
pin: str = ""
rsa_key_data: bytes | None = None
rsa_password: str | None = None
@dataclass
class DecodeResult:
"""
Result of decoding operation.
v3.2.0: date_encoded is always None (date removed from crypto).
"""
payload_type: str # 'text' or 'file'
message: str | None = None # For text payloads
file_data: bytes | None = None # For file payloads
filename: str | None = None # Original filename for file payloads
mime_type: str | None = None # MIME type hint
date_encoded: str | None = None # Always None in v3.2.0 (kept for compatibility)
@property
def is_file(self) -> bool:
return self.payload_type == "file"
@property
def is_text(self) -> bool:
return self.payload_type == "text"
def get_content(self) -> str | bytes:
"""Get the decoded content (text or bytes)."""
if self.is_text:
return self.message or ""
return self.file_data or b""
@dataclass
class EmbedStats:
"""Statistics from image embedding."""
pixels_modified: int
total_pixels: int
capacity_used: float
bytes_embedded: int
@property
def modification_percent(self) -> float:
"""Percentage of pixels modified."""
return (self.pixels_modified / self.total_pixels) * 100 if self.total_pixels > 0 else 0
@dataclass
class KeyInfo:
"""Information about an RSA key."""
key_size: int
is_encrypted: bool
pem_data: bytes
@dataclass
class ValidationResult:
"""Result of input validation."""
is_valid: bool
error_message: str = ""
details: dict = field(default_factory=dict)
warning: str | None = None # v3.2.0: Added for passphrase length warnings
@classmethod
def ok(cls, warning: str | None = None, **details) -> "ValidationResult":
"""Create a successful validation result."""
result = cls(is_valid=True, details=details)
if warning:
result.warning = warning
return result
@classmethod
def error(cls, message: str, **details) -> "ValidationResult":
"""Create a failed validation result."""
return cls(is_valid=False, error_message=message, details=details)
# =============================================================================
# NEW MODELS FOR V3.2.0 PUBLIC API
# =============================================================================
@dataclass
class ImageInfo:
"""Information about an image for steganography."""
width: int
height: int
pixels: int
format: str
mode: str
file_size: int
lsb_capacity_bytes: int
lsb_capacity_kb: float
dct_capacity_bytes: int | None = None
dct_capacity_kb: float | None = None
@dataclass
class CapacityComparison:
"""Comparison of embedding capacity between modes."""
image_width: int
image_height: int
lsb_available: bool
lsb_bytes: int
lsb_kb: float
lsb_output_format: str
dct_available: bool
dct_bytes: int | None = None
dct_kb: float | None = None
dct_output_formats: list[str] | None = None
dct_ratio_vs_lsb: float | None = None
@dataclass
class GenerateResult:
"""Result of credential generation."""
passphrase: str
pin: str | None = None
rsa_key_pem: str | None = None
passphrase_words: int = 4
passphrase_entropy: int = 0
pin_entropy: int = 0
rsa_entropy: int = 0
total_entropy: int = 0
def __str__(self) -> str:
lines = [
"Generated Credentials:",
f" Passphrase: {self.passphrase}",
]
if self.pin:
lines.append(f" PIN: {self.pin}")
if self.rsa_key_pem:
lines.append(f" RSA Key: {len(self.rsa_key_pem)} bytes PEM")
lines.append(f" Total Entropy: {self.total_entropy} bits")
return "\n".join(lines)
# =============================================================================
# AUDIO STEGANOGRAPHY MODELS (v4.3.0)
# =============================================================================
@dataclass
class AudioEmbedStats:
"""Statistics from audio embedding."""
samples_modified: int
total_samples: int
capacity_used: float # 0.0 - 1.0
bytes_embedded: int
sample_rate: int
channels: int
duration_seconds: float
embed_mode: str # "audio_lsb" or "audio_spread"
chip_tier: int | None = None # v4.4.0: spread spectrum chip tier (0/1/2)
chip_length: int | None = None # v4.4.0: samples per chip
embeddable_channels: int | None = None # v4.4.0: channels used (excl. LFE)
@property
def modification_percent(self) -> float:
"""Percentage of samples modified."""
return (self.samples_modified / self.total_samples) * 100 if self.total_samples > 0 else 0
@dataclass
class AudioInfo:
"""Information about an audio file."""
sample_rate: int
channels: int
duration_seconds: float
num_samples: int
format: str # "wav", "flac", "mp3", etc.
bitrate: int | None = None # For lossy formats
bit_depth: int | None = None # For lossless formats
@dataclass
class AudioCapacityInfo:
"""Capacity information for audio steganography."""
total_samples: int
usable_capacity_bytes: int
embed_mode: str
sample_rate: int
duration_seconds: float
chip_tier: int | None = None # v4.4.0: spread spectrum chip tier
chip_length: int | None = None # v4.4.0: samples per chip
embeddable_channels: int | None = None # v4.4.0: channels used (excl. LFE)
total_channels: int | None = None # v4.4.0: total channels in carrier
# =============================================================================
# VIDEO STEGANOGRAPHY MODELS (v4.4.0)
# =============================================================================
@dataclass
class VideoEmbedStats:
"""Statistics from video embedding."""
frames_modified: int
total_frames: int
capacity_used: float # 0.0 - 1.0
bytes_embedded: int
width: int
height: int
fps: float
duration_seconds: float
embed_mode: str # "video_lsb"
codec: str # Output codec (e.g., "ffv1")
@property
def modification_percent(self) -> float:
"""Percentage of frames modified."""
return (self.frames_modified / self.total_frames) * 100 if self.total_frames > 0 else 0
@dataclass
class VideoInfo:
"""Information about a video file."""
width: int
height: int
fps: float
duration_seconds: float
total_frames: int
i_frame_count: int
format: str # "mp4", "mkv", "webm", etc.
codec: str # "h264", "vp9", "ffv1", etc.
bitrate: int | None = None # For lossy formats
@dataclass
class VideoCapacityInfo:
"""Capacity information for video steganography."""
total_frames: int
i_frames: int
usable_capacity_bytes: int
embed_mode: str
resolution: tuple[int, int]
duration_seconds: float

View File

@ -0,0 +1,169 @@
"""
Platform-Calibrated DCT Presets (v4.4.0)
Pre-tuned DCT embedding parameters for social media platforms. Each platform
recompresses uploaded images differently these presets bake in the known
parameters so payloads survive the round-trip.
Usage::
from stegasoo.platform_presets import get_preset, PLATFORMS
preset = get_preset("telegram")
# Use preset.quant_step, preset.jpeg_quality, etc. in DCT encode
Preset parameters were derived from empirical testing. Platform compression
behavior can change without notice use ``pre_verify_survival()`` to confirm
payloads survive before relying on a preset.
"""
from __future__ import annotations
from dataclasses import dataclass
@dataclass(frozen=True)
class PlatformPreset:
"""Tuned DCT parameters for a specific platform."""
name: str
jpeg_quality: int # Platform's recompression quality
max_dimension: int # Max width/height before platform resizes
quant_step: int # QIM quantization step (higher = more robust)
embed_start: int # Start index into EMBED_POSITIONS (skip low-freq)
embed_end: int # End index into EMBED_POSITIONS (skip high-freq)
recompress_quality: int # Quality to simulate platform recompression for pre-verify
notes: str = ""
# Platform presets — derived from empirical testing of each platform's
# image processing pipeline. These WILL change as platforms update.
# Last verified: 2026-03-25
PRESETS: dict[str, PlatformPreset] = {
"telegram": PlatformPreset(
name="Telegram",
jpeg_quality=82,
max_dimension=2560,
quant_step=35,
embed_start=4,
embed_end=16,
recompress_quality=80,
notes="~81KB max embeddable. Moderate recompression.",
),
"discord": PlatformPreset(
name="Discord",
jpeg_quality=85,
max_dimension=4096,
quant_step=30,
embed_start=4,
embed_end=18,
recompress_quality=83,
notes="Varies with Nitro. Non-Nitro users get more aggressive compression.",
),
"signal": PlatformPreset(
name="Signal",
jpeg_quality=80,
max_dimension=2048,
quant_step=40,
embed_start=5,
embed_end=15,
recompress_quality=78,
notes="Aggressive recompression. Use smaller payloads for reliability.",
),
"whatsapp": PlatformPreset(
name="WhatsApp",
jpeg_quality=70,
max_dimension=1600,
quant_step=50,
embed_start=5,
embed_end=14,
recompress_quality=68,
notes="Most lossy. Capacity is significantly reduced.",
),
}
PLATFORMS = sorted(PRESETS.keys())
def get_preset(platform: str) -> PlatformPreset:
"""Get the preset for a platform.
Args:
platform: Platform name (telegram, discord, signal, whatsapp).
Returns:
PlatformPreset with tuned DCT parameters.
Raises:
ValueError: If platform is not recognized.
"""
key = platform.lower()
if key not in PRESETS:
available = ", ".join(PLATFORMS)
raise ValueError(f"Unknown platform '{platform}'. Available: {available}")
return PRESETS[key]
def get_embed_positions(preset: PlatformPreset) -> list[tuple[int, int]]:
"""Get the embed positions for a preset.
Args:
preset: Platform preset.
Returns:
List of (row, col) DCT coefficient positions.
"""
from .dct_steganography import EMBED_POSITIONS
return EMBED_POSITIONS[preset.embed_start : preset.embed_end]
def pre_verify_survival(
stego_image: bytes,
seed: bytes,
preset: PlatformPreset,
) -> bool:
"""Verify that a payload survives simulated platform recompression.
Encodes recompresses at platform quality attempts extraction.
If extraction succeeds, the payload should survive the real platform.
Args:
stego_image: The stego JPEG image bytes (already encoded).
seed: The same seed used for encoding.
preset: Platform preset to simulate.
Returns:
True if payload survived simulated recompression.
"""
import io
from PIL import Image
from .dct_steganography import extract_from_dct
# Simulate platform recompression
img = Image.open(io.BytesIO(stego_image))
# Resize if over max dimension
w, h = img.size
if max(w, h) > preset.max_dimension:
scale = preset.max_dimension / max(w, h)
new_size = (int(w * scale), int(h * scale))
img = img.resize(new_size, Image.LANCZOS)
# Recompress at platform quality
buf = io.BytesIO()
if img.mode != "RGB":
img = img.convert("RGB")
img.save(buf, format="JPEG", quality=preset.recompress_quality)
img.close()
recompressed = buf.getvalue()
# Try extraction
try:
result = extract_from_dct(recompressed, seed)
return result is not None and len(result) > 0
except Exception:
return False

View File

View File

@ -0,0 +1,625 @@
"""
Stegasoo QR Code Utilities
Functions for generating and reading QR codes containing RSA keys.
Supports automatic compression for large keys.
IMPROVEMENTS IN THIS VERSION:
- Much more robust PEM normalization
- Better handling of QR code extraction edge cases
- Improved error messages
- v4.2.0: Added zstd compression (better ratio than zlib)
"""
import base64
import io
import zlib
from PIL import Image
# Optional ZSTD support (better compression ratio)
try:
import zstandard as zstd
HAS_ZSTD = True
except ImportError:
HAS_ZSTD = False
# QR code generation
try:
import qrcode
from qrcode.constants import ERROR_CORRECT_L, ERROR_CORRECT_M
HAS_QRCODE_WRITE = True
except ImportError:
HAS_QRCODE_WRITE = False
# QR code reading
try:
from pyzbar.pyzbar import ZBarSymbol
from pyzbar.pyzbar import decode as pyzbar_decode
HAS_QRCODE_READ = True
except ImportError:
HAS_QRCODE_READ = False
from .constants import (
QR_CROP_MIN_PADDING_PX,
QR_CROP_PADDING_PERCENT,
QR_MAX_BINARY,
)
# Constants
COMPRESSION_PREFIX_ZLIB = "STEGASOO-Z:" # Legacy zlib compression
COMPRESSION_PREFIX_ZSTD = "STEGASOO-ZS:" # v4.2.0: New zstd compression (better ratio)
COMPRESSION_PREFIX = COMPRESSION_PREFIX_ZSTD if HAS_ZSTD else COMPRESSION_PREFIX_ZLIB
def compress_data(data: str) -> str:
"""
Compress string data for QR code storage.
Uses zstd if available (better ratio), falls back to zlib.
Args:
data: String to compress
Returns:
Compressed string with STEGASOO-ZS: (zstd) or STEGASOO-Z: (zlib) prefix
"""
data_bytes = data.encode("utf-8")
if HAS_ZSTD:
# Use zstd (better compression ratio)
cctx = zstd.ZstdCompressor(level=19)
compressed = cctx.compress(data_bytes)
encoded = base64.b64encode(compressed).decode("ascii")
return COMPRESSION_PREFIX_ZSTD + encoded
else:
# Fall back to zlib
compressed = zlib.compress(data_bytes, level=9)
encoded = base64.b64encode(compressed).decode("ascii")
return COMPRESSION_PREFIX_ZLIB + encoded
def decompress_data(data: str) -> str:
"""
Decompress data from QR code.
Supports both zstd (STEGASOO-ZS:) and zlib (STEGASOO-Z:) formats.
Args:
data: Compressed string with STEGASOO-ZS: or STEGASOO-Z: prefix
Returns:
Original uncompressed string
Raises:
ValueError: If data is not valid compressed format
"""
if data.startswith(COMPRESSION_PREFIX_ZSTD):
# v4.2.0: ZSTD compression
if not HAS_ZSTD:
raise ValueError(
"Data compressed with zstd but zstandard package not installed. "
"Run: pip install zstandard"
)
encoded = data[len(COMPRESSION_PREFIX_ZSTD) :]
compressed = base64.b64decode(encoded)
dctx = zstd.ZstdDecompressor()
return dctx.decompress(compressed).decode("utf-8")
elif data.startswith(COMPRESSION_PREFIX_ZLIB):
# Legacy zlib compression
encoded = data[len(COMPRESSION_PREFIX_ZLIB) :]
compressed = base64.b64decode(encoded)
return zlib.decompress(compressed).decode("utf-8")
else:
raise ValueError("Data is not in compressed format")
def normalize_pem(pem_data: str) -> str:
"""
Normalize PEM data to ensure proper formatting for cryptography library.
The cryptography library is very particular about PEM formatting.
This function handles all common issues from QR code extraction:
- Inconsistent line endings (CRLF, LF, CR)
- Missing newlines after header/before footer
- Extra whitespace, tabs, multiple spaces
- Non-ASCII characters
- Incorrect base64 padding
- Malformed headers/footers
Args:
pem_data: Raw PEM string from QR code
Returns:
Properly formatted PEM string that cryptography library will accept
"""
import re
# Step 1: Normalize ALL line endings to \n
pem_data = pem_data.replace("\r\n", "\n").replace("\r", "\n")
# Step 2: Remove leading/trailing whitespace
pem_data = pem_data.strip()
# Step 3: Remove any non-ASCII characters (QR artifacts)
pem_data = "".join(char for char in pem_data if ord(char) < 128)
# Step 4: Extract header, content, and footer with flexible regex
# This handles variations like:
# - "PRIVATE KEY" vs "RSA PRIVATE KEY"
# - Extra spaces in headers
# - Missing spaces
pattern = r"(-----BEGIN[^-]*-----)(.*?)(-----END[^-]*-----)"
match = re.search(pattern, pem_data, re.DOTALL | re.IGNORECASE)
if not match:
# Fallback: try even more permissive pattern
pattern = r"(-+BEGIN[^-]+-+)(.*?)(-+END[^-]+-+)"
match = re.search(pattern, pem_data, re.DOTALL | re.IGNORECASE)
if not match:
# Last resort: return original if can't parse
return pem_data
header_raw = match.group(1).strip()
content_raw = match.group(2)
footer_raw = match.group(3).strip()
# Step 5: Normalize header and footer
# Standardize spacing and ensure proper format
header = re.sub(r"\s+", " ", header_raw)
footer = re.sub(r"\s+", " ", footer_raw)
# Ensure exactly 5 dashes on each side
header = re.sub(r"^-+", "-----", header)
header = re.sub(r"-+$", "-----", header)
footer = re.sub(r"^-+", "-----", footer)
footer = re.sub(r"-+$", "-----", footer)
# Step 6: Clean the base64 content THOROUGHLY
# Remove ALL whitespace: spaces, tabs, newlines
# Keep only valid base64 characters: A-Z, a-z, 0-9, +, /, =
content_clean = "".join(char for char in content_raw if char.isalnum() or char in "+/=")
# Double-check: remove any remaining invalid characters
content_clean = re.sub(r"[^A-Za-z0-9+/=]", "", content_clean)
# Step 7: Fix base64 padding
# Base64 strings must be divisible by 4
remainder = len(content_clean) % 4
if remainder:
content_clean += "=" * (4 - remainder)
# Step 8: Split into 64-character lines (PEM standard)
lines = [content_clean[i : i + 64] for i in range(0, len(content_clean), 64)]
# Step 9: Reconstruct with EXACT PEM formatting
# Format: header\ncontent_line1\ncontent_line2\n...\nfooter\n
return header + "\n" + "\n".join(lines) + "\n" + footer + "\n"
def is_compressed(data: str) -> bool:
"""Check if data has compression prefix (zstd or zlib)."""
return data.startswith(COMPRESSION_PREFIX_ZSTD) or data.startswith(COMPRESSION_PREFIX_ZLIB)
def auto_decompress(data: str) -> str:
"""
Automatically decompress data if compressed, otherwise return as-is.
Args:
data: Possibly compressed string
Returns:
Decompressed string
"""
if is_compressed(data):
return decompress_data(data)
return data
def get_compressed_size(data: str) -> int:
"""Get size of data after compression (including prefix)."""
return len(compress_data(data))
def can_fit_in_qr(data: str, compress: bool = False) -> bool:
"""
Check if data can fit in a QR code.
Args:
data: String data
compress: Whether compression will be used
Returns:
True if data fits
"""
if compress:
size = get_compressed_size(data)
else:
size = len(data.encode("utf-8"))
return size <= QR_MAX_BINARY
def needs_compression(data: str) -> bool:
"""Check if data needs compression to fit in QR code."""
return not can_fit_in_qr(data, compress=False) and can_fit_in_qr(data, compress=True)
def generate_qr_code(
data: str,
compress: bool = False,
error_correction=None,
output_format: str = "png",
) -> bytes:
"""
Generate a QR code image from string data.
Args:
data: String data to encode
compress: Whether to compress data first
error_correction: QR error correction level (default: auto)
output_format: Image format - 'png' or 'jpg'/'jpeg'
Returns:
Image bytes in requested format
Raises:
RuntimeError: If qrcode library not available
ValueError: If data too large for QR code
"""
if not HAS_QRCODE_WRITE:
raise RuntimeError("qrcode library not installed. Run: pip install qrcode[pil]")
qr_data = data
# Compress if requested
if compress:
qr_data = compress_data(data)
# Check size
if len(qr_data.encode("utf-8")) > QR_MAX_BINARY:
raise ValueError(
f"Data too large for QR code ({len(qr_data)} bytes). " f"Maximum: {QR_MAX_BINARY} bytes"
)
# Use lower error correction for larger data
if error_correction is None:
error_correction = ERROR_CORRECT_L if len(qr_data) > 1000 else ERROR_CORRECT_M
qr = qrcode.QRCode(
version=None,
error_correction=error_correction,
box_size=10,
border=4,
)
qr.add_data(qr_data)
qr.make(fit=True)
img = qr.make_image(fill_color="black", back_color="white")
buf = io.BytesIO()
fmt = output_format.lower()
if fmt in ("jpg", "jpeg"):
# Convert to RGB for JPEG (no alpha channel)
img = img.convert("RGB")
img.save(buf, format="JPEG", quality=95)
else:
img.save(buf, format="PNG")
buf.seek(0)
return buf.getvalue()
def generate_qr_ascii(
data: str,
compress: bool = False,
invert: bool = False,
) -> str:
"""
Generate an ASCII representation of a QR code.
Uses Unicode block characters for compact display.
Args:
data: String data to encode
compress: Whether to compress data first
invert: Invert colors (white on black for dark terminals)
Returns:
ASCII string representation of QR code
Raises:
RuntimeError: If qrcode library not available
ValueError: If data too large for QR code
"""
if not HAS_QRCODE_WRITE:
raise RuntimeError("qrcode library not installed. Run: pip install qrcode[pil]")
qr_data = data
# Compress if requested
if compress:
qr_data = compress_data(data)
# Check size
if len(qr_data.encode("utf-8")) > QR_MAX_BINARY:
raise ValueError(
f"Data too large for QR code ({len(qr_data)} bytes). " f"Maximum: {QR_MAX_BINARY} bytes"
)
qr = qrcode.QRCode(
version=None,
error_correction=ERROR_CORRECT_L,
box_size=1,
border=2,
)
qr.add_data(qr_data)
qr.make(fit=True)
# Get the QR matrix
# Use print_ascii to a StringIO to capture output
import sys
from io import StringIO
old_stdout = sys.stdout
sys.stdout = StringIO()
try:
qr.print_ascii(invert=invert)
ascii_qr = sys.stdout.getvalue()
finally:
sys.stdout = old_stdout
return ascii_qr
def read_qr_code(image_data: bytes) -> str | None:
"""
Read QR code from image data.
Args:
image_data: Image bytes (PNG, JPG, etc.)
Returns:
Decoded string, or None if no QR code found
Raises:
RuntimeError: If pyzbar library not available
"""
if not HAS_QRCODE_READ:
raise RuntimeError(
"pyzbar library not installed. Run: pip install pyzbar\n"
"Also requires system library: sudo apt-get install libzbar0"
)
try:
img: Image.Image = Image.open(io.BytesIO(image_data))
# Convert to RGB if necessary (pyzbar works best with RGB/grayscale)
if img.mode not in ("RGB", "L"):
img = img.convert("RGB")
# Decode QR codes
decoded = pyzbar_decode(img, symbols=[ZBarSymbol.QRCODE])
if not decoded:
return None
# Return first QR code found
result: str = decoded[0].data.decode("utf-8")
return result
except Exception:
return None
def read_qr_code_from_file(filepath: str) -> str | None:
"""
Read QR code from image file.
Args:
filepath: Path to image file
Returns:
Decoded string, or None if no QR code found
"""
with open(filepath, "rb") as f:
return read_qr_code(f.read())
def extract_key_from_qr(image_data: bytes) -> str | None:
"""
Extract RSA key from QR code image, auto-decompressing if needed.
This function is more robust than the original, with better error handling
and PEM normalization.
Args:
image_data: Image bytes containing QR code
Returns:
PEM-encoded RSA key string, or None if not found/invalid
"""
# Step 1: Read QR code
qr_data = read_qr_code(image_data)
if not qr_data:
return None
# Step 2: Auto-decompress if needed
try:
if is_compressed(qr_data):
key_pem = decompress_data(qr_data)
else:
key_pem = qr_data
except Exception:
# If decompression fails, try using data as-is
key_pem = qr_data
# Step 3: Validate it looks like a PEM key
if "-----BEGIN" not in key_pem or "-----END" not in key_pem:
return None
# Step 4: Aggressively normalize PEM format
# This is crucial - QR codes can introduce subtle formatting issues
try:
key_pem = normalize_pem(key_pem)
except Exception:
# If normalization fails, return None rather than broken PEM
return None
# Step 5: Final validation - ensure it still looks like PEM
if "-----BEGIN" in key_pem and "-----END" in key_pem:
return key_pem
return None
def extract_key_from_qr_file(filepath: str) -> str | None:
"""
Extract RSA key from QR code image file.
Args:
filepath: Path to image file containing QR code
Returns:
PEM-encoded RSA key string, or None if not found/invalid
"""
with open(filepath, "rb") as f:
return extract_key_from_qr(f.read())
def detect_and_crop_qr(
image_data: bytes,
padding_percent: float = QR_CROP_PADDING_PERCENT,
min_padding_px: int = QR_CROP_MIN_PADDING_PX,
) -> bytes | None:
"""
Detect QR code in image and crop to it, handling rotation.
Uses the QR code's corner coordinates to compute an axis-aligned
bounding box, then adds padding to ensure rotated QR codes aren't clipped.
Args:
image_data: Input image bytes (PNG, JPG, etc.)
padding_percent: Padding as fraction of QR size (default 10%)
min_padding_px: Minimum padding in pixels (default 10)
Returns:
Cropped PNG image bytes, or None if no QR code found
Raises:
RuntimeError: If pyzbar library not available
"""
if not HAS_QRCODE_READ:
raise RuntimeError(
"pyzbar library not installed. Run: pip install pyzbar\n"
"Also requires system library: sudo apt-get install libzbar0"
)
try:
img: Image.Image = Image.open(io.BytesIO(image_data))
original_mode = img.mode
# Convert for pyzbar detection
if img.mode not in ("RGB", "L"):
detect_img = img.convert("RGB")
else:
detect_img = img
# Decode QR codes to get corner positions
decoded = pyzbar_decode(detect_img, symbols=[ZBarSymbol.QRCODE])
if not decoded:
return None
# Get the polygon corners of the first QR code
# pyzbar returns a Polygon with Point objects (x, y attributes)
polygon = decoded[0].polygon
if len(polygon) < 4:
# Fallback to rect if polygon not available
rect = decoded[0].rect
min_x, min_y = rect.left, rect.top
max_x, max_y = rect.left + rect.width, rect.top + rect.height
else:
# Extract corner coordinates - handles any rotation
xs = [p.x for p in polygon]
ys = [p.y for p in polygon]
min_x, max_x = min(xs), max(xs)
min_y, max_y = min(ys), max(ys)
# Calculate QR dimensions and padding
qr_width = max_x - min_x
qr_height = max_y - min_y
# Use larger dimension for padding calculation (handles rotation)
qr_size = max(qr_width, qr_height)
padding = max(int(qr_size * padding_percent), min_padding_px)
# Calculate crop box with padding, clamped to image bounds
img_width, img_height = img.size
crop_left = max(0, min_x - padding)
crop_top = max(0, min_y - padding)
crop_right = min(img_width, max_x + padding)
crop_bottom = min(img_height, max_y + padding)
# Crop the original image (preserves original mode/quality)
cropped = img.crop((crop_left, crop_top, crop_right, crop_bottom))
# Convert to PNG bytes
buf = io.BytesIO()
# Preserve transparency if present
if original_mode in ("RGBA", "LA", "P"):
cropped.save(buf, format="PNG")
else:
cropped.save(buf, format="PNG")
buf.seek(0)
return buf.getvalue()
except Exception as e:
# Log for debugging but return None for clean API
import sys
print(f"QR crop error: {e}", file=sys.stderr)
return None
def detect_and_crop_qr_file(
filepath: str,
padding_percent: float = QR_CROP_PADDING_PERCENT,
min_padding_px: int = QR_CROP_MIN_PADDING_PX,
) -> bytes | None:
"""
Detect QR code in image file and crop to it.
Args:
filepath: Path to image file
padding_percent: Padding as fraction of QR size (default 10%)
min_padding_px: Minimum padding in pixels (default 10)
Returns:
Cropped PNG image bytes, or None if no QR code found
"""
with open(filepath, "rb") as f:
return detect_and_crop_qr(f.read(), padding_percent, min_padding_px)
def has_qr_write() -> bool:
"""Check if QR code writing is available."""
return HAS_QRCODE_WRITE
def has_qr_read() -> bool:
"""Check if QR code reading is available."""
return HAS_QRCODE_READ
def has_qr_support() -> bool:
"""Check if full QR code support is available."""
return HAS_QRCODE_WRITE and HAS_QRCODE_READ

View File

@ -0,0 +1,445 @@
"""
Stegasoo Admin Recovery Module (v4.1.0)
Generates and manages recovery keys for admin password reset.
Recovery keys use the same format as channel keys (32 alphanumeric chars
with dashes) but serve a different purpose - they allow resetting the
admin password when locked out.
Security model:
- Recovery key is generated once during setup
- Only the hash is stored in the database
- The actual key is shown once and must be saved by the user
- Key can reset any admin account's password
- No recovery key = no password reset possible (most secure)
Usage:
# During setup - generate and show to user
key = generate_recovery_key()
key_hash = hash_recovery_key(key)
# Store key_hash in database, show key to user
# During recovery - verify user's key
if verify_recovery_key(user_input, stored_hash):
# Allow password reset
"""
import base64
import hashlib
import secrets
from io import BytesIO
from .constants import RECOVERY_OBFUSCATION_KEY
from .debug import debug
def _xor_bytes(data: bytes, key: bytes) -> bytes:
"""XOR data with repeating key."""
return bytes(b ^ key[i % len(key)] for i, b in enumerate(data))
def obfuscate_key(key: str) -> str:
"""
Obfuscate a recovery key for QR encoding.
XORs the key with magic header hash and base64 encodes.
Result looks like random gibberish when scanned.
Args:
key: Plain recovery key (formatted or normalized)
Returns:
Obfuscated string prefixed with "STEGO:" marker
"""
normalized = normalize_recovery_key(key)
key_bytes = normalized.encode("utf-8")
xored = _xor_bytes(key_bytes, RECOVERY_OBFUSCATION_KEY)
encoded = base64.b64encode(xored).decode("ascii")
return f"STEGO:{encoded}"
def deobfuscate_key(obfuscated: str) -> str | None:
"""
Deobfuscate a recovery key from QR data.
Reverses the obfuscation process.
Args:
obfuscated: Obfuscated string from QR scan
Returns:
Formatted recovery key, or None if invalid
"""
if not obfuscated.startswith("STEGO:"):
# Not obfuscated - try as plain key
try:
return format_recovery_key(obfuscated)
except ValueError:
return None
try:
encoded = obfuscated[6:] # Strip "STEGO:" prefix
xored = base64.b64decode(encoded)
key_bytes = _xor_bytes(xored, RECOVERY_OBFUSCATION_KEY)
normalized = key_bytes.decode("utf-8")
return format_recovery_key(normalized)
except Exception:
return None
# =============================================================================
# STEGO BACKUP - Hide recovery key in an image using Stegasoo itself
# =============================================================================
# Fixed credentials for recovery key stego (internal, not user-facing)
# These are hardcoded - security is in the obscurity of the stego image
_RECOVERY_STEGO_PASSPHRASE = "stegasoo-recovery-v1"
_RECOVERY_STEGO_PIN = "314159" # Pi digits - fixed, not secret
# Size limits for carrier image
STEGO_BACKUP_MIN_SIZE = 50 * 1024 # 50 KB
STEGO_BACKUP_MAX_SIZE = 2 * 1024 * 1024 # 2 MB
def create_stego_backup(
recovery_key: str,
carrier_image: bytes,
) -> bytes:
"""
Hide recovery key in an image using Stegasoo steganography.
Uses the same image as both carrier and reference for simplicity.
Fixed internal passphrase, no PIN required - obscurity is the security.
Args:
recovery_key: The recovery key to hide
carrier_image: JPEG image bytes (50KB-2MB, used as carrier AND reference)
Returns:
PNG image with hidden recovery key
Raises:
ValueError: If image size out of range or invalid format
"""
from .encode import encode
# Validate image size
size = len(carrier_image)
if size < STEGO_BACKUP_MIN_SIZE:
raise ValueError(f"Image too small: {size // 1024}KB (min 50KB)")
if size > STEGO_BACKUP_MAX_SIZE:
raise ValueError(f"Image too large: {size // 1024}KB (max 2MB)")
# Normalize key for embedding
formatted_key = format_recovery_key(recovery_key)
# Encode using Stegasoo - same image as carrier and reference
result = encode(
message=formatted_key,
reference_photo=carrier_image, # Same image for simplicity
carrier_image=carrier_image,
passphrase=_RECOVERY_STEGO_PASSPHRASE,
pin=_RECOVERY_STEGO_PIN,
)
debug.print(f"Created stego backup: {len(result.stego_image)} bytes")
return result.stego_image
def extract_stego_backup(
stego_image: bytes,
reference_photo: bytes,
) -> str | None:
"""
Extract recovery key from a stego backup image.
Args:
stego_image: The stego image containing hidden key
reference_photo: Original reference photo (same as was used for carrier)
Returns:
Extracted recovery key (formatted), or None if extraction fails
"""
from .decode import decode
from .exceptions import DecryptionError
try:
result = decode(
stego_image=stego_image,
reference_photo=reference_photo,
passphrase=_RECOVERY_STEGO_PASSPHRASE,
pin=_RECOVERY_STEGO_PIN,
)
# Validate it's a proper recovery key
extracted = result.message or ""
formatted = format_recovery_key(extracted)
debug.print(f"Extracted recovery key from stego: {get_recovery_fingerprint(formatted)}")
return formatted
except (DecryptionError, ValueError) as e:
debug.print(f"Stego backup extraction failed: {e}")
return None
# Recovery key format: same as channel key (32 chars, 8 groups of 4)
RECOVERY_KEY_LENGTH = 32
RECOVERY_KEY_ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
def generate_recovery_key() -> str:
"""
Generate a new random recovery key.
Format: XXXX-XXXX-XXXX-XXXX-XXXX-XXXX-XXXX-XXXX
(32 alphanumeric characters with dashes)
Returns:
Formatted recovery key string
Example:
>>> key = generate_recovery_key()
>>> len(key)
39
>>> key.count('-')
7
"""
# Generate 32 random alphanumeric characters
raw_key = "".join(secrets.choice(RECOVERY_KEY_ALPHABET) for _ in range(RECOVERY_KEY_LENGTH))
# Format with dashes every 4 characters
formatted = "-".join(raw_key[i : i + 4] for i in range(0, RECOVERY_KEY_LENGTH, 4))
debug.print(f"Generated recovery key: {formatted[:4]}-••••-...-{formatted[-4:]}")
return formatted
def normalize_recovery_key(key: str) -> str:
"""
Normalize a recovery key for validation/hashing.
Removes dashes, spaces, converts to uppercase.
Args:
key: Raw key input (may have dashes, spaces, mixed case)
Returns:
Normalized key (32 uppercase alphanumeric chars)
Raises:
ValueError: If key has invalid length or characters
Example:
>>> normalize_recovery_key("abcd-1234-efgh-5678-ijkl-9012-mnop-3456")
"ABCD1234EFGH5678IJKL9012MNOP3456"
"""
# Remove dashes and spaces, uppercase
clean = key.replace("-", "").replace(" ", "").upper()
# Validate length
if len(clean) != RECOVERY_KEY_LENGTH:
raise ValueError(
f"Recovery key must be {RECOVERY_KEY_LENGTH} characters " f"(got {len(clean)})"
)
# Validate characters
if not all(c in RECOVERY_KEY_ALPHABET for c in clean):
raise ValueError("Recovery key must contain only letters A-Z and digits 0-9")
return clean
def format_recovery_key(key: str) -> str:
"""
Format a recovery key with dashes for display.
Args:
key: Raw or normalized key
Returns:
Formatted key (XXXX-XXXX-XXXX-XXXX-XXXX-XXXX-XXXX-XXXX)
Example:
>>> format_recovery_key("ABCD1234EFGH5678IJKL9012MNOP3456")
"ABCD-1234-EFGH-5678-IJKL-9012-MNOP-3456"
"""
clean = normalize_recovery_key(key)
return "-".join(clean[i : i + 4] for i in range(0, RECOVERY_KEY_LENGTH, 4))
def hash_recovery_key(key: str) -> str:
"""
Hash a recovery key for secure storage.
Uses SHA-256 with a fixed salt prefix. The hash is stored in the
database; the original key is never stored.
Args:
key: Recovery key (formatted or raw)
Returns:
Hex-encoded hash string (64 chars)
Example:
>>> key = "ABCD-1234-EFGH-5678-IJKL-9012-MNOP-3456"
>>> len(hash_recovery_key(key))
64
"""
clean = normalize_recovery_key(key)
# Use a fixed salt prefix for recovery keys
# This differentiates from other hashes in the system
salted = f"stegasoo-recovery-v1:{clean}"
hash_bytes = hashlib.sha256(salted.encode("utf-8")).digest()
hash_hex = hash_bytes.hex()
debug.print(f"Hashed recovery key: {hash_hex[:8]}...")
return hash_hex
def verify_recovery_key(key: str, stored_hash: str) -> bool:
"""
Verify a recovery key against a stored hash.
Args:
key: User-provided recovery key
stored_hash: Hash from database
Returns:
True if key matches, False otherwise
Example:
>>> key = generate_recovery_key()
>>> h = hash_recovery_key(key)
>>> verify_recovery_key(key, h)
True
>>> verify_recovery_key("WRONG-KEY!", h)
False
"""
try:
computed_hash = hash_recovery_key(key)
# Use constant-time comparison to prevent timing attacks
matches = secrets.compare_digest(computed_hash, stored_hash)
debug.print(f"Recovery key verification: {'success' if matches else 'failed'}")
return matches
except ValueError:
# Invalid key format
debug.print("Recovery key verification: invalid format")
return False
def get_recovery_fingerprint(key: str) -> str:
"""
Get a short fingerprint for display (first and last 4 chars).
Args:
key: Recovery key
Returns:
Fingerprint like "ABCD-••••-...-3456"
Example:
>>> get_recovery_fingerprint("ABCD-1234-EFGH-5678-IJKL-9012-MNOP-3456")
"ABCD-••••-••••-••••-••••-••••-••••-3456"
"""
formatted = format_recovery_key(key)
parts = formatted.split("-")
masked = [parts[0]] + ["••••"] * 6 + [parts[-1]]
return "-".join(masked)
def generate_recovery_qr(key: str) -> bytes:
"""
Generate a QR code image for the recovery key.
The key is obfuscated using XOR with Stegasoo's magic headers,
so scanning the QR shows gibberish instead of the actual key.
Args:
key: Recovery key
Returns:
PNG image bytes
Raises:
ImportError: If qrcode library not available
Example:
>>> key = generate_recovery_key()
>>> png_bytes = generate_recovery_qr(key)
>>> len(png_bytes) > 0
True
"""
try:
import qrcode
except ImportError:
raise ImportError("qrcode library required: pip install qrcode[pil]")
# Obfuscate so scanning shows gibberish, not the actual key
obfuscated = obfuscate_key(key)
qr = qrcode.QRCode(
version=1,
error_correction=qrcode.constants.ERROR_CORRECT_M,
box_size=10,
border=4,
)
qr.add_data(obfuscated)
qr.make(fit=True)
img = qr.make_image(fill_color="black", back_color="white")
buffer = BytesIO()
img.save(buffer, format="PNG")
buffer.seek(0)
debug.print(f"Generated recovery QR (obfuscated): {len(buffer.getvalue())} bytes")
return buffer.getvalue()
def extract_key_from_qr(image_data: bytes) -> str | None:
"""
Extract recovery key from a QR code image.
Handles both obfuscated (STEGO:...) and plain key formats.
Args:
image_data: PNG/JPEG image bytes containing QR code
Returns:
Extracted and validated recovery key, or None if not found/invalid
Example:
>>> key = generate_recovery_key()
>>> qr = generate_recovery_qr(key)
>>> extract_key_from_qr(qr) == format_recovery_key(key)
True
"""
try:
from PIL import Image
from pyzbar import pyzbar
except ImportError:
debug.print("pyzbar/PIL not available for QR reading")
return None
try:
img = Image.open(BytesIO(image_data))
decoded = pyzbar.decode(img)
for obj in decoded:
data = obj.data.decode("utf-8").strip()
# Try deobfuscation first (handles both obfuscated and plain)
result = deobfuscate_key(data)
if result:
debug.print(f"Extracted recovery key from QR: {get_recovery_fingerprint(result)}")
return result
debug.print("No valid recovery key found in QR")
return None
except Exception as e:
debug.print(f"QR extraction error: {e}")
return None

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,281 @@
"""
Steganalysis Self-Check Module (v4.4.0)
Statistical analysis to estimate detectability risk of stego images.
Runs chi-square and RS (Regular-Singular) analysis on pixel data
to assess how visible the embedding is to an attacker.
Currently LSB-only. DCT steganalysis (calibration attack) deferred.
Usage::
from stegasoo.steganalysis import check_image
result = check_image(image_data)
print(result["risk"]) # "low", "medium", or "high"
print(result["chi_square"]) # per-channel chi-square p-values
print(result["rs"]) # per-channel RS embedding estimates
"""
from __future__ import annotations
import io
from dataclasses import dataclass, field
import numpy as np
from PIL import Image
from .constants import (
STEGANALYSIS_CHI_SUSPICIOUS_THRESHOLD,
STEGANALYSIS_RS_HIGH_THRESHOLD,
STEGANALYSIS_RS_MEDIUM_THRESHOLD,
)
@dataclass
class SteganalysisResult:
"""Result of steganalysis on an image."""
risk: str # "low", "medium", or "high"
chi_square: dict = field(default_factory=dict) # per-channel p-values
rs: dict = field(default_factory=dict) # per-channel embedding estimates
width: int = 0
height: int = 0
channels: int = 0
mode: str = "lsb"
def chi_square_analysis(channel_data: np.ndarray) -> float:
"""Chi-square test on LSB distribution of a single channel.
Groups pixel values into pairs (2i, 2i+1) so-called "pairs of values"
(PoVs). In a clean image, each pair has a natural frequency ratio.
LSB embedding with random data forces each pair toward equal frequency.
The test measures H0: "pairs are equalized" (consistent with embedding).
Args:
channel_data: Flattened 1-D array of pixel values (uint8).
Returns:
p-value from chi-square test.
HIGH p-value (close to 1.0) pairs are equalized suspicious.
LOW p-value (close to 0.0) pairs are not equalized less suspicious.
"""
from scipy.stats import chi2
# Count occurrences of each value 0-255
histogram = np.bincount(channel_data.ravel(), minlength=256)
# Group into 128 pairs: (0,1), (2,3), ..., (254,255)
chi_sq = 0.0
degrees_of_freedom = 0
for i in range(0, 256, 2):
observed_even = histogram[i]
observed_odd = histogram[i + 1]
total = observed_even + observed_odd
if total == 0:
continue
expected = total / 2.0
chi_sq += (observed_even - expected) ** 2 / expected
chi_sq += (observed_odd - expected) ** 2 / expected
degrees_of_freedom += 1
if degrees_of_freedom == 0:
return 1.0 # No data to analyze
# p-value: probability of observing this chi-square value by chance
# Low p-value = LSBs are suspiciously uniform = likely embedded
p_value = 1.0 - chi2.cdf(chi_sq, degrees_of_freedom)
return float(p_value)
def rs_analysis(channel_data: np.ndarray, block_size: int = 8) -> float:
"""Regular-Singular groups analysis on a single channel.
Divides the image channel into groups of `block_size` pixels and measures
the "smoothness" (variation) of each group. Applying a flipping function
F1 (flip LSB) and F-1 (flip LSB of value-1) produces Regular (smoother)
and Singular (rougher) groups.
In a clean image: R_m R_{-m} and S_m S_{-m}.
LSB embedding causes R_m and S_{-m} to converge while S_m and R_{-m}
diverge, allowing estimation of the embedding rate.
Args:
channel_data: Flattened 1-D array of pixel values (uint8).
block_size: Number of pixels per group (default 8).
Returns:
Estimated embedding rate (0.0 = clean, 1.0 = fully embedded).
Values > 0.5 strongly indicate LSB embedding.
"""
data = channel_data.ravel().astype(np.int16)
n = len(data)
# Trim to multiple of block_size
n_blocks = n // block_size
if n_blocks < 10:
return 0.0 # Not enough data
data = data[: n_blocks * block_size].reshape(n_blocks, block_size)
def variation(block: np.ndarray) -> float:
"""Sum of absolute differences between adjacent pixels."""
return float(np.sum(np.abs(np.diff(block))))
def flip_positive(block: np.ndarray) -> np.ndarray:
"""F1: flip LSB (0↔1, 2↔3, 4↔5, ...)."""
return block ^ 1
def flip_negative(block: np.ndarray) -> np.ndarray:
"""F-1: flip LSB of (value - 1), i.e. -1↔0, 1↔2, 3↔4, ..."""
result = block.copy()
even_mask = (block % 2) == 0
result[even_mask] -= 1
result[~even_mask] += 1
return result
r_m = s_m = r_neg = s_neg = 0
for i in range(n_blocks):
block = data[i]
v_orig = variation(block)
v_f1 = variation(flip_positive(block))
if v_f1 > v_orig:
r_m += 1
elif v_f1 < v_orig:
s_m += 1
v_fn1 = variation(flip_negative(block))
if v_fn1 > v_orig:
r_neg += 1
elif v_fn1 < v_orig:
s_neg += 1
# Estimate embedding rate using the RS quadratic formula
# d0 = R_m - S_m, d1 = R_{-m} - S_{-m}
# The embedding rate p satisfies: d(p/2) = d0, d(1 - p/2) = d1
# Simplified estimator: p ≈ (R_m - S_m) / (R_{-m} - S_{-m}) divergence
d0 = r_m - s_m
d1 = r_neg - s_neg
if n_blocks == 0:
return 0.0
# Use the simplified dual-statistic estimator
# In clean images: d0 ≈ d1 (both positive)
# In embedded images: d0 → 0 while d1 stays positive
if d1 == 0:
# Can't estimate — likely very embedded or degenerate
return 0.5 if d0 == 0 else 0.0
# Ratio-based estimate: how much has d0 dropped relative to d1
ratio = d0 / d1
if ratio >= 1.0:
return 0.0 # d0 ≥ d1 means no evidence of embedding
if ratio <= 0.0:
return 1.0 # d0 collapsed or inverted
# Linear interpolation: ratio=1 → 0% embedded, ratio=0 → 100% embedded
estimate = 1.0 - ratio
return float(np.clip(estimate, 0.0, 1.0))
def assess_risk(chi_p_values: dict[str, float], rs_estimates: dict[str, float]) -> str:
"""Map analysis results to a risk level.
RS analysis is the primary metric (reliable for both sequential and
random-order embedding). Chi-square is supplementary high p-values
indicate equalized PoV pairs, which is suspicious for random LSB embedding.
Args:
chi_p_values: Per-channel chi-square p-values (high = suspicious).
rs_estimates: Per-channel RS embedding rate estimates (high = suspicious).
Returns:
"low", "medium", or "high" detectability risk.
"""
if not chi_p_values and not rs_estimates:
return "low"
# RS is the primary indicator: any channel with high embedding estimate
max_rs = max(rs_estimates.values()) if rs_estimates else 0.0
# Chi-square: high p-value means pairs are equalized (suspicious)
max_chi_p = max(chi_p_values.values()) if chi_p_values else 0.0
chi_suspicious = max_chi_p > STEGANALYSIS_CHI_SUSPICIOUS_THRESHOLD
# High risk: RS strongly indicates embedding
if max_rs > STEGANALYSIS_RS_HIGH_THRESHOLD:
return "high"
# Medium risk: moderate RS signal, or RS + chi-square both flagging
if max_rs > STEGANALYSIS_RS_MEDIUM_THRESHOLD:
return "medium"
if chi_suspicious and max_rs > 0.05:
return "medium"
return "low"
def check_image(image_data: bytes, mode: str = "lsb") -> dict:
"""Run steganalysis on an image and return detectability assessment.
Args:
image_data: Raw image bytes (PNG, BMP, etc.).
mode: Analysis mode currently only "lsb" is supported.
Returns:
Dict with keys: risk, chi_square, rs, width, height, channels, mode.
"""
if mode not in ("lsb", "auto"):
raise ValueError(f"Unsupported steganalysis mode: {mode}. Use 'lsb' or 'auto'.")
img = Image.open(io.BytesIO(image_data))
if img.mode not in ("RGB", "RGBA", "L"):
img = img.convert("RGB")
width, height = img.size
pixels = np.array(img)
img.close()
channel_names = ["R", "G", "B"] if pixels.ndim == 3 else ["L"]
if pixels.ndim == 2:
pixels = pixels[:, :, np.newaxis]
num_channels = min(pixels.shape[2], 3) # Skip alpha
chi_p_values = {}
rs_estimates = {}
for i in range(num_channels):
name = channel_names[i]
channel = pixels[:, :, i].ravel()
chi_p_values[name] = chi_square_analysis(channel)
rs_estimates[name] = rs_analysis(channel)
risk = assess_risk(chi_p_values, rs_estimates)
result = SteganalysisResult(
risk=risk,
chi_square=chi_p_values,
rs=rs_estimates,
width=width,
height=height,
channels=num_channels,
mode=mode,
)
return {
"risk": result.risk,
"chi_square": result.chi_square,
"rs": result.rs,
"width": result.width,
"height": result.height,
"channels": result.channels,
"mode": result.mode,
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,532 @@
"""
Stegasoo Utilities
Secure deletion, filename generation, and other helpers.
"""
import io
import os
import random
import secrets
import shutil
from datetime import date
from pathlib import Path
from PIL import Image
from .constants import DAY_NAMES
from .debug import debug
def read_image_exif(image_data: bytes) -> dict:
"""
Read EXIF metadata from an image.
Args:
image_data: Raw image bytes
Returns:
Dict with EXIF fields (tag names as keys)
Example:
>>> exif = read_image_exif(photo_bytes)
>>> print(exif.get('Make')) # Camera manufacturer
"""
from PIL.ExifTags import GPSTAGS, TAGS
result = {}
try:
img = Image.open(io.BytesIO(image_data))
exif_data = img._getexif()
if exif_data:
for tag_id, value in exif_data.items():
tag = TAGS.get(tag_id, str(tag_id))
# Handle GPS data specially
if tag == "GPSInfo" and isinstance(value, dict):
gps = {}
for gps_tag_id, gps_value in value.items():
gps_tag = GPSTAGS.get(gps_tag_id, str(gps_tag_id))
# Convert tuples/IFDRational to simple types
if hasattr(gps_value, "numerator"):
gps[gps_tag] = float(gps_value)
elif isinstance(gps_value, tuple):
gps[gps_tag] = [
float(v) if hasattr(v, "numerator") else v for v in gps_value
]
else:
gps[gps_tag] = gps_value
result[tag] = gps
# Convert IFDRational to float
elif hasattr(value, "numerator"):
result[tag] = float(value)
# Convert bytes to string if possible
elif isinstance(value, bytes):
try:
# Try to decode as ASCII/UTF-8 text
decoded = value.decode("utf-8", errors="strict").strip("\x00")
# Only keep if it looks like printable text
if decoded.isprintable() or all(
c.isspace() or c.isprintable() for c in decoded
):
result[tag] = decoded
else:
result[tag] = f"<{len(value)} bytes binary>"
except (UnicodeDecodeError, Exception):
result[tag] = f"<{len(value)} bytes binary>"
# Handle tuples of IFDRational
elif isinstance(value, tuple) and value and hasattr(value[0], "numerator"):
result[tag] = [float(v) for v in value]
else:
result[tag] = value
img.close()
except Exception as e:
debug.print(f"Error reading EXIF: {e}")
return result
def write_image_exif(image_data: bytes, exif_updates: dict) -> bytes:
"""
Write/update EXIF metadata in a JPEG image.
Args:
image_data: Raw JPEG image bytes
exif_updates: Dict of EXIF fields to update (tag names as keys)
Use None as value to delete a field
Returns:
Image bytes with updated EXIF
Raises:
ValueError: If image is not JPEG or piexif not available
Example:
>>> updated = write_image_exif(jpeg_bytes, {"Artist": "John Doe"})
"""
try:
import piexif
except ImportError:
raise ValueError("piexif required for EXIF editing: pip install piexif")
# Verify it's a JPEG
if not image_data[:2] == b"\xff\xd8":
raise ValueError("EXIF editing only supported for JPEG images")
debug.print(f"Writing EXIF updates: {list(exif_updates.keys())}")
# Load existing EXIF
try:
exif_dict = piexif.load(image_data)
except Exception:
# No existing EXIF, start fresh
exif_dict = {"0th": {}, "Exif": {}, "GPS": {}, "1st": {}, "thumbnail": None}
# Map common tag names to piexif IFD and tag IDs
tag_mapping = {
# 0th IFD (main image)
"Make": (piexif.ImageIFD.Make, "0th"),
"Model": (piexif.ImageIFD.Model, "0th"),
"Software": (piexif.ImageIFD.Software, "0th"),
"Artist": (piexif.ImageIFD.Artist, "0th"),
"Copyright": (piexif.ImageIFD.Copyright, "0th"),
"ImageDescription": (piexif.ImageIFD.ImageDescription, "0th"),
"DateTime": (piexif.ImageIFD.DateTime, "0th"),
"Orientation": (piexif.ImageIFD.Orientation, "0th"),
# Exif IFD
"DateTimeOriginal": (piexif.ExifIFD.DateTimeOriginal, "Exif"),
"DateTimeDigitized": (piexif.ExifIFD.DateTimeDigitized, "Exif"),
"UserComment": (piexif.ExifIFD.UserComment, "Exif"),
"ExposureTime": (piexif.ExifIFD.ExposureTime, "Exif"),
"FNumber": (piexif.ExifIFD.FNumber, "Exif"),
"ISOSpeedRatings": (piexif.ExifIFD.ISOSpeedRatings, "Exif"),
"FocalLength": (piexif.ExifIFD.FocalLength, "Exif"),
"LensMake": (piexif.ExifIFD.LensMake, "Exif"),
"LensModel": (piexif.ExifIFD.LensModel, "Exif"),
}
for tag_name, value in exif_updates.items():
if tag_name not in tag_mapping:
debug.print(f"Unknown EXIF tag: {tag_name}, skipping")
continue
tag_id, ifd = tag_mapping[tag_name]
if value is None:
# Delete the tag
if tag_id in exif_dict[ifd]:
del exif_dict[ifd][tag_id]
debug.print(f"Deleted EXIF tag: {tag_name}")
else:
# Set the tag (encode strings as bytes)
if isinstance(value, str):
value = value.encode("utf-8")
exif_dict[ifd][tag_id] = value
debug.print(f"Set EXIF tag: {tag_name}")
# Serialize EXIF and insert into image
exif_bytes = piexif.dump(exif_dict)
output = io.BytesIO()
img = Image.open(io.BytesIO(image_data))
img.save(output, "JPEG", exif=exif_bytes, quality=95)
output.seek(0)
debug.print(f"EXIF updated: {len(image_data)} -> {len(output.getvalue())} bytes")
return output.getvalue()
def strip_image_metadata(image_data: bytes, output_format: str = "PNG") -> bytes:
"""
Remove all metadata (EXIF, ICC profiles, etc.) from an image.
Creates a fresh image with only pixel data - no EXIF, GPS coordinates,
camera info, timestamps, or other potentially sensitive metadata.
Args:
image_data: Raw image bytes
output_format: Output format ('PNG', 'BMP', 'TIFF')
Returns:
Clean image bytes with no metadata
Example:
>>> clean = strip_image_metadata(photo_bytes)
>>> # EXIF data is now removed
"""
debug.print(f"Stripping metadata, output format: {output_format}")
img = Image.open(io.BytesIO(image_data))
# Convert to RGB if needed (handles RGBA, P, L, etc.)
if img.mode not in ("RGB", "RGBA"):
img = img.convert("RGB")
# Create fresh image - this discards all metadata
clean = Image.new(img.mode, img.size)
clean.putdata(list(img.getdata()))
output = io.BytesIO()
clean.save(output, output_format.upper())
output.seek(0)
debug.print(f"Metadata stripped: {len(image_data)} -> {len(output.getvalue())} bytes")
return output.getvalue()
def generate_filename(date_str: str | None = None, prefix: str = "", extension: str = "png") -> str:
"""
Generate a filename for stego images.
Format: {prefix}{random}_{YYYYMMDD}.{extension}
Args:
date_str: Date string (YYYY-MM-DD), defaults to today
prefix: Optional prefix
extension: File extension without dot (default: 'png')
Returns:
Filename string
Example:
>>> generate_filename("2023-12-25", "secret_", "png")
"secret_a1b2c3d4_20231225.png"
"""
debug.validate(
bool(extension) and "." not in extension,
f"Extension must not contain dot, got '{extension}'",
)
if date_str is None:
date_str = date.today().isoformat()
date_compact = date_str.replace("-", "")
random_hex = secrets.token_hex(4)
# Ensure extension doesn't have a leading dot
extension = extension.lstrip(".")
filename = f"{prefix}{random_hex}_{date_compact}.{extension}"
debug.print(f"Generated filename: {filename}")
return filename
def parse_date_from_filename(filename: str) -> str | None:
"""
Extract date from a stego filename.
Looks for patterns like _20251227 or _2025-12-27
Args:
filename: Filename to parse
Returns:
Date string (YYYY-MM-DD) or None
Example:
>>> parse_date_from_filename("secret_a1b2c3d4_20231225.png")
"2023-12-25"
"""
import re
# Try YYYYMMDD format
match = re.search(r"_(\d{4})(\d{2})(\d{2})(?:\.|$)", filename)
if match:
year, month, day = match.groups()
date_str = f"{year}-{month}-{day}"
debug.print(f"Parsed date (compact): {date_str}")
return date_str
# Try YYYY-MM-DD format
match = re.search(r"_(\d{4})-(\d{2})-(\d{2})(?:\.|$)", filename)
if match:
year, month, day = match.groups()
date_str = f"{year}-{month}-{day}"
debug.print(f"Parsed date (dashed): {date_str}")
return date_str
debug.print(f"No date found in filename: {filename}")
return None
def get_day_from_date(date_str: str) -> str:
"""
Get day of week name from date string.
Args:
date_str: Date string (YYYY-MM-DD)
Returns:
Day name (e.g., "Monday")
Example:
>>> get_day_from_date("2023-12-25")
"Monday"
"""
debug.validate(
len(date_str) == 10 and date_str[4] == "-" and date_str[7] == "-",
f"Invalid date format: {date_str}, expected YYYY-MM-DD",
)
try:
year, month, day = map(int, date_str.split("-"))
d = date(year, month, day)
day_name = DAY_NAMES[d.weekday()]
debug.print(f"Date {date_str} is {day_name}")
return day_name
except Exception as e:
debug.exception(e, f"get_day_from_date for {date_str}")
return ""
def get_today_date() -> str:
"""
Get today's date as YYYY-MM-DD.
Returns:
Today's date string
Example:
>>> get_today_date()
"2023-12-25"
"""
today = date.today().isoformat()
debug.print(f"Today's date: {today}")
return today
def get_today_day() -> str:
"""
Get today's day name.
Returns:
Today's day name
Example:
>>> get_today_day()
"Monday"
"""
today_day = DAY_NAMES[date.today().weekday()]
debug.print(f"Today is {today_day}")
return today_day
class SecureDeleter:
"""
Securely delete files by overwriting with random data.
Implements multi-pass overwriting before deletion.
Example:
>>> deleter = SecureDeleter("secret.txt", passes=3)
>>> deleter.execute()
"""
def __init__(self, path: str | Path, passes: int = 7):
"""
Initialize secure deleter.
Args:
path: Path to file or directory
passes: Number of overwrite passes
"""
debug.validate(passes > 0, f"Passes must be positive, got {passes}")
self.path = Path(path)
self.passes = passes
debug.print(f"SecureDeleter initialized for {self.path} with {passes} passes")
def _overwrite_file(self, file_path: Path) -> None:
"""Overwrite file with random data multiple times."""
if not file_path.exists() or not file_path.is_file():
debug.print(f"File does not exist or is not a file: {file_path}")
return
length = file_path.stat().st_size
debug.print(f"Overwriting file {file_path} ({length} bytes)")
if length == 0:
debug.print("File is empty, nothing to overwrite")
return
patterns = [b"\x00", b"\xff", bytes([random.randint(0, 255)])]
for pass_num in range(self.passes):
debug.print(f"Overwrite pass {pass_num + 1}/{self.passes}")
with open(file_path, "r+b") as f:
for pattern_idx, pattern in enumerate(patterns):
f.seek(0)
# Write pattern in chunks for large files
chunk_size = 1024 * 1024 # 1MB chunks
for offset in range(0, length, chunk_size):
chunk = min(chunk_size, length - offset)
f.write(pattern * (chunk // len(pattern)))
f.write(pattern[: chunk % len(pattern)])
# Final pass with random data
f.seek(0)
f.write(os.urandom(length))
debug.print(f"Completed {self.passes} overwrite passes")
def delete_file(self) -> None:
"""Securely delete a single file."""
if self.path.is_file():
debug.print(f"Securely deleting file: {self.path}")
self._overwrite_file(self.path)
self.path.unlink()
debug.print(f"File deleted: {self.path}")
else:
debug.print(f"Not a file: {self.path}")
def delete_directory(self) -> None:
"""Securely delete a directory and all contents."""
if not self.path.is_dir():
debug.print(f"Not a directory: {self.path}")
return
debug.print(f"Securely deleting directory: {self.path}")
# First, securely overwrite all files
file_count = 0
for file_path in self.path.rglob("*"):
if file_path.is_file():
self._overwrite_file(file_path)
file_count += 1
debug.print(f"Overwrote {file_count} files")
# Then remove the directory tree
shutil.rmtree(self.path)
debug.print(f"Directory deleted: {self.path}")
def execute(self) -> None:
"""Securely delete the path (file or directory)."""
debug.print(f"Executing secure deletion: {self.path}")
if self.path.is_file():
self.delete_file()
elif self.path.is_dir():
self.delete_directory()
else:
debug.print(f"Path does not exist: {self.path}")
def secure_delete(path: str | Path, passes: int = 7) -> None:
"""
Convenience function for secure deletion.
Args:
path: Path to file or directory
passes: Number of overwrite passes
Example:
>>> secure_delete("secret.txt", passes=3)
"""
debug.print(f"secure_delete called: {path}, passes={passes}")
SecureDeleter(path, passes).execute()
def format_file_size(size_bytes: int) -> str:
"""
Format file size for display.
Args:
size_bytes: Size in bytes
Returns:
Human-readable string (e.g., "1.5 MB")
Example:
>>> format_file_size(1500000)
"1.5 MB"
"""
debug.validate(size_bytes >= 0, f"File size cannot be negative: {size_bytes}")
size: float = float(size_bytes)
for unit in ["B", "KB", "MB", "GB"]:
if size < 1024:
if unit == "B":
return f"{int(size)} {unit}"
return f"{size:.1f} {unit}"
size /= 1024
return f"{size:.1f} TB"
def format_number(n: int) -> str:
"""
Format number with commas.
Args:
n: Integer to format
Returns:
Formatted string
Example:
>>> format_number(1234567)
"1,234,567"
"""
debug.validate(isinstance(n, int), f"Input must be integer, got {type(n)}")
return f"{n:,}"
def clamp(value: int, min_val: int, max_val: int) -> int:
"""
Clamp value to range.
Args:
value: Value to clamp
min_val: Minimum allowed value
max_val: Maximum allowed value
Returns:
Clamped value
Example:
>>> clamp(15, 0, 10)
10
"""
debug.validate(min_val <= max_val, f"min_val ({min_val}) must be <= max_val ({max_val})")
return max(min_val, min(max_val, value))

View File

@ -0,0 +1,515 @@
"""
Stegasoo Input Validation (v3.2.0)
Validators for all user inputs with clear error messages.
Changes in v3.2.0:
- Renamed validate_phrase() validate_passphrase()
- Added word count validation with warnings for passphrases
- Added validators for embed modes and DCT parameters
"""
import io
from PIL import Image
from .debug import get_logger
logger = get_logger(__name__)
from .constants import (
ALLOWED_AUDIO_EXTENSIONS,
ALLOWED_IMAGE_EXTENSIONS,
ALLOWED_KEY_EXTENSIONS,
EMBED_MODE_AUDIO_AUTO,
EMBED_MODE_AUTO,
EMBED_MODE_DCT,
EMBED_MODE_LSB,
MAX_FILE_PAYLOAD_SIZE,
MAX_FILE_SIZE,
MAX_IMAGE_PIXELS,
MAX_MESSAGE_SIZE,
MAX_PIN_LENGTH,
MIN_KEY_PASSWORD_LENGTH,
MIN_PASSPHRASE_WORDS,
MIN_PIN_LENGTH,
MIN_RSA_BITS,
RECOMMENDED_PASSPHRASE_WORDS,
VALID_AUDIO_EMBED_MODES,
)
from .exceptions import (
AudioValidationError,
ImageValidationError,
KeyValidationError,
MessageValidationError,
PinValidationError,
SecurityFactorError,
)
from .keygen import load_rsa_key
from .models import FilePayload, ValidationResult
def validate_pin(pin: str, required: bool = False) -> ValidationResult:
"""
Validate PIN format.
Rules:
- 6-9 digits only
- Cannot start with zero
- Empty is OK if not required
Args:
pin: PIN string to validate
required: Whether PIN is required
Returns:
ValidationResult
"""
if not pin:
if required:
return ValidationResult.error("PIN is required")
return ValidationResult.ok()
if not pin.isdigit():
return ValidationResult.error("PIN must contain only digits")
if len(pin) < MIN_PIN_LENGTH or len(pin) > MAX_PIN_LENGTH:
return ValidationResult.error(f"PIN must be {MIN_PIN_LENGTH}-{MAX_PIN_LENGTH} digits")
if pin[0] == "0":
return ValidationResult.error("PIN cannot start with zero")
return ValidationResult.ok(length=len(pin))
def validate_message(message: str) -> ValidationResult:
"""
Validate text message content and size.
Args:
message: Message text
Returns:
ValidationResult
"""
if not message:
return ValidationResult.error("Message is required")
if len(message) > MAX_MESSAGE_SIZE:
return ValidationResult.error(
f"Message too long ({len(message):,} chars). Maximum: {MAX_MESSAGE_SIZE:,} characters"
)
return ValidationResult.ok(length=len(message))
def validate_payload(payload: str | bytes | FilePayload) -> ValidationResult:
"""
Validate a payload (text message, bytes, or file).
Args:
payload: Text string, raw bytes, or FilePayload
Returns:
ValidationResult
"""
if isinstance(payload, str):
return validate_message(payload)
elif isinstance(payload, FilePayload):
if not payload.data:
return ValidationResult.error("File is empty")
if len(payload.data) > MAX_FILE_PAYLOAD_SIZE:
return ValidationResult.error(
f"File too large ({len(payload.data):,} bytes). "
f"Maximum: {MAX_FILE_PAYLOAD_SIZE:,} bytes ({MAX_FILE_PAYLOAD_SIZE // 1024} KB)"
)
return ValidationResult.ok(
size=len(payload.data), filename=payload.filename, mime_type=payload.mime_type
)
elif isinstance(payload, bytes):
if not payload:
return ValidationResult.error("Payload is empty")
if len(payload) > MAX_FILE_PAYLOAD_SIZE:
return ValidationResult.error(
f"Payload too large ({len(payload):,} bytes). "
f"Maximum: {MAX_FILE_PAYLOAD_SIZE:,} bytes ({MAX_FILE_PAYLOAD_SIZE // 1024} KB)"
)
return ValidationResult.ok(size=len(payload))
else:
return ValidationResult.error(f"Invalid payload type: {type(payload)}")
def validate_file_payload(
file_data: bytes, filename: str = "", max_size: int = MAX_FILE_PAYLOAD_SIZE
) -> ValidationResult:
"""
Validate a file for embedding.
Args:
file_data: Raw file bytes
filename: Original filename (for display in errors)
max_size: Maximum allowed size in bytes
Returns:
ValidationResult
"""
if not file_data:
return ValidationResult.error("File is empty")
if len(file_data) > max_size:
size_kb = len(file_data) / 1024
max_kb = max_size / 1024
return ValidationResult.error(
f"File '{filename or 'unnamed'}' too large ({size_kb:.1f} KB). "
f"Maximum: {max_kb:.0f} KB"
)
return ValidationResult.ok(size=len(file_data), filename=filename)
def validate_image(
image_data: bytes, name: str = "Image", check_size: bool = True
) -> ValidationResult:
"""
Validate image data and dimensions.
Args:
image_data: Raw image bytes
name: Name for error messages
check_size: Whether to check pixel dimensions
Returns:
ValidationResult with width, height, pixels
"""
if not image_data:
return ValidationResult.error(f"{name} is required")
if len(image_data) > MAX_FILE_SIZE:
return ValidationResult.error(
f"{name} too large ({len(image_data):,} bytes). Maximum: {MAX_FILE_SIZE:,} bytes"
)
try:
img = Image.open(io.BytesIO(image_data))
width, height = img.size
num_pixels = width * height
if check_size and num_pixels > MAX_IMAGE_PIXELS:
max_dim = int(MAX_IMAGE_PIXELS**0.5)
return ValidationResult.error(
f"{name} too large ({width}×{height} = {num_pixels:,} pixels). "
f"Maximum: ~{MAX_IMAGE_PIXELS:,} pixels ({max_dim}×{max_dim})"
)
return ValidationResult.ok(
width=width, height=height, pixels=num_pixels, mode=img.mode, format=img.format
)
except Exception as e:
return ValidationResult.error(f"Could not read {name}: {e}")
def validate_rsa_key(
key_data: bytes, password: str | None = None, required: bool = False
) -> ValidationResult:
"""
Validate RSA private key.
Args:
key_data: PEM-encoded key bytes
password: Password if key is encrypted
required: Whether key is required
Returns:
ValidationResult with key_size
"""
if not key_data:
if required:
return ValidationResult.error("RSA key is required")
return ValidationResult.ok()
try:
private_key = load_rsa_key(key_data, password)
key_size = private_key.key_size
if key_size < MIN_RSA_BITS:
return ValidationResult.error(
f"RSA key must be at least {MIN_RSA_BITS} bits (got {key_size})"
)
return ValidationResult.ok(key_size=key_size)
except Exception as e:
return ValidationResult.error(str(e))
def validate_security_factors(pin: str, rsa_key_data: bytes | None) -> ValidationResult:
"""
Validate that at least one security factor is provided.
Args:
pin: PIN string (may be empty)
rsa_key_data: RSA key bytes (may be None/empty)
Returns:
ValidationResult
"""
has_pin = bool(pin and pin.strip())
has_key = bool(rsa_key_data and len(rsa_key_data) > 0)
if not has_pin and not has_key:
return ValidationResult.error("You must provide at least a PIN or RSA Key")
return ValidationResult.ok(has_pin=has_pin, has_key=has_key)
def validate_file_extension(
filename: str, allowed: set[str], file_type: str = "File"
) -> ValidationResult:
"""
Validate file extension.
Args:
filename: Filename to check
allowed: Set of allowed extensions (lowercase, no dot)
file_type: Name for error messages
Returns:
ValidationResult with extension
"""
if not filename or "." not in filename:
return ValidationResult.error(f"{file_type} must have a file extension")
ext = filename.rsplit(".", 1)[1].lower()
if ext not in allowed:
return ValidationResult.error(
f"Unsupported {file_type.lower()} type: .{ext}. "
f"Allowed: {', '.join(sorted('.' + e for e in allowed))}"
)
return ValidationResult.ok(extension=ext)
def validate_image_file(filename: str) -> ValidationResult:
"""Validate image file extension."""
return validate_file_extension(filename, ALLOWED_IMAGE_EXTENSIONS, "Image")
def validate_key_file(filename: str) -> ValidationResult:
"""Validate key file extension."""
return validate_file_extension(filename, ALLOWED_KEY_EXTENSIONS, "Key file")
def validate_key_password(password: str) -> ValidationResult:
"""
Validate password for key encryption.
Args:
password: Password string
Returns:
ValidationResult
"""
if not password:
return ValidationResult.error("Password is required")
if len(password) < MIN_KEY_PASSWORD_LENGTH:
return ValidationResult.error(
f"Password must be at least {MIN_KEY_PASSWORD_LENGTH} characters"
)
return ValidationResult.ok(length=len(password))
def validate_passphrase(passphrase: str) -> ValidationResult:
"""
Validate passphrase.
v3.2.0: Recommend 4+ words for good entropy (since date is no longer used).
Args:
passphrase: Passphrase string
Returns:
ValidationResult with word_count and optional warning
"""
if not passphrase or not passphrase.strip():
return ValidationResult.error("Passphrase is required")
words = passphrase.strip().split()
if len(words) < MIN_PASSPHRASE_WORDS:
return ValidationResult.error(
f"Passphrase should have at least {MIN_PASSPHRASE_WORDS} words"
)
# Provide warning if below recommended length
if len(words) < RECOMMENDED_PASSPHRASE_WORDS:
return ValidationResult.ok(
word_count=len(words),
warning=f"Recommend {RECOMMENDED_PASSPHRASE_WORDS}+ words for better security",
)
return ValidationResult.ok(word_count=len(words))
# =============================================================================
# NEW VALIDATORS FOR V3.2.0
# =============================================================================
def validate_reference_photo(photo_data: bytes) -> ValidationResult:
"""Validate reference photo. Alias for validate_image."""
return validate_image(photo_data, "Reference photo")
def validate_carrier(carrier_data: bytes) -> ValidationResult:
"""Validate carrier image. Alias for validate_image."""
return validate_image(carrier_data, "Carrier image")
def validate_embed_mode(mode: str) -> ValidationResult:
"""
Validate embedding mode.
Args:
mode: Embedding mode string
Returns:
ValidationResult
"""
valid_modes = {EMBED_MODE_LSB, EMBED_MODE_DCT, EMBED_MODE_AUTO}
if mode not in valid_modes:
return ValidationResult.error(
f"Invalid embed_mode: '{mode}'. Valid options: {', '.join(sorted(valid_modes))}"
)
return ValidationResult.ok(mode=mode)
def validate_dct_output_format(format_str: str) -> ValidationResult:
"""
Validate DCT output format.
Args:
format_str: Output format ('png' or 'jpeg')
Returns:
ValidationResult
"""
valid_formats = {"png", "jpeg"}
if format_str.lower() not in valid_formats:
return ValidationResult.error(
f"Invalid DCT output format: '{format_str}'. Valid options: {', '.join(sorted(valid_formats))}"
)
return ValidationResult.ok(format=format_str.lower())
def validate_dct_color_mode(mode: str) -> ValidationResult:
"""
Validate DCT color mode.
Args:
mode: Color mode ('grayscale' or 'color')
Returns:
ValidationResult
"""
valid_modes = {"grayscale", "color"}
if mode.lower() not in valid_modes:
return ValidationResult.error(
f"Invalid DCT color mode: '{mode}'. Valid options: {', '.join(sorted(valid_modes))}"
)
return ValidationResult.ok(mode=mode.lower())
# ============================================================================
# EXCEPTION-RAISING VALIDATORS (for CLI/API use)
# ============================================================================
def require_valid_pin(pin: str, required: bool = False) -> None:
"""Validate PIN, raising exception on failure."""
result = validate_pin(pin, required)
if not result.is_valid:
raise PinValidationError(result.error_message)
def require_valid_message(message: str) -> None:
"""Validate message, raising exception on failure."""
result = validate_message(message)
if not result.is_valid:
raise MessageValidationError(result.error_message)
def require_valid_payload(payload: str | bytes | FilePayload) -> None:
"""Validate payload (text, bytes, or file), raising exception on failure."""
result = validate_payload(payload)
if not result.is_valid:
raise MessageValidationError(result.error_message)
def require_valid_image(image_data: bytes, name: str = "Image") -> None:
"""Validate image, raising exception on failure."""
result = validate_image(image_data, name)
if not result.is_valid:
raise ImageValidationError(result.error_message)
def require_valid_rsa_key(
key_data: bytes, password: str | None = None, required: bool = False
) -> None:
"""Validate RSA key, raising exception on failure."""
result = validate_rsa_key(key_data, password, required)
if not result.is_valid:
raise KeyValidationError(result.error_message)
def require_security_factors(pin: str, rsa_key_data: bytes | None) -> None:
"""Validate security factors, raising exception on failure."""
result = validate_security_factors(pin, rsa_key_data)
if not result.is_valid:
raise SecurityFactorError(result.error_message)
# =============================================================================
# AUDIO VALIDATORS (v4.3.0)
# =============================================================================
def validate_audio_file(filename: str) -> ValidationResult:
"""Validate audio file extension."""
return validate_file_extension(filename, ALLOWED_AUDIO_EXTENSIONS, "Audio file")
def validate_audio_embed_mode(mode: str) -> ValidationResult:
"""Validate audio embedding mode."""
valid_modes = VALID_AUDIO_EMBED_MODES | {EMBED_MODE_AUDIO_AUTO}
if mode not in valid_modes:
return ValidationResult.error(
f"Invalid audio embed_mode: '{mode}'. "
f"Valid options: {', '.join(sorted(valid_modes))}"
)
return ValidationResult.ok(mode=mode)
def require_valid_audio(audio_data: bytes, name: str = "Audio") -> None:
"""Validate audio, raising AudioValidationError on failure."""
from .audio_utils import validate_audio
result = validate_audio(audio_data, name)
if not result.is_valid:
raise AudioValidationError(result.error_message)

View File

@ -0,0 +1,496 @@
"""
Stegasoo Video Steganography LSB Embedding/Extraction (v4.4.0)
Frame-based LSB embedding for video files.
Hides data in the least significant bits of video frame pixels. Uses the
existing image steganography engine for per-frame embedding, providing
high capacity across multiple I-frames.
Strategy:
1. Extract I-frames (keyframes) from video using ffmpeg
2. Embed payload across I-frames using existing LSB engine
3. Re-encode video with modified frames using FFV1 lossless codec
4. Output: MKV container with embedded data
Uses ChaCha20 as a CSPRNG for pseudo-random frame selection and pixel
selection within frames, ensuring that without the key an attacker cannot
determine which frames/pixels were modified.
"""
import struct
import tempfile
from pathlib import Path
from .constants import (
EMBED_MODE_VIDEO_LSB,
VIDEO_MAGIC_LSB,
VIDEO_OUTPUT_CODEC,
)
from .debug import debug
from .exceptions import VideoCapacityError, VideoError
from .models import VideoEmbedStats
from .steganography import ENCRYPTION_OVERHEAD, _embed_lsb, _extract_lsb
from .video_utils import extract_frames, get_video_info, reassemble_video
# Progress reporting interval — write every N frames
PROGRESS_INTERVAL = 5
# =============================================================================
# PROGRESS REPORTING
# =============================================================================
def _write_progress(progress_file: str | None, current: int, total: int, phase: str = "embedding"):
"""Write progress to file for frontend polling."""
if progress_file is None:
return
try:
import json
with open(progress_file, "w") as f:
json.dump(
{
"current": current,
"total": total,
"percent": round((current / total) * 100, 1) if total > 0 else 0,
"phase": phase,
},
f,
)
except Exception:
pass # Don't let progress writing break encoding
# =============================================================================
# CAPACITY
# =============================================================================
def calculate_video_lsb_capacity(video_data: bytes) -> int:
"""
Calculate the maximum bytes that can be embedded in a video via LSB.
Calculates capacity based on I-frames (keyframes) only. Each I-frame
provides capacity proportional to its pixel count.
Args:
video_data: Raw bytes of a video file.
Returns:
Maximum embeddable payload size in bytes (after subtracting overhead).
Raises:
VideoError: If the video cannot be read or is in an unsupported format.
"""
from .video_utils import calculate_video_capacity
capacity_info = calculate_video_capacity(video_data, EMBED_MODE_VIDEO_LSB)
debug.print(
f"Video LSB capacity: {capacity_info.usable_capacity_bytes} bytes "
f"({capacity_info.i_frames} I-frames, {capacity_info.resolution[0]}x{capacity_info.resolution[1]})"
)
return capacity_info.usable_capacity_bytes
# =============================================================================
# FRAME INDEX GENERATION (ChaCha20 CSPRNG)
# =============================================================================
def generate_frame_indices(key: bytes, num_frames: int, num_needed: int) -> list[int]:
"""
Generate pseudo-random frame indices using ChaCha20 as a CSPRNG.
Produces a deterministic sequence of unique frame indices so that
the same key always yields the same embedding locations.
Args:
key: 32-byte key for the ChaCha20 cipher.
num_frames: Total number of frames available.
num_needed: How many unique frame indices are required.
Returns:
List of ``num_needed`` unique indices in [0, num_frames).
"""
from cryptography.hazmat.backends import default_backend
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms
debug.validate(len(key) == 32, f"Frame key must be 32 bytes, got {len(key)}")
debug.validate(num_frames > 0, f"Number of frames must be positive, got {num_frames}")
debug.validate(num_needed > 0, f"Number needed must be positive, got {num_needed}")
debug.validate(
num_needed <= num_frames,
f"Cannot select {num_needed} frames from {num_frames} available",
)
debug.print(f"Generating {num_needed} frame indices from {num_frames} total frames")
# Use a different nonce offset for frame selection (vs pixel selection)
nonce = b"\x01" + b"\x00" * 15 # Different from pixel selection nonce
if num_needed >= num_frames // 2:
# Full Fisher-Yates shuffle
cipher = Cipher(algorithms.ChaCha20(key, nonce), mode=None, backend=default_backend())
encryptor = cipher.encryptor()
indices = list(range(num_frames))
random_bytes = encryptor.update(b"\x00" * (num_frames * 4))
for i in range(num_frames - 1, 0, -1):
j_bytes = random_bytes[(num_frames - 1 - i) * 4 : (num_frames - i) * 4]
j = int.from_bytes(j_bytes, "big") % (i + 1)
indices[i], indices[j] = indices[j], indices[i]
return indices[:num_needed]
# Direct sampling
selected: list[int] = []
used: set[int] = set()
cipher = Cipher(algorithms.ChaCha20(key, nonce), mode=None, backend=default_backend())
encryptor = cipher.encryptor()
bytes_needed = (num_needed * 2) * 4
random_bytes = encryptor.update(b"\x00" * bytes_needed)
byte_offset = 0
while len(selected) < num_needed and byte_offset < len(random_bytes) - 4:
idx = int.from_bytes(random_bytes[byte_offset : byte_offset + 4], "big") % num_frames
byte_offset += 4
if idx not in used:
used.add(idx)
selected.append(idx)
debug.validate(
len(selected) == num_needed,
f"Failed to generate enough indices: {len(selected)}/{num_needed}",
)
return selected
# =============================================================================
# EMBEDDING
# =============================================================================
@debug.time
def embed_in_video_lsb(
data: bytes,
carrier_video: bytes,
pixel_key: bytes,
progress_file: str | None = None,
) -> tuple[bytes, VideoEmbedStats]:
"""
Embed data into video frames using LSB steganography.
The payload is prepended with a 4-byte magic header and a 4-byte
big-endian length prefix. Data is distributed across I-frames using
pseudo-random selection based on the pixel_key.
The output video uses FFV1 lossless codec in MKV container to
preserve the embedded data perfectly.
Args:
data: Encrypted payload bytes to embed.
carrier_video: Raw bytes of the carrier video file.
pixel_key: 32-byte key for frame and pixel selection.
progress_file: Optional path for progress JSON (frontend polling).
Returns:
Tuple of (stego video bytes, VideoEmbedStats).
Raises:
VideoCapacityError: If the payload is too large for the carrier.
VideoError: On any other embedding failure.
"""
debug.print(f"Video LSB embedding {len(data)} bytes")
debug.data(pixel_key, "Pixel key for embedding")
debug.validate(len(pixel_key) == 32, f"Pixel key must be 32 bytes, got {len(pixel_key)}")
try:
# Get video info
video_info = get_video_info(carrier_video)
debug.print(
f"Carrier video: {video_info.width}x{video_info.height}, "
f"{video_info.fps:.2f} fps, {video_info.duration_seconds:.1f}s, "
f"{video_info.i_frame_count} I-frames"
)
# Prepend magic + length prefix
header = VIDEO_MAGIC_LSB + struct.pack(">I", len(data))
payload = header + data
debug.print(f"Payload with header: {len(payload)} bytes")
# Calculate capacity and check fit
capacity = calculate_video_lsb_capacity(carrier_video)
if len(payload) > capacity + ENCRYPTION_OVERHEAD:
raise VideoCapacityError(len(payload), capacity)
# Extract I-frames to temp directory
with tempfile.TemporaryDirectory(prefix="stegasoo_video_") as temp_dir_str:
temp_dir = Path(temp_dir_str)
_write_progress(progress_file, 5, 100, "extracting_frames")
frames, _ = extract_frames(carrier_video, temp_dir, keyframes_only=True)
num_frames = len(frames)
debug.print(f"Extracted {num_frames} I-frames for embedding")
if num_frames == 0:
raise VideoError("No I-frames found in video")
# Calculate bytes per frame (minus 4 byte length prefix used by _embed_lsb)
pixels_per_frame = video_info.width * video_info.height
bytes_per_frame = (pixels_per_frame * 3) // 8 - 4 # 3 bits per pixel, minus len prefix
# For simplicity, embed entire payload in first frame if it fits
# This makes extraction straightforward
if len(payload) <= bytes_per_frame:
debug.print(f"Payload fits in single frame ({len(payload)} <= {bytes_per_frame})")
frame_path = frames[0]
with open(frame_path, "rb") as f:
frame_data = f.read()
try:
stego_frame, stats, ext = _embed_lsb(
payload,
frame_data,
pixel_key,
bits_per_channel=1,
output_format="PNG",
)
with open(frame_path, "wb") as f:
f.write(stego_frame)
modified_frames = 1
except Exception as e:
debug.print(f"Failed to embed in frame: {e}")
raise VideoError(f"Failed to embed in frame: {e}")
else:
# For larger payloads, we need to split across frames
# Each frame stores: 4-byte chunk length + chunk data
debug.print("Splitting payload across multiple frames")
frames_needed = (len(payload) + bytes_per_frame - 1) // bytes_per_frame
frames_needed = min(frames_needed, num_frames)
debug.print(f"Using {frames_needed} frames to embed {len(payload)} bytes")
# For now, use sequential frames for simplicity
modified_frames = 0
bytes_remaining = len(payload)
payload_offset = 0
for frame_idx in range(frames_needed):
if bytes_remaining <= 0:
break
frame_path = frames[frame_idx]
with open(frame_path, "rb") as f:
frame_data = f.read()
chunk_size = min(bytes_remaining, bytes_per_frame)
chunk = payload[payload_offset : payload_offset + chunk_size]
try:
stego_frame, stats, ext = _embed_lsb(
chunk,
frame_data,
pixel_key,
bits_per_channel=1,
output_format="PNG",
)
with open(frame_path, "wb") as f:
f.write(stego_frame)
modified_frames += 1
payload_offset += chunk_size
bytes_remaining -= chunk_size
except Exception as e:
debug.print(f"Failed to embed in frame {frame_idx}: {e}")
raise VideoError(f"Failed to embed in frame {frame_idx}: {e}")
if progress_file and frame_idx % PROGRESS_INTERVAL == 0:
pct = 10 + int((frame_idx / frames_needed) * 70)
_write_progress(progress_file, pct, 100, "embedding")
_write_progress(progress_file, 80, 100, "reassembling")
# Reassemble video with modified frames
stego_video = reassemble_video(
frames,
carrier_video,
fps=1.0, # I-frame only videos use 1 fps
)
_write_progress(progress_file, 100, 100, "complete")
video_stats = VideoEmbedStats(
frames_modified=modified_frames,
total_frames=video_info.total_frames,
capacity_used=len(payload) / (capacity + ENCRYPTION_OVERHEAD),
bytes_embedded=len(payload),
width=video_info.width,
height=video_info.height,
fps=video_info.fps,
duration_seconds=video_info.duration_seconds,
embed_mode=EMBED_MODE_VIDEO_LSB,
codec=VIDEO_OUTPUT_CODEC,
)
debug.print(
f"Video LSB embedding complete: {len(stego_video)} bytes, "
f"{modified_frames} frames modified"
)
return stego_video, video_stats
except VideoCapacityError:
raise
except VideoError:
raise
except Exception as e:
debug.exception(e, "embed_in_video_lsb")
raise VideoError(f"Failed to embed data in video: {e}") from e
# =============================================================================
# EXTRACTION
# =============================================================================
@debug.time
def extract_from_video_lsb(
video_data: bytes,
pixel_key: bytes,
progress_file: str | None = None,
) -> bytes | None:
"""
Extract hidden data from video using LSB steganography.
Extracts I-frames, reads LSBs from the same pseudo-random locations
used during embedding, and reconstructs the payload.
Args:
video_data: Raw bytes of the stego video file.
pixel_key: 32-byte key (must match the one used for embedding).
progress_file: Optional path for progress JSON.
Returns:
Extracted payload bytes (without magic/length prefix), or ``None``
if extraction fails (wrong key, no data, corrupted).
"""
debug.print(f"Video LSB extracting from {len(video_data)} byte video")
debug.data(pixel_key, "Pixel key for extraction")
try:
# Get video info
video_info = get_video_info(video_data)
debug.print(
f"Video: {video_info.width}x{video_info.height}, "
f"{video_info.i_frame_count} I-frames"
)
# Extract I-frames
with tempfile.TemporaryDirectory(prefix="stegasoo_video_extract_") as temp_dir_str:
temp_dir = Path(temp_dir_str)
_write_progress(progress_file, 5, 100, "extracting_frames")
frames, _ = extract_frames(video_data, temp_dir, keyframes_only=True)
num_frames = len(frames)
if num_frames == 0:
debug.print("No I-frames found in video")
return None
debug.print(f"Extracted {num_frames} I-frames for extraction")
_write_progress(progress_file, 20, 100, "extracting_data")
# First, try to extract from frame 0 to get magic and total length
frame_path = frames[0]
with open(frame_path, "rb") as f:
frame_data = f.read()
first_chunk = _extract_lsb(frame_data, pixel_key, bits_per_channel=1)
if first_chunk is None or len(first_chunk) < 8:
debug.print("Failed to extract initial data from first frame")
return None
# Check magic bytes
magic = first_chunk[:4]
if magic != VIDEO_MAGIC_LSB:
debug.print(f"Magic mismatch: got {magic!r}, expected {VIDEO_MAGIC_LSB!r}")
return None
# Get total payload length
total_length = struct.unpack(">I", first_chunk[4:8])[0]
debug.print(f"Total payload length: {total_length} bytes")
# Sanity check
pixels_per_frame = video_info.width * video_info.height
bytes_per_frame = (pixels_per_frame * 3) // 8 - 4 # minus length prefix
max_possible = bytes_per_frame * num_frames
if total_length > max_possible or total_length < 1:
debug.print(f"Invalid payload length: {total_length}")
return None
# If the entire payload fits in the first frame, return it directly
# This matches the simplified single-frame embedding approach
if len(first_chunk) >= 8 + total_length:
debug.print("Payload fits in single frame, extracting directly")
payload = first_chunk[8 : 8 + total_length]
else:
# Multi-frame extraction
debug.print("Multi-frame extraction needed")
frames_needed = (total_length + 8 + bytes_per_frame - 1) // bytes_per_frame
frames_needed = min(frames_needed, num_frames)
# Extract sequentially (matching the embedding approach)
extracted_chunks = [first_chunk]
for frame_idx in range(1, frames_needed):
frame_path = frames[frame_idx]
with open(frame_path, "rb") as f:
frame_data = f.read()
chunk = _extract_lsb(frame_data, pixel_key, bits_per_channel=1)
if chunk:
extracted_chunks.append(chunk)
if progress_file and frame_idx % PROGRESS_INTERVAL == 0:
pct = 20 + int((frame_idx / frames_needed) * 70)
_write_progress(progress_file, pct, 100, "extracting_data")
# Combine chunks
combined = b"".join(extracted_chunks)
if len(combined) < 8 + total_length:
debug.print(
f"Insufficient data: have {len(combined) - 8}, need {total_length}"
)
return None
payload = combined[8 : 8 + total_length]
_write_progress(progress_file, 100, 100, "complete")
debug.print(f"Video LSB successfully extracted {len(payload)} bytes")
return payload
except Exception as e:
debug.exception(e, "extract_from_video_lsb")
return None

View File

@ -0,0 +1,732 @@
"""
Stegasoo Video Utilities (v4.4.0)
Video format detection, frame extraction, and transcoding for video steganography.
Dependencies:
- ffmpeg binary: Required for all video operations
- numpy: For frame data manipulation
- PIL/Pillow: For frame image handling
Uses ffmpeg for:
- Format detection and metadata extraction
- I-frame extraction
- Video reassembly with FFV1 lossless codec
"""
from __future__ import annotations
import json
import os
import shutil
import subprocess
import tempfile
from pathlib import Path
from .constants import (
EMBED_MODE_VIDEO_AUTO,
EMBED_MODE_VIDEO_LSB,
MAX_VIDEO_DURATION,
MAX_VIDEO_FILE_SIZE,
MAX_VIDEO_RESOLUTION,
MIN_VIDEO_RESOLUTION,
VALID_VIDEO_EMBED_MODES,
VIDEO_OUTPUT_CODEC,
VIDEO_OUTPUT_CONTAINER,
)
from .debug import get_logger
from .exceptions import (
UnsupportedVideoFormatError,
VideoTranscodeError,
VideoValidationError,
)
from .models import ValidationResult, VideoCapacityInfo, VideoInfo
logger = get_logger(__name__)
# =============================================================================
# FFMPEG AVAILABILITY
# =============================================================================
def has_ffmpeg_support() -> bool:
"""Check if ffmpeg is available on the system.
Returns:
True if ffmpeg is found on PATH, False otherwise.
"""
return shutil.which("ffmpeg") is not None
def has_ffprobe_support() -> bool:
"""Check if ffprobe is available on the system.
Returns:
True if ffprobe is found on PATH, False otherwise.
"""
return shutil.which("ffprobe") is not None
def _require_ffmpeg() -> None:
"""Raise error if ffmpeg is not available."""
if not has_ffmpeg_support():
raise VideoTranscodeError(
"ffmpeg is required for video operations. Install ffmpeg on your system."
)
def _require_ffprobe() -> None:
"""Raise error if ffprobe is not available."""
if not has_ffprobe_support():
raise VideoTranscodeError(
"ffprobe is required for video metadata. Install ffmpeg on your system."
)
# =============================================================================
# FORMAT DETECTION
# =============================================================================
def detect_video_format(video_data: bytes) -> str:
"""Detect video format from magic bytes.
Examines the first bytes of video data to identify the container format.
Magic byte signatures:
- MP4/M4V: b"ftyp" at offset 4
- MKV/WebM: b"\\x1a\\x45\\xdf\\xa3" (EBML header)
- AVI: b"RIFF" at offset 0 + b"AVI " at offset 8
- MOV: b"ftyp" with "qt" brand or b"moov"/"mdat" early
Args:
video_data: Raw video file bytes.
Returns:
Format string: "mp4", "mkv", "webm", "avi", "mov", or "unknown".
"""
if len(video_data) < 12:
logger.debug("detect_video_format: data too short (%d bytes)", len(video_data))
return "unknown"
# MP4/M4V/MOV: "ftyp" atom at offset 4
if video_data[4:8] == b"ftyp":
# Check brand for specific type
brand = video_data[8:12]
if brand in (b"qt ", b"mqt "):
return "mov"
if brand in (b"isom", b"iso2", b"mp41", b"mp42", b"avc1", b"M4V "):
return "mp4"
# Default to mp4 for ftyp containers
return "mp4"
# MKV/WebM: EBML header
if video_data[:4] == b"\x1a\x45\xdf\xa3":
# Check doctype to distinguish MKV from WebM
# WebM uses "webm" doctype, MKV uses "matroska"
# Simple heuristic: search for doctype string in first 64 bytes
header = video_data[:64]
if b"webm" in header.lower():
return "webm"
return "mkv"
# AVI: RIFF....AVI
if video_data[:4] == b"RIFF" and video_data[8:12] == b"AVI ":
return "avi"
# MOV without ftyp (older format): check for moov/mdat atoms
if video_data[4:8] in (b"moov", b"mdat", b"wide", b"free"):
return "mov"
return "unknown"
# =============================================================================
# METADATA EXTRACTION
# =============================================================================
def get_video_info(video_data: bytes) -> VideoInfo:
"""Extract video metadata from raw video bytes.
Uses ffprobe to extract detailed video information including
resolution, frame rate, duration, codec, and I-frame count.
Args:
video_data: Raw video file bytes.
Returns:
VideoInfo dataclass with video metadata.
Raises:
UnsupportedVideoFormatError: If the format cannot be detected.
VideoTranscodeError: If metadata extraction fails.
"""
_require_ffprobe()
fmt = detect_video_format(video_data)
if fmt == "unknown":
raise UnsupportedVideoFormatError(
"Cannot detect video format. Supported: MP4, MKV, WebM, AVI, MOV."
)
# Write to temp file for ffprobe
with tempfile.NamedTemporaryFile(suffix=f".{fmt}", delete=False) as f:
f.write(video_data)
temp_path = f.name
try:
# Get stream info
result = subprocess.run(
[
"ffprobe",
"-v",
"quiet",
"-print_format",
"json",
"-show_format",
"-show_streams",
"-select_streams",
"v:0",
temp_path,
],
capture_output=True,
text=True,
timeout=60,
)
if result.returncode != 0:
raise VideoTranscodeError(f"ffprobe failed: {result.stderr}")
info = json.loads(result.stdout)
# Extract video stream info
if not info.get("streams"):
raise VideoTranscodeError("No video stream found in file")
stream = info["streams"][0]
format_info = info.get("format", {})
width = int(stream.get("width", 0))
height = int(stream.get("height", 0))
codec = stream.get("codec_name", "unknown")
# Parse frame rate (can be "30/1" or "29.97")
fps_str = stream.get("r_frame_rate", "0/1")
if "/" in fps_str:
num, den = fps_str.split("/")
fps = float(num) / float(den) if float(den) > 0 else 0.0
else:
fps = float(fps_str)
# Get duration
duration = float(stream.get("duration", format_info.get("duration", 0)))
# Get total frames
nb_frames = stream.get("nb_frames")
if nb_frames:
total_frames = int(nb_frames)
else:
# Estimate from duration and fps
total_frames = int(duration * fps) if fps > 0 else 0
# Get bitrate
bitrate = None
if format_info.get("bit_rate"):
bitrate = int(format_info["bit_rate"])
# Count I-frames using ffprobe
i_frame_count = _count_i_frames(temp_path, timeout=120)
return VideoInfo(
width=width,
height=height,
fps=fps,
duration_seconds=duration,
total_frames=total_frames,
i_frame_count=i_frame_count,
format=fmt,
codec=codec,
bitrate=bitrate,
)
except json.JSONDecodeError as e:
raise VideoTranscodeError(f"Failed to parse ffprobe output: {e}")
except subprocess.TimeoutExpired:
raise VideoTranscodeError("ffprobe timed out")
finally:
os.unlink(temp_path)
def _count_i_frames(video_path: str, timeout: int = 120) -> int:
"""Count I-frames (keyframes) in a video file.
Args:
video_path: Path to video file.
timeout: Maximum time in seconds.
Returns:
Number of I-frames in the video.
"""
try:
result = subprocess.run(
[
"ffprobe",
"-v",
"quiet",
"-select_streams",
"v:0",
"-show_entries",
"frame=pict_type",
"-of",
"csv=p=0",
video_path,
],
capture_output=True,
text=True,
timeout=timeout,
)
if result.returncode != 0:
logger.warning("Failed to count I-frames: %s", result.stderr)
return 0
# Count lines containing 'I'
return sum(1 for line in result.stdout.strip().split("\n") if line.strip() == "I")
except subprocess.TimeoutExpired:
logger.warning("I-frame counting timed out")
return 0
except Exception as e:
logger.warning("I-frame counting failed: %s", e)
return 0
# =============================================================================
# FRAME EXTRACTION
# =============================================================================
def extract_frames(
video_data: bytes,
output_dir: Path | None = None,
keyframes_only: bool = True,
) -> tuple[list[Path], VideoInfo]:
"""Extract frames from video as PNG images.
Uses ffmpeg to extract frames from the video. By default extracts only
I-frames (keyframes) which are more robust to re-encoding.
Args:
video_data: Raw video file bytes.
output_dir: Directory to save frames (temp dir if None).
keyframes_only: If True, only extract I-frames (keyframes).
Returns:
Tuple of (list of frame paths sorted by frame number, VideoInfo).
Raises:
VideoTranscodeError: If frame extraction fails.
"""
_require_ffmpeg()
fmt = detect_video_format(video_data)
if fmt == "unknown":
raise UnsupportedVideoFormatError(
"Cannot detect video format. Supported: MP4, MKV, WebM, AVI, MOV."
)
# Get video info first
video_info = get_video_info(video_data)
# Create output directory
if output_dir is None:
output_dir = Path(tempfile.mkdtemp(prefix="stegasoo_frames_"))
else:
output_dir = Path(output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
# Write video to temp file
with tempfile.NamedTemporaryFile(suffix=f".{fmt}", delete=False) as f:
f.write(video_data)
video_path = f.name
try:
# Build ffmpeg command
cmd = [
"ffmpeg",
"-i",
video_path,
"-vsync",
"0",
]
if keyframes_only:
# Extract only I-frames
cmd.extend(["-vf", "select='eq(pict_type,I)'"])
# Output as PNG with frame number
output_pattern = str(output_dir / "frame_%06d.png")
cmd.extend(["-start_number", "0", output_pattern])
result = subprocess.run(
cmd,
capture_output=True,
text=True,
timeout=600, # 10 minute timeout
)
if result.returncode != 0:
raise VideoTranscodeError(f"Frame extraction failed: {result.stderr}")
# Collect extracted frames
frames = sorted(output_dir.glob("frame_*.png"))
if not frames:
raise VideoTranscodeError("No frames were extracted from video")
logger.info(
"Extracted %d %s from video",
len(frames),
"I-frames" if keyframes_only else "frames",
)
return frames, video_info
except subprocess.TimeoutExpired:
raise VideoTranscodeError("Frame extraction timed out")
finally:
os.unlink(video_path)
# =============================================================================
# VIDEO REASSEMBLY
# =============================================================================
def reassemble_video(
frames: list[Path],
original_video_data: bytes,
output_path: Path | None = None,
fps: float | None = None,
audio_data: bytes | None = None,
) -> bytes:
"""Reassemble frames back into a video file.
Creates a new video from the modified frames using FFV1 lossless codec
in an MKV container. This preserves the embedded data perfectly.
Args:
frames: List of frame image paths in order.
original_video_data: Original video bytes (for audio track extraction).
output_path: Optional output path (temp file if None).
fps: Frame rate (auto-detected from original if None).
audio_data: Optional audio track data to mux in.
Returns:
Video file bytes (MKV container with FFV1 codec).
Raises:
VideoTranscodeError: If reassembly fails.
"""
_require_ffmpeg()
if not frames:
raise VideoTranscodeError("No frames provided for reassembly")
# Get original video format
fmt = detect_video_format(original_video_data)
if fps is None:
# Use a fixed low framerate for I-frame sequences
# since I-frames are sparse (typically 1 per 30-60 frames)
fps = 1.0 # 1 fps for I-frame only videos
# Create temp directory for work
with tempfile.TemporaryDirectory(prefix="stegasoo_reassemble_") as temp_dir_str:
temp_dir = Path(temp_dir_str)
# Write original video for audio extraction
original_path = temp_dir / f"original.{fmt}"
original_path.write_bytes(original_video_data)
# Create frame list file for ffmpeg
frame_list = temp_dir / "frames.txt"
with open(frame_list, "w") as f:
for frame in frames:
# FFmpeg concat format
f.write(f"file '{frame.absolute()}'\n")
f.write(f"duration {1.0 / fps}\n")
# Output path
if output_path is None:
output_file = temp_dir / f"output.{VIDEO_OUTPUT_CONTAINER}"
else:
output_file = Path(output_path)
# Build ffmpeg command
cmd = [
"ffmpeg",
"-y", # Overwrite output
"-f",
"concat",
"-safe",
"0",
"-i",
str(frame_list),
]
# Add audio from original video if available
# Check if original has audio
has_audio = _video_has_audio(original_path)
if has_audio:
cmd.extend(["-i", str(original_path)])
# Video encoding settings (FFV1 lossless)
cmd.extend(
[
"-c:v",
VIDEO_OUTPUT_CODEC,
"-level",
"3", # FFV1 level 3 for better compression
"-coder",
"1", # Range coder
"-context",
"1", # Large context
"-slicecrc",
"1", # Error detection
]
)
# Audio settings
if has_audio:
cmd.extend(
[
"-map",
"0:v", # Video from frames
"-map",
"1:a?", # Audio from original (if exists)
"-c:a",
"copy", # Copy audio without re-encoding
]
)
cmd.append(str(output_file))
logger.debug("Running ffmpeg: %s", " ".join(cmd))
result = subprocess.run(
cmd,
capture_output=True,
text=True,
timeout=600,
)
if result.returncode != 0:
raise VideoTranscodeError(f"Video reassembly failed: {result.stderr}")
# Read output
return output_file.read_bytes()
def _video_has_audio(video_path: Path) -> bool:
"""Check if a video file has an audio stream.
Args:
video_path: Path to video file.
Returns:
True if video has audio, False otherwise.
"""
try:
result = subprocess.run(
[
"ffprobe",
"-v",
"quiet",
"-select_streams",
"a:0",
"-show_entries",
"stream=index",
"-of",
"csv=p=0",
str(video_path),
],
capture_output=True,
text=True,
timeout=30,
)
return bool(result.stdout.strip())
except Exception:
return False
# =============================================================================
# VALIDATION
# =============================================================================
def validate_video(
video_data: bytes,
name: str = "Video",
check_duration: bool = True,
) -> ValidationResult:
"""Validate video data for steganography.
Checks:
- Not empty
- Not too large (MAX_VIDEO_FILE_SIZE)
- Valid video format (detectable via magic bytes)
- Duration within limits (MAX_VIDEO_DURATION) if check_duration=True
- Resolution within limits (MIN/MAX_VIDEO_RESOLUTION)
Args:
video_data: Raw video file bytes.
name: Descriptive name for error messages (default: "Video").
check_duration: Whether to enforce duration limit (default: True).
Returns:
ValidationResult with video info in details on success.
"""
if not video_data:
return ValidationResult.error(f"{name} is required")
if len(video_data) > MAX_VIDEO_FILE_SIZE:
size_gb = len(video_data) / (1024**3)
max_gb = MAX_VIDEO_FILE_SIZE / (1024**3)
return ValidationResult.error(
f"{name} too large ({size_gb:.1f} GB). Maximum: {max_gb:.0f} GB"
)
# Detect format
fmt = detect_video_format(video_data)
if fmt == "unknown":
return ValidationResult.error(
f"Could not detect {name} format. " "Supported formats: MP4, MKV, WebM, AVI, MOV."
)
# Check ffmpeg availability
if not has_ffmpeg_support():
return ValidationResult.error(
"ffmpeg is required for video processing. Please install ffmpeg."
)
# Extract metadata for further validation
try:
info = get_video_info(video_data)
except (VideoTranscodeError, UnsupportedVideoFormatError) as e:
return ValidationResult.error(f"Could not read {name}: {e}")
except Exception as e:
return ValidationResult.error(f"Could not read {name}: {e}")
# Check duration
if check_duration and info.duration_seconds > MAX_VIDEO_DURATION:
return ValidationResult.error(
f"{name} too long ({info.duration_seconds:.1f}s). "
f"Maximum: {MAX_VIDEO_DURATION}s ({MAX_VIDEO_DURATION // 60} minutes)"
)
# Check resolution
if info.width < MIN_VIDEO_RESOLUTION[0] or info.height < MIN_VIDEO_RESOLUTION[1]:
return ValidationResult.error(
f"{name} resolution too small ({info.width}x{info.height}). "
f"Minimum: {MIN_VIDEO_RESOLUTION[0]}x{MIN_VIDEO_RESOLUTION[1]}"
)
if info.width > MAX_VIDEO_RESOLUTION[0] or info.height > MAX_VIDEO_RESOLUTION[1]:
return ValidationResult.error(
f"{name} resolution too large ({info.width}x{info.height}). "
f"Maximum: {MAX_VIDEO_RESOLUTION[0]}x{MAX_VIDEO_RESOLUTION[1]}"
)
# Check I-frame count
if info.i_frame_count < 1:
return ValidationResult.error(f"{name} has no I-frames (keyframes) for embedding")
return ValidationResult.ok(
width=info.width,
height=info.height,
fps=info.fps,
duration=info.duration_seconds,
total_frames=info.total_frames,
i_frame_count=info.i_frame_count,
format=info.format,
codec=info.codec,
bitrate=info.bitrate,
)
def require_valid_video(video_data: bytes, name: str = "Video") -> None:
"""Validate video, raising VideoValidationError on failure.
Args:
video_data: Raw video file bytes.
name: Descriptive name for error messages.
Raises:
VideoValidationError: If validation fails.
"""
result = validate_video(video_data, name)
if not result.is_valid:
raise VideoValidationError(result.error_message)
def validate_video_embed_mode(mode: str) -> ValidationResult:
"""Validate video embedding mode string.
Args:
mode: Embedding mode to validate.
Returns:
ValidationResult with mode in details on success.
"""
valid_modes = VALID_VIDEO_EMBED_MODES | {EMBED_MODE_VIDEO_AUTO}
if mode not in valid_modes:
return ValidationResult.error(
f"Invalid video embed_mode: '{mode}'. "
f"Valid options: {', '.join(sorted(valid_modes))}"
)
return ValidationResult.ok(mode=mode)
# =============================================================================
# CAPACITY CALCULATION
# =============================================================================
def calculate_video_capacity(video_data: bytes, embed_mode: str = EMBED_MODE_VIDEO_LSB) -> VideoCapacityInfo:
"""Calculate steganographic capacity for a video file.
Capacity is based on I-frames only (keyframes). Each I-frame provides
capacity similar to an image of the same dimensions.
Args:
video_data: Raw video file bytes.
embed_mode: Embedding mode (currently only video_lsb).
Returns:
VideoCapacityInfo with capacity details.
"""
info = get_video_info(video_data)
# Calculate capacity per I-frame
# RGB image: 3 bits per pixel (1 bit per channel) / 8 = 0.375 bytes per pixel
# Subtract overhead per frame for header
pixels_per_frame = info.width * info.height
bytes_per_frame = (pixels_per_frame * 3) // 8 # 3 bits per pixel
# Total capacity across all I-frames
# Subtract 70 bytes overhead for the encrypted payload header
from .steganography import ENCRYPTION_OVERHEAD
total_capacity = (bytes_per_frame * info.i_frame_count) - ENCRYPTION_OVERHEAD
return VideoCapacityInfo(
total_frames=info.total_frames,
i_frames=info.i_frame_count,
usable_capacity_bytes=max(0, total_capacity),
embed_mode=embed_mode,
resolution=(info.width, info.height),
duration_seconds=info.duration_seconds,
)

View File

@ -0,0 +1,28 @@
"""
Verisoo - Decentralized image provenance and attestation.
Part of the Soo Suite:
- Stegasoo: covert communication, hiding encrypted messages in images
- Verisoo: overt attestation, proving provenance and building decentralized reputation
"""
__version__ = "0.1.0"
try:
from .models import Attestation, AttestationRecord, Identity
from .exceptions import VerisooError, AttestationError, VerificationError
_AVAILABLE = True
except ImportError:
_AVAILABLE = False
__all__ = [
"__version__",
"_AVAILABLE",
"Attestation",
"AttestationRecord",
"Identity",
"VerisooError",
"AttestationError",
"VerificationError",
]

582
src/soosef/verisoo/api.py Normal file
View File

@ -0,0 +1,582 @@
"""
FastAPI verification service for Verisoo.
Lightweight REST API for:
- Verifying images against attestation records
- Looking up attestations by ID
- Getting proof links
Designed for media orgs and fact-checkers to integrate easily.
Run with: uvicorn verisoo.api:app --host 0.0.0.0 --port 8000
"""
from __future__ import annotations
import os
from datetime import datetime
from pathlib import Path
from typing import Annotated
try:
from fastapi import FastAPI, File, Form, HTTPException, Query, UploadFile
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from pydantic import BaseModel
except ImportError:
raise ImportError("API requires fastapi: pip install verisoo[api]")
from .hashing import compute_all_distances, hash_image, is_same_image
from .models import AttestationRecord, ImageHashes, ProofLink
from .storage import LocalStorage
from .crypto import verify_signature, load_public_key_from_bytes
# Configuration via environment
DATA_DIR = Path(os.environ.get("SOOSEF_DATA_DIR", Path.home() / ".soosef"))
BASE_URL = os.environ.get("VERISOO_BASE_URL", "https://verisoo.io")
app = FastAPI(
title="Verisoo",
description="Decentralized image provenance and attestation API",
version="0.1.0",
docs_url="/docs",
redoc_url="/redoc",
)
# CORS for browser-based verification widgets
app.add_middleware(
CORSMiddleware,
allow_origins=["*"], # Configure appropriately for production
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# --- Response models ---
class LocationResponse(BaseModel):
latitude: float
longitude: float
accuracy_meters: float | None = None
name: str | None = None
class DeviceResponse(BaseModel):
make: str | None = None
model: str | None = None
software: str | None = None
class AttestorResponse(BaseModel):
fingerprint: str
name: str | None = None
class AttestationResponse(BaseModel):
record_id: str
short_id: str
attestor: AttestorResponse
attested_at: datetime
captured_at: datetime | None = None
location: LocationResponse | None = None
device: DeviceResponse | None = None
caption: str | None = None
proof_url: str
match_type: str # "exact", "phash", "dhash", etc.
hash_distances: dict[str, int]
class VerifyResponse(BaseModel):
verified: bool
message: str
attestations: list[AttestationResponse]
image_sha256: str
checked_at: datetime
class StatusResponse(BaseModel):
status: str
record_count: int
merkle_root: str | None
log_size_bytes: int
class ProofResponse(BaseModel):
record_id: str
short_id: str
attestor: AttestorResponse
attested_at: datetime
captured_at: datetime | None = None
location: LocationResponse | None = None
device: DeviceResponse | None = None
caption: str | None = None
image_sha256: str
proof_url: str
full_proof_url: str
# --- Helper functions ---
def get_storage() -> LocalStorage:
"""Get storage instance (could be cached/pooled in production)."""
return LocalStorage(DATA_DIR)
def record_to_attestation_response(
record,
identity,
match_type: str,
distances: dict[str, int],
) -> AttestationResponse:
"""Convert internal record to API response."""
cm = record.capture_metadata
proof = ProofLink(record.record_id, BASE_URL)
location = None
if record.location:
loc = record.location
location = LocationResponse(
latitude=loc.latitude,
longitude=loc.longitude,
accuracy_meters=loc.accuracy_meters,
name=loc.location_name,
)
device = None
if cm and cm.device:
dev = cm.device
device = DeviceResponse(
make=dev.make,
model=dev.model,
software=dev.software,
)
return AttestationResponse(
record_id=record.record_id,
short_id=record.short_id,
attestor=AttestorResponse(
fingerprint=record.attestor_fingerprint,
name=identity.display_name if identity else None,
),
attested_at=record.timestamp,
captured_at=record.captured_at,
location=location,
device=device,
caption=cm.caption if cm else None,
proof_url=proof.url,
match_type=match_type,
hash_distances=distances,
)
# --- Endpoints ---
@app.get("/", response_class=JSONResponse)
async def root():
"""API root - basic info."""
return {
"service": "Verisoo",
"description": "Decentralized image provenance and attestation",
"docs": "/docs",
"verify": "POST /verify with image file",
}
@app.get("/status", response_model=StatusResponse)
async def status():
"""Get service status and statistics."""
storage = get_storage()
stats = storage.get_stats()
return StatusResponse(
status="ok",
record_count=stats.record_count,
merkle_root=stats.merkle_root,
log_size_bytes=stats.log_file_size,
)
@app.post("/verify", response_model=VerifyResponse)
async def verify_image(
image: Annotated[UploadFile, File(description="Image to verify")],
threshold: Annotated[int, Query(ge=0, le=50)] = 10,
):
"""
Verify an image against known attestations.
Upload an image (even a mangled social media version) and get back
any matching attestations with provenance information.
The threshold parameter controls how strict matching is:
- 0: Exact perceptual match only
- 5: Very strict
- 10: Default - catches most social media compression
- 15-20: Looser - may catch more heavily modified images
"""
# Read and hash the uploaded image
image_data = await image.read()
try:
image_hashes = hash_image(image_data, robust=True)
except Exception as e:
raise HTTPException(status_code=400, detail=f"Invalid image: {e}")
storage = get_storage()
matches: list[AttestationResponse] = []
# Search for matching attestations
for record in storage.iterate_records():
is_match, match_type = is_same_image(
image_hashes,
record.image_hashes,
perceptual_threshold=threshold,
)
if is_match:
# Get attestor identity for display name
identity = storage.load_identity(record.attestor_fingerprint)
# Compute distances for transparency
distances = compute_all_distances(image_hashes, record.image_hashes)
matches.append(
record_to_attestation_response(record, identity, match_type, distances)
)
# Sort by attestation time (oldest first - establishes priority)
matches.sort(key=lambda m: m.attested_at)
if matches:
first = matches[0]
if first.location:
message = f"Image attested by {first.attestor.name or first.attestor.fingerprint[:8]} in {first.location.name or 'recorded location'} on {first.attested_at.strftime('%Y-%m-%d')}"
else:
message = f"Image attested by {first.attestor.name or first.attestor.fingerprint[:8]} on {first.attested_at.strftime('%Y-%m-%d')}"
else:
message = "No matching attestations found"
return VerifyResponse(
verified=len(matches) > 0,
message=message,
attestations=matches,
image_sha256=image_hashes.sha256,
checked_at=datetime.utcnow(),
)
@app.get("/v/{short_id}", response_model=ProofResponse)
async def get_proof_short(short_id: str):
"""
Get attestation proof by short ID.
This is the endpoint for shareable proof links:
verisoo.io/v/a8f3c2d1e9b7
"""
return await _get_proof(short_id)
@app.get("/verify/{record_id}", response_model=ProofResponse)
async def get_proof_full(record_id: str):
"""Get attestation proof by full record ID."""
return await _get_proof(record_id)
async def _get_proof(id_prefix: str) -> ProofResponse:
"""Internal: look up proof by ID prefix."""
storage = get_storage()
# Search for matching record
for record in storage.iterate_records():
if record.record_id.startswith(id_prefix) or record.short_id == id_prefix:
identity = storage.load_identity(record.attestor_fingerprint)
cm = record.capture_metadata
proof = ProofLink(record.record_id, BASE_URL)
location = None
if record.location:
loc = record.location
location = LocationResponse(
latitude=loc.latitude,
longitude=loc.longitude,
accuracy_meters=loc.accuracy_meters,
name=loc.location_name,
)
device = None
if cm and cm.device:
dev = cm.device
device = DeviceResponse(
make=dev.make,
model=dev.model,
software=dev.software,
)
return ProofResponse(
record_id=record.record_id,
short_id=record.short_id,
attestor=AttestorResponse(
fingerprint=record.attestor_fingerprint,
name=identity.display_name if identity else None,
),
attested_at=record.timestamp,
captured_at=record.captured_at,
location=location,
device=device,
caption=cm.caption if cm else None,
image_sha256=record.image_hashes.sha256,
proof_url=proof.url,
full_proof_url=proof.full_url,
)
raise HTTPException(status_code=404, detail=f"Attestation not found: {id_prefix}")
@app.get("/attestor/{fingerprint}")
async def get_attestor_records(
fingerprint: str,
limit: Annotated[int, Query(ge=1, le=100)] = 20,
):
"""Get recent attestations by an attestor."""
storage = get_storage()
# Check if this is a known identity
identity = storage.load_identity(fingerprint)
records = storage.get_records_by_attestor(fingerprint)
recent = records[-limit:] if len(records) > limit else records
return {
"attestor": {
"fingerprint": fingerprint,
"name": identity.display_name if identity else None,
"known": identity is not None,
},
"total_attestations": len(records),
"recent": [
{
"record_id": r.record_id,
"short_id": r.short_id,
"attested_at": r.timestamp.isoformat(),
"image_sha256": r.image_hashes.sha256[:16] + "...",
"proof_url": ProofLink(r.record_id, BASE_URL).url,
}
for r in recent
],
}
# --- Mobile sync endpoint ---
class MobileAttestationRequest(BaseModel):
"""Attestation submitted from mobile app."""
sha256: str # Image SHA-256 (mobile computed)
attestor_fingerprint: str
attestor_public_key: str # Hex-encoded Ed25519 public key
timestamp: datetime
signature: str # Hex-encoded Ed25519 signature
metadata: dict | None = None # Location, caption, device, etc.
class MobileAttestationResponse(BaseModel):
"""Response after processing mobile attestation."""
success: bool
record_id: str
short_id: str
proof_url: str
embedded_image_available: bool
message: str
@app.post("/attest", response_model=MobileAttestationResponse)
async def attest_from_mobile(
image: Annotated[UploadFile, File(description="Original image from mobile")],
attestation: Annotated[str, Form(description="JSON attestation data")],
):
"""
Receive attestation from mobile app and finalize it.
Mobile sends:
- Original image bytes
- Pre-signed attestation (SHA-256 + signature + metadata)
Server:
1. Verifies signature
2. Computes perceptual hashes (pHash, dHash)
3. Embeds proof link via DCT (JPEG only)
4. Stores in log
5. Returns confirmation + embedded image endpoint
The mobile app can then download the embedded image.
"""
import base64
import json
# Parse attestation JSON
try:
data = json.loads(attestation)
req = MobileAttestationRequest(**data)
except Exception as e:
raise HTTPException(status_code=400, detail=f"Invalid attestation data: {e}")
# Read image
image_data = await image.read()
# Verify SHA-256 matches
from .hashing import hash_image
computed_hashes = hash_image(image_data, robust=True)
if computed_hashes.sha256 != req.sha256:
raise HTTPException(
status_code=400,
detail=f"SHA-256 mismatch: expected {req.sha256[:16]}..., got {computed_hashes.sha256[:16]}..."
)
# Verify signature
try:
public_key_bytes = bytes.fromhex(req.attestor_public_key)
signature_bytes = bytes.fromhex(req.signature)
# Reconstruct signing payload (must match mobile's format)
signing_payload = f"{req.sha256}|{req.attestor_fingerprint}|{req.timestamp.isoformat()}"
if req.metadata:
signing_payload += f"|{json.dumps(req.metadata, sort_keys=True, separators=(',', ':'))}"
is_valid = verify_signature(
signing_payload.encode(),
signature_bytes,
public_key_bytes,
)
if not is_valid:
raise HTTPException(status_code=401, detail="Invalid signature")
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=400, detail=f"Signature verification failed: {e}")
# Create full attestation record with perceptual hashes
record = AttestationRecord(
image_hashes=computed_hashes, # Includes pHash, dHash from server
attestor_fingerprint=req.attestor_fingerprint,
timestamp=req.timestamp,
signature=signature_bytes,
metadata=req.metadata or {},
)
# Store in log
storage = get_storage()
index = storage.append_record(record)
# Update Merkle tree
merkle_log = storage.load_merkle_log()
storage.set_merkle_root(merkle_log.root_hash)
# Try to embed proof link (JPEG only)
embedded_available = False
proof_link = f"{BASE_URL}/v/{record.short_id}"
try:
from .embed import embed_proof_link, get_embed_method
from pathlib import Path
import tempfile
# Check if we can embed (JPEG with stegasoo available)
# Save image temporarily to check format
with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as f:
f.write(image_data)
temp_path = Path(f.name)
method = get_embed_method(temp_path)
if method == "dct":
from .embed import embed_proof_in_jpeg
embedded_bytes, stats = embed_proof_in_jpeg(image_data, proof_link)
# Save embedded image for later retrieval
embedded_path = storage.base_path / "embedded" / f"{record.short_id}.jpg"
embedded_path.parent.mkdir(parents=True, exist_ok=True)
embedded_path.write_bytes(embedded_bytes)
embedded_available = True
# Cleanup temp file
temp_path.unlink(missing_ok=True)
except Exception as e:
# DCT embedding failed, continue without it
import logging
logging.warning(f"DCT embedding failed: {e}")
# Register the attestor's identity if not known
from .models import Identity
if not storage.load_identity(req.attestor_fingerprint):
identity = Identity(
public_key=public_key_bytes,
fingerprint=req.attestor_fingerprint,
)
storage.save_identity(identity)
return MobileAttestationResponse(
success=True,
record_id=record.record_id,
short_id=record.short_id,
proof_url=proof_link,
embedded_image_available=embedded_available,
message=f"Attestation stored at index {index}" + (
", embedded image available" if embedded_available else ""
),
)
@app.get("/embedded/{short_id}")
async def get_embedded_image(short_id: str):
"""
Download the DCT-embedded version of an attested image.
Only available for JPEG images that were successfully embedded.
"""
from fastapi.responses import FileResponse
storage = get_storage()
embedded_path = storage.base_path / "embedded" / f"{short_id}.jpg"
if not embedded_path.exists():
raise HTTPException(
status_code=404,
detail="Embedded image not available. Either not a JPEG or embedding failed."
)
return FileResponse(
embedded_path,
media_type="image/jpeg",
filename=f"{short_id}_attested.jpg",
)
# --- Health check ---
@app.get("/health")
async def health():
"""Health check for load balancers."""
return {"status": "healthy"}
# --- Run directly ---
def serve(host: str = "0.0.0.0", port: int = 8000):
"""Run the API server."""
import uvicorn
uvicorn.run(app, host=host, port=port)
if __name__ == "__main__":
serve()

View File

@ -0,0 +1,665 @@
"""
Attestation Creation Module for Verisoo.
This module is the core of Verisoo's provenance system. An attestation is a
cryptographic proof that binds together:
1. AN IMAGE - identified by multiple hashes (SHA-256 + perceptual)
2. AN IDENTITY - the attestor's Ed25519 public key fingerprint
3. A TIMESTAMP - when the attestation was created
4. METADATA - optional context (location, device, caption, etc.)
The binding is secured by an Ed25519 digital signature over all components,
making it computationally infeasible to forge or modify after creation.
Architecture Overview:
----------------------
Image Data Private Metadata
(bytes) Key (optional)
Compute Extract
Hashes EXIF
(SHA+pHash) (auto)
Build Canonical Signing Payload
(deterministic byte string)
Ed25519 Sign
Attestation
Record
Security Properties:
-------------------
- **Authenticity**: Only the private key holder can create valid signatures
- **Integrity**: Any modification invalidates the signature
- **Non-repudiation**: Attestor cannot deny creating the attestation
- **Timestamping**: Proves the image existed at attestation time
Usage Example:
-------------
from .attestation import create_attestation
from .crypto import load_private_key
# Load attestor's private key
private_key = load_private_key("~/.verisoo/private.pem")
# Create attestation with auto EXIF extraction
attestation = create_attestation(
image_data=open("photo.jpg", "rb").read(),
private_key=private_key,
metadata={"caption": "Street scene in Kyiv"},
)
# The attestation.record can now be stored in the append-only log
"""
from __future__ import annotations
from datetime import datetime, timezone
from typing import Any
from cryptography.hazmat.primitives.asymmetric.ed25519 import Ed25519PrivateKey
from .crypto import create_identity, sign
from .hashing import hash_image
from .models import (
Attestation,
AttestationRecord,
CaptureDevice,
CaptureMetadata,
GeoLocation,
ImageHashes,
)
# =============================================================================
# EXIF METADATA EXTRACTION
# =============================================================================
#
# EXIF (Exchangeable Image File Format) is metadata embedded in images by
# cameras and phones. It contains valuable provenance information:
# - When the photo was taken (DateTimeOriginal)
# - Where it was taken (GPS coordinates)
# - What device captured it (Make, Model)
#
# We extract this automatically to enrich attestations, but user-provided
# metadata always takes precedence (in case EXIF is wrong or missing).
# =============================================================================
def extract_exif_metadata(image_data: bytes) -> dict[str, Any]:
"""
Extract EXIF metadata from image bytes for attestation enrichment.
This function parses embedded EXIF data from JPEG/TIFF images to extract
provenance-relevant information. The extracted data provides evidence of
when, where, and how an image was captured.
Extraction Priority:
-------------------
1. **Timestamp**: DateTimeOriginal > DateTimeDigitized > DateTime
- DateTimeOriginal: When shutter was pressed (most reliable)
- DateTimeDigitized: When image was digitized (scanner use case)
- DateTime: Last modification time (least reliable for capture)
2. **Location**: GPS coordinates with hemisphere references
- Converted from DMS (degrees/minutes/seconds) to decimal degrees
- Hemisphere (N/S, E/W) determines sign
3. **Device**: Camera/phone identification
- Make: Manufacturer (e.g., "Apple", "Canon")
- Model: Device model (e.g., "iPhone 15 Pro", "EOS R5")
- Software: Firmware/app version
4. **Dimensions**: Image width and height in pixels
Args:
image_data: Raw image bytes (JPEG, TIFF, or other EXIF-capable format)
Returns:
Dictionary with extracted fields. May contain:
- captured_at: datetime object (UTC)
- location: GeoLocation object
- device: CaptureDevice object
- width: int (pixels)
- height: int (pixels)
Returns empty dict if:
- exifread library not installed
- Image has no EXIF data
- EXIF parsing fails
Note:
Uses 'exifread' library (optional dependency). Gracefully degrades
to empty dict if unavailable, allowing attestations without EXIF.
Example:
>>> exif = extract_exif_metadata(jpeg_bytes)
>>> if exif.get("location"):
... print(f"Photo taken at {exif['location']}")
"""
# -------------------------------------------------------------------------
# Import exifread lazily - it's an optional dependency
# If not installed, we simply skip EXIF extraction (graceful degradation)
# -------------------------------------------------------------------------
try:
import io
import exifread
except ImportError:
# exifread not installed - return empty dict, attestation still works
return {}
# -------------------------------------------------------------------------
# Parse EXIF tags from image bytes
# details=False skips thumbnail and MakerNote (faster, less memory)
# -------------------------------------------------------------------------
try:
tags = exifread.process_file(io.BytesIO(image_data), details=False)
except Exception:
# Malformed EXIF or unsupported format - continue without EXIF
return {}
if not tags:
return {}
result: dict[str, Any] = {}
# -------------------------------------------------------------------------
# TIMESTAMP EXTRACTION
# Try multiple EXIF date fields in order of reliability for capture time
# EXIF stores dates as strings: "YYYY:MM:DD HH:MM:SS" (note colons in date)
# -------------------------------------------------------------------------
for tag_name in ["EXIF DateTimeOriginal", "EXIF DateTimeDigitized", "Image DateTime"]:
if tag_name in tags:
try:
dt_str = str(tags[tag_name])
# Parse EXIF date format (colons in date part, space separator)
dt = datetime.strptime(dt_str, "%Y:%m:%d %H:%M:%S")
# Assume UTC if no timezone info (EXIF doesn't store timezone)
result["captured_at"] = dt.replace(tzinfo=timezone.utc)
break # Use first valid timestamp found
except (ValueError, TypeError):
continue # Malformed date string, try next field
# -------------------------------------------------------------------------
# GPS COORDINATE EXTRACTION
# EXIF stores GPS as rational numbers in DMS (degrees, minutes, seconds)
# with separate reference tags for hemisphere (N/S for lat, E/W for lon)
# -------------------------------------------------------------------------
gps_lat = tags.get("GPS GPSLatitude")
gps_lat_ref = tags.get("GPS GPSLatitudeRef") # "N" or "S"
gps_lon = tags.get("GPS GPSLongitude")
gps_lon_ref = tags.get("GPS GPSLongitudeRef") # "E" or "W"
if gps_lat and gps_lon:
try:
# Convert DMS to decimal degrees
lat = _parse_gps_coord(gps_lat.values)
lon = _parse_gps_coord(gps_lon.values)
# Apply hemisphere sign (South and West are negative)
if gps_lat_ref and str(gps_lat_ref) == "S":
lat = -lat
if gps_lon_ref and str(gps_lon_ref) == "W":
lon = -lon
result["location"] = GeoLocation(latitude=lat, longitude=lon)
except (ValueError, TypeError, AttributeError):
# Malformed GPS data - skip location rather than fail
pass
# -------------------------------------------------------------------------
# DEVICE INFORMATION EXTRACTION
# Identifies the camera/phone that captured the image
# Useful for establishing authenticity ("taken with iPhone, not Photoshop")
# -------------------------------------------------------------------------
make = tags.get("Image Make") # e.g., "Apple", "Canon", "Nikon"
model = tags.get("Image Model") # e.g., "iPhone 15 Pro", "EOS R5"
software = tags.get("Image Software") # e.g., "iOS 17.4", "Lightroom"
if make or model or software:
result["device"] = CaptureDevice(
make=str(make).strip() if make else None,
model=str(model).strip() if model else None,
software=str(software).strip() if software else None,
)
# -------------------------------------------------------------------------
# IMAGE DIMENSIONS
# May differ from actual pixel dimensions if image was resized after EXIF
# -------------------------------------------------------------------------
width = tags.get("EXIF ExifImageWidth") or tags.get("Image ImageWidth")
height = tags.get("EXIF ExifImageLength") or tags.get("Image ImageLength")
if width:
try:
result["width"] = int(str(width))
except (ValueError, TypeError):
pass
if height:
try:
result["height"] = int(str(height))
except (ValueError, TypeError):
pass
return result
def _parse_gps_coord(coord_values: list) -> float:
"""
Convert EXIF GPS coordinate from DMS to decimal degrees.
EXIF stores GPS coordinates as three rational numbers representing
degrees, minutes, and seconds. Each rational has a numerator and
denominator (to handle fractional seconds precisely).
Formula: decimal = degrees + (minutes / 60) + (seconds / 3600)
Args:
coord_values: List of three IfdTag Ratio objects [degrees, minutes, seconds]
Each has .num (numerator) and .den (denominator) attributes
Returns:
Decimal degrees as float (always positive; caller applies hemisphere sign)
Example:
GPS coords "50° 27' 0.36"" stored as:
[Ratio(50/1), Ratio(27/1), Ratio(36/100)]
Returns: 50.45010 (50 + 27/60 + 0.36/3600)
"""
# Extract degrees, minutes, seconds as floats from rational values
d = float(coord_values[0].num) / float(coord_values[0].den) # Degrees
m = float(coord_values[1].num) / float(coord_values[1].den) # Minutes
s = float(coord_values[2].num) / float(coord_values[2].den) # Seconds
# Convert to decimal: degrees + minutes/60 + seconds/3600
return d + (m / 60.0) + (s / 3600.0)
# =============================================================================
# ATTESTATION CREATION
# =============================================================================
#
# The main entry point for creating attestations. This function orchestrates:
# 1. Metadata preparation (EXIF extraction + user overrides)
# 2. Image hashing (cryptographic + perceptual)
# 3. Payload construction (canonical, deterministic format)
# 4. Digital signing (Ed25519)
# 5. Record assembly
#
# The result is an Attestation object containing both the image data and the
# signed record, ready for storage in the append-only log.
# =============================================================================
def create_attestation(
image_data: bytes,
private_key: Ed25519PrivateKey,
metadata: dict[str, Any] | CaptureMetadata | None = None,
timestamp: datetime | None = None,
auto_exif: bool = True,
) -> Attestation:
"""
Create a cryptographically signed attestation for an image.
This is the primary function for creating image attestations. It binds
an image to an identity at a specific point in time, with optional
metadata providing additional context.
The Attestation Process:
-----------------------
Input Image Hash (SHA-256 + pHash + dHash)
EXIF Data Extract
User Metadata
Canonical Payload
(deterministic)
Private Key Sign
AttestationRecord
Metadata Precedence:
-------------------
When auto_exif=True, metadata is merged with the following precedence:
1. User-provided metadata (highest priority - always wins)
2. EXIF-extracted metadata (fills in gaps)
This allows users to correct wrong EXIF data (e.g., wrong camera time)
while still benefiting from automatic extraction when available.
Args:
image_data:
Raw image bytes. Supports any format PIL can read (JPEG, PNG,
TIFF, WebP, etc.). The bytes are hashed directly for SHA-256,
and decoded for perceptual hashing.
private_key:
Ed25519 private key for signing. This establishes WHO is making
the attestation. The corresponding public key fingerprint is
embedded in the record.
metadata:
Optional provenance metadata. Can be:
- dict: Raw key-value pairs (flexible schema)
- CaptureMetadata: Structured object with typed fields
- None: No user metadata (may still extract EXIF)
Common fields: location, caption, device, captured_at, tags
timestamp:
Override the attestation timestamp. Defaults to current UTC time.
Note: This is ATTESTATION time, not CAPTURE time. Capture time
should be in metadata (auto-extracted from EXIF or user-provided).
auto_exif:
If True (default), automatically extract EXIF metadata from the
image and merge with user-provided metadata. Set to False to:
- Skip EXIF extraction for speed
- Avoid including potentially sensitive EXIF data
- When EXIF is known to be incorrect/missing
Returns:
Attestation object containing:
- image_data: Original image bytes (for storage/verification)
- image_hashes: Computed hashes (SHA-256 + perceptual)
- record: Signed AttestationRecord ready for the log
Raises:
ValueError: If image cannot be decoded for hashing
TypeError: If private_key is not an Ed25519PrivateKey
Example:
>>> # Simple attestation with auto EXIF
>>> attestation = create_attestation(image_bytes, private_key)
>>> # With manual location override
>>> attestation = create_attestation(
... image_bytes,
... private_key,
... metadata={"location": {"lat": 50.45, "lon": 30.52, "name": "Kyiv"}},
... )
>>> # Disable EXIF extraction
>>> attestation = create_attestation(
... image_bytes,
... private_key,
... auto_exif=False,
... )
Security Notes:
- The signature covers ALL fields (hashes, fingerprint, timestamp, metadata)
- Changing any field invalidates the signature
- Timestamp is attestation time, not necessarily capture time
- Verify attestations using verisoo.verification module
"""
# -------------------------------------------------------------------------
# STEP 1: Establish attestation timestamp
# Use provided timestamp or current UTC time
# -------------------------------------------------------------------------
timestamp = timestamp or datetime.now(timezone.utc)
# -------------------------------------------------------------------------
# STEP 2: Normalize metadata to dict format
# Accept CaptureMetadata objects, dicts, or None
# -------------------------------------------------------------------------
if isinstance(metadata, CaptureMetadata):
# Convert structured object to dict for JSON serialization
metadata_dict = metadata.to_dict()
elif metadata is None:
metadata_dict = {}
else:
# Make a copy to avoid mutating caller's dict
metadata_dict = dict(metadata)
# -------------------------------------------------------------------------
# STEP 3: Auto-extract and merge EXIF metadata (if enabled)
# EXIF provides defaults; user metadata takes precedence
# -------------------------------------------------------------------------
if auto_exif:
exif_data = extract_exif_metadata(image_data)
if exif_data:
# Start with empty merged dict, add EXIF fields not in user metadata
merged: dict[str, Any] = {}
# Convert EXIF objects to JSON-serializable dict format
# Only include if not already provided by user
if "captured_at" in exif_data and "captured_at" not in metadata_dict:
merged["captured_at"] = exif_data["captured_at"].isoformat()
if "location" in exif_data and "location" not in metadata_dict:
merged["location"] = exif_data["location"].to_dict()
if "device" in exif_data and "device" not in metadata_dict:
merged["device"] = exif_data["device"].to_dict()
if "width" in exif_data and "width" not in metadata_dict:
merged["width"] = exif_data["width"]
if "height" in exif_data and "height" not in metadata_dict:
merged["height"] = exif_data["height"]
# User metadata overwrites EXIF (higher precedence)
merged.update(metadata_dict)
metadata_dict = merged
metadata = metadata_dict
# -------------------------------------------------------------------------
# STEP 4: Compute image hashes
# SHA-256 for exact matching, pHash/dHash for perceptual matching
# These survive social media compression, resizing, format conversion
# -------------------------------------------------------------------------
image_hashes = hash_image(image_data)
# -------------------------------------------------------------------------
# STEP 5: Get attestor identity from private key
# The fingerprint is SHA-256(public_key)[:16] in hex (32 chars)
# -------------------------------------------------------------------------
identity = create_identity(private_key)
# -------------------------------------------------------------------------
# STEP 6: Build canonical signing payload
# Deterministic byte string that will be signed
# Must be reproducible for verification
# -------------------------------------------------------------------------
signing_payload = _build_signing_payload(
image_hashes, identity.fingerprint, timestamp, metadata
)
# -------------------------------------------------------------------------
# STEP 7: Sign the payload with Ed25519
# Produces 64-byte signature
# -------------------------------------------------------------------------
signature = sign(private_key, signing_payload)
# -------------------------------------------------------------------------
# STEP 8: Assemble the attestation record
# This is what gets stored in the append-only log
# -------------------------------------------------------------------------
record = AttestationRecord(
image_hashes=image_hashes,
signature=signature,
attestor_fingerprint=identity.fingerprint,
timestamp=timestamp,
metadata=metadata,
)
# Return full Attestation with image data (for storage) and record
return Attestation(
image_data=image_data,
image_hashes=image_hashes,
record=record,
)
def create_attestation_from_hashes(
image_hashes: ImageHashes,
private_key: Ed25519PrivateKey,
metadata: dict[str, Any] | None = None,
timestamp: datetime | None = None,
) -> AttestationRecord:
"""
Create attestation record from pre-computed hashes (without image bytes).
This is a lower-level function for scenarios where you have image hashes
but not the original image data:
Use Cases:
---------
- **Distributed systems**: Hashes computed on one node, signed on another
- **Batch processing**: Pre-compute hashes, sign later
- **Re-attestation**: Create new attestation for known hashes
- **Testing**: Create records without actual images
Unlike create_attestation(), this function:
- Does NOT compute hashes (uses provided ImageHashes)
- Does NOT extract EXIF (no image bytes available)
- Returns AttestationRecord directly (no Attestation wrapper)
Args:
image_hashes: Pre-computed ImageHashes object with sha256, phash, dhash
private_key: Ed25519 private key for signing
metadata: Optional metadata dict (no auto-extraction)
timestamp: Override timestamp (defaults to now, UTC)
Returns:
AttestationRecord ready for storage (no image data attached)
Example:
>>> # Re-attest an image you've already hashed
>>> from .hashing import hash_image
>>> hashes = hash_image(image_bytes)
>>> # ... later, on a different system ...
>>> record = create_attestation_from_hashes(hashes, private_key)
"""
timestamp = timestamp or datetime.now(timezone.utc)
metadata = metadata or {}
identity = create_identity(private_key)
signing_payload = _build_signing_payload(
image_hashes, identity.fingerprint, timestamp, metadata
)
signature = sign(private_key, signing_payload)
return AttestationRecord(
image_hashes=image_hashes,
signature=signature,
attestor_fingerprint=identity.fingerprint,
timestamp=timestamp,
metadata=metadata,
)
# =============================================================================
# SIGNING PAYLOAD CONSTRUCTION
# =============================================================================
#
# The signing payload is the exact byte sequence that gets signed. It MUST be:
# 1. Deterministic - same inputs always produce same payload
# 2. Unambiguous - different inputs never produce same payload
# 3. Complete - covers all attested data
#
# We achieve this with:
# - Newline-separated fields (unambiguous delimiter)
# - Sorted JSON keys (deterministic object serialization)
# - No whitespace in JSON (canonical form)
# - UTF-8 encoding (explicit byte representation)
# =============================================================================
def _build_signing_payload(
image_hashes: ImageHashes,
attestor_fingerprint: str,
timestamp: datetime,
metadata: dict[str, Any],
) -> bytes:
"""
Build the canonical byte string for Ed25519 signing.
This function constructs a deterministic representation of all attested
data. The same inputs MUST always produce the same output bytes, as this
is required for signature verification.
Payload Format:
--------------
Line 1: SHA-256 hash (64 hex characters)
Line 2: pHash (perceptual hash, typically 16 hex chars)
Line 3: dHash (difference hash, typically 16 hex chars)
Line 4: Attestor fingerprint (32 hex characters)
Line 5: Timestamp (ISO 8601 format with timezone)
Line 6: Metadata (JSON, sorted keys, no whitespace)
Example payload:
a1b2c3d4... (SHA-256, 64 chars)
f8e7d6c5... (pHash, 16 chars)
1a2b3c4d... (dHash, 16 chars)
9f8e7d6c... (fingerprint, 32 chars)
2024-01-15T10:30:00+00:00
{"caption":"Test","location":{"lat":50.45,"lon":30.52}}
Canonicalization Rules:
----------------------
- JSON keys are sorted alphabetically (Python's sort_keys=True)
- No whitespace in JSON (separators=(",", ":"))
- Timestamp includes timezone (isoformat())
- All lines joined with single newline (no trailing newline)
- Final encoding is UTF-8
Args:
image_hashes: ImageHashes object with sha256, phash, dhash
attestor_fingerprint: Hex string identifying the attestor
timestamp: Datetime object (should have timezone info)
metadata: Dict to serialize as JSON
Returns:
UTF-8 encoded bytes ready for signing
Security Notes:
- Changing any field changes the payload (and invalidates signature)
- The format is designed to be unambiguous (no field can "bleed" into another)
- Verification must use IDENTICAL canonicalization
"""
import json
# Canonical JSON: sorted keys, no whitespace (compact, deterministic)
metadata_json = json.dumps(metadata, sort_keys=True, separators=(",", ":"))
# Join all fields with newlines (unambiguous separator)
payload = "\n".join([
image_hashes.sha256, # Line 1: Cryptographic hash
image_hashes.phash, # Line 2: Perceptual hash (DCT-based)
image_hashes.dhash, # Line 3: Difference hash
attestor_fingerprint, # Line 4: Who is attesting
timestamp.isoformat(), # Line 5: When (ISO 8601)
metadata_json, # Line 6: Additional context
])
# Encode as UTF-8 bytes for signing
return payload.encode("utf-8")

View File

@ -0,0 +1,285 @@
"""
Binary append-only log for attestation records.
Format:
[8 bytes: magic "VERISOO\x00"]
[4 bytes: version (uint32 LE)]
[record]*
Each record:
[4 bytes: length (uint32 LE, not including this header)]
[4 bytes: CRC32 of record bytes]
[length bytes: record data]
This format is:
- Append-only (no rewriting)
- Self-describing (magic + version)
- Corruption-detectable (CRC32 per record)
- Streamable (length-prefixed, can read from offset)
- Federation-friendly (just ship bytes from offset N)
"""
from __future__ import annotations
import struct
import zlib
from dataclasses import dataclass
from pathlib import Path
from typing import BinaryIO, Iterator
from .exceptions import VerisooError
MAGIC = b"VERISOO\x00"
VERSION = 1
HEADER_SIZE = len(MAGIC) + 4 # magic + version
RECORD_HEADER_SIZE = 8 # length + crc32
class LogCorruptionError(VerisooError):
"""Log file is corrupted."""
def __init__(self, message: str, offset: int) -> None:
super().__init__(message)
self.offset = offset
@dataclass
class LogEntry:
"""An entry in the binary log."""
offset: int # Byte offset in file (start of record header)
index: int # Logical index (0, 1, 2, ...)
data: bytes # Record payload
crc32: int # Stored CRC32
class BinaryLog:
"""
Append-only binary log file.
Thread-safe for concurrent reads, single-writer for appends.
"""
def __init__(self, path: Path) -> None:
self.path = path
self._size = 0 # Number of records
self._file_size = 0 # Bytes written
if path.exists():
self._validate_and_count()
else:
self._initialize()
def _initialize(self) -> None:
"""Create new log file with header."""
with open(self.path, "wb") as f:
f.write(MAGIC)
f.write(struct.pack("<I", VERSION))
self._file_size = HEADER_SIZE
self._size = 0
def _validate_and_count(self) -> None:
"""Validate existing log and count records."""
with open(self.path, "rb") as f:
# Check magic
magic = f.read(len(MAGIC))
if magic != MAGIC:
raise LogCorruptionError(f"Invalid magic: {magic!r}", 0)
# Check version
version_bytes = f.read(4)
if len(version_bytes) < 4:
raise LogCorruptionError("Truncated header", len(MAGIC))
version = struct.unpack("<I", version_bytes)[0]
if version > VERSION:
raise LogCorruptionError(f"Unknown version: {version}", len(MAGIC))
# Count records
count = 0
while True:
offset = f.tell()
header = f.read(RECORD_HEADER_SIZE)
if not header:
break
if len(header) < RECORD_HEADER_SIZE:
raise LogCorruptionError("Truncated record header", offset)
length, stored_crc = struct.unpack("<II", header)
data = f.read(length)
if len(data) < length:
raise LogCorruptionError("Truncated record data", offset)
# Verify CRC
actual_crc = zlib.crc32(data) & 0xFFFFFFFF
if actual_crc != stored_crc:
raise LogCorruptionError(
f"CRC mismatch at record {count}: stored={stored_crc}, actual={actual_crc}",
offset,
)
count += 1
self._size = count
self._file_size = f.tell()
@property
def size(self) -> int:
"""Number of records in the log."""
return self._size
@property
def file_size(self) -> int:
"""Total bytes in the log file."""
return self._file_size
def append(self, data: bytes) -> int:
"""
Append a record to the log.
Returns the index of the new record.
"""
crc = zlib.crc32(data) & 0xFFFFFFFF
header = struct.pack("<II", len(data), crc)
with open(self.path, "ab") as f:
f.write(header)
f.write(data)
index = self._size
self._size += 1
self._file_size += RECORD_HEADER_SIZE + len(data)
return index
def read(self, index: int) -> bytes:
"""Read record at index."""
if index < 0 or index >= self._size:
raise IndexError(f"Index {index} out of range [0, {self._size})")
for entry in self._iterate_from(0):
if entry.index == index:
return entry.data
raise IndexError(f"Record {index} not found") # Should never happen
def read_range(self, start: int, end: int) -> list[bytes]:
"""Read records in range [start, end)."""
if start < 0:
start = 0
if end > self._size:
end = self._size
results = []
for entry in self._iterate_from(0):
if entry.index >= end:
break
if entry.index >= start:
results.append(entry.data)
return results
def iterate(self) -> Iterator[LogEntry]:
"""Iterate over all records."""
yield from self._iterate_from(0)
def _iterate_from(self, start_index: int) -> Iterator[LogEntry]:
"""Iterate records starting from index."""
with open(self.path, "rb") as f:
f.seek(HEADER_SIZE)
index = 0
while True:
offset = f.tell()
header = f.read(RECORD_HEADER_SIZE)
if not header:
break
if len(header) < RECORD_HEADER_SIZE:
break
length, stored_crc = struct.unpack("<II", header)
data = f.read(length)
if index >= start_index:
yield LogEntry(
offset=offset,
index=index,
data=data,
crc32=stored_crc,
)
index += 1
def get_bytes_from_offset(self, offset: int) -> bytes:
"""
Get raw bytes from offset to end of file.
Used for federation: "send me everything after byte N"
"""
if offset < HEADER_SIZE:
offset = HEADER_SIZE
with open(self.path, "rb") as f:
f.seek(offset)
return f.read()
def append_raw_bytes(self, data: bytes) -> int:
"""
Append raw bytes (pre-formatted records) from federation sync.
Returns number of records appended.
"""
# Validate the incoming data before appending
records_added = 0
pos = 0
while pos < len(data):
if pos + RECORD_HEADER_SIZE > len(data):
raise LogCorruptionError("Truncated record in sync data", pos)
length, stored_crc = struct.unpack("<II", data[pos : pos + RECORD_HEADER_SIZE])
pos += RECORD_HEADER_SIZE
if pos + length > len(data):
raise LogCorruptionError("Truncated record data in sync data", pos)
record_data = data[pos : pos + length]
actual_crc = zlib.crc32(record_data) & 0xFFFFFFFF
if actual_crc != stored_crc:
raise LogCorruptionError(f"CRC mismatch in sync data at offset {pos}", pos)
pos += length
records_added += 1
# All validated, append in one write
with open(self.path, "ab") as f:
f.write(data)
self._size += records_added
self._file_size += len(data)
return records_added
def truncate_to(self, size: int) -> None:
"""
Truncate log to first `size` records.
DANGEROUS: Only use for recovery from corruption.
"""
if size >= self._size:
return
# Find byte offset after record `size-1`
target_offset = HEADER_SIZE
for entry in self._iterate_from(0):
if entry.index >= size:
target_offset = entry.offset
break
target_offset = entry.offset + RECORD_HEADER_SIZE + len(entry.data)
with open(self.path, "r+b") as f:
f.truncate(target_offset)
self._size = size
self._file_size = target_offset

721
src/soosef/verisoo/cli.py Normal file
View File

@ -0,0 +1,721 @@
"""
Command-Line Interface (CLI) for Verisoo.
This module provides the `verisoo` command-line tool for interacting with
the image provenance system. It wraps the core library functionality in
a user-friendly interface.
Command Structure:
-----------------
verisoo
identity # Manage attestor identity (Ed25519 keypair)
generate # Create new identity
show # Display current identity fingerprint
attest <image> # Create attestation for an image
--location, -l # GPS coordinates
--caption, -c # Photographer's notes
--tag, -t # Metadata tags (repeatable)
--no-exif # Disable EXIF extraction
verify <image> # Check image against known attestations
--exact # Require byte-exact match (no perceptual)
log # Query the attestation log
status # Show log statistics
list # List recent attestations
peer # Manage federation peers
add <url> <fp> # Add a peer node
list # List configured peers
serve # Run the API server
--host # Bind address (default: 0.0.0.0)
--port # Listen port (default: 8000)
Global Options:
--------------
--data-dir PATH Override default data directory (~/.verisoo)
--json Output in JSON format (for scripting)
Data Directory Structure:
------------------------
~/.verisoo/
private.pem # Ed25519 private key (PEM format)
public.pem # Ed25519 public key (PEM format)
identity.json # Identity metadata (name, created_at)
attestations.binlog # Append-only binary log of attestation records
index.lmdb/ # LMDB index for fast lookups
merkle.json # Current Merkle tree root hash
peers.json # Federation peer list
Usage Examples:
--------------
# First-time setup
$ verisoo identity generate --name "Photographer Name"
# Attest a photo with location
$ verisoo attest photo.jpg -l "50.45,30.52,10,Kyiv" -c "Morning scene"
# Verify an image (even after social media compression)
$ verisoo verify downloaded_photo.jpg
# Start API server for remote verification
$ verisoo serve --port 8000
# Check log status
$ verisoo log status
Exit Codes:
----------
0: Success
1: Verification failed (no matching attestation found)
2: Error (missing identity, invalid input, etc.)
"""
from __future__ import annotations
import json
import sys
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
# -----------------------------------------------------------------------------
# DEPENDENCY CHECK
# Click is required for CLI but optional for library-only usage
# -----------------------------------------------------------------------------
try:
import click
except ImportError:
print("CLI requires click: pip install verisoo[cli]", file=sys.stderr)
sys.exit(1)
@click.group()
@click.option("--data-dir", type=click.Path(path_type=Path), help="Data directory")
@click.option("--json", "json_output", is_flag=True, help="Output as JSON")
@click.pass_context
def main(ctx: click.Context, data_dir: Path | None, json_output: bool) -> None:
"""
Verisoo - Decentralized image provenance and attestation.
Part of the Soo Suite. Prove when images were created and by whom.
"""
ctx.ensure_object(dict)
ctx.obj["data_dir"] = data_dir
ctx.obj["json"] = json_output
# --- Identity commands ---
@main.group()
def identity() -> None:
"""Manage attestor identity."""
pass
@identity.command("generate")
@click.option("--name", help="Display name for this identity")
@click.option("--password", is_flag=True, help="Encrypt private key with password")
@click.pass_context
def identity_generate(ctx: click.Context, name: str | None, password: bool) -> None:
"""Generate a new attestor identity (Ed25519 keypair)."""
from .crypto import generate_keypair, create_identity, save_private_key, save_public_key
from .storage import LocalStorage
storage = LocalStorage(ctx.obj.get("data_dir"))
if storage.has_node_identity():
raise click.ClickException("Identity already exists. Use --force to overwrite.")
# Generate keypair
private_key, public_key = generate_keypair()
# Get password if requested
key_password = None
if password:
key_password = click.prompt("Password", hide_input=True, confirmation_prompt=True).encode()
# Save keys
save_private_key(private_key, storage.private_key_path, key_password)
save_public_key(public_key, storage.public_key_path)
# Create and save identity
metadata = {"name": name} if name else {}
ident = create_identity(private_key, metadata)
storage.save_identity(ident)
if ctx.obj.get("json"):
click.echo(json.dumps({"fingerprint": ident.fingerprint, "created": ident.created_at.isoformat()}))
else:
click.echo(f"Identity created: {ident.fingerprint}")
click.echo(f"Public key: {storage.public_key_path}")
@identity.command("show")
@click.pass_context
def identity_show(ctx: click.Context) -> None:
"""Show current identity."""
from .crypto import load_public_key, fingerprint_from_pubkey
from .storage import LocalStorage
storage = LocalStorage(ctx.obj.get("data_dir"))
if not storage.has_node_identity():
raise click.ClickException("No identity configured. Run: verisoo identity generate")
public_key = load_public_key(storage.public_key_path)
fingerprint = fingerprint_from_pubkey(public_key.public_key_bytes())
if ctx.obj.get("json"):
click.echo(json.dumps({"fingerprint": fingerprint}))
else:
click.echo(f"Fingerprint: {fingerprint}")
click.echo(f"Public key: {storage.public_key_path}")
# =============================================================================
# ATTESTATION COMMANDS
# =============================================================================
#
# The `attest` command is the primary user-facing operation. It:
# 1. Reads an image file
# 2. Optionally extracts EXIF metadata (enabled by default)
# 3. Merges user-provided metadata (location, caption, tags)
# 4. Creates a cryptographically signed attestation
# 5. Appends the record to the local log
# 6. Updates the Merkle tree root
#
# The result is a tamper-evident proof that the image existed at attestation
# time, attested by the configured identity.
# =============================================================================
def _parse_location(location_str: str) -> dict[str, Any]:
"""
Parse a location string from CLI into a GeoLocation-compatible dict.
This function provides a flexible, human-friendly way to specify GPS
coordinates on the command line. It supports multiple formats to balance
convenience with precision.
Supported Formats:
-----------------
1. Basic coordinates:
"50.4501,30.5234"
{"lat": 50.4501, "lon": 30.5234}
2. With accuracy (meters):
"50.4501,30.5234,10"
{"lat": 50.4501, "lon": 30.5234, "accuracy": 10.0}
3. With accuracy and name:
"50.4501,30.5234,10,Kyiv, Ukraine"
{"lat": 50.4501, "lon": 30.5234, "accuracy": 10.0, "name": "Kyiv, Ukraine"}
4. With name only (if 3rd part is not a number):
"50.4501,30.5234,Kyiv, Ukraine"
{"lat": 50.4501, "lon": 30.5234, "name": "Kyiv, Ukraine"}
Note: The name can contain commas (e.g., "City, Country") because we
use split(",", 3) to limit splitting to first 3 commas.
Args:
location_str: Comma-separated location string from CLI
Returns:
Dict with lat, lon, and optionally accuracy and name
Raises:
click.ClickException: If format is invalid or coordinates out of range
Validation:
- Latitude must be between -90 and 90 (poles)
- Longitude must be between -180 and 180 (date line)
- At minimum, lat and lon are required
"""
# Split into at most 4 parts: lat, lon, accuracy, name
# Using maxsplit=3 allows the name to contain commas
parts = location_str.split(",", 3)
# Validate minimum required parts
if len(parts) < 2:
raise click.ClickException(
"Invalid location format. Use: lat,lon or lat,lon,accuracy or lat,lon,accuracy,name"
)
# Parse latitude and longitude
try:
lat = float(parts[0].strip())
lon = float(parts[1].strip())
except ValueError:
raise click.ClickException("Invalid latitude/longitude values")
# Validate coordinate ranges
if not (-90 <= lat <= 90):
raise click.ClickException("Latitude must be between -90 and 90")
if not (-180 <= lon <= 180):
raise click.ClickException("Longitude must be between -180 and 180")
# Build location dict with required fields
location: dict[str, Any] = {"lat": lat, "lon": lon}
# Parse optional accuracy (3rd part)
if len(parts) >= 3:
try:
accuracy = float(parts[2].strip())
location["accuracy"] = accuracy
except ValueError:
# 3rd part is not a number - treat everything from here as name
# This handles: "50.45,30.52,Kyiv, Ukraine" (no accuracy)
location["name"] = ",".join(parts[2:]).strip()
return location
# Parse optional name (4th part, may contain commas)
if len(parts) >= 4:
location["name"] = parts[3].strip()
return location
@main.command()
@click.argument("image", type=click.Path(exists=True, path_type=Path))
@click.option("--password", is_flag=True, help="Private key is encrypted")
@click.option("--tag", "-t", multiple=True, help="Add metadata tags")
@click.option("--location", "-l", "location_str", help='GPS coords: "lat,lon" or "lat,lon,accuracy,name"')
@click.option("--caption", "-c", help="Photographer's notes")
@click.option("--no-exif", "no_exif", is_flag=True, help="Disable auto EXIF extraction")
@click.option("--embed", "-e", is_flag=True, help="Embed proof link in image (JPEG: DCT, other: XMP sidecar)")
@click.option("--base-url", default="https://verisoo.io", help="Base URL for proof links")
@click.pass_context
def attest(
ctx: click.Context,
image: Path,
password: bool,
tag: tuple[str, ...],
location_str: str | None,
caption: str | None,
no_exif: bool,
embed: bool,
base_url: str,
) -> None:
"""
Create a cryptographic attestation for an image.
This command creates a signed record proving that YOU attested THIS IMAGE
at THIS TIME with THIS METADATA. The attestation is stored in your local
log and can be synced to federation peers.
\b
METADATA SOURCES (in order of precedence):
1. Command-line options (--location, --caption, --tag)
2. EXIF data from the image (unless --no-exif)
\b
PROOF EMBEDDING (--embed):
For JPEG: Uses DCT steganography (invisible, survives compression)
For PNG/RAW: Creates XMP sidecar file readable by Lightroom/Darktable
\b
EXAMPLES:
# Basic attestation (auto-extracts EXIF)
verisoo attest photo.jpg
# With proof link embedded in image
verisoo attest photo.jpg --embed
# With manual location (overrides EXIF GPS)
verisoo attest photo.jpg -l "50.45,30.52,10,Kyiv"
# With caption and tags
verisoo attest photo.jpg -c "Morning scene" -t news -t ukraine
# Skip EXIF extraction
verisoo attest photo.jpg --no-exif
\b
OUTPUT:
Attested: photo.jpg
SHA-256: a1b2c3d4e5f6...
Index: 42
Root: 9f8e7d6c5b4a...
Embedded: photo_attested.jpg (DCT)
"""
from .attestation import create_attestation
from .crypto import load_private_key
from .storage import LocalStorage
# -------------------------------------------------------------------------
# Initialize storage and verify identity exists
# -------------------------------------------------------------------------
storage = LocalStorage(ctx.obj.get("data_dir"))
if not storage.has_node_identity():
raise click.ClickException("No identity configured. Run: verisoo identity generate")
# -------------------------------------------------------------------------
# Load the attestor's private key
# Optionally decrypt if password-protected
# -------------------------------------------------------------------------
key_password = None
if password:
key_password = click.prompt("Password", hide_input=True).encode()
private_key = load_private_key(storage.private_key_path, key_password)
# -------------------------------------------------------------------------
# Read image file
# -------------------------------------------------------------------------
image_data = image.read_bytes()
# -------------------------------------------------------------------------
# Build metadata from CLI options
# These will override any EXIF data with the same keys
# -------------------------------------------------------------------------
metadata: dict[str, Any] = {}
# Tags (can specify multiple: -t news -t ukraine)
if tag:
metadata["tags"] = list(tag)
# Always record the original filename
metadata["filename"] = image.name
# Parse and add location if provided via CLI
# This OVERRIDES any GPS data from EXIF
if location_str:
metadata["location"] = _parse_location(location_str)
# Add caption (photographer's notes)
if caption:
metadata["caption"] = caption
# -------------------------------------------------------------------------
# Create the attestation
# This: computes hashes, extracts EXIF (if enabled), signs the record
# -------------------------------------------------------------------------
attestation = create_attestation(
image_data, private_key, metadata, auto_exif=not no_exif
)
# -------------------------------------------------------------------------
# Store the attestation record
# Appends to binary log and updates LMDB index
# -------------------------------------------------------------------------
index = storage.append_record(attestation.record)
# -------------------------------------------------------------------------
# Update Merkle tree root
# This provides tamper-evidence for the entire log
# -------------------------------------------------------------------------
merkle_log = storage.load_merkle_log()
storage.set_merkle_root(merkle_log.root_hash)
# -------------------------------------------------------------------------
# Embed proof link if requested
# -------------------------------------------------------------------------
embed_result = None
if embed:
from .embed import embed_proof_link
# Generate proof link from attestation record
proof_link = f"{base_url}/v/{attestation.record.short_id}"
embed_result = embed_proof_link(
image_path=image,
proof_link=proof_link,
fingerprint=attestation.record.attestor_fingerprint,
attested_at=attestation.record.timestamp,
image_sha256=attestation.image_hashes.sha256,
caption=caption,
)
# -------------------------------------------------------------------------
# Output results
# -------------------------------------------------------------------------
if ctx.obj.get("json"):
result = {
"index": index,
"sha256": attestation.image_hashes.sha256,
"attestor": attestation.record.attestor_fingerprint,
"timestamp": attestation.record.timestamp.isoformat(),
"root": merkle_log.root_hash,
}
if embed_result:
result["embed"] = {
"success": embed_result.success,
"method": embed_result.method,
"output": str(embed_result.output_path) if embed_result.output_path else None,
"proof_link": embed_result.proof_link,
"message": embed_result.message,
}
click.echo(json.dumps(result))
else:
click.echo(f"Attested: {image.name}")
click.echo(f" SHA-256: {attestation.image_hashes.sha256[:16]}...")
click.echo(f" Index: {index}")
click.echo(f" Root: {merkle_log.root_hash[:16]}...")
if embed_result:
if embed_result.success:
click.echo(f" Embedded: {embed_result.output_path.name if embed_result.output_path else 'N/A'} ({embed_result.method.upper()})")
click.echo(f" Proof: {embed_result.proof_link}")
else:
click.echo(f" Embed failed: {embed_result.message}")
@main.command()
@click.argument("image", type=click.Path(exists=True, path_type=Path))
@click.option("--exact", is_flag=True, help="Require exact byte match (not perceptual)")
@click.pass_context
def verify(ctx: click.Context, image: Path, exact: bool) -> None:
"""Verify an image against known attestations."""
from .hashing import hash_image
from .verification import find_attestations_for_image
from .storage import LocalStorage
storage = LocalStorage(ctx.obj.get("data_dir"))
# Read image and compute hashes
image_data = image.read_bytes()
hashes = hash_image(image_data)
# Find matching attestations
records = list(storage.iterate_records())
matches = find_attestations_for_image(
image_data, records, perceptual_threshold=0 if exact else 10
)
if ctx.obj.get("json"):
result = {
"image": str(image),
"sha256": hashes.sha256,
"matches": len(matches),
"attestations": [
{
"attestor": m.attestor_fingerprint,
"timestamp": m.timestamp.isoformat(),
"exact": m.image_hashes.sha256 == hashes.sha256,
}
for m in matches
],
}
click.echo(json.dumps(result))
else:
if not matches:
click.echo(f"No attestations found for {image.name}")
click.echo(f" SHA-256: {hashes.sha256[:16]}...")
sys.exit(1)
click.echo(f"Found {len(matches)} attestation(s) for {image.name}")
for m in matches:
match_type = "exact" if m.image_hashes.sha256 == hashes.sha256 else "perceptual"
click.echo(f" [{match_type}] {m.attestor_fingerprint[:16]}... @ {m.timestamp.isoformat()}")
# --- Log commands ---
@main.group()
def log() -> None:
"""Query the attestation log."""
pass
@log.command("status")
@click.pass_context
def log_status(ctx: click.Context) -> None:
"""Show log status (size, root hash)."""
from .storage import LocalStorage
storage = LocalStorage(ctx.obj.get("data_dir"))
stats = storage.get_stats()
if ctx.obj.get("json"):
click.echo(json.dumps({
"records": stats.record_count,
"root": stats.merkle_root,
"log_size_bytes": stats.log_file_size,
"unique_images": stats.unique_images_sha256,
"attestors": stats.attestor_count,
}))
else:
click.echo(f"Records: {stats.record_count}")
click.echo(f"Root: {stats.merkle_root or '(empty)'}")
click.echo(f"Log size: {stats.log_file_size:,} bytes")
click.echo(f"Unique images: {stats.unique_images_sha256}")
click.echo(f"Attestors: {stats.attestor_count}")
@log.command("list")
@click.option("--limit", "-n", default=10, help="Number of records to show")
@click.pass_context
def log_list(ctx: click.Context, limit: int) -> None:
"""List recent attestations."""
from .storage import LocalStorage
storage = LocalStorage(ctx.obj.get("data_dir"))
records = list(storage.iterate_records())
recent = records[-limit:] if len(records) > limit else records
if ctx.obj.get("json"):
click.echo(json.dumps([
{
"sha256": r.image_hashes.sha256,
"attestor": r.attestor_fingerprint,
"timestamp": r.timestamp.isoformat(),
}
for r in recent
]))
else:
for r in recent:
click.echo(f"{r.timestamp.isoformat()} {r.attestor_fingerprint[:16]}... {r.image_hashes.sha256[:16]}...")
# --- Peer commands ---
@main.group()
def peer() -> None:
"""Manage federation peers."""
pass
@peer.command("add")
@click.argument("url")
@click.argument("fingerprint")
@click.pass_context
def peer_add(ctx: click.Context, url: str, fingerprint: str) -> None:
"""Add a federation peer."""
from .storage import LocalStorage
storage = LocalStorage(ctx.obj.get("data_dir"))
peers = storage.load_peers()
# Check for duplicates
if any(p["url"] == url for p in peers):
raise click.ClickException(f"Peer already exists: {url}")
peers.append({
"url": url,
"fingerprint": fingerprint,
"added": datetime.now(timezone.utc).isoformat(),
})
storage.save_peers(peers)
click.echo(f"Added peer: {url}")
@peer.command("list")
@click.pass_context
def peer_list(ctx: click.Context) -> None:
"""List federation peers."""
from .storage import LocalStorage
storage = LocalStorage(ctx.obj.get("data_dir"))
peers = storage.load_peers()
if ctx.obj.get("json"):
click.echo(json.dumps(peers))
else:
if not peers:
click.echo("No peers configured")
else:
for p in peers:
click.echo(f"{p['url']} ({p['fingerprint'][:16]}...)")
# =============================================================================
# API SERVER COMMAND
# =============================================================================
#
# The `serve` command launches a FastAPI-based HTTP server for:
# - Remote image verification (POST /verify)
# - Proof lookup by ID (GET /v/{short_id})
# - Status and health checks
#
# This enables integration with:
# - Newsroom editorial systems
# - Fact-checking workflows
# - Mobile apps
# - Browser-based verification widgets
#
# The server reads from the local attestation log - it doesn't create
# attestations (that requires the private key, which should stay local).
# =============================================================================
@main.command()
@click.option("--host", default="0.0.0.0", help="Host to bind to")
@click.option("--port", default=8000, type=int, help="Port to listen on")
def serve(host: str, port: int) -> None:
"""
Run the Verisoo verification API server.
Starts a FastAPI server that exposes verification endpoints. This allows
remote clients to verify images against your local attestation log.
\b
ENDPOINTS:
POST /verify Upload image for verification
GET /v/{short_id} Get proof by short ID
GET /status Server status and statistics
GET /health Health check for load balancers
\b
EXAMPLES:
# Start on default port
verisoo serve
# Custom port
verisoo serve --port 9000
# Bind to localhost only (no external access)
verisoo serve --host 127.0.0.1
\b
CLIENT USAGE:
# Verify an image
curl -X POST -F "image=@photo.jpg" http://localhost:8000/verify
# Get proof details
curl http://localhost:8000/v/a8f3c2d1e9b7
\b
ENVIRONMENT VARIABLES:
VERISOO_DATA_DIR Override data directory
VERISOO_BASE_URL Base URL for proof links (default: https://verisoo.io)
\b
SECURITY NOTES:
- The server is READ-ONLY (cannot create attestations)
- CORS is enabled by default (configure for production)
- No authentication by default (add API keys for production)
"""
# -------------------------------------------------------------------------
# Import the API module (requires fastapi, uvicorn)
# These are optional dependencies: pip install verisoo[api]
# -------------------------------------------------------------------------
try:
from .api import serve as run_server
except ImportError:
raise click.ClickException("API server requires fastapi: pip install verisoo[api]")
# -------------------------------------------------------------------------
# Start the server
# Uses uvicorn as the ASGI server
# -------------------------------------------------------------------------
click.echo(f"Starting Verisoo API server on {host}:{port}")
click.echo("Press Ctrl+C to stop")
run_server(host=host, port=port)
# =============================================================================
# ENTRY POINT
# =============================================================================
if __name__ == "__main__":
main()

View File

@ -0,0 +1,178 @@
"""
Cryptographic primitives for Verisoo.
Ed25519 for signatures (fast, small keys, deterministic).
SHA-256 for content hashing.
"""
from __future__ import annotations
import hashlib
import secrets
from datetime import datetime, timezone
from pathlib import Path
from cryptography.hazmat.primitives import serialization
from cryptography.hazmat.primitives.asymmetric.ed25519 import (
Ed25519PrivateKey,
Ed25519PublicKey,
)
from .exceptions import IdentityError, SignatureError
from .models import Identity
def generate_keypair() -> tuple[Ed25519PrivateKey, Ed25519PublicKey]:
"""Generate a new Ed25519 keypair."""
private_key = Ed25519PrivateKey.generate()
public_key = private_key.public_key()
return private_key, public_key
def create_identity(
private_key: Ed25519PrivateKey, metadata: dict | None = None
) -> Identity:
"""
Create an Identity from a private key.
The fingerprint is SHA-256(public_key_bytes)[:16] hex-encoded (32 chars).
"""
public_key = private_key.public_key()
public_bytes = public_key.public_bytes(
encoding=serialization.Encoding.Raw,
format=serialization.PublicFormat.Raw,
)
fingerprint = hashlib.sha256(public_bytes).hexdigest()[:32]
return Identity(
public_key=public_bytes,
fingerprint=fingerprint,
created_at=datetime.now(timezone.utc),
metadata=metadata or {},
)
def sign(private_key: Ed25519PrivateKey, data: bytes) -> bytes:
"""Sign data with Ed25519."""
return private_key.sign(data)
def verify(public_key: bytes | Ed25519PublicKey, signature: bytes, data: bytes) -> bool:
"""
Verify an Ed25519 signature.
Returns True if valid, raises SignatureError if invalid.
"""
if isinstance(public_key, bytes):
public_key = Ed25519PublicKey.from_public_bytes(public_key)
try:
public_key.verify(signature, data)
return True
except Exception as e:
raise SignatureError(f"Signature verification failed: {e}")
def hash_bytes(data: bytes) -> str:
"""SHA-256 hash of bytes, hex-encoded."""
return hashlib.sha256(data).hexdigest()
def fingerprint_from_pubkey(public_key: bytes) -> str:
"""Compute fingerprint from public key bytes."""
return hashlib.sha256(public_key).hexdigest()[:32]
def save_private_key(
private_key: Ed25519PrivateKey,
path: Path,
password: bytes | None = None,
) -> None:
"""
Save private key to file.
If password is provided, encrypts with scrypt + AES-256-GCM.
"""
if password:
encryption = serialization.BestAvailableEncryption(password)
else:
encryption = serialization.NoEncryption()
pem = private_key.private_bytes(
encoding=serialization.Encoding.PEM,
format=serialization.PrivateFormat.PKCS8,
encryption_algorithm=encryption,
)
path.write_bytes(pem)
# Restrict permissions on Unix
try:
path.chmod(0o600)
except OSError:
pass
def load_private_key(path: Path, password: bytes | None = None) -> Ed25519PrivateKey:
"""Load private key from file."""
pem = path.read_bytes()
try:
private_key = serialization.load_pem_private_key(pem, password=password)
if not isinstance(private_key, Ed25519PrivateKey):
raise IdentityError(f"Expected Ed25519 key, got {type(private_key)}")
return private_key
except Exception as e:
raise IdentityError(f"Failed to load private key: {e}")
def save_public_key(public_key: Ed25519PublicKey | bytes, path: Path) -> None:
"""Save public key to file."""
if isinstance(public_key, bytes):
public_key = Ed25519PublicKey.from_public_bytes(public_key)
pem = public_key.public_bytes(
encoding=serialization.Encoding.PEM,
format=serialization.PublicFormat.SubjectPublicKeyInfo,
)
path.write_bytes(pem)
def load_public_key(path: Path) -> Ed25519PublicKey:
"""Load public key from file."""
pem = path.read_bytes()
try:
public_key = serialization.load_pem_public_key(pem)
if not isinstance(public_key, Ed25519PublicKey):
raise IdentityError(f"Expected Ed25519 key, got {type(public_key)}")
return public_key
except Exception as e:
raise IdentityError(f"Failed to load public key: {e}")
def load_public_key_from_bytes(public_key_bytes: bytes) -> Ed25519PublicKey:
"""Load public key from raw 32-byte Ed25519 public key."""
try:
return Ed25519PublicKey.from_public_bytes(public_key_bytes)
except Exception as e:
raise IdentityError(f"Failed to load public key from bytes: {e}")
def verify_signature(data: bytes, signature: bytes, public_key_bytes: bytes) -> bool:
"""
Verify an Ed25519 signature.
Args:
data: The signed data
signature: 64-byte Ed25519 signature
public_key_bytes: 32-byte Ed25519 public key
Returns:
True if valid, False otherwise
"""
try:
public_key = Ed25519PublicKey.from_public_bytes(public_key_bytes)
public_key.verify(signature, data)
return True
except Exception:
return False

537
src/soosef/verisoo/embed.py Normal file
View File

@ -0,0 +1,537 @@
"""
Proof Link Embedding Module for Verisoo.
This module handles embedding proof links into images after attestation.
Two strategies are used depending on the image format:
1. JPEG: DCT steganography via stegasoo
- Embeds in frequency domain (survives recompression)
- Uses center region for robustness against cropping
- Invisible to human eye
2. PNG/RAW: XMP sidecar file
- Standard metadata format used by Lightroom/Darktable/etc.
- Travels with the image file
- Not steganographic (plaintext XML)
The proof link format: https://verisoo.io/v/{short_id}
"""
from __future__ import annotations
import io
from dataclasses import dataclass
from datetime import datetime
from pathlib import Path
from typing import Any
from PIL import Image
# Stegasoo integration — imported as a pip dependency (no path hacks needed).
# Install stegasoo[dct] to enable DCT steganography for JPEG proof embedding.
try:
from soosef.stegasoo.dct_steganography import (
embed_in_dct,
extract_from_dct,
has_dct_support,
has_jpegio_support,
calculate_dct_capacity,
)
HAS_STEGASOO = True
except ImportError:
HAS_STEGASOO = False
has_dct_support = lambda: False
has_jpegio_support = lambda: False
# =============================================================================
# CONSTANTS
# =============================================================================
# Fixed public seed for Verisoo proof links
# This is intentionally public - anyone should be able to extract the proof link
VERISOO_SEED = b"verisoo"
# Base URL for proof links
DEFAULT_BASE_URL = "https://verisoo.io"
# XMP namespace for Verisoo
XMP_NAMESPACE = "https://verisoo.io/ns/1.0/"
# Supported formats for DCT embedding
DCT_FORMATS = {".jpg", ".jpeg"}
# Formats that need XMP sidecar
XMP_FORMATS = {".png", ".tiff", ".tif", ".webp", ".heic", ".heif"}
RAW_FORMATS = {".cr2", ".cr3", ".nef", ".arw", ".raf", ".orf", ".rw2", ".dng", ".raw"}
# =============================================================================
# DATA CLASSES
# =============================================================================
@dataclass
class EmbedResult:
"""Result of a proof link embedding operation."""
success: bool
method: str # "dct", "xmp", "none"
output_path: Path | None
proof_link: str
message: str
@dataclass
class ExtractResult:
"""Result of extracting a proof link from an image."""
success: bool
method: str # "dct", "xmp", "none"
proof_link: str | None
message: str
# =============================================================================
# XMP SIDECAR GENERATION
# =============================================================================
def generate_xmp_sidecar(
proof_link: str,
fingerprint: str,
attested_at: datetime,
image_sha256: str,
caption: str | None = None,
) -> str:
"""
Generate XMP sidecar XML content for a Verisoo attestation.
This creates a standard XMP file that can be read by Lightroom,
Darktable, and other photo management software.
Args:
proof_link: Full proof URL (e.g., "https://verisoo.io/v/abc123")
fingerprint: Attestor's fingerprint
attested_at: Attestation timestamp
image_sha256: SHA-256 hash of the image
caption: Optional caption/description
Returns:
XMP XML content as string
"""
# Escape XML special characters
def escape(s: str) -> str:
return (s.replace("&", "&amp;")
.replace("<", "&lt;")
.replace(">", "&gt;")
.replace('"', "&quot;"))
caption_attr = f'\n verisoo:Caption="{escape(caption)}"' if caption else ""
return f'''<?xml version="1.0" encoding="UTF-8"?>
<x:xmpmeta xmlns:x="adobe:ns:meta/" x:xmptk="Verisoo">
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
<rdf:Description rdf:about=""
xmlns:verisoo="{XMP_NAMESPACE}"
verisoo:ProofLink="{escape(proof_link)}"
verisoo:Fingerprint="{escape(fingerprint)}"
verisoo:AttestedAt="{attested_at.isoformat()}"
verisoo:ImageSHA256="{escape(image_sha256)}"{caption_attr}/>
</rdf:Description>
</x:xmpmeta>
'''
def write_xmp_sidecar(
image_path: Path,
proof_link: str,
fingerprint: str,
attested_at: datetime,
image_sha256: str,
caption: str | None = None,
) -> Path:
"""
Write an XMP sidecar file alongside an image.
The sidecar is named {image_name}.xmp and placed in the same directory.
Args:
image_path: Path to the original image
proof_link: Full proof URL
fingerprint: Attestor's fingerprint
attested_at: Attestation timestamp
image_sha256: SHA-256 hash of the image
caption: Optional caption
Returns:
Path to the created XMP sidecar file
"""
xmp_content = generate_xmp_sidecar(
proof_link=proof_link,
fingerprint=fingerprint,
attested_at=attested_at,
image_sha256=image_sha256,
caption=caption,
)
# Standard sidecar naming: image.jpg -> image.jpg.xmp
# Some software prefers image.xmp, but .jpg.xmp is more explicit
xmp_path = image_path.with_suffix(image_path.suffix + ".xmp")
xmp_path.write_text(xmp_content, encoding="utf-8")
return xmp_path
def read_xmp_sidecar(image_path: Path) -> dict[str, str] | None:
"""
Read Verisoo metadata from an XMP sidecar file.
Args:
image_path: Path to the image (sidecar path is derived)
Returns:
Dict with proof_link, fingerprint, etc. or None if not found
"""
import re
xmp_path = image_path.with_suffix(image_path.suffix + ".xmp")
if not xmp_path.exists():
# Try alternate naming
xmp_path = image_path.with_suffix(".xmp")
if not xmp_path.exists():
return None
try:
content = xmp_path.read_text(encoding="utf-8")
# Simple regex extraction (not full XML parsing for speed)
result = {}
patterns = {
"proof_link": r'verisoo:ProofLink="([^"]*)"',
"fingerprint": r'verisoo:Fingerprint="([^"]*)"',
"attested_at": r'verisoo:AttestedAt="([^"]*)"',
"image_sha256": r'verisoo:ImageSHA256="([^"]*)"',
"caption": r'verisoo:Caption="([^"]*)"',
}
for key, pattern in patterns.items():
match = re.search(pattern, content)
if match:
result[key] = match.group(1)
return result if result else None
except Exception:
return None
# =============================================================================
# DCT EMBEDDING (JPEG)
# =============================================================================
def embed_proof_in_jpeg(
image_data: bytes,
proof_link: str,
) -> tuple[bytes, dict[str, Any]]:
"""
Embed a proof link into a JPEG image using DCT steganography.
Uses stegasoo's DCT embedding with:
- Fixed public seed (b"verisoo") so anyone can extract
- Center-biased embedding for crop resistance (TODO)
- Minimal quality impact (only ~25 blocks needed)
Args:
image_data: Original JPEG bytes
proof_link: Proof URL to embed (e.g., "https://verisoo.io/v/abc123")
Returns:
Tuple of (embedded_image_bytes, stats_dict)
Raises:
ImportError: If stegasoo is not available
ValueError: If image is too small or embedding fails
"""
if not HAS_STEGASOO:
raise ImportError(
"DCT embedding requires stegasoo. "
"Ensure stegasoo is installed or available at ../stegasoo"
)
if not has_jpegio_support():
raise ImportError(
"Native JPEG embedding requires jpeglib. "
"Install with: pip install jpeglib"
)
# Embed the proof link
payload = proof_link.encode("utf-8")
stego_bytes, stats = embed_in_dct(
data=payload,
carrier_image=image_data,
seed=VERISOO_SEED,
output_format="jpeg",
color_mode="color",
)
return stego_bytes, {
"blocks_used": stats.blocks_used,
"blocks_available": stats.blocks_available,
"usage_percent": stats.usage_percent,
"output_format": stats.output_format,
}
def extract_proof_from_jpeg(image_data: bytes) -> str | None:
"""
Extract a proof link from a JPEG image.
Args:
image_data: JPEG image bytes
Returns:
Proof URL string or None if not found/invalid
"""
if not HAS_STEGASOO:
return None
try:
payload = extract_from_dct(
stego_image=image_data,
seed=VERISOO_SEED,
)
# Validate it looks like a proof link
proof_link = payload.decode("utf-8")
if "verisoo" in proof_link.lower() or proof_link.startswith("http"):
return proof_link
return None
except Exception:
return None
# =============================================================================
# UNIFIED EMBEDDING API
# =============================================================================
def get_embed_method(image_path: Path) -> str:
"""
Determine the appropriate embedding method for an image.
Args:
image_path: Path to the image file
Returns:
"dct" for JPEG, "xmp" for PNG/RAW, "none" if unsupported
"""
suffix = image_path.suffix.lower()
if suffix in DCT_FORMATS:
if HAS_STEGASOO and has_jpegio_support():
return "dct"
else:
return "xmp" # Fallback to XMP if stegasoo unavailable
if suffix in XMP_FORMATS or suffix in RAW_FORMATS:
return "xmp"
return "none"
def embed_proof_link(
image_path: Path,
proof_link: str,
fingerprint: str,
attested_at: datetime,
image_sha256: str,
caption: str | None = None,
output_path: Path | None = None,
) -> EmbedResult:
"""
Embed a proof link into an image using the appropriate method.
For JPEG: Uses DCT steganography (invisible, survives compression)
For PNG/RAW: Creates XMP sidecar file
Args:
image_path: Path to the original image
proof_link: Full proof URL to embed
fingerprint: Attestor's fingerprint
attested_at: Attestation timestamp
image_sha256: SHA-256 hash of the image
caption: Optional caption
output_path: Optional output path for embedded image (JPEG only)
Returns:
EmbedResult with success status, method used, and output path
"""
method = get_embed_method(image_path)
if method == "dct":
try:
image_data = image_path.read_bytes()
stego_bytes, stats = embed_proof_in_jpeg(image_data, proof_link)
# Determine output path
if output_path is None:
stem = image_path.stem
output_path = image_path.parent / f"{stem}_attested.jpg"
output_path.write_bytes(stego_bytes)
return EmbedResult(
success=True,
method="dct",
output_path=output_path,
proof_link=proof_link,
message=f"Embedded via DCT ({stats['usage_percent']:.2f}% capacity used)",
)
except Exception as e:
# Fall back to XMP on DCT failure
return _embed_xmp_fallback(
image_path, proof_link, fingerprint, attested_at,
image_sha256, caption, str(e)
)
elif method == "xmp":
try:
xmp_path = write_xmp_sidecar(
image_path=image_path,
proof_link=proof_link,
fingerprint=fingerprint,
attested_at=attested_at,
image_sha256=image_sha256,
caption=caption,
)
return EmbedResult(
success=True,
method="xmp",
output_path=xmp_path,
proof_link=proof_link,
message=f"Created XMP sidecar: {xmp_path.name}",
)
except Exception as e:
return EmbedResult(
success=False,
method="xmp",
output_path=None,
proof_link=proof_link,
message=f"XMP sidecar failed: {e}",
)
else:
return EmbedResult(
success=False,
method="none",
output_path=None,
proof_link=proof_link,
message=f"Unsupported format: {image_path.suffix}",
)
def _embed_xmp_fallback(
image_path: Path,
proof_link: str,
fingerprint: str,
attested_at: datetime,
image_sha256: str,
caption: str | None,
dct_error: str,
) -> EmbedResult:
"""Fallback to XMP when DCT fails."""
try:
xmp_path = write_xmp_sidecar(
image_path=image_path,
proof_link=proof_link,
fingerprint=fingerprint,
attested_at=attested_at,
image_sha256=image_sha256,
caption=caption,
)
return EmbedResult(
success=True,
method="xmp",
output_path=xmp_path,
proof_link=proof_link,
message=f"DCT failed ({dct_error}), created XMP sidecar instead",
)
except Exception as e:
return EmbedResult(
success=False,
method="none",
output_path=None,
proof_link=proof_link,
message=f"Both DCT and XMP failed: {dct_error}; {e}",
)
def extract_proof_link(image_path: Path) -> ExtractResult:
"""
Extract a proof link from an image (DCT or XMP).
Tries DCT extraction for JPEG, XMP sidecar for all formats.
Args:
image_path: Path to the image
Returns:
ExtractResult with proof link if found
"""
suffix = image_path.suffix.lower()
# Try DCT for JPEG
if suffix in DCT_FORMATS and HAS_STEGASOO:
try:
image_data = image_path.read_bytes()
proof_link = extract_proof_from_jpeg(image_data)
if proof_link:
return ExtractResult(
success=True,
method="dct",
proof_link=proof_link,
message="Extracted from DCT steganography",
)
except Exception:
pass
# Try XMP sidecar
xmp_data = read_xmp_sidecar(image_path)
if xmp_data and "proof_link" in xmp_data:
return ExtractResult(
success=True,
method="xmp",
proof_link=xmp_data["proof_link"],
message="Extracted from XMP sidecar",
)
return ExtractResult(
success=False,
method="none",
proof_link=None,
message="No embedded proof link found",
)
# =============================================================================
# UTILITY FUNCTIONS
# =============================================================================
def can_embed_dct() -> bool:
"""Check if DCT embedding is available."""
return HAS_STEGASOO and has_jpegio_support()
def get_embed_capabilities() -> dict[str, Any]:
"""Get information about available embedding capabilities."""
return {
"dct_available": HAS_STEGASOO and has_dct_support(),
"jpeg_native": HAS_STEGASOO and has_jpegio_support(),
"xmp_available": True, # Always available
"supported_dct_formats": list(DCT_FORMATS) if can_embed_dct() else [],
"supported_xmp_formats": list(XMP_FORMATS | RAW_FORMATS),
}

View File

@ -0,0 +1,64 @@
"""
Exception hierarchy for Verisoo.
Follows the pattern established in the Soo Suite for typed, informative errors.
"""
from __future__ import annotations
class VerisooError(Exception):
"""Base exception for all Verisoo errors."""
pass
class AttestationError(VerisooError):
"""Errors during attestation creation or signing."""
pass
class VerificationError(VerisooError):
"""Errors during attestation verification."""
def __init__(self, message: str, *, reason: str | None = None) -> None:
super().__init__(message)
self.reason = reason
class HashMismatchError(VerificationError):
"""Image hash does not match attestation record."""
def __init__(
self, message: str, *, expected: str, actual: str, hash_type: str
) -> None:
super().__init__(message, reason="hash_mismatch")
self.expected = expected
self.actual = actual
self.hash_type = hash_type
class SignatureError(VerificationError):
"""Signature verification failed."""
def __init__(self, message: str) -> None:
super().__init__(message, reason="invalid_signature")
class IdentityError(VerisooError):
"""Errors related to identity/key management."""
pass
class MerkleError(VerisooError):
"""Errors in merkle tree operations."""
pass
class FederationError(VerisooError):
"""Errors in peer communication and sync."""
pass

View File

@ -0,0 +1,318 @@
"""
Federation and gossip protocol for Verisoo.
Nodes sync their merkle logs via gossip:
1. Periodically exchange merkle roots with peers
2. If roots differ, request consistency proof
3. Fetch missing records and append to local log
Eventually consistent - "eventually" depends on gossip frequency and topology.
No central coordinator, no leader election, no consensus protocol.
Just append-only logs that converge.
"""
from __future__ import annotations
import asyncio
import hashlib
import json
import logging
from dataclasses import dataclass, field
from datetime import datetime, timezone
from typing import Callable, Protocol
from urllib.parse import urlparse
from .exceptions import FederationError
from .merkle import ConsistencyProof, InclusionProof, MerkleLog
from .models import AttestationRecord
logger = logging.getLogger(__name__)
@dataclass
class PeerInfo:
"""Information about a federation peer."""
url: str
fingerprint: str # Ed25519 fingerprint for peer authentication
last_seen: datetime | None = None
last_root: str | None = None
last_size: int = 0
healthy: bool = True
consecutive_failures: int = 0
@dataclass
class SyncStatus:
"""Result of a sync attempt with a peer."""
peer: str
success: bool
records_received: int = 0
our_size_before: int = 0
our_size_after: int = 0
their_size: int = 0
error: str | None = None
class PeerTransport(Protocol):
"""Protocol for peer communication."""
async def get_status(self, peer: PeerInfo) -> tuple[str, int]:
"""Get peer's current root hash and log size."""
...
async def get_records(
self, peer: PeerInfo, start_index: int, count: int
) -> list[AttestationRecord]:
"""Fetch records from peer."""
...
async def get_consistency_proof(
self, peer: PeerInfo, old_size: int
) -> ConsistencyProof:
"""Get proof that peer's log is consistent with ours."""
...
async def push_records(
self, peer: PeerInfo, records: list[AttestationRecord]
) -> int:
"""Push records to peer, returns number accepted."""
...
class GossipNode:
"""
A node in the Verisoo federation network.
Manages:
- Local merkle log
- Peer connections
- Periodic sync via gossip
"""
def __init__(
self,
log: MerkleLog,
transport: PeerTransport,
node_id: str | None = None,
) -> None:
self.log = log
self.transport = transport
self.node_id = node_id or self._generate_node_id()
self.peers: dict[str, PeerInfo] = {}
self._running = False
self._sync_task: asyncio.Task | None = None
def add_peer(self, url: str, fingerprint: str) -> None:
"""Register a peer for gossip."""
peer = PeerInfo(url=url, fingerprint=fingerprint)
self.peers[url] = peer
logger.info(f"Added peer: {url} ({fingerprint[:16]}...)")
def remove_peer(self, url: str) -> None:
"""Remove a peer from gossip."""
if url in self.peers:
del self.peers[url]
logger.info(f"Removed peer: {url}")
async def start(self, interval_seconds: float = 60.0) -> None:
"""Start the gossip loop."""
if self._running:
return
self._running = True
self._sync_task = asyncio.create_task(self._gossip_loop(interval_seconds))
logger.info(f"Gossip started, interval={interval_seconds}s")
async def stop(self) -> None:
"""Stop the gossip loop."""
self._running = False
if self._sync_task:
self._sync_task.cancel()
try:
await self._sync_task
except asyncio.CancelledError:
pass
logger.info("Gossip stopped")
async def sync_with_peer(self, peer_url: str) -> SyncStatus:
"""
Sync with a specific peer.
1. Get their status (root, size)
2. If they have more records, fetch them
3. Verify consistency before accepting
4. Append new records to our log
"""
peer = self.peers.get(peer_url)
if not peer:
return SyncStatus(
peer=peer_url,
success=False,
error="Unknown peer",
)
our_size_before = self.log.size
try:
# Get peer status
their_root, their_size = await self.transport.get_status(peer)
# Update peer info
peer.last_seen = datetime.now(timezone.utc)
peer.last_root = their_root
peer.last_size = their_size
# Already in sync?
if their_size <= our_size_before:
if their_root == self.log.root_hash:
peer.healthy = True
peer.consecutive_failures = 0
return SyncStatus(
peer=peer_url,
success=True,
our_size_before=our_size_before,
our_size_after=our_size_before,
their_size=their_size,
)
# They have records we don't - fetch them
if their_size > our_size_before:
# First verify consistency
if our_size_before > 0:
proof = await self.transport.get_consistency_proof(peer, our_size_before)
if not self._verify_consistency(proof):
raise FederationError(
f"Peer {peer_url} failed consistency check - possible fork"
)
# Fetch new records
new_records = await self.transport.get_records(
peer, our_size_before, their_size - our_size_before
)
# Append to our log
for record in new_records:
self.log.append(record)
peer.healthy = True
peer.consecutive_failures = 0
return SyncStatus(
peer=peer_url,
success=True,
records_received=their_size - our_size_before,
our_size_before=our_size_before,
our_size_after=self.log.size,
their_size=their_size,
)
except Exception as e:
peer.consecutive_failures += 1
if peer.consecutive_failures >= 3:
peer.healthy = False
logger.warning(f"Peer {peer_url} marked unhealthy after 3 failures")
return SyncStatus(
peer=peer_url,
success=False,
error=str(e),
our_size_before=our_size_before,
our_size_after=self.log.size,
)
async def broadcast_record(self, record: AttestationRecord) -> dict[str, bool]:
"""
Push a new record to all peers.
Returns dict of peer_url -> success.
"""
results = {}
for url, peer in self.peers.items():
if not peer.healthy:
results[url] = False
continue
try:
accepted = await self.transport.push_records(peer, [record])
results[url] = accepted > 0
except Exception as e:
logger.warning(f"Failed to push to {url}: {e}")
results[url] = False
return results
async def _gossip_loop(self, interval: float) -> None:
"""Background task that periodically syncs with peers."""
while self._running:
try:
await self._gossip_round()
except Exception as e:
logger.error(f"Gossip round failed: {e}")
await asyncio.sleep(interval)
async def _gossip_round(self) -> None:
"""One round of gossip with all healthy peers."""
healthy_peers = [p for p in self.peers.values() if p.healthy]
if not healthy_peers:
return
# Sync with all healthy peers concurrently
tasks = [self.sync_with_peer(p.url) for p in healthy_peers]
results = await asyncio.gather(*tasks, return_exceptions=True)
# Log summary
success_count = sum(
1 for r in results if isinstance(r, SyncStatus) and r.success
)
logger.debug(f"Gossip round: {success_count}/{len(healthy_peers)} peers synced")
def _verify_consistency(self, proof: ConsistencyProof) -> bool:
"""Verify a consistency proof from a peer."""
# Simplified: trust the proof structure for now
# Full implementation would verify the merkle path
return proof.old_size <= self.log.size
def _generate_node_id(self) -> str:
"""Generate a random node ID."""
import secrets
return hashlib.sha256(secrets.token_bytes(32)).hexdigest()[:16]
# Placeholder for HTTP transport implementation
class HttpTransport:
"""
HTTP-based peer transport.
Endpoints expected on peers:
- GET /status -> {"root": "...", "size": N}
- GET /records?start=N&count=M -> [records...]
- GET /consistency-proof?old_size=N -> proof
- POST /records -> accept records, return count
"""
def __init__(self, timeout: float = 30.0) -> None:
self.timeout = timeout
# Will use aiohttp when federation extra is installed
async def get_status(self, peer: PeerInfo) -> tuple[str, int]:
"""Get peer's current root hash and log size."""
raise NotImplementedError("Install verisoo[federation] for HTTP transport")
async def get_records(
self, peer: PeerInfo, start_index: int, count: int
) -> list[AttestationRecord]:
raise NotImplementedError("Install verisoo[federation] for HTTP transport")
async def get_consistency_proof(
self, peer: PeerInfo, old_size: int
) -> ConsistencyProof:
raise NotImplementedError("Install verisoo[federation] for HTTP transport")
async def push_records(
self, peer: PeerInfo, records: list[AttestationRecord]
) -> int:
raise NotImplementedError("Install verisoo[federation] for HTTP transport")

View File

@ -0,0 +1,290 @@
"""
Multi-algorithm image hashing for Verisoo.
Designed to survive social media mangling:
- JPEG recompression (Instagram, Twitter, Facebook)
- Resizing (thumbnails, feed images)
- Format conversion (PNG JPEG)
- Cropping (platform auto-crops, user crops)
- Color adjustments (filters, auto-enhance)
Strategy: compute multiple hash types, match if ANY is within threshold.
This is how professional systems (TinEye, Content ID) work.
"""
from __future__ import annotations
import hashlib
from io import BytesIO
from PIL import Image
from .models import ImageHashes
# Optional dependency - graceful degradation
try:
import imagehash
IMAGEHASH_AVAILABLE = True
except ImportError:
IMAGEHASH_AVAILABLE = False
def hash_image(image_data: bytes, *, robust: bool = True) -> ImageHashes:
"""
Compute all hashes for an image.
Args:
image_data: Raw image bytes
robust: If True, compute all hash types for best social media survival.
If False, only compute required hashes (faster).
Returns:
ImageHashes with sha256, phash, dhash, and optionally ahash, colorhash, crop_resistant
"""
# Cryptographic hash of raw bytes
sha256 = hashlib.sha256(image_data).hexdigest()
# Decode image for perceptual hashing
img = Image.open(BytesIO(image_data))
if IMAGEHASH_AVAILABLE:
phash = str(imagehash.phash(img))
dhash = str(imagehash.dhash(img))
ahash = None
colorhash = None
crop_resistant = None
if robust:
ahash = str(imagehash.average_hash(img))
colorhash = str(imagehash.colorhash(img))
crop_resistant = _compute_crop_resistant_hash(img)
else:
# Fallback without imagehash - less robust but functional
phash, dhash = _fallback_perceptual_hashes(img)
ahash = None
colorhash = None
crop_resistant = None
return ImageHashes(
sha256=sha256,
phash=phash,
dhash=dhash,
ahash=ahash,
colorhash=colorhash,
crop_resistant=crop_resistant,
)
def hash_image_file(path: str, *, robust: bool = True) -> ImageHashes:
"""Hash an image from file path."""
with open(path, "rb") as f:
return hash_image(f.read(), robust=robust)
def _compute_crop_resistant_hash(img: Image.Image) -> str:
"""
Compute hash of center region - survives edge crops.
Takes center 60% of image and hashes that.
"""
if not IMAGEHASH_AVAILABLE:
return ""
width, height = img.size
# Crop to center 60%
left = int(width * 0.2)
top = int(height * 0.2)
right = int(width * 0.8)
bottom = int(height * 0.8)
center = img.crop((left, top, right, bottom))
return str(imagehash.phash(center))
def _fallback_perceptual_hashes(img: Image.Image) -> tuple[str, str]:
"""
Fallback perceptual hashes when imagehash not installed.
Not truly perceptual, but allows code to run.
Uses normalized pixel data hashing.
"""
# Normalize: resize to 32x32, convert to grayscale
normalized = img.convert("L").resize((32, 32), Image.Resampling.LANCZOS)
pixel_bytes = normalized.tobytes()
# Use different portions for phash vs dhash simulation
phash = hashlib.sha256(pixel_bytes).hexdigest()[:16]
dhash = hashlib.sha256(pixel_bytes[:512]).hexdigest()[:16]
return phash, dhash
def perceptual_distance(hash1: str, hash2: str) -> int:
"""
Compute hamming distance between two perceptual hashes.
Lower = more similar.
0 = identical
1-10 = likely same image, different encoding
11-20 = possibly same image, significant changes
>20 = likely different images
"""
if len(hash1) != len(hash2):
# Pad shorter hash
max_len = max(len(hash1), len(hash2))
hash1 = hash1.zfill(max_len)
hash2 = hash2.zfill(max_len)
val1 = int(hash1, 16)
val2 = int(hash2, 16)
return bin(val1 ^ val2).count("1")
def is_same_image(
hashes1: ImageHashes,
hashes2: ImageHashes,
*,
perceptual_threshold: int = 10,
) -> tuple[bool, str]:
"""
Determine if two sets of hashes represent the same image.
Uses multi-algorithm matching - succeeds if ANY algorithm matches.
Returns:
(is_same, match_type) where match_type is one of:
- "exact" - SHA-256 identical
- "phash" - DCT perceptual match
- "dhash" - Difference hash match
- "ahash" - Average hash match
- "colorhash" - Color distribution match
- "crop_resistant" - Center region match
- "none" - Not the same image
"""
# Exact match first
if hashes1.sha256 == hashes2.sha256:
return True, "exact"
# Try perceptual matches
matches, match_type = hashes1.matches_perceptually(hashes2, perceptual_threshold)
if matches:
return True, match_type or "perceptual"
return False, "none"
def compute_all_distances(hashes1: ImageHashes, hashes2: ImageHashes) -> dict[str, int]:
"""
Compute hamming distance for all available hash types.
Useful for debugging and understanding match quality.
"""
distances = {}
distances["phash"] = perceptual_distance(hashes1.phash, hashes2.phash)
distances["dhash"] = perceptual_distance(hashes1.dhash, hashes2.dhash)
if hashes1.ahash and hashes2.ahash:
distances["ahash"] = perceptual_distance(hashes1.ahash, hashes2.ahash)
if hashes1.colorhash and hashes2.colorhash:
distances["colorhash"] = perceptual_distance(hashes1.colorhash, hashes2.colorhash)
if hashes1.crop_resistant and hashes2.crop_resistant:
distances["crop_resistant"] = perceptual_distance(
hashes1.crop_resistant, hashes2.crop_resistant
)
return distances
def extract_exif_metadata(image_data: bytes) -> dict:
"""
Extract EXIF metadata from image for CaptureMetadata.
Returns dict with keys matching CaptureMetadata fields where available.
"""
from datetime import datetime
from .models import CaptureDevice, GeoLocation
img = Image.open(BytesIO(image_data))
exif = img.getexif() if hasattr(img, "getexif") else {}
result: dict = {
"width": img.width,
"height": img.height,
"mime_type": Image.MIME.get(img.format, f"image/{img.format.lower()}") if img.format else None,
}
if not exif:
return result
# EXIF tag IDs
MAKE = 271
MODEL = 272
SOFTWARE = 305
DATETIME_ORIGINAL = 36867
GPS_INFO = 34853
# Device info
make = exif.get(MAKE)
model = exif.get(MODEL)
software = exif.get(SOFTWARE)
if make or model:
result["device"] = CaptureDevice(
make=make,
model=model,
software=software,
).to_dict()
# Capture time
datetime_str = exif.get(DATETIME_ORIGINAL)
if datetime_str:
try:
# EXIF format: "2024:03:15 14:32:00"
captured = datetime.strptime(datetime_str, "%Y:%m:%d %H:%M:%S")
result["captured_at"] = captured.isoformat()
except ValueError:
pass
# GPS coordinates (simplified - full parsing is complex)
gps_info = exif.get(GPS_INFO)
if gps_info and isinstance(gps_info, dict):
try:
lat = _parse_gps_coord(gps_info.get(2), gps_info.get(1)) # GPSLatitude, GPSLatitudeRef
lon = _parse_gps_coord(gps_info.get(4), gps_info.get(3)) # GPSLongitude, GPSLongitudeRef
if lat is not None and lon is not None:
result["location"] = GeoLocation(
latitude=lat,
longitude=lon,
).to_dict()
except (TypeError, ValueError, KeyError):
pass
return result
def _parse_gps_coord(coord_tuple: tuple | None, ref: str | None) -> float | None:
"""Parse EXIF GPS coordinate tuple to decimal degrees."""
if not coord_tuple or not ref:
return None
try:
# coord_tuple is ((deg_num, deg_den), (min_num, min_den), (sec_num, sec_den))
# or sometimes just (deg, min, sec) as floats
if isinstance(coord_tuple[0], tuple):
degrees = coord_tuple[0][0] / coord_tuple[0][1]
minutes = coord_tuple[1][0] / coord_tuple[1][1]
seconds = coord_tuple[2][0] / coord_tuple[2][1]
else:
degrees, minutes, seconds = coord_tuple
decimal = degrees + minutes / 60 + seconds / 3600
if ref in ("S", "W"):
decimal = -decimal
return decimal
except (TypeError, ZeroDivisionError, IndexError):
return None

View File

@ -0,0 +1,319 @@
"""
LMDB-backed index and state storage for Verisoo.
Provides fast lookups on top of the append-only binary log:
- record_id -> log offset
- attestor_fingerprint -> [record indices]
- image_sha256 -> [record indices]
- image_phash -> [record indices] (for perceptual search)
- merkle tree state
LMDB is memory-mapped, so reads are zero-copy and very fast.
Single-writer model fits append-only perfectly.
"""
from __future__ import annotations
import json
import struct
from contextlib import contextmanager
from dataclasses import dataclass
from datetime import datetime
from pathlib import Path
from typing import Iterator
import lmdb
from .exceptions import VerisooError
from .models import AttestationRecord, Identity, ImageHashes
class IndexError(VerisooError):
"""Error in index operations."""
pass
# LMDB database names (sub-databases within the environment)
DB_RECORDS = b"records" # index (uint64) -> record_id
DB_BY_ATTESTOR = b"by_attestor" # fingerprint -> [indices] (JSON array)
DB_BY_SHA256 = b"by_sha256" # sha256 -> [indices]
DB_BY_PHASH = b"by_phash" # phash -> [indices]
DB_IDENTITIES = b"identities" # fingerprint -> identity JSON
DB_META = b"meta" # key -> value (merkle root, log size, etc.)
@dataclass
class IndexStats:
"""Statistics about the index."""
record_count: int
attestor_count: int
unique_images_sha256: int
unique_images_phash: int
merkle_root: str | None
log_file_size: int
class LMDBIndex:
"""
LMDB-backed index for attestation records.
Works alongside BinaryLog - the log stores raw records,
this index provides fast lookups by various keys.
"""
# 1GB default max size, grows as needed up to this
DEFAULT_MAP_SIZE = 1 * 1024 * 1024 * 1024
def __init__(
self,
path: Path,
*,
map_size: int = DEFAULT_MAP_SIZE,
readonly: bool = False,
) -> None:
self.path = path
path.mkdir(parents=True, exist_ok=True)
self._env = lmdb.open(
str(path),
map_size=map_size,
max_dbs=10,
readonly=readonly,
# Allow multiple readers
max_readers=126,
# Don't sync on every write (we have CRC in binlog for durability)
sync=False,
# Memory-map is the whole point
writemap=True,
)
# Open sub-databases
with self._env.begin(write=True) as txn:
self._db_records = self._env.open_db(DB_RECORDS, txn=txn)
self._db_by_attestor = self._env.open_db(DB_BY_ATTESTOR, txn=txn)
self._db_by_sha256 = self._env.open_db(DB_BY_SHA256, txn=txn)
self._db_by_phash = self._env.open_db(DB_BY_PHASH, txn=txn)
self._db_identities = self._env.open_db(DB_IDENTITIES, txn=txn)
self._db_meta = self._env.open_db(DB_META, txn=txn)
def close(self) -> None:
"""Close the LMDB environment."""
self._env.close()
def __enter__(self) -> LMDBIndex:
return self
def __exit__(self, *args) -> None:
self.close()
# --- Record indexing ---
def index_record(
self,
index: int,
record: AttestationRecord,
record_id: str,
) -> None:
"""
Add a record to the index.
Called after appending to the binary log.
"""
with self._env.begin(write=True) as txn:
# Store index -> record_id mapping
txn.put(
struct.pack("<Q", index),
record_id.encode(),
db=self._db_records,
)
# Index by attestor
self._append_to_list(
txn,
self._db_by_attestor,
record.attestor_fingerprint.encode(),
index,
)
# Index by image SHA-256
self._append_to_list(
txn,
self._db_by_sha256,
record.image_hashes.sha256.encode(),
index,
)
# Index by perceptual hash
self._append_to_list(
txn,
self._db_by_phash,
record.image_hashes.phash.encode(),
index,
)
def get_record_id(self, index: int) -> str | None:
"""Get record ID for index."""
with self._env.begin() as txn:
data = txn.get(struct.pack("<Q", index), db=self._db_records)
return data.decode() if data else None
def get_indices_by_attestor(self, fingerprint: str) -> list[int]:
"""Get all record indices for an attestor."""
with self._env.begin() as txn:
return self._get_list(txn, self._db_by_attestor, fingerprint.encode())
def get_indices_by_sha256(self, sha256: str) -> list[int]:
"""Get all record indices for an image by SHA-256."""
with self._env.begin() as txn:
return self._get_list(txn, self._db_by_sha256, sha256.encode())
def get_indices_by_phash(self, phash: str) -> list[int]:
"""Get all record indices for an image by perceptual hash."""
with self._env.begin() as txn:
return self._get_list(txn, self._db_by_phash, phash.encode())
def find_similar_phash(self, phash: str, max_distance: int = 10) -> list[tuple[str, int]]:
"""
Find perceptual hashes within hamming distance.
Returns list of (phash, distance) tuples.
This is O(n) over unique phashes - for large scale, would need LSH.
"""
results = []
target = int(phash, 16)
with self._env.begin() as txn:
cursor = txn.cursor(db=self._db_by_phash)
for key, _ in cursor:
candidate = key.decode()
try:
candidate_int = int(candidate, 16)
distance = bin(target ^ candidate_int).count("1")
if distance <= max_distance:
results.append((candidate, distance))
except ValueError:
continue
return sorted(results, key=lambda x: x[1])
# --- Identity storage ---
def save_identity(self, identity: Identity) -> None:
"""Save an identity (public key + metadata)."""
data = {
"public_key": identity.public_key.hex(),
"fingerprint": identity.fingerprint,
"created_at": identity.created_at.isoformat(),
"metadata": identity.metadata,
}
with self._env.begin(write=True) as txn:
txn.put(
identity.fingerprint.encode(),
json.dumps(data).encode(),
db=self._db_identities,
)
def get_identity(self, fingerprint: str) -> Identity | None:
"""Get identity by fingerprint."""
with self._env.begin() as txn:
data = txn.get(fingerprint.encode(), db=self._db_identities)
if not data:
return None
obj = json.loads(data.decode())
return Identity(
public_key=bytes.fromhex(obj["public_key"]),
fingerprint=obj["fingerprint"],
created_at=datetime.fromisoformat(obj["created_at"]),
metadata=obj.get("metadata", {}),
)
def list_identities(self) -> list[str]:
"""List all identity fingerprints."""
fingerprints = []
with self._env.begin() as txn:
cursor = txn.cursor(db=self._db_identities)
for key, _ in cursor:
fingerprints.append(key.decode())
return fingerprints
# --- Metadata storage ---
def set_meta(self, key: str, value: str) -> None:
"""Set a metadata value."""
with self._env.begin(write=True) as txn:
txn.put(key.encode(), value.encode(), db=self._db_meta)
def get_meta(self, key: str) -> str | None:
"""Get a metadata value."""
with self._env.begin() as txn:
data = txn.get(key.encode(), db=self._db_meta)
return data.decode() if data else None
def set_merkle_root(self, root: str) -> None:
"""Store current merkle root."""
self.set_meta("merkle_root", root)
def get_merkle_root(self) -> str | None:
"""Get stored merkle root."""
return self.get_meta("merkle_root")
def set_record_count(self, count: int) -> None:
"""Store record count (for consistency checks)."""
self.set_meta("record_count", str(count))
def get_record_count(self) -> int:
"""Get stored record count."""
val = self.get_meta("record_count")
return int(val) if val else 0
# --- Stats ---
def get_stats(self) -> IndexStats:
"""Get index statistics."""
with self._env.begin() as txn:
record_count = txn.stat(self._db_records)["entries"]
attestor_count = txn.stat(self._db_by_attestor)["entries"]
sha256_count = txn.stat(self._db_by_sha256)["entries"]
phash_count = txn.stat(self._db_by_phash)["entries"]
return IndexStats(
record_count=record_count,
attestor_count=attestor_count,
unique_images_sha256=sha256_count,
unique_images_phash=phash_count,
merkle_root=self.get_merkle_root(),
log_file_size=0, # Filled by storage layer
)
# --- Internal helpers ---
def _append_to_list(self, txn, db, key: bytes, value: int) -> None:
"""Append an integer to a JSON list stored at key."""
existing = txn.get(key, db=db)
if existing:
items = json.loads(existing.decode())
else:
items = []
items.append(value)
txn.put(key, json.dumps(items).encode(), db=db)
def _get_list(self, txn, db, key: bytes) -> list[int]:
"""Get JSON list of integers from key."""
data = txn.get(key, db=db)
if not data:
return []
return json.loads(data.decode())
def sync(self) -> None:
"""Force sync to disk."""
self._env.sync()
def compact(self) -> None:
"""Compact the database (copy to remove free pages)."""
compact_path = self.path.parent / f"{self.path.name}.compact"
self._env.copy(str(compact_path), compact=True)
# Could swap files here, but leave that to caller

View File

@ -0,0 +1,304 @@
"""
Merkle tree and append-only log for Verisoo.
Inspired by Certificate Transparency (RFC 6962). Each attestation record
becomes a leaf in a merkle tree. The tree structure allows:
- Efficient proof that a record exists (O(log n) proof size)
- Efficient comparison between nodes ("do we agree on history?")
- Tamper evidence (any modification changes the root hash)
The log is append-only: records can be added but never removed or modified.
This gives Byzantine fault tolerance - if one node goes rogue, others have
the canonical record.
"""
from __future__ import annotations
import hashlib
import json
from dataclasses import dataclass, field
from datetime import datetime, timezone
from pathlib import Path
from typing import Iterator
from .exceptions import MerkleError
from .models import AttestationRecord
@dataclass
class MerkleNode:
"""A node in the merkle tree."""
hash: str
left: MerkleNode | None = None
right: MerkleNode | None = None
leaf_data: bytes | None = None # Only for leaf nodes
@property
def is_leaf(self) -> bool:
return self.leaf_data is not None
@dataclass
class InclusionProof:
"""
Proof that a record exists in the tree.
Contains the sibling hashes needed to recompute the root.
Verifier can check: hash(record) + proof_hashes = known_root
"""
leaf_hash: str
leaf_index: int
tree_size: int
proof_hashes: list[str]
root_hash: str
@dataclass
class ConsistencyProof:
"""
Proof that an older tree is a prefix of a newer tree.
Used during sync: "I have root X with N entries, you have root Y with M entries.
Prove that your tree includes all my entries."
"""
old_size: int
new_size: int
proof_hashes: list[str]
class MerkleLog:
"""
Append-only merkle log for attestation records.
Thread-safe for reads, single-writer for appends.
Persistence is handled separately (see storage module).
"""
def __init__(self) -> None:
self._leaves: list[bytes] = [] # Serialized records
self._leaf_hashes: list[str] = []
self._root: str | None = None
@property
def size(self) -> int:
"""Number of records in the log."""
return len(self._leaves)
@property
def root_hash(self) -> str | None:
"""Current merkle root, or None if empty."""
return self._root
def append(self, record: AttestationRecord) -> int:
"""
Append a record to the log.
Returns the index of the new record.
"""
record_bytes = self._serialize_record(record)
leaf_hash = self._hash_leaf(record_bytes)
self._leaves.append(record_bytes)
self._leaf_hashes.append(leaf_hash)
self._root = self._compute_root()
return len(self._leaves) - 1
def get(self, index: int) -> AttestationRecord:
"""Get record at index."""
if index < 0 or index >= len(self._leaves):
raise MerkleError(f"Index {index} out of range [0, {len(self._leaves)})")
return self._deserialize_record(self._leaves[index])
def inclusion_proof(self, index: int) -> InclusionProof:
"""
Generate proof that record at index exists in the tree.
The proof is O(log n) hashes that let anyone verify
the record is part of the tree with the given root.
"""
if index < 0 or index >= len(self._leaves):
raise MerkleError(f"Index {index} out of range")
proof_hashes = self._build_inclusion_proof(index, len(self._leaves))
return InclusionProof(
leaf_hash=self._leaf_hashes[index],
leaf_index=index,
tree_size=len(self._leaves),
proof_hashes=proof_hashes,
root_hash=self._root or "",
)
def consistency_proof(self, old_size: int) -> ConsistencyProof:
"""
Generate proof that tree at old_size is prefix of current tree.
Used during federation: prove we haven't rewritten history.
"""
if old_size < 0 or old_size > len(self._leaves):
raise MerkleError(f"Old size {old_size} invalid for tree of size {len(self._leaves)}")
proof_hashes = self._build_consistency_proof(old_size, len(self._leaves))
return ConsistencyProof(
old_size=old_size,
new_size=len(self._leaves),
proof_hashes=proof_hashes,
)
def verify_inclusion(self, proof: InclusionProof) -> bool:
"""Verify an inclusion proof against our current root."""
if self._root is None:
return False
# Recompute root from leaf + proof
computed = proof.leaf_hash
index = proof.leaf_index
size = proof.tree_size
for sibling_hash in proof.proof_hashes:
if index % 2 == 0:
# We're left child, sibling is right
computed = self._hash_pair(computed, sibling_hash)
else:
# We're right child, sibling is left
computed = self._hash_pair(sibling_hash, computed)
index //= 2
return computed == proof.root_hash
def __iter__(self) -> Iterator[AttestationRecord]:
"""Iterate over all records in order."""
for record_bytes in self._leaves:
yield self._deserialize_record(record_bytes)
def __len__(self) -> int:
return self.size
# --- Internal methods ---
def _hash_leaf(self, data: bytes) -> str:
"""Hash a leaf node (prefixed with 0x00 to distinguish from internal nodes)."""
return hashlib.sha256(b"\x00" + data).hexdigest()
def _hash_pair(self, left: str, right: str) -> str:
"""Hash two child nodes (prefixed with 0x01)."""
combined = bytes.fromhex(left) + bytes.fromhex(right)
return hashlib.sha256(b"\x01" + combined).hexdigest()
def _compute_root(self) -> str:
"""Compute merkle root from current leaves."""
if not self._leaf_hashes:
return hashlib.sha256(b"").hexdigest()
# Build tree bottom-up
level = self._leaf_hashes.copy()
while len(level) > 1:
next_level = []
for i in range(0, len(level), 2):
if i + 1 < len(level):
next_level.append(self._hash_pair(level[i], level[i + 1]))
else:
# Odd node: promote to next level
next_level.append(level[i])
level = next_level
return level[0]
def _build_inclusion_proof(self, index: int, size: int) -> list[str]:
"""Build the sibling hashes for inclusion proof."""
proof = []
level = self._leaf_hashes.copy()
while len(level) > 1:
# Find sibling
if index % 2 == 0:
if index + 1 < len(level):
proof.append(level[index + 1])
else:
proof.append(level[index - 1])
# Move to parent level
next_level = []
for i in range(0, len(level), 2):
if i + 1 < len(level):
next_level.append(self._hash_pair(level[i], level[i + 1]))
else:
next_level.append(level[i])
level = next_level
index //= 2
return proof
def _build_consistency_proof(self, old_size: int, new_size: int) -> list[str]:
"""Build consistency proof hashes."""
# Simplified: return subtree roots that prove consistency
# Full implementation would follow RFC 6962 algorithm
# For now, return empty - federation will implement full version
return []
def _serialize_record(self, record: AttestationRecord) -> bytes:
"""Serialize record to bytes for storage/hashing."""
# Use JSON for now - could use protobuf for efficiency
data = {
"sha256": record.image_hashes.sha256,
"phash": record.image_hashes.phash,
"dhash": record.image_hashes.dhash,
"signature": record.signature.hex(),
"attestor": record.attestor_fingerprint,
"timestamp": record.timestamp.isoformat(),
"metadata": record.metadata,
}
return json.dumps(data, sort_keys=True, separators=(",", ":")).encode("utf-8")
def _deserialize_record(self, data: bytes) -> AttestationRecord:
"""Deserialize record from bytes."""
from .models import ImageHashes
obj = json.loads(data.decode("utf-8"))
return AttestationRecord(
image_hashes=ImageHashes(
sha256=obj["sha256"],
phash=obj["phash"],
dhash=obj["dhash"],
),
signature=bytes.fromhex(obj["signature"]),
attestor_fingerprint=obj["attestor"],
timestamp=datetime.fromisoformat(obj["timestamp"]),
metadata=obj.get("metadata", {}),
)
def verify_inclusion_proof(
record_bytes: bytes,
proof: InclusionProof,
expected_root: str,
) -> bool:
"""
Standalone verification of inclusion proof.
Anyone can verify without having the full log.
"""
leaf_hash = hashlib.sha256(b"\x00" + record_bytes).hexdigest()
if leaf_hash != proof.leaf_hash:
return False
computed = leaf_hash
index = proof.leaf_index
for sibling_hash in proof.proof_hashes:
combined = (
bytes.fromhex(computed) + bytes.fromhex(sibling_hash)
if index % 2 == 0
else bytes.fromhex(sibling_hash) + bytes.fromhex(computed)
)
computed = hashlib.sha256(b"\x01" + combined).hexdigest()
index //= 2
return computed == expected_root

View File

@ -0,0 +1,458 @@
"""
Core data models for Verisoo.
Designed for the photographer provenance use case:
"I took this photo in Ukraine, not San Francisco - here's my cryptographic proof."
"""
from __future__ import annotations
import hashlib
import json
from dataclasses import dataclass, field
from datetime import datetime, timezone
from typing import Any
@dataclass(frozen=True)
class Identity:
"""
An attestor identity backed by Ed25519 keypair.
The fingerprint is the first 16 bytes of SHA-256(public_key), hex-encoded.
This provides a short, recognizable identifier without exposing the full key.
"""
public_key: bytes
fingerprint: str
created_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
metadata: dict[str, Any] = field(default_factory=dict)
@property
def display_name(self) -> str:
"""Human-readable name if set, otherwise short fingerprint."""
return self.metadata.get("name", f"{self.fingerprint[:8]}...")
def __str__(self) -> str:
return f"Identity({self.display_name})"
@dataclass(frozen=True)
class GeoLocation:
"""
Geographic location where image was captured.
GPS coordinates with optional accuracy and altitude.
"""
latitude: float # -90 to 90
longitude: float # -180 to 180
accuracy_meters: float | None = None # GPS accuracy
altitude_meters: float | None = None
location_name: str | None = None # "Kyiv, Ukraine" - human readable
def to_dict(self) -> dict[str, Any]:
d: dict[str, Any] = {
"lat": self.latitude,
"lon": self.longitude,
}
if self.accuracy_meters is not None:
d["accuracy"] = self.accuracy_meters
if self.altitude_meters is not None:
d["altitude"] = self.altitude_meters
if self.location_name:
d["name"] = self.location_name
return d
@classmethod
def from_dict(cls, d: dict[str, Any]) -> GeoLocation:
return cls(
latitude=d["lat"],
longitude=d["lon"],
accuracy_meters=d.get("accuracy"),
altitude_meters=d.get("altitude"),
location_name=d.get("name"),
)
def __str__(self) -> str:
if self.location_name:
return f"{self.location_name} ({self.latitude:.4f}, {self.longitude:.4f})"
return f"({self.latitude:.4f}, {self.longitude:.4f})"
@dataclass(frozen=True)
class CaptureDevice:
"""
Information about the device that captured the image.
Helps establish authenticity - "taken with iPhone 15 Pro" vs "photoshopped".
"""
make: str | None = None # "Apple"
model: str | None = None # "iPhone 15 Pro"
software: str | None = None # "iOS 17.4"
serial_hash: str | None = None # Hash of device serial (privacy-preserving)
def to_dict(self) -> dict[str, Any]:
d: dict[str, Any] = {}
if self.make:
d["make"] = self.make
if self.model:
d["model"] = self.model
if self.software:
d["software"] = self.software
if self.serial_hash:
d["serial_hash"] = self.serial_hash
return d
@classmethod
def from_dict(cls, d: dict[str, Any]) -> CaptureDevice:
return cls(
make=d.get("make"),
model=d.get("model"),
software=d.get("software"),
serial_hash=d.get("serial_hash"),
)
def __str__(self) -> str:
parts = [p for p in [self.make, self.model] if p]
return " ".join(parts) if parts else "Unknown device"
@dataclass(frozen=True)
class CaptureMetadata:
"""
Rich metadata about image capture for provenance.
This is what lets a photographer say "I took this in Kyiv, not San Francisco."
"""
# When was it actually captured (from device clock/EXIF, not attestation time)
captured_at: datetime | None = None
# Where was it captured
location: GeoLocation | None = None
# What device captured it
device: CaptureDevice | None = None
# Photographer's notes at capture time
caption: str | None = None
# Image technical details
width: int | None = None
height: int | None = None
mime_type: str | None = None
# Original filename
filename: str | None = None
# Free-form tags
tags: list[str] = field(default_factory=list)
def to_dict(self) -> dict[str, Any]:
d: dict[str, Any] = {}
if self.captured_at:
d["captured_at"] = self.captured_at.isoformat()
if self.location:
d["location"] = self.location.to_dict()
if self.device:
d["device"] = self.device.to_dict()
if self.caption:
d["caption"] = self.caption
if self.width:
d["width"] = self.width
if self.height:
d["height"] = self.height
if self.mime_type:
d["mime_type"] = self.mime_type
if self.filename:
d["filename"] = self.filename
if self.tags:
d["tags"] = self.tags
return d
@classmethod
def from_dict(cls, d: dict[str, Any]) -> CaptureMetadata:
return cls(
captured_at=datetime.fromisoformat(d["captured_at"]) if d.get("captured_at") else None,
location=GeoLocation.from_dict(d["location"]) if d.get("location") else None,
device=CaptureDevice.from_dict(d["device"]) if d.get("device") else None,
caption=d.get("caption"),
width=d.get("width"),
height=d.get("height"),
mime_type=d.get("mime_type"),
filename=d.get("filename"),
tags=d.get("tags", []),
)
@dataclass(frozen=True)
class ImageHashes:
"""
Multi-algorithm image fingerprinting for robust matching.
Designed to survive social media mangling:
- JPEG recompression
- Resizing
- Format conversion
- Cropping
- Color adjustments
Match if ANY hash is within threshold - defense in depth.
"""
sha256: str # Exact match only - rarely survives sharing
phash: str # DCT-based perceptual hash - survives compression
dhash: str # Difference hash - survives resizing
ahash: str | None = None # Average hash - very tolerant
colorhash: str | None = None # Color distribution - survives crops
crop_resistant: str | None = None # Center-region hash
def matches_exactly(self, other: ImageHashes) -> bool:
"""Exact byte-for-byte match."""
return self.sha256 == other.sha256
def matches_perceptually(
self,
other: ImageHashes,
threshold: int = 10,
) -> tuple[bool, str | None]:
"""
Check if images match perceptually.
Returns (matches, best_matching_algorithm).
Uses multiple algorithms - match if ANY passes.
"""
# Check each hash type, return first match
checks = [
("phash", self.phash, other.phash),
("dhash", self.dhash, other.dhash),
]
if self.ahash and other.ahash:
checks.append(("ahash", self.ahash, other.ahash))
if self.colorhash and other.colorhash:
checks.append(("colorhash", self.colorhash, other.colorhash))
if self.crop_resistant and other.crop_resistant:
checks.append(("crop_resistant", self.crop_resistant, other.crop_resistant))
for name, h1, h2 in checks:
if h1 and h2:
distance = _hamming_distance(h1, h2)
if distance <= threshold:
return True, name
return False, None
def to_dict(self) -> dict[str, Any]:
d = {
"sha256": self.sha256,
"phash": self.phash,
"dhash": self.dhash,
}
if self.ahash:
d["ahash"] = self.ahash
if self.colorhash:
d["colorhash"] = self.colorhash
if self.crop_resistant:
d["crop_resistant"] = self.crop_resistant
return d
@classmethod
def from_dict(cls, d: dict[str, Any]) -> ImageHashes:
return cls(
sha256=d["sha256"],
phash=d["phash"],
dhash=d["dhash"],
ahash=d.get("ahash"),
colorhash=d.get("colorhash"),
crop_resistant=d.get("crop_resistant"),
)
@dataclass(frozen=True)
class AttestationRecord:
"""
The core attestation record stored in the append-only log.
This is the cryptographic proof that a specific image existed
at a specific time, attested by a specific identity, with specific metadata.
Once in the log, it cannot be modified or deleted.
"""
image_hashes: ImageHashes
signature: bytes
attestor_fingerprint: str
timestamp: datetime # When attestation was created
metadata: dict[str, Any] = field(default_factory=dict) # CaptureMetadata.to_dict()
@property
def record_id(self) -> str:
"""
Unique identifier for this record.
SHA-256 of (sha256 || attestor_fingerprint || timestamp_iso)[:32].
Deterministic, collision-resistant, URL-safe.
"""
content = f"{self.image_hashes.sha256}|{self.attestor_fingerprint}|{self.timestamp.isoformat()}"
return hashlib.sha256(content.encode()).hexdigest()[:32]
@property
def short_id(self) -> str:
"""Short ID for display/URLs (first 12 chars)."""
return self.record_id[:12]
@property
def capture_metadata(self) -> CaptureMetadata | None:
"""Parse metadata as CaptureMetadata if present."""
if not self.metadata:
return None
try:
return CaptureMetadata.from_dict(self.metadata)
except (KeyError, TypeError):
return None
@property
def location(self) -> GeoLocation | None:
"""Shortcut to capture location."""
cm = self.capture_metadata
return cm.location if cm else None
@property
def captured_at(self) -> datetime | None:
"""Shortcut to capture time (may differ from attestation time)."""
cm = self.capture_metadata
return cm.captured_at if cm else None
def to_bytes(self) -> bytes:
"""Serialize for signing/hashing."""
data = {
"hashes": self.image_hashes.to_dict(),
"attestor": self.attestor_fingerprint,
"timestamp": self.timestamp.isoformat(),
"metadata": self.metadata,
}
return json.dumps(data, sort_keys=True, separators=(",", ":")).encode()
@classmethod
def from_bytes(cls, data: bytes, signature: bytes) -> AttestationRecord:
"""Deserialize from wire format."""
obj = json.loads(data.decode())
return cls(
image_hashes=ImageHashes.from_dict(obj["hashes"]),
signature=signature,
attestor_fingerprint=obj["attestor"],
timestamp=datetime.fromisoformat(obj["timestamp"]),
metadata=obj.get("metadata", {}),
)
@dataclass
class Attestation:
"""
Full attestation including the image data (for creation/verification).
This is the "working" object - AttestationRecord is what gets stored/transmitted.
"""
image_data: bytes
image_hashes: ImageHashes
record: AttestationRecord | None = None
@property
def is_attested(self) -> bool:
return self.record is not None
@dataclass(frozen=True)
class VerificationResult:
"""
Result of verifying an image against attestation records.
Tells you: "Yes, this image was attested by @photographer in Kyiv on 2024-03-15"
"""
is_valid: bool
match_type: str | None # "exact", "phash", "dhash", etc.
attestor: Identity | None
record: AttestationRecord | None
error: str | None = None
# Match quality details
hash_distances: dict[str, int] = field(default_factory=dict)
@property
def location(self) -> GeoLocation | None:
"""Where was the original image taken?"""
return self.record.location if self.record else None
@property
def captured_at(self) -> datetime | None:
"""When was the original image captured?"""
return self.record.captured_at if self.record else None
@property
def attested_at(self) -> datetime | None:
"""When was the attestation created?"""
return self.record.timestamp if self.record else None
def summary(self) -> str:
"""Human-readable summary for display."""
if not self.is_valid:
return f"Not verified: {self.error or 'No matching attestation found'}"
parts = []
if self.attestor:
parts.append(f"Attested by {self.attestor.display_name}")
if self.attested_at:
parts.append(f"on {self.attested_at.strftime('%Y-%m-%d')}")
if self.location:
parts.append(f"in {self.location}")
if self.match_type and self.match_type != "exact":
parts.append(f"({self.match_type} match)")
return " ".join(parts) if parts else "Verified"
@dataclass(frozen=True)
class ProofLink:
"""
A shareable link to an attestation proof.
Photographers can share these to prove provenance:
"Here's proof I took this photo: verisoo.io/v/a8f3c2d1e9b7"
"""
record_id: str
base_url: str = "https://verisoo.io"
@property
def short_id(self) -> str:
return self.record_id[:12]
@property
def url(self) -> str:
return f"{self.base_url}/v/{self.short_id}"
@property
def full_url(self) -> str:
return f"{self.base_url}/verify/{self.record_id}"
def __str__(self) -> str:
return self.url
def _hamming_distance(hash1: str, hash2: str) -> int:
"""Compute hamming distance between two hex-encoded hashes."""
# Handle different length hashes by padding shorter one
if len(hash1) != len(hash2):
max_len = max(len(hash1), len(hash2))
hash1 = hash1.zfill(max_len)
hash2 = hash2.zfill(max_len)
# Convert hex to int, XOR, count bits
val1 = int(hash1, 16)
val2 = int(hash2, 16)
return bin(val1 ^ val2).count("1")

View File

@ -0,0 +1,337 @@
"""
Local storage for Verisoo.
Two-layer storage architecture:
1. Binary append-only log (binlog.py) - stores raw records, federation-friendly
2. LMDB index (lmdb_store.py) - fast lookups by attestor, image hash, etc.
Directory structure:
~/.verisoo/
config.json # Node configuration
identity/
private.pem # Node's private key (encrypted)
public.pem # Node's public key
log.bin # Append-only binary record log
index/ # LMDB index database
data.mdb
lock.mdb
peers.json # Federation peer list
"""
from __future__ import annotations
import hashlib
import json
from datetime import datetime
from pathlib import Path
from typing import Iterator
from .binlog import BinaryLog
from .exceptions import VerisooError
from .lmdb_store import LMDBIndex, IndexStats
from .merkle import MerkleLog
from .models import AttestationRecord, Identity, ImageHashes
class StorageError(VerisooError):
"""Error in storage operations."""
pass
def _serialize_record(record: AttestationRecord) -> bytes:
"""Serialize a record to bytes for storage."""
data = {
"sha256": record.image_hashes.sha256,
"phash": record.image_hashes.phash,
"dhash": record.image_hashes.dhash,
"signature": record.signature.hex(),
"attestor": record.attestor_fingerprint,
"timestamp": record.timestamp.isoformat(),
"metadata": record.metadata,
}
return json.dumps(data, sort_keys=True, separators=(",", ":")).encode("utf-8")
def _deserialize_record(data: bytes) -> AttestationRecord:
"""Deserialize a record from bytes."""
obj = json.loads(data.decode("utf-8"))
return AttestationRecord(
image_hashes=ImageHashes(
sha256=obj["sha256"],
phash=obj["phash"],
dhash=obj["dhash"],
),
signature=bytes.fromhex(obj["signature"]),
attestor_fingerprint=obj["attestor"],
timestamp=datetime.fromisoformat(obj["timestamp"]),
metadata=obj.get("metadata", {}),
)
def _compute_record_id(record: AttestationRecord) -> str:
"""Compute unique ID for a record."""
# Hash of (sha256 || attestor || timestamp)
content = f"{record.image_hashes.sha256}|{record.attestor_fingerprint}|{record.timestamp.isoformat()}"
return hashlib.sha256(content.encode()).hexdigest()[:32]
class LocalStorage:
"""
High-level storage interface combining binary log and LMDB index.
Thread-safe for reads, single-writer for appends.
"""
DEFAULT_PATH = Path.home() / ".verisoo"
def __init__(self, base_path: Path | None = None) -> None:
self.base_path = base_path or self.DEFAULT_PATH
self._ensure_directories()
# Initialize storage layers
self._binlog = BinaryLog(self.base_path / "log.bin")
self._index = LMDBIndex(self.base_path / "index")
# Verify consistency between log and index
self._verify_consistency()
def _ensure_directories(self) -> None:
"""Create directory structure if needed."""
dirs = [
self.base_path,
self.base_path / "identity",
]
for d in dirs:
d.mkdir(parents=True, exist_ok=True)
# Restrict permissions on sensitive directories
try:
self.base_path.chmod(0o700)
(self.base_path / "identity").chmod(0o700)
except OSError:
pass
def _verify_consistency(self) -> None:
"""Verify log and index are in sync."""
log_size = self._binlog.size
index_size = self._index.get_record_count()
if log_size > index_size:
# Index is behind - rebuild missing entries
self._rebuild_index_from(index_size)
elif index_size > log_size:
# Index is ahead (shouldn't happen) - reset index count
self._index.set_record_count(log_size)
def _rebuild_index_from(self, start_index: int) -> None:
"""Rebuild index entries from log starting at index."""
for entry in self._binlog.iterate():
if entry.index >= start_index:
record = _deserialize_record(entry.data)
record_id = _compute_record_id(record)
self._index.index_record(entry.index, record, record_id)
self._index.set_record_count(self._binlog.size)
def close(self) -> None:
"""Close storage (flushes LMDB)."""
self._index.close()
def __enter__(self) -> LocalStorage:
return self
def __exit__(self, *args) -> None:
self.close()
# --- Record operations ---
def append_record(self, record: AttestationRecord) -> int:
"""
Append a record to the log and index it.
Returns the index of the new record.
"""
# Serialize and append to binary log
data = _serialize_record(record)
index = self._binlog.append(data)
# Index the record
record_id = _compute_record_id(record)
self._index.index_record(index, record, record_id)
self._index.set_record_count(self._binlog.size)
return index
def get_record(self, index: int) -> AttestationRecord:
"""Get record by index."""
data = self._binlog.read(index)
return _deserialize_record(data)
def get_records_range(self, start: int, end: int) -> list[AttestationRecord]:
"""Get records in range [start, end)."""
data_list = self._binlog.read_range(start, end)
return [_deserialize_record(d) for d in data_list]
def iterate_records(self) -> Iterator[AttestationRecord]:
"""Stream all records."""
for entry in self._binlog.iterate():
yield _deserialize_record(entry.data)
@property
def record_count(self) -> int:
"""Number of records in the log."""
return self._binlog.size
# --- Query operations (via index) ---
def get_records_by_attestor(self, fingerprint: str) -> list[AttestationRecord]:
"""Get all records by an attestor."""
indices = self._index.get_indices_by_attestor(fingerprint)
return [self.get_record(i) for i in indices]
def get_records_by_image_sha256(self, sha256: str) -> list[AttestationRecord]:
"""Get all records for an image by exact SHA-256."""
indices = self._index.get_indices_by_sha256(sha256)
return [self.get_record(i) for i in indices]
def get_records_by_image_phash(self, phash: str) -> list[AttestationRecord]:
"""Get all records for an image by perceptual hash."""
indices = self._index.get_indices_by_phash(phash)
return [self.get_record(i) for i in indices]
def find_similar_images(
self, phash: str, max_distance: int = 10
) -> list[tuple[AttestationRecord, int]]:
"""
Find records for perceptually similar images.
Returns list of (record, hamming_distance) tuples.
"""
similar = self._index.find_similar_phash(phash, max_distance)
results = []
for found_phash, distance in similar:
indices = self._index.get_indices_by_phash(found_phash)
for idx in indices:
results.append((self.get_record(idx), distance))
return results
# --- Merkle tree ---
def load_merkle_log(self) -> MerkleLog:
"""
Load records into a MerkleLog for verification.
Note: This loads all records into memory. For large logs,
use iterate_records() and build incrementally.
"""
log = MerkleLog()
for record in self.iterate_records():
log.append(record)
return log
def get_merkle_root(self) -> str | None:
"""Get stored merkle root (may be stale - use load_merkle_log for current)."""
return self._index.get_merkle_root()
def set_merkle_root(self, root: str) -> None:
"""Store merkle root after computing."""
self._index.set_merkle_root(root)
# --- Federation sync ---
def get_sync_bytes(self, from_offset: int) -> bytes:
"""
Get raw bytes for federation sync.
Returns bytes from the binary log starting at offset.
Peer can append these directly to their log.
"""
return self._binlog.get_bytes_from_offset(from_offset)
def append_sync_bytes(self, data: bytes) -> int:
"""
Append pre-formatted records from federation sync.
Returns number of records added.
"""
start_index = self._binlog.size
records_added = self._binlog.append_raw_bytes(data)
# Index the new records
self._rebuild_index_from(start_index)
return records_added
@property
def log_file_size(self) -> int:
"""Size of the binary log file in bytes."""
return self._binlog.file_size
# --- Identity management ---
def save_identity(self, identity: Identity) -> None:
"""Save a known identity to the index."""
self._index.save_identity(identity)
def load_identity(self, fingerprint: str) -> Identity | None:
"""Load identity by fingerprint."""
return self._index.get_identity(fingerprint)
def list_identities(self) -> list[str]:
"""List all known identity fingerprints."""
return self._index.list_identities()
# --- Node identity (keypair files) ---
@property
def identity_path(self) -> Path:
return self.base_path / "identity"
@property
def private_key_path(self) -> Path:
return self.identity_path / "private.pem"
@property
def public_key_path(self) -> Path:
return self.identity_path / "public.pem"
def has_node_identity(self) -> bool:
"""Check if this node has an identity configured."""
return self.private_key_path.exists()
# --- Peers ---
def save_peers(self, peers: list[dict]) -> None:
"""Save peer list."""
path = self.base_path / "peers.json"
path.write_text(json.dumps(peers, indent=2))
def load_peers(self) -> list[dict]:
"""Load peer list."""
path = self.base_path / "peers.json"
if not path.exists():
return []
return json.loads(path.read_text())
# --- Config ---
def save_config(self, config: dict) -> None:
"""Save node configuration."""
path = self.base_path / "config.json"
path.write_text(json.dumps(config, indent=2))
def load_config(self) -> dict:
"""Load node configuration."""
path = self.base_path / "config.json"
if not path.exists():
return {}
return json.loads(path.read_text())
# --- Stats ---
def get_stats(self) -> IndexStats:
"""Get storage statistics."""
stats = self._index.get_stats()
stats.log_file_size = self._binlog.file_size
return stats

View File

@ -0,0 +1,489 @@
"""
Attestation Verification Module for Verisoo.
This module answers the fundamental question:
"Was this image attested by this identity at this time?"
Verification is the counterpart to attestation - it validates that an image
matches a known attestation record and that the record's signature is valid.
Verification Architecture:
-------------------------
VERIFICATION PIPELINE
Input Image Attestation Record
Compute Hashes
(SHA + pHash)
STEP 1: HASH COMPARISON
Image Hash Record Hash
??
Exact match: SHA-256 identical
Perceptual match: pHash/dHash within threshold
STEP 2: IDENTITY LOOKUP
Fingerprint Identity Database
(resolve public key)
STEP 3: SIGNATURE VERIFICATION
Public Key Verify Ed25519 Record
Signature Signature
Valid / Invalid
Verification
Result
Match Types:
-----------
Verisoo uses multiple matching strategies to handle real-world image sharing:
1. **Exact Match** (SHA-256)
- Byte-for-byte identical images
- Fails after ANY modification (even metadata changes)
- Use case: Verifying untouched originals
2. **Perceptual Match** (pHash, dHash)
- Based on visual content, not bytes
- Survives: JPEG recompression, resizing, format conversion
- Use case: Verifying social media copies
Perceptual Threshold:
-------------------
The threshold parameter controls how similar images must be for perceptual matching:
threshold=0 : Exact perceptual match (rare for different encodings)
threshold=5 : Very strict - minor differences only
threshold=10 : Default - handles most social media compression
threshold=15 : Looser - catches more heavily modified images
threshold=20+: Very loose - may have false positives
The threshold is measured in Hamming distance (bit differences between hashes).
Lower = more strict, higher = more tolerant.
Usage Examples:
--------------
from .verification import verify_attestation, find_attestations_for_image
# Full verification with identity lookup
result = verify_attestation(
image_data=downloaded_image,
record=attestation_record,
lookup_identity=storage.load_identity,
)
if result.is_valid:
print(f"Verified! Attested by {result.attestor.display_name}")
# Find all attestations for an image
matches = find_attestations_for_image(image_data, all_records)
for record in matches:
print(f"Found attestation from {record.timestamp}")
"""
from __future__ import annotations
from typing import Callable
from .crypto import fingerprint_from_pubkey, verify
from .exceptions import HashMismatchError, SignatureError, VerificationError
from .hashing import hash_image, is_same_image
from .models import (
AttestationRecord,
Identity,
ImageHashes,
VerificationResult,
)
# =============================================================================
# TYPE DEFINITIONS
# =============================================================================
# Identity lookup function type
# Takes a fingerprint string, returns Identity object or None if unknown
IdentityLookup = Callable[[str], Identity | None]
# =============================================================================
# MAIN VERIFICATION FUNCTION
# =============================================================================
def verify_attestation(
image_data: bytes,
record: AttestationRecord,
lookup_identity: IdentityLookup,
*,
perceptual_threshold: int = 10,
require_exact_match: bool = False,
) -> VerificationResult:
"""
Verify an attestation record against an image.
This is the primary verification function. It performs a complete
verification pipeline to determine if an image matches an attestation
record and if the record's signature is valid.
Verification Steps:
------------------
1. **Hash Computation**: Compute SHA-256 and perceptual hashes of the
provided image.
2. **Hash Comparison**: Compare computed hashes against the record's
hashes. Supports both exact (SHA-256) and perceptual (pHash/dHash)
matching.
3. **Identity Lookup**: Resolve the attestor's fingerprint to their
public key using the provided lookup function.
4. **Signature Verification**: Rebuild the canonical signing payload
and verify the Ed25519 signature against the attestor's public key.
Match Types Returned:
--------------------
- "exact": SHA-256 bytes match (images are identical)
- "phash": Perceptual hash match (DCT-based, survives JPEG)
- "dhash": Difference hash match (survives resizing)
- None: No match found
Args:
image_data:
Raw bytes of the image to verify. Can be any PIL-supported
format (JPEG, PNG, WebP, etc.).
record:
AttestationRecord to verify against. Contains the expected
hashes, signature, and attestor fingerprint.
lookup_identity:
Callable that takes a fingerprint string and returns an
Identity object (with public key), or None if unknown.
Typically: storage.load_identity
perceptual_threshold:
Maximum Hamming distance for perceptual hash matching.
- 0: Exact perceptual match only
- 10: Default, handles typical social media compression
- 20: Loose, may have false positives
Higher values are more tolerant of image modifications.
require_exact_match:
If True, only accept SHA-256 exact matches. Perceptual
matches will be rejected. Use for high-security scenarios.
Returns:
VerificationResult object with:
- is_valid: True if all verification steps passed
- match_type: How the image matched ("exact", "phash", etc.)
- attestor: Identity object if found
- record: The original record (for convenience)
- error: Error message if verification failed
Failure Scenarios:
-----------------
- Image doesn't match (no hash match within threshold)
- Unknown attestor (fingerprint not in identity database)
- Fingerprint mismatch (identity's key doesn't match fingerprint)
- Invalid signature (record was tampered with)
Example:
>>> result = verify_attestation(
... image_data=suspect_image,
... record=claimed_attestation,
... lookup_identity=storage.load_identity,
... )
>>> if result.is_valid:
... print(f"Verified by {result.attestor.display_name}")
... print(f"Match type: {result.match_type}")
... else:
... print(f"Verification failed: {result.error}")
"""
# -------------------------------------------------------------------------
# STEP 1: Compute hashes of the provided image
# This gives us SHA-256 (exact) and perceptual hashes to compare
# -------------------------------------------------------------------------
image_hashes = hash_image(image_data)
# -------------------------------------------------------------------------
# STEP 2: Compare hashes against the attestation record
# is_same_image() tries exact match first, then perceptual hashes
# -------------------------------------------------------------------------
is_same, match_type = is_same_image(
image_hashes,
record.image_hashes,
perceptual_threshold=perceptual_threshold,
)
# No match found - images are different
if not is_same:
return VerificationResult(
is_valid=False,
match_type=None,
attestor=None,
record=record,
error="Image does not match attestation",
hash_distances={
"provided_sha256": image_hashes.sha256,
"record_sha256": record.image_hashes.sha256,
},
)
# Normalize match_type for consistent API
# Internal "exact_match" becomes external "exact"
normalized_match_type = "exact" if match_type == "exact_match" else match_type
# If caller requires exact match, reject perceptual matches
if require_exact_match and normalized_match_type != "exact":
return VerificationResult(
is_valid=False,
match_type=normalized_match_type,
attestor=None,
record=record,
error="Exact match required but only perceptual match found",
)
# -------------------------------------------------------------------------
# STEP 3: Look up the attestor's identity
# We need their public key to verify the signature
# -------------------------------------------------------------------------
identity = lookup_identity(record.attestor_fingerprint)
if identity is None:
# Attestor not in our identity database
# Could be from a peer we haven't synced with yet
return VerificationResult(
is_valid=False,
match_type=normalized_match_type,
attestor=None,
record=record,
error=f"Unknown attestor: {record.attestor_fingerprint}",
)
# Verify that the identity's public key actually produces this fingerprint
# This catches mismatched or corrupted identity records
computed_fingerprint = fingerprint_from_pubkey(identity.public_key)
if computed_fingerprint != record.attestor_fingerprint:
return VerificationResult(
is_valid=False,
match_type=normalized_match_type,
attestor=identity,
record=record,
error="Identity fingerprint mismatch",
)
# -------------------------------------------------------------------------
# STEP 4: Verify the Ed25519 signature
# Rebuild the canonical payload and check the signature
# -------------------------------------------------------------------------
from .attestation import _build_signing_payload
# Reconstruct the exact bytes that were signed
signing_payload = _build_signing_payload(
record.image_hashes,
record.attestor_fingerprint,
record.timestamp,
record.metadata,
)
try:
# Ed25519 signature verification
# Raises SignatureError if invalid
verify(identity.public_key, record.signature, signing_payload)
except SignatureError as e:
# Signature invalid - record may have been tampered with
return VerificationResult(
is_valid=False,
match_type=normalized_match_type,
attestor=identity,
record=record,
error=str(e),
)
# -------------------------------------------------------------------------
# SUCCESS: All checks passed
# -------------------------------------------------------------------------
return VerificationResult(
is_valid=True,
match_type=normalized_match_type,
attestor=identity,
record=record,
error=None,
)
# =============================================================================
# SIGNATURE-ONLY VERIFICATION
# =============================================================================
def verify_record_signature(
record: AttestationRecord,
identity: Identity,
) -> bool:
"""
Verify just the signature on an attestation record.
This is a lightweight verification that checks ONLY whether the signature
is valid for the given identity. It does NOT:
- Check if an image matches the record's hashes
- Look up the identity (caller provides it)
- Verify fingerprint matches public key
Use Cases:
---------
- **Log validation**: Verify all records in a log weren't tampered with
- **Sync verification**: Check records received from peers
- **Batch processing**: When you already have the identity loaded
Why No Image Check?
------------------
Sometimes you want to verify record integrity without having the original
image. For example, when validating a log backup or checking records
received over the network.
Args:
record: AttestationRecord to verify
identity: Identity object with the attestor's public key
Returns:
True if signature is valid, False otherwise
Example:
>>> # Validate all records in a log
>>> for record in storage.iterate_records():
... identity = storage.load_identity(record.attestor_fingerprint)
... if identity and not verify_record_signature(record, identity):
... print(f"TAMPERED: {record.record_id}")
"""
from .attestation import _build_signing_payload
# Rebuild the canonical payload that was originally signed
signing_payload = _build_signing_payload(
record.image_hashes,
record.attestor_fingerprint,
record.timestamp,
record.metadata,
)
try:
# Verify Ed25519 signature
verify(identity.public_key, record.signature, signing_payload)
return True
except SignatureError:
# Signature invalid - record was tampered or key mismatch
return False
# =============================================================================
# ATTESTATION SEARCH
# =============================================================================
def find_attestations_for_image(
image_data: bytes,
records: list[AttestationRecord],
*,
perceptual_threshold: int = 10,
) -> list[AttestationRecord]:
"""
Find all attestation records that match an image.
This function searches through a list of attestation records and returns
all that match the provided image (either exactly or perceptually).
The returned records are sorted by timestamp (oldest first), which
establishes the **provenance chain** - showing the history of who
attested this image and when.
Matching Strategy:
-----------------
For each record, we check:
1. Exact SHA-256 match (byte-identical)
2. Perceptual hash match within threshold (visually same)
If either matches, the record is included in results.
Use Cases:
---------
- **Fact-checking**: Find who first attested this viral image
- **Provenance chain**: See all attestations for an image over time
- **Duplicate detection**: Find if image was previously attested
Args:
image_data:
Raw bytes of the image to search for.
records:
List of AttestationRecords to search through.
Typically: list(storage.iterate_records())
perceptual_threshold:
Maximum Hamming distance for perceptual matching.
Default 10 catches most social media compression.
Returns:
List of matching AttestationRecords, sorted by timestamp
(oldest first). Empty list if no matches found.
Performance Note:
This performs a linear scan of all records. For large databases,
consider using indexed lookups (by SHA-256 or perceptual hash)
in the storage layer.
Example:
>>> # Find all attestations for a downloaded image
>>> image = open("suspect_photo.jpg", "rb").read()
>>> records = list(storage.iterate_records())
>>> matches = find_attestations_for_image(image, records)
>>>
>>> if matches:
... first = matches[0]
... print(f"First attested: {first.timestamp}")
... print(f"By: {first.attestor_fingerprint[:16]}...")
... if first.location:
... print(f"Location: {first.location}")
... else:
... print("No attestations found for this image")
"""
# Compute hashes of the query image once (expensive operation)
image_hashes = hash_image(image_data)
matches = []
# Linear scan through all records
for record in records:
is_same, _ = is_same_image(
image_hashes,
record.image_hashes,
perceptual_threshold=perceptual_threshold,
)
if is_same:
matches.append(record)
# Sort by timestamp (oldest first) to show provenance chain
# The first attestation is the most significant for establishing provenance
return sorted(matches, key=lambda r: r.timestamp)

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.3 MiB

BIN
test_data/3072_RSA_QR.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 10 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.4 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.4 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 736 B

BIN
test_data/carrier.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 239 KiB

BIN
test_data/carrier2.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.7 MiB

BIN
test_data/carrier3.JPG Executable file

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.4 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.4 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.4 MiB

30
test_data/phonebooth.pem Normal file
View File

@ -0,0 +1,30 @@
-----BEGIN ENCRYPTED PRIVATE KEY-----
MIIFJTBfBgkqhkiG9w0BBQ0wUjAxBgkqhkiG9w0BBQwwJAQQZA5S460JEEzHr4Gv
6SHaxwICCAAwDAYIKoZIhvcNAgkFADAdBglghkgBZQMEASoEEIUT3kxmLKusysd+
g2eLYzwEggTAjVjjUGenOSvsc9jyPzq+bvpkml1OXxbPh/014rge+wpSd8Q937eK
6CCfkhp7gGpcK2/Myt9RzATHRFj3Y0t2HNrLXHhBsuQrhO6Nd4RIMhRLWbZL7eyV
hjrACXDTNOJIMHaMj17qu2bWDhoQK9khtYFKTiGnXJgw/qheaq+XoV/dcDXIC3/m
3wlveYLxRB+907u9Ddjqjhyz+58IWZozxaEjCcX7UIdJLul0RvBhAT0RSBGzA1Zr
kvuIya/rx37vtHu4VDBijZyxlieMAXp7oEsi4vC6rEWMBO+mupf9scTuxiO6UJJp
+kh1aH0zBep5X5pseHfsZmtjF+ExfXQDEDDBKIXJteoyozaT3cwXw+0f3+ba2fGl
4gI+SiZeprhOLRAuh6z1HSshSe3+SHubfVQiaZWrrusQOlE/CbxXF7MC6p7YBuw7
UIl4shjqERe9mSj4bRtCw7DBqnKbCxQjqgAN2P1ELuiH6f+z8kd//AFBMp0IBtwR
AlmIl0yT8x209Kd8ztpqRpoO87FJNOVfmTKIIZqVQls5jglPoeL6xgNdruTydMr8
4fTqW+O7V69F7hASe4Zxu6VZYDqb9Qg2DEwbIsgERL9t/7bO6Lhpfsk7J4YLgaqu
Tq+BcP62J73aq9lo4VJlA7NaSOzH3Sqi78JCYq4ZrttGbmOqSAKVxDsXq7sI6sJA
va97f5pxhU+g4o0iu1rkaygGA08Ajs/8AzJ9Oyj65zxNONOfBRDWYvfbia1xKBMl
QGnHuyBFAvOvSFwq2qJ7+yUB7PMkXar/Gx2dQrW7a/2ahqjhO4+ssUKbeOpup4K7
BIXob8guks3s1i3dl0wap8GtwCgPLduEXSvQ2ORiU/avpYdCAA8iqUaxXalZ/lhe
nfTy8Uz/BBXpunTpHJ4A1ruDrdigfoYiI3vnVB1DglX37XillmysO/gu5gwYECHz
OTZSUevcWw88rVVRbUelIs3FwmywCT+NWXJDtfgm1PCXchlJmQx2zjJMBwez3syn
u+SY84ntrB0hyAWmwaHtGbwe4Z9u1FnZ7j+0Y8vTAD4LeWJls34RkboXhzNlJYn5
s4zp619MY+l+YPgQubhFEsCr6yzPOXQEdg1pk/liZFO9sh2tFR1teg3bM4JKn0w1
8qpdUmeY3tTU/+Vk9UUZSqhMk8No59a/8//26KN9AOOUUv7j8yLrjsonUkuvkadX
EnsJHVlOnwe0dt+4ll23Hf5+Ka8KjNYAjdeyMrtS5XVnz0zOC6KLnWori+DbuB4n
jezwLC1cHU5KbVDRCnssEN7di0i1UlFFi3oujvC8DOD0k57+rmwpK26gj61tCiwn
TcIvzIvtSNeFgCjrIVldFt2rd36nvgVK6I6NyK4EAdLdVjqV0gVZ5WVhV9x50ZNi
ADoaidbHoxVTBt3ZkKMXjxJss4YtTDerUS3xD1bHMMtSQKMYhe1u/n1ecwkyGaAv
9s9ldUUwmGU6wbHpIixXTlDeRT/w3DVHLlEjHRnqv1o88wJV4kALZxUCfgLaaiQo
SpBl6v1Q70MXd22N+ywJTPS/mScEMb4NiemlNFSVGpT6EioY0lofHB7YNaB4UZES
mOcTA23IguMFuU/jGYp04cGT+gE4X+7CzA==
-----END ENCRYPTED PRIVATE KEY-----

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.4 MiB

BIN
test_data/qr_scan.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.5 MiB

BIN
test_data/ref.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 159 KiB

BIN
test_data/ref2.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 422 KiB

BIN
test_data/rpi_20260102.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 236 KiB

BIN
test_data/scandal.txt.gz Normal file

Binary file not shown.

View File

@ -0,0 +1,353 @@
H4sICKvmUGkCA3NjYW5kYWwudHh0AJV927LjRpLke38Fqh727JqRZ6x3bczaWg9lpUtLp1u3VdVO
WT+CJEhCBwQ4AHgo6o/2O/bH1t0jIjPBU5qxadsdSVUkCCQy4+Lh4fGnP/3pT9V/+L/31Yev3v/4
9fvvq6cfqy9/+u6bH57e/2ffqd6P8/EyVl8Nfd1XXw+3rvmT/+8/+WL1sd50TTXsq+3Qz00/T//p
N/S/r471eW7G6s//tY//z//ax//Xn+7+V3zku/c/f/zml+op/fHHofpwbMZu2D5X3w3dqZmq6dhU
7VTV3bW+TdWM/7oOp7p/rJ6qY/3SVFPT7YZTdWzqcVcd25Nd6IRlaIcefzxWl36H/1v3t2qY+d99
fWrwdfwlrtvc/DeabdeeJ/xH3e/sGuexwZXbvp4b/+HjYOvMq0zNb7jIXF3rqeqHGR+oZ/xFtW+6
mT9m12hOg+6jfm77ah6qbsAt74exehqbvqne77pmfKzed118clrxBuxqQ9/4ndTj3G4vXT12t1V1
bcamqjfHYcQ1Zl6VD7Idut2K97xtp6baXHATu1M7Ymv4rWzqru63za7CI+0eq+8a3voKyzjXz1ji
eaVnPA3TXJ2bcd9s52ps6mno2/6QF2XYTM34wj861dtji2fQrdp7GTu8AqzH1DT9yu4B66mHHit9
4tL5dfTuzl3NG8Jbw1vcV1iiutrXHe7/PEwtl0P32Tf8/nQenrX6/K1p2GNzxepMk63cxIte2/mI
6xzaTaOlrKupb7jKH4/NzRfPV4Y3j0fx88LXwmvbIzbjet38tm26jovMv9uN9ZUPzs+8NG1X7Uds
POy0h6niy3tp/EL61a3e5mP15WVOV57HGiu283XFM+Hd8V7marpsj3j+ebzoUew6+G97t8O1r3ZN
126xFXX1PS7T3fDdXy/TjAvOzQnvrOau14ach3SFcdhdtvhWtWsn/D5uC4+wxz9xT9dji589tYcj
3+A4XPmx4bKZq8uZmxbbEj+fT1Td4danSzfjub4dcdt6Y1PT82Vh6dseP3HhJ1cVLl9XW/zgMz+F
p8Wr81fvT3TE767PA95IhTXG0VvZBtFx2nAr4lXxri/jxta91s/hkfrD8njpPnBQL/iGlhJrgJ/B
ycJi3Rrtz1EbXruSdyMrkhYKW7A4ePZ3Wki8tY6rXpzXFTfh7rJph0thLf790ky8F22rE25svD26
VaOl2ulQVF07z2ZD3Lzx4t3tsfrhhgM1jm19aPTp3dju+Wb5E7B8vkO54RqcO7Nk+hZXcjuczl0z
85vnM7bGlKwI/mg4NestXgmsWdoUWIxpnvz9jzQYl3NVj8PFv6THPw7YZyPMAXbbbkqn9FRP6ejR
FvrbxAXx6O10tNevczZd9vt227qVqjfTMG60q07Yu/NsNnpll8K9YGFsVVb68W6ocS+7iqf/xjN0
4u9NAy443+yY88dlle0aXw7H5tTi3ifsoxX26snOGzcgXGs37A487vzvL2H0xuoDVqWZaarGFp+r
T2ln6bGwFzfD8OyrWXd4bmwyHSC+iWvTPPPB9M9NM1/5hrfDlj+aN0Z92siUmX3d4ZRNekVhynbj
5ZBf175tRhxXbLbxcPP19bXGIj/zF2yfhwXHeWg7PCz+jQu1gqFozp3Wl6cdT7W5mcmcL7tbem3b
sT019rPDdns5t7LDWJrTiWeRFgLnvDV/WDW/4WLDuIMzxKvQmZ3Spcxe1nEQ90PXDTKUw4UHb8DV
tt2l8VXcdvDU/FvsOP2lm5cbdxV/zzYlz8CGT1tv8K2Bb5Fnejg3HdfOn2nQ/qo7dwQDTCQW5m98
OTOejy9H/3zy+GDCYahe6sMF72e7xXafYwvvBu6Mvy629XQ5YUPQmlY/7fCrNR+PP7utp+SJPo7N
GbdRnS7jzm1DemXls4bjwn/SkdMXHJrdLf78/YwIAU6h2ow621j32WMiXAEHvO4af184jjhD6Zun
Vu4v1s3MB58ONgHHseFT25XChXB34DowlFs81f7Ci4WLGpv20JuLOLX2IziTiBx2j37Gmttge3Xi
0xz6KZaLvuWlnRGhHLFBtBftnp5gD0f+6nSskxUyNw1bYL9a7/jMcSRq/jRimUkBynPfXAvLeTJj
EDZoj13D08m9hWeue4YNbnl/wkHs6d7W69Y842BvEGcVxqfBLeCCP9Tj9riq/vyXv/xlvX7Sx8YG
Z4zrUBjeuvoVVqRHEEFjhrBjlmX77wrntOo9PKh9kz55qLZYj9ibcr3b5n9wUbCt8RAw63B8iMca
Od/L4biwSnBeE67L6KbRevu6ISJZw7I1pw0WFY5iGNJCnxANRKTM6Geisaxns4D4RUZ+Wvg4c/B0
3Pe2r/RGbPlHOu1tu2MyES/lA+0HL/RhiziUVlNLZZeamvb3Zhexl8wUTkw76gjC84W7qw8wjm7t
BrxYLNiR/7/Jl0Io0w03bkHF55+xPDB9+JtxGE6T+ZnNCBMIuz9j12CfrFIYtqJN7Gk5nmD9ET/C
oZ5541N91eUR0sB2TgiwaXoPjB+44NgfeFd4WL+SLcnUdsfh0swMwvgck8W98Ho5oMbXtxEk8g6r
6QpHjrDdHw6H3oL4cF8wTDyK/bPFXArlcbZmrZE+UNP7brt6OsfZ2TTHVn97emTKRENOh6lzokWT
xzwNgx2LY71heK8zOs8t3mOTA1W4eotEO22ydpSjmWbGL/FMComwAHpq/Sl3O+MGXyBa+jCkcGfr
LQ40t90O/zyZC7nKfHtkKUvIoMQ8+okx/jUOyoAI6sT8bqx9ITfY8+ki05E3SO8xmDE+1jwK2W+k
aP7EpTYvgm3LJIpbHt8O88Bt5M8faVyz3yMEf2mU23luib/7QnGjbdPq0NU7JU54C8/+Yn2bM7P8
pFcLd0M7y4XbWe7iKVGcEbxBfAD558qu+xJv1/wWY93xhIdrR0YOI15tvR2HabIt4h5o2x7q0T0r
Lijzjpfuh/LcMkjXZy0TOiDvODCQDUc2wFRZasRcma99oN/dM/ROJmfPc8wL+BHCDSqja227Jpem
bAO7dKtkYF9Px8IUv/3U7JTXT5cWRuU2XFZvK5l+xL246mP1Nla0+lTPkwVM2Hn4pGeLF/eI2ECT
nWs91LFGVHpm7Kp72TZ+vvHFx7fx6x/4hTdv8Vd1P9Fo7NKNPQEfaPRCsbUuSmFf+PS0yDPTV/M8
TEceq7/LxMJ7t3v/M3zRlwnp9S1u3lIPbjs3/GH6niK/fKz+iSfbtZbvzNziJ1/w9NwM1XF3MpaH
wRPCemT4mB+NL08+F75A36JZfZf++kkbs5W9xqWYDLZwLt/Z55/MCC9XWkfmAEOXHCD+alT0L8ty
RUZliUuRNKVv1wYiIOY7TbccB8NrWOymaBFH/9COXb5NpDI7REuRAbzFK8TSPK2qt7PiUoZhMHVI
7mzhLFXcNuOMZcVB4k+HdcTdb+SEVzJUvLMOW5KbZY9zRMNzUaxZHwYddF59vDiW8ZSNCDJYBol8
5Lp75sb7CIxu2tUWQW2RcSvBsgSU1m6HcArvkYGL4x9PBeCB49wfeIJuWB7GeXRM27p/wC2c6gPz
Br5H3nDxqt4rBEWU4uH73+seu86BMQaHyBsBd3QeH9Ktt/tGSMyhfWkMBcMmwy7UXcUma7h507bc
I+gKMxY3IasPEwL7nvbbd3yMy/a5s20ZaSF/ebxsNp5HdAPDChiXF2bJ5sNm2B7lrWlrauWnlpHq
FpEjforXipd/bL6o3p5uhtH5AQlkLFsn+GEEKjSGNyV5zX7mQcajERxhtGW2TGasE9YBCKF9xjUd
9LLrIFYXPMg7wpJa4jS1vyGgMlCsHhEsNF21vRD/+GnzQgCgUyLyavtt68vkV8D2EN5wHPRGdIDi
LDAmBkZyJiYUybcHCrtDM2W0q9MWG5hicNFhNAkkbkeDf06XnYWoOthNz/eMxYjQDP+FVSSwg8to
XylXK06t51p04/iZF2biV1uOVT7Ai8/XC1ASXhQr2/NUI2Ge1xNtJsMgegMkYL0/iV3o+4EZXTV1
WLNbbG+9vDCWq6rd013hoCIWEBbTPU9m/05+Ciz+m07YF0o2gR8Ou4FufxU+dtMRfqJ/4V/3LULJ
WTsFkdxL5A8p7Bq1N+j6kGEd/NF5lUsHUMajFvtQ7LiU6c3DeS0cdFB84rvu2Dgcinho9oOBF8a0
d4tMlqdOIfvGF2Z3YTTauj/CEjzRRNM9ICrqsUFgEHDe+CsCNy3lEtzECKhc41PDYKDjF/eNEsR0
gJ9o1BxmOzbdGaYcnk4orwW0DQMGLWHKKZvfgNUKS0mPjCtvHcZIewpO/BNDCcu3tVlogezlGupJ
0+4mMVy/bLxjsZG+AMjCFbQzLB7a8Bhg+Xfcczp4vr1pPZLltifDDzGBxNq1DPlkU9yh8xmFoHn2
275kMzITH0+GJIPfiBlOgM/3e5o8uAPYST6Zr4k+vViSDDs+MXRtmxe/v7BlzDZonBWgT+Zcsz//
3wiQ+LAWH0W4sqpku7REFvYxEQn/Oxri4sbYdxPjZB2ZIpbEG6L3lFGgg5KrsT2W4pKPsYkIvsK/
GLIzGZwBeAX5bvNbzWVfZa+/HxtAoErBhHQa5NScpwTvYdE7JjsI3WWshE4KAWAg7xldXoa/pQvm
P2PQQuS/z3HDp0YZHJ3wERYUK2VmE1bnLkaSVzuxDlSbRxibd4sr8+/e6czRITMuev1S3ij40BMX
a7bLrzwtCFeBa1nLydEXpaz7jKQblvrH4bpaBGCGUvPuFN/OXEetId5VI5fiVS/3NIggbLEF46Vb
+fK2xpXWOEcrD4h5U7xsQL+GCAjDcednMa4nX55QLL+r3dr0OkTENrDn+nZLSIbYvp+A+To4RKMI
mXsSx4TAHi412XY51UzKXt0L4Pq3cF7ha5jwKMuoGek0s1m3dov0ABD383rm92XEmvUZHnS8Aw79
uQQkwNb2OfEkTt08KvpQAOdYIhLsmXWnOYcf+NAvTcSCLaGV4bJLewJ7Sj+vnBBbj6lXRlLoS5tW
EQVRMqtNsCiy2xHXeix2JisTADBwN/g/uk0u0zysBV2taLJq1Bfgb837N/JBw8OW+dTqbbo93jVs
6cJvIuowIEamFAVilm90fSvx4G3M853vUIhCGJkwxGkgoK+YmzZRSbtKgFu7pFV2HO+4wePgyacm
o8PfXEYsvx8Jpe4pjuCuitCIm2Kq940Sdgbj2hgFVqXbDHRY32TMibD5PIxmuO2vEE1H8r2h36oP
cOV8NY/x1gRVJvBXYW4TNowwH16CL/akB27tUCkCy7Y+YIeZlqWO+i/+2raA21SvrcKBAKWlR5dr
eWlRMIsjc6Wn5HuYnkt7ZTmPxQL8a+HjNzrPMmX+xF/1tC8yCN1JG9amgQMpk0E3XUC05poGK/If
OJX63LLEh1vVbRu+MoxA9gISSBVphjW8Auvoqv9tiBX3/CDuQRsDEBprRwhR9J9M+/16MATZ59Ij
2P7R36SP6otWPp3joFl6AIQlP3Ug20qWPAZ+V0Q62CkGcNNjKYBRqRq4SMI9+QdmQXQkbBsZXOxv
CJ+GzymPbKqQXUe7McP+WB09seQumJbPshsWryxgQLzWGpmRigP4VIsaMENT5oWBYT9MEU/AkQHA
YFnTbnOby+heKt0YVmFMB+EhrL3ysDFO3z7n14x4/AKolDF7WUiVuZ/b/T5vwZ/9k+v17D7MCt9M
hpGIhJW0fP0xkjneEJbmm/7A4kMU6HnXtGJdR+ABEVo7FwieApsiPCUmgvDHbqpmURpJBkLwt9+8
jch+OvGYvj3gLmrcKv+hWF0f9Af49v7TM/5geGk8KJIrALx8GZMB041mwKo4W6c6uAc1theCNuJg
/ujltiC3JBVmCOavdNZYU/+C1p/HGjWl4YBiPQqUSrHykv/IxfNl4tUevn3IqLw9xMP8UDFQRZTD
usPDt3DhXTfBGu3n1UMZa/kL+7YZuVf14a+0t26PD/nMw86itsSUFNyh0VgLeCHPTQC7+hq+8fDz
6sE4RkSCVoub+BmWA8/xwIgmVZIevjngT75v3BBdhJvSTjN/N6ITzqCxCr6tf2c0y7UgvgCk59nC
15qJxMuNVbGru/SXAcCSn3UlVMhhXrQDvzl0jKj4D+R6v6/X5loVUfGPUV33YxDmB9e39VkjUSVH
5xCADwpGfdSTV9rVe5hp/ehXOD3Tpg6H8vCLzrbq/uQXNAH/E+BOm2HHdJr/8YkgAu1e1EH24QgI
i/Z4tHGwtZqmtfE0ogqr7blGWa6bsLLf1US3lNpvBlYUjlEx/Pye/S6xnZC5E72xn+eN0qeTAFAd
iNsjbb5EGDa2l9P5yLx+y9gnr3rKQBbb3wOxmrD6Tg4zlvCtQNk2o64/G1OJ3If3d6wDs6nZ6k/p
NT2CGKdn418kGCsZNcU3iB1sFwd4wkAa+269fvi87y/wuf8gAHgAqw8pSb898jaxx3+5IHus+5xM
F9dxj6GlTwBjPox8SO7cARab52lu+MqN7JNCsM2kbw4EN43JMK0sQ9gLdaZzaYEjvCj0NQt9rftU
8Y9LFb/J28IGkJtLXAm3ztxfuLRiLK9aW1CSODKM3qIsh43Z2IsLoAN+iwG+QAukyzwxHkj0K4Os
gGJF5sIlpg2QYcxp2vtJ21FEr8zY0UtESHcmraNXtADuG7ziA/457O1swKBmtBroS9NNiyLdFiSi
lXMTbIVqv+j5wuLSnApNj27Xky2dEEvnffseC8aSTLAqeEuLjOGfQq+1IjRwFx40Wb6gRAQPSoEI
AjykmbxszyKB52EblYOxjObV+It87E1TX8jJwMpHspvd9x51xlt1uMCmsmQHi7y1XH5sRFBj4doT
LZWEcpml3ReIsDtxYTFYxOaxiB1VnilAcpJemDwc4KzdGS59WaXC49cDrdgjKsgI8g0Z8/D/0TZL
R3w1kibYsy+HifGTG1gzD63CIWwpixGLBBJ3akxMHUQCY8gQZ1XpLerOscXbLw3ygKXoWLxfr/P9
imR4SnXVJ6UkPFD2BSJlHpJYtnIUBafY/H67H9o5oBdbqlzGs4WwqwgXuxj4g5tmVSkYUfk8ACMl
KU/HjG6QYMDqniFj1JZcV0VBZJy8IljYSPgTFJ+RPRs0TaLPrjU2CJZdiKYc1TBEQTAfQS8oygVo
S17wrkbGrHyKuT6n4/GVVUPeLKPD9Dy0RI3e204WaesFN+VrufakOpH4fgTg90z/8S/htVkfV/Gx
sfw4hUi5cN61p41woJrvCN6hcdaAkm8/2uR5cCU9SuS/i5hl66vtRDRWwazIBNhbiUXA5Dn4nk7u
RUyAtcA74sZQ1KIQiXdB6uUzn8d4cXDuJOZ4QTcl3hM4Pi/u7OGKnHxh9CSC9+sNQUla+C24cSvn
zFl40ZwLvw1nXT/Hc8iEKhkwHq5iJj6cbD3+tlNelHPtfVeTMzjgdfM9AUt9to0PN8s/cGPZNwDX
A2kHdrd1vNhvAa641b3qLbBIjHvlpWlZkDnfYGm/RLEg1huhuNU6mb+QG3M5ZyoeoJGXYJD5Y6ms
M2obxx0BhZ/sjvReFTRWe6TkcUtGk7RoA8jBaMi4TGs94v+1W2BF2CwZThDj4AIMYTLHmm/J2OCG
UzOuFS9iSw6nEnb8eL3DC7MblH/uE59jwXk06nRT7AY8On2yGAu+AehwV75GXEBZGJzpXMzj7m+e
N7DzXCevfby0v9M40I+v7hhiZ5JehNcGjbgo8ziBxLice09cjomCQcohspd2shpgq0InywR2tN0S
/i1Q3k4k33gc1eLqrR7cls+uIsut4Gq/yE1hPMVkHFNdx3FAFlO5FiDuR63Gyo48bcLIyFD3o2Xv
kOYKl1cwclGUeL7oKEYq2vQHi9MBp8Kz1NvE4n37Ty99wUEx/Hxn8LwyQb8vnULc7XRElqKav/HP
9TBRsDAgIMIyRKOC1+hdmRgL+TMH6xVuZoPKitz0MEYNHkSANN7ZgNu+TKKCIgxjtUjFcX/v4pov
scefRxwzYT1kcdfzq5w+YChS7kqaHVIYkih9G38NhxGxhIe24JTWPFoiKt4hx6o5Wc2ceEc9Jdf5
6cjsgl7gybnLPb1jvvMMZP3TgWT/C17PQ8WvFN3/G17tP5Bkk9adY91e8La6RzwrJWKiVDYQeNx8
gk9X5p3t0VeWiPgG9VtLxVGG4siHWl+Dk8cPAjJjeyygViNJkN7WBO0jY5m4vz03WUFKOQn/UYRu
SO7pdOmtI0CXT1gYQOq+KdCUkQRc8Ues4uNMvG0tyCjj30gO3Hn6keBfwYYUvQh8XV5EspyGFQhR
/Xv85jIK9vczCexnxhLWpVxgNspYqGl79FZ8S5W/Bdhub3Y6jjzLVk2Vlc3u7NErPamuCvuQb8u3
61s88KY1M8ofVBnF+ekwCo3Va7fOiUVB48ZK5BepOqoUpLCVxjRW/qr3Kyx/IzvTD4uX+n4WRMh0
uw20LOrHxm0R/MmHNwxXR2lvLQ1Xg/1Z0aw9ViL+3e/h9XH1ophtkHstdyMeXxHCewy9+mx89pah
9tPj4pDpadAIw+KTXp58yduc3SiRo8VFzTyQbXsPvh8vh4v1FE2DpaHG8NSZvZKhbJS2+uB9Alg6
0CF7lQqU7Wc+p58q/DaL2kKS6KpTCxLIjZ02dBCPtUzigdCOJobdzfqNnPL3G7wLOQzGDSwWS6Hv
1YsN7Z15RCh5624L+GPbItQ8WR15iiKFYSrGvd46YcoImmzWqs0LHYumHdY/mDbz6VkBVVzF6rHe
/4FtOokAEHT9CWWPXd1le4QSjdNTGPXYS1/UahY879a4JrZxxCkVHubwbSeG1arc40bZSZ1x9ozf
sfLD6/w0ngzg8miJMQGSBTzys/ov8BE3ya9Wu0N2lZac7xWLDmr9SYFndIdMSDu7ot4d+VYOziLn
8paDYUp84lve7j/lUoxjk+7JVU2OAAmnbzyrR8Y3GT6JLDtiOATJFoU4fTiBv71TfjxLCxgYO+Hc
qi/DyFMpi0g+gcxr8hJym5hYXXIXbAuBl9n6aWn79M5K0IJZo1AjFj7dUhrxIJM48QaNwK5eEX3k
AEwPh3obOQT/NnO1vHb1Q/0r4qhbRCeg8jQEOvU4k4oYll/XMjIFwXOV+YjHyNQdP7BWPHr5F65z
SdyMWst0Fhs444/p7Z7VQ0j4UmW3a5+517EjiLKRm4o+BqG0B2WuQ295bgorGSD6K8RDsaHO/HlY
stlZsQaLpVtRLKsc/TKSIdIWJOeDaF3uD+twgiIROUjEdOELrkptsfI/BGd8Ot5imZ4EDJzOsxdw
t030uxSVLhQrbsEcevfqxNjv53dnvQqz05KjuPeU02I7gRGH8g8szOI+/wQWfNOdqm+HeZ5gRmCe
PoB+OJ3YFvIy+JInE4BGQa7N1xcDo79iO0O3/hugpQyBFwbiH8blWvRzvYJvVOTNoVvY5c84IhIg
jImmnTEkR6nmQts+UzIOSnBSksz+RDqSWR27DYn6qK77z38+fEzQJ8J6VVbsmKu5yLz45uLdXxb9
pqqGuUeU2a1ZMcUStWDi1Py5oDnxh+gKCd2wuDqEn3TrEIgaiNNagnB7loeqlyK1UW8Nt9kO8Apx
Ie1w5Ahsl4qqzhnGcJii25xkAg+j7vnI59EcNT+ydJykrvodJcMkNy6GdelPrXhcW4dHszeu5dT8
tfrAu92LWSaWWM3gdndx1NqSOE+3tBH4Ej7hg7UIN6pN8GHqLXB+RLbB+1IYzP4aRR++mrsX0YWt
F6ns2k71PsURbt3Nndajhy55Vf5hfH9aX+tKP/sW4KH9LWGDr85uvEVP+WDQF65MbCyRmmwlIrvf
DWd1AlQTiQPqnNwNLAQnkJwYPDmtQM7Px8ksi1qe2OrrhXF6a5KrUjzK/bhr2XVHGklsFK0Cfuqy
+ZVt42r99XivbEzLuzYCtz2brKJMjFAW5E03zE8FUk1yspB/rhvabnXHTBL6Hdp0jkYg8v7LIi53
9G9DJhG6vTdt0VkvSGoGpW/NRKpW3TzctRdvggHu5drzMVt15vlrJMzYggxf055FsZNBLShZb+4z
6e8upzewZ6Ng4R9xR3/H+gQib9sYDWj/+hfPhL9SNbYDB2h95De/r9l21bHkx//8GbASW7X7vq6e
2PqNHsjqJ3qsZDttu6/X2CRvql/w3kfHDriH6Nm2XIADymLHGtdv1dOPuzFy7nr97yS8eThhrLfk
P1bWUFUYYNHTTsJ/auJ8qZmsFcAdW872xMoLfKJG8SCnCFVgjkKCaGmZ1FMnBhNLZPFw3pvg3aX+
FWWrBX8hCoz4EeORoH5VAP6fDC8YDfxg8p46sN8VVYwil8Z/4bcOZNWyZjZxoxNjafcWCv/R1zwl
DYK9FaEW0ZTy/bxacXgsAkjcUnFcjmqXtwemURbOh+qgkaBHR4LcTE/WF2JV8nRccSkxUtnsRSSf
VQfS79+9XZLSvGjpZJliZYcBWf8Z//cNzQ/4VUWK+QPzmvaFzipz84o+HKBcTb/4uPrNENQUuYDR
YXbLq6J7zA5iEQ+I9/JYkEG9IZApvR+tz33vp+NK3SZvEj9TCRkA/OVOV17mHCwaChoGYmBhsDLq
c5/IwMeQQgAb1Szz6fC2npPtcmtNStnur6US8FcCs38eW0t4GymgXGvfMV5I24gt1Y4zIcqCvQq0
xgERFrQZ4ZSL+8npb7NB1yw+YD+Vn1isia50rm8qusVlN3dv4sPRSY2KM1nMu9sCK3uITPelN/dI
dypKQXTW8LXqx0RTD/K0MYIn3AN7BCcSnOQcPP9jGooGDYH4VJDA4872gaoDdESJBfXgTibJ8UKN
j138xmSk/aKXA7WIjswFq6VqS0RGZ2oYCwNBsqfBBfk4vXdkqbsJKMutNKEBgbWzDjjtRZleulAS
LG533NwCZSuj4tp2sO+hgIZPzMFyR7KnF/gTXAwkeDHj0mX+jfxC3+1W278aA8oVcvCds2OJYJik
ols+X4X9wNJc+JKaZex+XDamscC9kUqA8IyTFU+KnTJEeKoa52N5/a/QP9UC9IPDmkXJQOpRfah/
a9Y/KETiDoNZZ1KyE9SZkdd/pBgIf/CByAmA/heSgwK1dEp2o06hLVE7JuUoHk0h/2Pt8Y+UK3Jj
wNjLDnLiyVGGwrJJA39YtwfpYCcWe9Jaqa3L4ZbqZmymiGr2GGwiTwiMzY98Oy+GQLscmJZWXG24
RsecGmPZnO6Not70FMdVPROPzkovgqn7D/wz9w1YFzfLMzQ/fn7oVO3pPYYAu6vTchnWFTWgEgw3
esPe1uzK4lMmaLIwn95Z+obXcUwWgCbxF+vhUuk4Q+fYV2zYk2v0JHcQvJ8J9/3gaYMWy0LWKSSL
9JgHxGKL0xs5PQGiJq/S0TuJA+YFx/Zuz/Pzy9d3PRbO90un+sfFdNzzS9AN6W220QhX7YxYUXTF
IRymkARiFRFTUT0GV/8mRimM2Un2TlI3Dlb3qERUP2Dv1belmxS/OQjSpOXr1yY+1h0e6ijKrb72
wn/rzLQLBZf5QjUtjx91RWseEMweFG8az8zqlmjWUFYhBOfWCUp/XHrsq9RQlkTFWw5sl1JPfom8
9l9FP6k3mXJ1KH+j+pJl5t8jwiU1x8i285L0GdH7ogT1KhKc1PYCKZizEWFUgdeTegNJEka4yuwe
xkXDr8p2OHyMa+NihIPVJP8b1XHuk3E3MeQAvftMNEKuGwNJMiseP+O1yv2rPsxXVUqxWQp4mSFm
KxTcukGI+7KVPukt1dET9bnw7L3RIlOxhC0kFIh6V6KC/zDuNNLpIImSkj+w99Q7OTe1A4bOi1YL
E4kRCSWnXy+OkXWGLINgA8K47Z1xFa3m2FIHVmyNGaE2W/9A2yv0naI1g36koD6GsB2gv82mU5Iu
luE5ZLdyw8vRShQgfug5peVAjkRq+TAMeVG/+QEBE5eBUc3DlCunqVqdId2p+hLhArCD76GMhKPy
oRnPjVi51Xs80IV/hPP19+FINvonlHHvQxdff/E0FfTg5FN0xbKQ0MRaVAY97/JUJL19NSCgLLew
lgIc7jczy8nRGnNbpqWqEFnN1YS8Gj8e06Aiy0vKaE/e0ATdCcujCNbIFZaX96q2rd1uJ85scffO
M1y0vyTqHuHmpkCkJ1d1cSQ9LK8ntHfNVdYWtwYiJlU2it31fATZIG5G7/5JTi7auKxvY5tq2SFe
4KVnkUW9XMx1/QNBxqzI+P7+B88prX4KbZB7La1UY3ac2SUEmjmFogaekVQFKY2bIz8hhWM6ivyu
ermNCHCJTjU87qiaBddk2QoVGR9WrTdSILadk/HAsHOGW9a0KIWKIpqV4kLQhwGDt45TntKHxTFJ
NQErym2ayMbqjiIAt1DhWja5iU3RI7xnoL0fxmhKTcfZ2EajigbGUu0LowoC+8k7NQzk3jdqK5sK
SpSjVln5Jz0ZXaxUv6ZldTTumInhqC1uimK5KhXNa/ssfhVhGWqnHX9lsoJK2CyrHUFuhKKAc9JS
CCKPmQrpxhHps3bnWvyg1Bm6+NHw/i+0Jwf7KZc8SswUbhgdcuJRmbrIo24UA3sBQcJhOxW5KSN0
dYKiNl+84hPaO88SsrwryLFxqGy0MKo7UzAkCLUFg9G1LNE1o9wlHJaiCiGBFOGIECE+NjYHqU2A
UnVs2Su+m3JF+yhFxAyzRSQcC0Q+CVINaRCGohqi7qEsSFwVfbm0JNYUtDd9wTuiS2ZDJDLI+9Di
BYxpvmVCmkFOXPYtO8ZV6RABbNmnz7pJUkK0Lc6CbGPObm/01xCcEf20srJlcLqgKNSjTLVmGJgy
twMLfHg1BOtVHLOG1R3aXnF3z9YxbRGpkSRqPnqU6hJnB5nwRV4/9DhWkhrQcXVqlbqSq/d5/bR8
12XWZnuQ7vZU/85lMaWqsEheF8cPonIsWM740x7cmon1hdcJjZKcLZfzuEoQ3kGpY+gN0QfvklWC
vopJv6WngC6HrRjTBOPegXc0HswwqZSCFAvRXrx/gOm7NTvxHBIaGyn71CYzIqeH6Oex+rko5ZjM
R8iWhigfqw7TqnIXOAuwZ9Iv99SYHKPYrokcmPSGDBhJ+whQq0vVyYH7DT8uW8FxSIFqvMnV1ZRE
SuJoe1T5u7i89QlYY38K2IS7IWk62L6GPRO8zC7809mLlh0BgWmVxJREjVp0j/GALeKZh0B3SPjZ
X/r+5sidzIZqi5ZwytOg8Ea9J5iGJyfNZAZLeDnRr64WizfhNVRwLGHEhcaMHCMopgJzXgv/XOt5
ewzYQdphU8HAAZoLldEpu2UlWz+QaF9Wx141zn/h4CTLI4gLLS+1Bjgs/WW6AADmfmdUFDlG8EfM
8fKvlxFBFmf6XDRgbKWhkPHLLyr7odpMSUiY0TgH0ie+35UMhxGghIcfN/ImjjfnaDcgN8AlEF44
KSwYVNURFvFlmRG5vGxi6ek5leaZBmLq/pfQkYkP8DUNZiPx1TJUX7SxscsCyoSUbWGBKMgMAMyi
Mzn2Lt/A5tJ2cyi96NgVnISiKZPEupWChsla0B4h6Q19n8rWdiiaBb7PTZdebV/bkt5LpqysLdar
fslCK6CyTph4JJPc8V/Zd0N0TVjB50zNTXo3Qp3eauoXoOzYTJrMQneCsUcrugoPF90LX49E9HKk
EF0vY+rnM0XpO4fojRTxe9vgsoyUFYkam7PRF+HLlpDr2tHpp/AzpvvTzq6z6k3K7WxOkuZOor90
w2FQWzOVL21zzZJyfKPUeJJY9y337yjW8ij0sWzlYZsFSwevcxRvS2Rjkzk75t9bqRvkjVqQRbDd
r5N1UyKkD+Y8vPbksbe1hl/V9eWhrHZnouFKY1s8ZjZbsaBvXAxck5pSib9hNSvr/lq5W7L+vMT1
AVVRIlvcumeTQqqtm1KRS+1SVfwX29579lOKZXmsD0H02bBL0AOQyZiZRXnaVsXevMtcGo4tM2gi
0VkWsL6lrZXuAI/zO1WRrFgHnlaXNPl6WjFcbTwyJ4zHck5xyBT6ZxlvzkWS4dtBhyQK5W0zlUqd
LD2p4GFubVKb4nAPR1qNfIEq008t4AOig4FReoUh9FxNFJ2R37TshWKMbyhwW+TRO5Iw2kZZu+Ry
TbEfRSVoMzo6Q8YKd9escNY4sv71DFusuRNXps8qmvcHtZezpwZut6EOpzpRlLYa4yE5Nyhu83N8
jfVsEZHRNHf1LfYaH1PfNcDHGgilDd9SRTI6wEw28jDky9mLVmzHLo5tc55zDUp3BAZflPqSygJF
fzOTSXpyhhSgdtlZIH5S2wk7hOrRWiJ2jI0K2a4ddRhNjI0xBaGFqWiwEg+jzg8peR3TP41Le/fX
DzD13w47OL0bGr3HmYmSH+cnaWh+ZIyCb31onF6zo85f7UJmdcgAbkT8Vve5yEs1MdyPJqK2s9fQ
ixkpZb04KhYX5+aHVy+dNy8FVOGgG4swT6TSmSAV8QRtd9v6+FTKcG8RjM+KIKnadJA4vMn+ldzC
nkSN0hXfccqStq31SgpwYamys7YBsCTQzHkotSSwwMtV1SFvXqS+K6nmBIfPIdwfQIdQnceky2fW
uKuvN+MncaM6lMBBFvA8rp0RbbVQoTODdseeiXDlGk5vcGaP5X5wv6bvqr05vas+1VYizLn/KnNx
o30i5A84h4CIpHEeGldqDVuyU6Ezj60g1DGh6WH0bLdADD2FRZJ+NrhnH3r9ahbyTKlLOS3RMSL7
P9nqwVZfPDxT0dWwSvJzLY7G+dSpXUJswXs39WSK4s53KCprC7ATC5F7D2WCveLtDw9TyMikTDHO
kGfKdab+ye87LWaOIHLXRGEXwvHdztF2x5gYZey5Xx2q3wyeYwQmFJjarkF+2blkpcNCRTVCwrap
Dp3oX633e99Kaus8LEmRTYZVHpdVsKwTr7THop3V5xVai64zG2NyVxUt5a2lx0NDCDtIa8M6y0tq
j+ANLl+XK+OmlhRnGlPyMp+pFBMGqOF2tlIZ0WxvjUVnIcdVOHHaZkWEmCOSDnNQw6Ms6m6d295K
3PzJu6Y4BVbGsAfDmNAh9+OcmtYWdlU2q/V9shmtk4d9gv7TrfXdOjE8IS65+VOGu/XUKAkHyF4M
gradL46HT+/Hl8m3sKdmZoEt3iqEkSJu2jLLRGAG21bKRdHu+3U8I8jK+Tm1WIXMdmGaI+KCxTbZ
sS2xi5Cohb5yoBToDaB+p9Xx43a4xz1Kc3v6sxWf2JecsJJV5SCU6YqLRIz+G+v5lDs14ObRpAxS
6ft8Nop6SFbXG+EhlB5Q/UeFJOXdmVZuwElRqzLevkFAjUSAFc88fE2P6dC/yIlIBKHVItUE7RN8
xgZ5BN74rRpN/xsiDgBrtwe9uV8acYYDxU+Qid8ycr9xq8ZIVIQ8HqD+W5qJ8A0Ok1jjvyBxZDCj
MNfkCMJKmCAjv0D5/eiHNBTnzUOKQK+1i6pem7B4ZgFM0FUJufWMLQ20IuPdkHXyC7admA+yDZo7
0VimsjXias+z5eaNRYn6srpL23QQQrKdzdcWqXF7r2E05yHhlVKVb5ui5oh3tej3jSAZkVHq73ZR
Z28PBx3vOatFWAIvxd1olHziydoR0vEthIdyyHCy9z6XX7ZyM8+5JVVq2Y1L2ZN4O2AdJzJ4Lu7/
oxU4GB7XNNgpLKIzKhx9ENBq6gcyIlYm2bXy9sluPHy831m+qVaqOWITTxnEe4i0TdQSUtgQRzmu
Y9tLGfhihyWI8GERb/HuMwyniFpIbBbFeCx3XHI5qeq33HbIdJUEtLODcXd/D8iMzH8X7Q8Kim20
8Fd0VNqNMQqiLBl+1JiY1opPsgI2nyAEf2zkEdORDXLtveSOzJX/Cvme4EdreyQatkV0j9VDatT7
7It4SDrcn13/+6XPONvd+kdAY3++LgFnE4ujiFNkIHHwSOCIME7io1EivUYQW7iKx4IrutVCyPEL
EHpMqp4Wlj8ptSs+EOyldOp9CMu1CanQWEDG0B/NhqcqWLxKD6vaMRl+UzswwMIudg92m/u1HIX8
N4kNcuxHW6hBW6usOozkahKsv9U7C8Ay0AHnXtl9T0a1aLJAZxF0JEEc71QHbZgzdDSmBwOI7Egf
JQp+ioAkcBZCcJfO5iHFo58WY9Zk6Fg8VjToNbia4OacgI3FYqCYWBvO68iUG2vVbiE30Lmfy8P8
WoITKZYyXULwas6xSHVaog8XFPCtj1GdzNHWZ32Ndp91Iaege5EhS+LcNmqDS3WXsenkwgzYrDyZ
81DmmfKRmwc4yB37XksLCB/My5nHlq3DoQQ350E2G6grTO8b+7/Jij188sSsf/fwCpehzBslv/nu
tvpXWXh7Qj9zMlStnOmD+LvitWczadZP7g1j7w6HRfiilfEIM+pUyrxN28e+G0MRtHLeeXS6nDad
QaxoPeunReVaUkvnZkzTapq6VB+HWqJVJYy7FX2kUcrW73sA5wI7RIGlP8AOr2m+rwKYmkk13/KA
psWQN0zP6M0w4LHvcI+YqIjwviMCHtmaXpmN11DZkfs+B1OFnlVKNxK43D9b602g5ryMbftkZDgt
qs9yD4X+Sxo3ZWeWTEoe16F3nQUdsnSbec5jgaMHuGxzF6skG+GmcvEe7f10GDIQ+WhcNx7H9ChV
5nfq4lyLhH1qsty4nKsVOmZZmOkVvpzaZyfrTxMEqzK0E4cFCNNk9XaSPV5JS+GnMBOwqQfqxWrj
hVpk6Ah6zX+L6Wz6Tcysm+9F8PnwiMduhfqLzKy9KHB2d40VlSyYt9RDUHAX4ZIVYPxAmZ+XN0Mj
hDvh2lShBGN+jMsapYJBaxCZ3BFHmEy9V5tJV4/ObNOoKaQWaxYo9WZtPOGiOBGCHa8laJ1Zfumj
CmDoBu9wv6fUVJrVgTbWEjR6JdL9lHaR8LBpQVnvVDWjtdV2iGxHUiaZLsbM1OdVSrakzzpWBG6w
xy6BwaFrDQpFbFpVV3JRQMFLccJA7qc+GXNj8gUznPr9XDhZm7S1qEiuLE2Z+MOm/uyBmiSO6lwn
M1TH76voxJFoy7WPloWHTA0Vn/KSgH5KNiZYmnLTrJdafOwbgW/AUTgVSV3EzMTB+sHH18g5W9jD
sZK8PPGcho3uQYoYm20xifZJWRgn7dmReW68b5XRCgstTAwKjt6nEGh+V9D41eSlMlmzD8AlSjL4
s/Fe6p7pXVsMf3pcyhfLIjB0RzftrQBb99hgwy7um8JyQvwcR2HrbZNCM9WZfViWscW+dB0UIuGh
1BYvJOnARWi6HqI/vIS1Ehd3Z5NG7npoPAYVTkUpr+d4RkC27xbKeWV9p2wvGXOUoWEyxqDHm4Cp
eHUFLxaITf6K1G1/XGnEmk+5fXPPU8YKimBJG3TXk5Q5709+IDXhz7TLl00f18SEcPBxOpZ96/il
H6jc8pElpNHaa0bzH74KM/nOTg04uaC2pQNxjkTepJz+2yDbeuDldSkwc0Ge65L/9KER+9Tlnt5s
4iQ6Ek3kjm3frijDhq+LNAXd+cI91i6RlbS1Q3pbei7tKar1qh2woNvqU1drckU8TnCM2UFqpirH
2yQV2donbGSqhYI40H1rxnijj2uJIpW5ntjvZlCicIUqQP6xO/zaotyGHMzxM1XBpV1Xvwyvg7cR
hG7T7blnF7qx2KXdQOl4cKrGJMvu5IeogEWdO8ch+qFWqnLeqWPxw1yMtd1rppGa2U0zpXYmfwal
75oxzDb0zYX9tosOJmtvcXUimPcXb/oOuf6oU9hokpMrNIcwgLiAsBqa1UXZWiY9vw7mgFsPxqSt
00fAa9LBKDBgLE4xrHfIqCrbvXEe8doXCbTYL5Zb3COlUSgQV0G/Z+yH6F1x1Y7WDq71Lxk9b/au
b36+vEph0Uq9z+KCBmRyU9qxiB4TVotc6WNhJBbXeR8Y/pMJzHFhiLav1xMacBJx2sZVL0h6zoLy
GouPnbHHGKkLbZMZNCvXiUjCW1o7Pe1k8OnWAiNy0Hxr8skMLXIs8VT4tm96dlYvejbnUkI1zS5G
nZOMkmSgVoxgXK+A/BcJKq/hQNKURxLI7+Hg1CMoDeKYiglBbNSQHjgNCVET3js/ugqi8zxnhTpo
/GsCjg2mcNmWsKl8+euYMuNtNdCzfE4NbqrX9lF4oLSxzLcAoNqVW8ZyBa0IVxLdTdmIcOUt9Tn2
lxhfZEyI3IdnzUeswfp7XopvReScIhXJEjY6ZTY+r2IxO0Ao2iTOokgCti8JXtk1kdvIsby2EabA
HHZCOSECRysH52LFKm9/0tcs1CkwBU0C6dKeTHq8BsatEuL+2e1YkPC8ymOlt7kY5ZiKJYYcvm6G
v+tFCsG1xrdx8uKnRf+6mewUKDym4ZdBB1y5WFkQt9pg8j9FzN3YjFVvvGK2Pg5dU1QbkRLcyiFz
cLWpEBaU0qCxFnQMgwtssF/Yw7bwn0uyX89JHCYCJL4ww4A14zFcBk373OA8qxpf6BnkY54Vampz
pqxJaSq7JcAjFPqjfaZugWmDClxum1UmwjpzUNnGhObRZgFTGA9YR8CKG4augLPBZmnfQnCXLRP0
W4xXt0n3JGYQL0fRRagbvec2T7IE3bK2DuenObO2JNBtY5n1okIW0CcUWp0DAB99uaTjfwu51FVJ
oyfxw545d7xmsPCF5kmmx4e44xJHEwL1Po/Ur3A5RVuIZClME5oKG5ylbsF4zPKdthon5B+hhuYl
pCpCMsunLCt98CKPRtHVpIBwf6gT4o6WnofsqGBK6eGrdz8qpVr2uTjwoUwillXq64XJqbLzHtO1
LAk2Z/tiuzWTWbzrRt1XBa7kTSWX6XlVAEKns4PKVqZoTAxVeUeSFWMH4SuFrBKEXlbDo+MlDuqW
xJHcLa2uDc6LD9KOV3tTrST2pMHLVDqTlk07l9Qd+B6xSG3PPajzgHPGSKbDizlbE0bwhfFRh3xy
XTwp5YnIFIIT3vYbQEViethsC2+6SfNAzEYWoyBEEVvy7xKLJBMAsIrgUc/NrkTd69SVYEiiSjGa
QcdDI9yrl3eOo51AMP2yGe9VQnVgq4E7TWskLSZNE6VHdZwZb/FwYcDH66pC36gUDemHgOODHNxz
o605bHWVehVpcCR0FLdnsiOLiyVhu9BhTrtHd2OTh93UEpUBWnGcFmK5HDL3Num/JfWnesqbUu3s
4szeF0YKLYeYARvyLEEbt8GhexIcvXfYtmSmCMU2kT67N9zD4HO45o4QGTtQlIl9NFp40m28a7fe
JDtVwaKOFh3zLNiZ0ww2BDSfY8iR1Jf05qs8L1Jft9OVgptbkwrCUhxQ6vBj6gxyglI7rdefMkRa
24wTlrrbfveHkgyfbNxrSMQto1U57UsfcEk8vKlZK9s1uNX2YTMu+OfetonX8bu0Iwcf18OzBnN8
C9E6AkV+ndrKxQ+ux/6Y7IKI7xkGtiZfyUCd5bVFoDUmh9TRpMxRql4SPTUxjyGriaQxquO8zEoL
hRFmxPUtB6kxHDbGUE+q1TXmOLdCjl8vy/1qL7Pl79ST3NOJkAvHam6i6iW9HTUsalJr0XRl+kWM
RZMGfpcDcWJfvYX0eI5B8l2jd9CpAGLyRu1LGhfZ3Jb9oruhnP1un76TITR2nqsOvIQSxS1xVck1
f6dX6MUudeEGwCljBQVmUFHPmUWQBcFIvIwqnqTSDYbkOp0HrIBmmSat29R1mZAiZfG1T/ByK5qk
9gShf6kG0IkAiY9LLRQbdjGeRCsqXCxR7Li+Hl9S+GChemMFLzsqoNNrKsTobVC6FUv0Aw+yL7X9
He67QMva2QjVJnjjo6wPCzWen+Fdrm8cy/X54UkxwFvKXumuWHKkUdj46zLBSTI9y+99eg33+GcP
hg6lYbf3Ki9JK8RqLX8gCkToydCA2N3HSKZG1gkbK++xwzoAtARnbd0TmHW00+izQzQjecp67mLI
/sGAGYDQKJRlghm6BxU++eSI+BUv7y4HNCMWf0k9sfVdvMYIY7yjFEVbRM0/83Jqzgyt73F/1+Xz
XrY3cXxWpcQClAz2Sa2vHANgieDORkzACBMYEtx29rGs+q1E1jvriiSUKTYJgJsTCLydgHTkbmM0
ARYNNklMRTdh0bZp5znhMEKGPLxMOonRW00dHLjtcdso4kYSwoP8rIrHKg9eSLZutP5SFypfBD/2
05Ih0Pm2n8zLVIj0JlW5GN1zF2WtEtXRMiYc5iFLuRa0EP0Sn2FDNMioqRg65aH5ovabKrdpiZac
wEBp015bpQot5wL4mMvYSCJOEA3otMzl6D5qone+HZyyoXtiXRKXNdKmSDr2CPdcFSgZzlOMAdw+
T49J5bpeNpJyrKC205kiftv8hF+E1Z1doWUy9V/ripJd9PJlLVRD4aRTNfzappa7uts7WOVhl+oe
7LnTbOdrX06imv0/jFvn0XiKkLU/5nByptrAeep9k+tdOa/yzbIYn7OouJeQlUkYp0DaEKx8IAQG
5dEa3HUXDpJOFWdHAZUTCxle2RL7WFwflxCLvICAxC5Pi9f2vyrVsvEGJZPBGpZbpTwxZzbVH1dG
PnZiUfBtOHT4i+Sn0QE4lENWUsAT9DPYik0SwL7MFpuU47eyWQ0+YqbS3g0ZKOCrQqhEYS3tViZq
q76CG+dwu+mVuIl3xVC0963RalLmo9kcOVX5ceCAP9cMeJjEazCxgBNlPJ1r7Rbv0R1q82CY9iEF
QE6FCXliOUmb1Y52LE1WLeZYS5kFRk9D5xqiylFF99DYFPRDZts00OpMBHkIvoYockZwJurY7i2A
2Bmtp+l9eMuSl4VjGCyZOrBm0uo4xIFnBefpyFr2w6QypSHZC0FBjbrhb7Axul0KnPxAMarGpcuM
z0020ngqKsIXYeb0dukz8eqL4sWiERe5EhWkGDME92FfP1ZO6ISBVcWlSUXMDyaCblQA7JjemNw0
rxu6SPvJRenLEvN255X3YniW6b9h70QTvdX+7DzF5O1QPtoiwxI/Sdb9lPo2Gf7dFmPfPiYYJxFd
unbO0AfZUrs7HRV9CqSsa5+VeUsWu9ttM7+d5igNAYNcSIR7ShzpFDgGZdXXZ2qQynnNgP2CF58B
H2OOjQ2cJ1c6Q2GO7xPKFQ/LSKh5wPmTN6Fhy8/Gno+u/sivj0Zc2r+mGhm646UhjoU1ckBIxjFI
INbkF3LbU7RNUov63I7RMm/+JqTbn6nObNmCnroYZGV0Z+kgpJVc2NqY/N4WI+js5VGYMk1oMk84
io2Q1JP0Io0qwkVKvcNazXJeUzKQPhm7qLKO1gTEOhkXMzrQJCvRLEpCBQZn9HR850Ihdpi19+qg
r42n4wSulQ9P9ZbTX01g29LMAn3gWwY4/CIE0zJzm8Qc3wjbudSO4q6mPg98znKLRjmHL5m24TSk
qbS5JyUVpVSV8TFjHE3z3rpcPBL1oVrm2zVTPqLgOIPvy/qMhakWuuUbsHJMUoi3qk5Z0KGcCt1/
Emfw9b6rT3oNzko6UDwdqRP90eZ/7RyVV8t/eFbn8Evt4aJwqPab1/Axi8UIN0qaAyD55C32EY4k
ZIRCKWv3nYwQvdE6IMGRnZprLhzAJdbPoqzi5Yk4CvCjz8ubR7Bos2GNMsfdxtzIxGkylT2qNvm5
orOQ7yPexdNn+xD4KvOEjWnhuNwUgYbsYnw2eAvf8l2mnRhZPMoMIah5Ws5Jt7QN8SPdKfAr+J4P
mCt2tk8XlHyGAyrTa+2fonZIF5SgvT+qx6UEoSwRGLr869A6HrpPSpdJ2+hBTUuyham/zAuQSsiS
2UhkkQsFXkd2k8XjSWxgumJIqDtFkg1bG3WXFFXKIpr0UEz5McJXK7f5JIWUiBpsHhxGM1pdU2e+
c9keZFdSj9CCGGU6fCoScRBqdysU8YshIo8iNx19ULNHRoVftGg8zbQnBrEYLp5FD/8IJ31KHnFs
rKi55CYQ8ODdMs4T4Ir98G6BcBATEeLvkXea65bw5LtOSIsi2pDyHJ/Lv3eRVbKmUq2+dnGnWzHb
KlUUHmNAfQyQSNrr+0ZzjqzqqSL7Uiizdi+tWp3TmAtViHQpG1s3aARQc0/4h4hqfTDgwUBtp9cV
jyRZmyYpK9zr+4WQacUKeYELWEkpJcPUOCTrjO2NbT8vir6I1E5uOGmxaVjcHzgeslLUbX2DMF+1
slD/9ELXSWMXrRbrdUSE8hrXbDZ32yV6FykZAj7bUoZexXONNYogbY+AergXBRWRNKZHygmZeIGF
q959cHTyL/9S8BxB19Qs4yDtbninkMRq5kIrysbJciioq3sVPCGnWk2XKUpo5OJqhpsaxL2nK2Yx
hKhUatFL/cSYbhgANYNSXdi3iiJsI7rX4fCf3Mn7xuG03KI7X5trIVxx58WTuI62iLFQVM0pMhY/
t1537iS9ei81yR28Tt32Tm/xjMyA9ikuMHrlU5S1kp+hrpSe/QZb2xYxpGzQHgz/UI6942t7qYFt
WyUmN/kp8cknl7i7hMVUFUFc3k1NLL7tDZNRq8tJN8a4SVoMIdP51Gc1PzfkX4OeiCuzoQX6c3iC
WcPJoiZpE8a8HqpRk03ZEqM97hbjPRiIiGkQsWDezxwJWwDyFqPZVrelhQ3eaKVMtnVz+6LShJ/4
FI2Rxc9TFvDlZ39tEO5AHfU3Q4IXvWIuwuf0Tu+fIIt5vdMAvV1KgqIQm7uXtZCSuozJ7KEqfXVd
0dp6uVSVi/K4H1lvCrR37W0mrZfLFVsEo8dHK53aMXeWs3/EeRCKpYwy7kGHDkKWCUW29Ewuzfhi
hUMT2c7FNGuzkSZY5EnMupc1Uo+hTYR2mqJPMUrRXbszfKoHSmvA3sbHR7iyEwnaa2LU2WK5lFVf
jEooWmE+H+XZEFmrx6jtqNH7VB++aXi4pbMdcZkXeniL2M20JEA56lRkZkYd893qosfPeXLJNUQU
Vdq0oPRajdfMqQ3o1GDNzMkixGEtPDcfl0ilF5yac3nppP6EnWwzLiL3TsFjMe0rWpSW78sNShZU
Sx3KaU/7NKckYOSvJSm7nVgdpLRsdzPinwg7U+3DRZnuxvD1cLe0PWudCGJZKsJx/AJV/utyugNd
T0+Z/9d6QRdXBK5U3yafeSuYHWGU5MCymrX0bxMAnPX6kyRusDlnJ9p5G+rWGahGAObfSvKkWXIQ
GaD1n0ecErMuwhfD7jeXmMbppN9SFCzp+BjLYDra0ig49LIsB733oTleSO+n0mDUM2tmxO20N7J3
KJyEtrl4iBIrbOfMCUmjqlR3/AyTNlX+uKzFVIrZBfPuZXPvy3QsPAXSlBSVrY6eGs/YKkkFkqLQ
WhDareVmFb17BZWDK6WepXo5o9uJaFEsFQBkDcjUI3wMOvi1HdMM5LxNAltHF0bBKfwUlV6rSJRs
Lr80ix9WkPDk1PTtTN6iZBiElISLW6bd8ty4Dj+zKZHYfboco/S/xsJ/WyhM/0BJovGeElWOPxx9
pk2g164WNkRj0EtjxIqYgHpq7lqZD3xCwTCFrohOiMxSTRuvRg+bmUPHK0xoWbGKCsEmDwx5eojJ
HD40fLCaNXHwVyPf0P9XyrghFjt1iYUwZ3VsdnFIVj7N5DJRi0sIChTMRqZ8/5GKdZaxhmHpqLl1
p1ZtV/RXIYvShI+1LEbAVGoOmYfaRyRv0Q/VNHcnJ2cuPsUwBhiGh6HJyWCTEO5XeanJl6KuQIuz
UDGVVDEvfLdZ0lR6tzUxDTlN+5ReCR5nNEHqIdfNyoahW8kXjkGLVnhAtD2VGXL+0/vcjdvKFU1p
JGLIqIW/Xi5ZDB7SKT56VbDelL1OK/VEhJy6UP67QVSeOcUYC+eD3RIb7C3PvM8n9R736ENlP9vd
NEge4kX5PT1wLqZVYbua3WtqW1YgsE+8UdzyruxzwG0tJmx8acNvCFMvek9wCEPX0jhCGjKxc1bZ
a7pFZtJQk0MFnyXGQDvKF7YkM72aQpI+93ocSYK3JzG5F8Ng+a6lNMXA9iLhbjCDoYfvs8zzplcU
znucLIe6TBsFTwt6lZ7h1Xy01Ov4x59bZRFVTWeqbeT0LVRRkopXahZKMUS6xoMJDd43dAB4bxLW
DwIegt43KqtOx0QiEgRlsIOIVNZY8yaG20sNBRGQhwtFaY8gIs76d6bIdp6yesEJvuIWQN1CdjxI
3EJI+gzXKXC26ZKv5jk8FvMzPsc4sdTuHIN3OGmIlqS7eZ5mhWAaxbIFA7XiCP1N3fmShsFMcA4U
EiG30YmfC+pDnlfj5zyfofHx3tCZMh8qji/NO3cvUzEGS/ucX/NPv00NpRpcj8ACzF1SMsMmer5p
t2oXCZJl0HSDaUqrQejhUP9e/KC4lG+qH25J1M7AvlOhq2xc2tSHyhDMZSlBKY9VzELFqbQeRyNK
hv/6+Od/ZSek0WRPUGsxh/oV6xvLqTZfSaKOfRoLMhc22L3nINX+oBUygWDrW7C6H9RQR4UDQVlx
vQz46K8HaxxRr3k590g0eRquRZ/dj43zBa1jY2l0LIjhwEO8UyUM2YMegWGraQUfpTeZRPlf+A8f
79HAq/D4nO8Ez7yyYe/ysmCyRGNkfc3gWJJD8UX/R+wDq0Za6iz5Ytm4HBVu5XHoYzbete3aocEu
cWUkNLXgHGkr4WZE/5fkTeqx4/1YXSY636vUjuohWFdOvVMlI2n7BJlHdItE4YlS2Jz6qZWur2yg
tYqPiRNv6X7kSRdTUpSFRpeZ8cF9kIUNWnBumpF6yyQ1QwjOwbuHHRxFKp1rDFCz5dNxUCGrmEVh
F/MWZO4aTtGxkPbtK3PxzfTvGma/8QaO3KqxVUeMxKje8vz6DAktSGs9jxEPZhUbcoZTwQiP0cxB
hdZYXGXzrI63u751+QsfMrUVIcACTbcrfUEwMAbDXwsBIw6nrD5jAv9ahIseKZalE9b+rHtO9V8T
/4iUiufo/8hLqF+0VLopMKnA2V1HCEAwZoOSsvel5RQB0KeZvDEm7sldILu1mzgxCzFVAaiP6YNq
7RsLplDq5eakKk5iw1jnxcdlXA31yZP8kjJ+HnydinqmNBgzvbgP0pgfuXwfjpR/4SDxWcaw/yQy
VPRRuFzAylvmdgLiWfRmK0Uy9t4BFALu36hjSZX1p9SLwDJCrOtlyrIYIu6MBaebQn6C9UzeaydB
nOc8CK8rZJr0flZpeNiqWkgl0G1YwUW1G7mqqDGaYfuBUsPe8FX2r7JRL3ENXBtYJagCT847ieS8
nTpxW1Ni43oqW9ccL4m6LSCru4ZJx+VqdYjCOVFdxFIhmyiiOibNdZrWYUrcTvtz1I4rHJEiwrBg
I0qBUBohBKXuhUnc8HvLsHbHLsnOT4O98lL1IBl/P4cE3ZNKbkhyp2pw02EQtSzEZ0517oHvqbTm
QQeymfFiuqErxZl+e+WAqjKlWdBxTHEkyi+sbFk4kR/chuSGVI95Bh+peCEzlzbNHImfeHLNLuYb
pzS4JhqbtbG4IRA7TfMXXLHbYoCedX3iBwhuOkqSpiEs512J9+wLlx2GD/vyRhqBj4318gHV2DZJ
zlBJgdzPSf+uG669MlxZIYdYqaPsMiEnG6h3LXsSdeuBJBESN50fG6EtlXtRdxX3bJEccA9gsDW7
JzU/E6rEaa6TCXANQljFSk3m0euNmnQ3Kgv2tiQP8pPHqrsrxy46JJzAG0TuCsAzWaiQx3F4wjKy
Qmzf0u3SFy+/HDAqO1BgKb5wLNx6oG1ycpYEv/e6xTRZJk14cAkLrBbJdHRH9f/v/76aGOKJkjKO
9Zqjpq/Fn7wJNudncJZV9Fhm8ag7722KL+eWRSkGspQtSg0ohnigjKQhTDnyTVNGXfkSetep2srv
phROh8DLOY1lelC5npI/Mp96Zi9jbjDzNM50hzo4FEC0VZqKPCZdA/uZyVPMFBgqs5V2lo8EMqBF
xdWseZREfuwXkomLvHk5yZNaPYq6jZ6AQ3kr8O6iKCLv4l0cTvi8S6VTE45ns8EA8WFTYu4BhwGh
fSGl86px8+QxEYYtrO53AHSn+gUpZFPCOvmXHhdcRKV+5F3ZFKx2IIfoM8WxeHVXlafqfeOhOfEQ
r3LF0KI7zAVpkJXteeMLvITjC6bhHqKBklWvkR98kxsPJBSwaOKntqiFdK5/eZPv62OXqtFDnxcp
lq8D8+6ZFk2kMlmERCXjmpu3p4UYjVJvGhPRCUD2e4w42nkNLlP0glsROcDjZ4eEHxej1xKvwHrJ
VOsuJJSPLWWlY1xmsRNfw6MbA8w0crWd79TK8nt782pSKL2kxecuMc1JXYLOlzjfw/IyhShjHg6b
6k1Cf8rbeDIpvzvwKhgeCZmyOr0O6i4TbaKn3ilMmad0HHppcQz2gzYzVgG/YoEo3OhVq0+nLEsy
5VYay/0S/swoy3nIk9A/vqOce+dWlXJolw0ctWlejcmCGUXX8JWb882zsH2sruX6LttKIxua5kEs
mH1atXcF7/fRXBKzYyPQTMa+djpWil1MVY7jqF77JTusG/1EQsxzH+q1Nbn3rA1IobNGjWUvTi3c
0naOoQ5oA6pz00thFq0mwn1mitf4tBmXVJJTFxoEwHIK7KtkHRshCasq9t772TQqpgiHQnFDcUGa
Rhx909wxZpnbuUsUD+st+FP6nz68Tv+LM0O8kwOhreApDSy3RZD4m9bt9DYrJvaMyqnuxJ3+Y6hv
25+0Pvqj1wTg3SpUJLiZFOB2Np9tdP/pv6HZejmW1a1IvJq1wl17OfHZT2IlVYMNaOwuyrA3NgWJ
U9rauVAKWN7PiYLMjFY3NtGQtUtTfTR9AOYIHHEAs42gl3PSMnAaS6PwWAOB5gAzOUmRdBFmQdX7
D189PWmLfvfxh+/1dzWF8mJ6CU7dPJ//+i//Mvk+xWSo7vTY+D0QecUBGAWe8nfSoAr9eopNkMFK
sNPaY/OAIoOh5tf3bUXqREjYckqtEKoHSXjQDVf/6/HPsUf+P1sFQeDtyAAA

Binary file not shown.

After

Width:  |  Height:  |  Size: 813 B

View File

@ -0,0 +1,54 @@
-----BEGIN ENCRYPTED PRIVATE KEY-----
MIIJtTBfBgkqhkiG9w0BBQ0wUjAxBgkqhkiG9w0BBQwwJAQQJdD0f2FnF8tXObq2
HeQj8QICCAAwDAYIKoZIhvcNAgkFADAdBglghkgBZQMEASoEEFKD/tvo6am/xKKS
fiNtbagEgglQBJdTsd1JIjihIK+tcV+SbNJggJ0i7R0sh82GxZ21Oca2Ij4FndPU
rwjhyv8977dibIwt1F6oJOkWgt/DLCFVMinQvJaKdKY2Jowgj42MfiRQlFnzXJhY
GI1LHPg4/PWBNUIWKrOYOlVB+Nq4SffjQFlpmQGSxCjLwCNLZCG0ckxWBFrHg1g1
R1LPnQikBEJ1xvtyMHELlyQia2JPDwvn29vhGtT5Jr9y4762R86RgqbelbB7H5wn
4WG4b9agZERx9vwnF7NQEFpOOhe6CMjEsWdfSswAsUoz/zaHmVz2alCOlQYj1yJj
vDPbHR9NZc1UtuH7g0pbEijUIto/PZcYhXPEvb1knwOA/JY7DuCmvW1t1rNsTSqk
2L8kmjDlr2FDDcNvD2XLHVZzqp3F5jYLtXfkWpOH7rqkrvdqHeu+ve5jxCnesZ0D
rDpcmpbEwqWx/W3slpZEqAdTrSgLcXXDi6OjIzAYDEzCHO/u6djDDKzYF1ziZNxq
bq4ZogP4SfzaGehArnCbPIBIObQp8t2BuXk6veDmEHk4aPSSBbbjKhWXVSbposz4
ZvespTu2Z4aIT+xb7Rj32fAjiy+IPEI7Mt/KtsV+W2F5CM+QQxWTOdUkt+3OuAJe
VlgnZk4a7yHYLXbyqc/wpHPdD4EEKyCCBuT2lPwu+L/3XNNy8dWL/1y74PbUOyAW
r5wfIalJZ43Zabvgl+LXxCUXrVRFMG1hASXupCY88uU1evvdBjd+anWTd/IpNHBC
g6pvwnHQDeuf9KhzKIRvb2HqMeYM80yir6PMBcayZj6icKSZa9i2KKs6W4IVhS1p
ZDZBbuP01GlwU3pAX+bX7HIBt9wPYYoabUjYDahsvLCKToK8rhLbHd//3qKOuIh2
7T+DtouVTFu7ipuxaq+VqSAExU5gNXi9xh9fSbJwAf7E//LA9s6UBMTRfJOmC1Wv
gyapSNqeATkvwFNmucTIXbaFTTlR+6WisgEO7eqT7F99k+tDoj/m8HoX84mcesqz
t3zeR0A6L0bq0GAICxdkNMRMXZWuan34T7IvxjdtIsaUm3ReIDf68oW51107Wlts
ZX2IE4P+vrAq4gR0Ra4L2NaDWDawZMIyEFAMRHxNE96TqZzvaNVZW3dOfn0YjRJH
fuvRThuoDGKKM5NzVDuWQJM+PP3dR2I+wamiL4QEeP+czP5FQXxR2C5iwY03Ntcj
ByAp1ZiLoGePEu3PGFIAocntyIy+UTKVMLfvqn1tX3VW19uF4J8eQnp0W4oqOAcZ
DTV6gamXNHrJzI5qtlB5yBf1YZb1bxniLKCiihOyx1O3fY/y178gIePMXX1ZVpQZ
PWdYlyDlw07tk5WnQxxAj4E6iNodlkhm9lfBFf+8GPgFe3esgPyID79KbS5UqN6D
gpnJcV57vsbU8KkjZ1hYEHUCuyR3AWIQOGAjP0Ai/nJADtEF54UZbP6fnOPT6yJR
olek4GiaEFV9SiSReIwKeTHiCZvpN1rMDnGLTn9p2bphOBM8mjBhKfE8Wy1LOYr9
5HjJleAgtppgDh0dnKPc6kV0e+yHeQXDp0o1RC2J0awW4Oeqr65dJOoynARQ95n6
UVlahI07BKqWZNRKcmJVvrWaQisDDLfWrvCaGYocTfOBEb9mpJzLZ3NrtE6UBxSj
/caJH0y0dRBaDLJvH44RXK9hXVW0iRp09lpABID9AvUyAFc/G+aKTbxbHkhc6AwB
pITCXPC+EMQ7Z4TcoRykU6+6EMsYNjvZ0l5xpsh5Pe7zsNeBtmBa8z//71ZkjsFi
Ioy3dmD0ruWgkq0dlU2L0BfNr55tsCZUzfd9/u2/hE6Ye4edtsKKQJD6aqoMi8Nk
qDI4t2GS1RHiCZ8hr4Ux5NXvKCFxD5913n5OY70BtMXKg/H/TwoTBqwzSH6fv1JZ
mWUSdtS9hN3fcezkqDwfR8Dzgz6Aq8ewa2HBoqcZ9T551hEGwvyN9QnT0DzkaZNK
VNwvTAHQ5Xs3lbS0X+Giu75nvHJMpKL70Z/aNX5IwobmfAi89jXaUMuGetcVbO98
SL96j5AxFO6K0PczCgE8CHXJY62Sh/eGYF+Uc7DbRZROxgM035MYBQqa5U17W0/G
h2Mf+qvfrH0jsvTwod9BRbYusnxp0E04+1Y7SdcQfbcbpafc2MAjnQGxU51KQiWf
yZ4D6COBoT1j7eGc/fg6uFKClEH34I97vod89CMj0uJblCieYj+5+pz0aGCgL3yP
6WZb5ogZQkq23p7lMZptmjW+OZGNt5bNEqNTAIhRB5jN1PnvJs81vzQo1rNmoJG6
rokC3A6Mqic7MssU0B9nUXUA92LEB/YhimO/sccRshbBD2/TuY+KhQdApbU7NtJ3
giyj/5JEwUmj4ecGXfxhxWYfPrnLG87hO1mogfp1ndCC0efbLR6u8Qb5vlz56luQ
hSvE45gWcVjxo6hJasZHpoqq4aD4CqVLgCi0zSEgXhPS+vgo2CYpW3u5N5Kw3nJG
WmcQOfGUXIoCLsFoiSoLNt5H5uPXi4+rcgi65pio2QwXpYfxlCZpHbEgyvzr+U85
fiBNPwSvYnQx3DYqx/2mkIZPJO1pSGfDKy68OAnvOMUhQ7jASgmMjK0HeRSpT1E7
n3+cUk1zJgDbu68laxj0xzU+iyJZr4hk05mmqVfux60WSv7NqurLgLQ3++CZ5XPu
SSuYY89gBlbbl9GLlF8EcmsbqXfqYa1F+6A2bqFBe96jbVo7WEdNXJDuZZxwU2GU
FgDo9tyLxnkGfv3XfSBmZDydltOQm2sgGIZ0EXczbso0F4BDeamolCgL6jhgVs0B
rhJ2kooSEA8/MJMhzUVgRjqNUV6iCW+iFRtX5nD4rW/vODYpFKs/zlSQo5qq8P3/
eKw7VFlcc/i2V7ZxA48WIvM9HsNsKs3sHCxEHUZHmT/8KTcHuY1LlUA8aE3UMyAB
iqrpMQwn3x6G5UqLa/3IoxGYH9dYvoDjESVKm9CTZjbQdiCpYENsNiZ+TkjBBUwU
m50oRjC8YWhqAHdJxDcbAiiH0zyDYrMgvozLbDpUMjye8wOV94ga4Pb681Qld1vW
rFfytkJPYFCIP0uVrlEuAnfrcvymLAB/tMEbMeiEoFuoRfy2ra7taOeH6tpQcb1N
18QSzGTAcerjkvrpJLxG/aGyzKQDFvnpbObvsH3XJQScTgjhoY3yXPI=
-----END ENCRYPTED PRIVATE KEY-----

Binary file not shown.

After

Width:  |  Height:  |  Size: 10 KiB

52
test_data/stego.pem Normal file
View File

@ -0,0 +1,52 @@
-----BEGIN PRIVATE KEY-----
MIIJQgIBADANBgkqhkiG9w0BAQEFAASCCSwwggkoAgEAAoICAQDWrgNqtHWzP493
1QrWiv1Kpo2o+8QUG8tvUDduEWcLCoFOo3BwzFZdslYflXsDRx4hOyOOFF1dqTGs
tGmG1My38tdgFjGrSYaqOrDt169GTz5M4wXuIpr3LRDIrE0WGT9zpC25xLjbnVqw
H6wGQg/Gzdk2zvTdlrbiQZ+nnO3oYuSFNfxXBQw72ld9Ck4VjrGfVzen0ZfoAqx6
y129xIKXFojUIyLMK1nDScUvzriSqdgEIcTm8UaCS7H3Obs7QFuVNhSn3Pa7GwtQ
gN+Grmfrixfrwjwm4/4CiK4BW3D50xzHGJ8jLqzQJPhBeI6poaS5L5q/Ow0/j52m
aEIJvm9nG0ozdzvvj0jfRpTd1i6svMDGQn15M40ELfIwSxhaxEyybd2Y0D0zaUaO
7QlnZrIevpKAuuSrEDgMrQZH0B8lxkayE+vx+RvMH4OlZl3pgu2cEMnVrwkjshRu
Jpo4MhAJpT9hoVQL2o458fKO3Mq3NgTxX2K9u/WBKV5FgoeEvFvPBGoeX85VE1u7
iQiqEV/4Tj+kbyUCbNKia4YW+sT6enrMzVPs1xkeFYGfY4a/I9gPYSvjYSYRNrh6
fRYupqb7fRvtI0BACXnXER4oQzk4/QJAeNDhkJmrW0lD3QMYNz3iK/E8JSYrsdDf
IE5wH/pLFkx0TpYD/NROSGg+4kSpWwIDAQABAoICAGKNhBgKPDZLqYszjEQtjlYk
28eFvOAWWnABfVDUQF+7NN1q/oVp6QQeiJb7QnugvF45Dft9sdb7g0hrkj5Jlqmj
m+gKiEa//EhNpUeqylYWrt0RI3nM/SMX1yqonsLUWGK4Lh2oB80aEbXk9E3zKdb0
TRFyNMs9EzwPMEH8MuOJ8qqMvAke0hzwAVXv0yTmWvhxS6WI/JmQEKpWjfQ+ME04
FRx9/M2J4S1bwmBOIEGEIxzSe9ghT2MlaM+7+w4aX3DCYP0m3UXMBoewge0MR7zE
H/klNAQOil+Ta35IAjdOCVoFHZlDJiZvsmAr/iI1iK9TAVFkxJI3VgI8voRGnlD5
ZxNUkwhgkqAHS7L+lpRm/NKNNWJ8GhMRCp4vVAvig8RryW6Ho0CroOsw0qBCN6zx
919MviMRxODjfqBqBne83qCS6MjnSF1XJDUCSstUOl0bZ7yM0PLIIEXZYQT4MjXm
Bq+KXqiR7jB0dpsmkiP0mQYUZ+PgktWH/4dDL38O3tD5Oj4BG5mPA8dn1DalXYeF
a2AB4jYC+YAJ0TD72rFrcYw60rBMGRnTJ/Zw+TQXjPYqaDQqdQRIf/xJu/UhUOGM
BOJTTZxVAmLdxEKggFMO2bbhWjhAj/jcTk/7cVarMyB6GSVv/nJUxks3mj1VGi6r
Zy5FTv7g6GV9Xrai57oRAoIBAQD6nUm3ks7kY+oMqt4/OZFqeFkOPJij6qLr6i9b
AOEt/5MK9EPLj3b0E8OtoAdu1l+NKqOxzb0u67CBzIot32Iug7n2OFOxVzoA9oFv
0v1SiT+mXRbvCHXd8SjMjmCBrG2hbbQZk8DRo8PeRwEONKiibvkKVnYDlBSd+HAy
AQZiwOVTiSAmAuotnOxq/E8pY1m+YvfeSZxfbS3xLuRzG2Wl5rc5AInLUiYbqJRI
0W8ZU8GG0N/Yel9ZTdwoGAb0dpa7hv831H3iWUy8EUXHpUBd5hUeawZ8U7z1il+s
EuvQ9SmLB+S97AjVkJ5dBaeNR7myyxiRv67yIYWsB9GPIQuPAoIBAQDbSwl5FZvH
PNVIyj2VWCjNopdbbduqDu1Sl6QjykEmqrrh7yZHL7dT8u+VRLWLQDXlUOmcWZ+3
vbXmpcm+tmXQGs2gVhtLCqDK/1RFx5PbhsbdUoVcaxQHZ1e3jVh8rcFrD8NFKJIY
s7YgaFHPFfEcvdqsxoEidsfSRKv0fHfLhh5K9ygp0XQH44wNoRZcm918a4KScBWz
aO9erRRu9IjH0uZrcOLpY6LMnbjzd9FnlCIWnNnir2pXxdPfZargRftfsPRz8F1m
1gDK2wTaNNP0e3AP726D9amtR2OsmIwjN+StbD82C+D5Rzae/WLllCiuYqB3/Tdq
G1aPGdqVMw91AoIBAEIi5gRS8tRk5awoH3SR7aUqU14lcaa4UfxuFccvST2JV7Wp
VobOunaZDKeyNqPyYM7xNLfEcquCOVdXshN/AvsBej7XCIchXMZKR0hKZt20KJ7v
rBCyq+BBNyIcZK1z4o6aU8/4JQdt70+mb0nfba1IGeK3hHdXScWSfZFEofnHq/1K
j7hYRpJ4+g8VvJJQjKtolL8Nr/2i41/lnxYdYcrRQwMA+Fw2tqXP7Cn3Uxlc+BJq
5tfLG4SGvpW6GYPEE+0zKAEEy5/DLzPmXwLbwkvFX0diuRhwRMqLlMnlSGQhfrAZ
Ci1wr7gkbRZhWqE3D3N2k2pLKpDtdcg7W/sonlMCggEAEXd0lbRm6AOsaVPSe+a9
uUNOEvY5HN57gKybYk5lyhKnN/+4aMsgKwuakaUbYLAImu9K7C0nT5dIucpSu2ID
0gOQhUjWe6+pBwI3UCeJW17s0AwWmzncKorMrc4QUj30vfZPc1675EPMKeClQ31X
6xYkKMbW2kvjtpfJqnuPfyJ7nzT4efw45bf2G8/M3Ct+BL77c3aCA/xDtKLMkdng
WyotODmRmU1aVjnp1Vqhz0I5RNvRbwDR05PqQEz0w1BPqVuK3QE0L1C0V3QCeu7i
QwLh9iL4BoK2tRqOkdzErLly+W05y7A+hxxo+VjtW6PlaZ5pcaU1mLn8twE0K9YZ
UQKCAQEAvuV8orKNfC7QsQoy6gaYbESqEwIR7X3BmCpOZIW2lWtKFNEna6BOpu+U
8UpWvabZ/g4APO3NXMFiF9GE0iwfnVhWtejMp9cBZb/s1KzOuG7RXjNDsGQIlFsP
I0Ea4JmThKnVy3OsGnAIfb+wkzfyzyoUsa3/i50Ln4TxOIqvnV7eBFITi+IsD7v8
74IUqeN+OwWELAEbwCQQgEQk/50LopS3bdd0PW8K8z1lB05Xwej8wDVcOttxWWIu
/71pLOyDXpGTKL2P55XT9+XaNG1oHkS29wlxZ864fYFodB2+B6Ka7Gvht0sykZob
qLuMBXlezXwx2ayC9XWuX5Ld7uOSGA==
-----END PRIVATE KEY-----

View File

@ -182,7 +182,7 @@ class TestRotateChannelKey:
assert oct(key_file.stat().st_mode & 0o777) == oct(0o600)
def test_archived_key_matches_old_fingerprint(self, tmp_path: Path):
from stegasoo.crypto import get_channel_fingerprint
from soosef.stegasoo.crypto import get_channel_fingerprint
ks = _make_manager(tmp_path)
ks.generate_channel_key()
@ -197,7 +197,7 @@ class TestRotateChannelKey:
assert old_fp == result.old_fingerprint
def test_new_channel_key_active_after_rotation(self, tmp_path: Path):
from stegasoo.crypto import get_channel_fingerprint
from soosef.stegasoo.crypto import get_channel_fingerprint
ks = _make_manager(tmp_path)
ks.generate_channel_key()

681
tests/test_stegasoo.py Normal file
View File

@ -0,0 +1,681 @@
"""
Stegasoo Library Unit Tests
Tests core functionality: encode/decode, LSB/DCT modes, channel keys, validation.
"""
import io
from pathlib import Path
import pytest
from PIL import Image
import soosef.stegasoo as stegasoo
from soosef.stegasoo import (
decode,
decode_text,
encode,
generate_channel_key,
generate_passphrase,
generate_pin,
has_dct_support,
validate_image,
validate_message,
validate_passphrase,
validate_pin,
)
# Test data paths
TEST_DATA = Path(__file__).parent.parent / "test_data"
CARRIER_PATH = TEST_DATA / "carrier.jpg"
REF_PATH = TEST_DATA / "ref.jpg"
# Test credentials
TEST_PASSPHRASE = "tower booty sunny windy toasty spicy"
TEST_PIN = "727643678"
TEST_MESSAGE = "Hello, Stegasoo!"
@pytest.fixture
def carrier_bytes():
"""Load carrier image as bytes."""
return CARRIER_PATH.read_bytes()
@pytest.fixture
def ref_bytes():
"""Load reference image as bytes."""
return REF_PATH.read_bytes()
@pytest.fixture
def small_image():
"""Create a small test image in memory."""
img = Image.new("RGB", (200, 200), color="blue")
buf = io.BytesIO()
img.save(buf, format="PNG")
return buf.getvalue()
class TestVersion:
"""Test version info."""
def test_version_exists(self):
assert hasattr(stegasoo, "__version__")
assert stegasoo.__version__
def test_version_format(self):
parts = stegasoo.__version__.split(".")
assert len(parts) >= 2
assert all(p.isdigit() for p in parts[:2])
class TestGeneration:
"""Test credential generation."""
def test_generate_passphrase_default(self):
passphrase = generate_passphrase()
words = passphrase.split()
assert len(words) == stegasoo.DEFAULT_PASSPHRASE_WORDS
def test_generate_passphrase_custom_length(self):
passphrase = generate_passphrase(words=8)
words = passphrase.split()
assert len(words) == 8
def test_generate_pin_default(self):
pin = generate_pin()
assert pin.isdigit()
assert len(pin) == 6 # Default is 6 digits
def test_generate_pin_custom_length(self):
pin = generate_pin(length=9)
assert pin.isdigit()
assert len(pin) == 9
def test_generate_channel_key(self):
key = generate_channel_key()
# Format: XXXX-XXXX-XXXX-XXXX-XXXX-XXXX-XXXX-XXXX (39 chars)
assert len(key) == 39
assert key.count("-") == 7
class TestValidation:
"""Test validation functions."""
def test_validate_passphrase_valid(self):
result = validate_passphrase(TEST_PASSPHRASE)
assert result.is_valid
def test_validate_passphrase_too_short(self):
result = validate_passphrase("one two")
assert not result.is_valid
def test_validate_pin_valid(self):
result = validate_pin(TEST_PIN)
assert result.is_valid
def test_validate_pin_too_short(self):
result = validate_pin("123")
assert not result.is_valid
def test_validate_pin_non_numeric(self):
result = validate_pin("abc123")
assert not result.is_valid
def test_validate_message_valid(self):
result = validate_message("Hello world")
assert result.is_valid
def test_validate_message_empty(self):
result = validate_message("")
assert not result.is_valid
def test_validate_image_valid(self, carrier_bytes):
result = validate_image(carrier_bytes)
assert result.is_valid
def test_validate_image_invalid(self):
result = validate_image(b"not an image")
assert not result.is_valid
class TestLSBMode:
"""Test LSB (Least Significant Bit) encoding/decoding."""
def test_encode_decode_roundtrip(self, carrier_bytes, ref_bytes):
"""Basic encode/decode roundtrip."""
result = encode(
message=TEST_MESSAGE,
reference_photo=ref_bytes,
carrier_image=carrier_bytes,
passphrase=TEST_PASSPHRASE,
pin=TEST_PIN,
embed_mode="lsb",
)
assert result.stego_image
assert len(result.stego_image) > 0
decoded = decode(
stego_image=result.stego_image,
reference_photo=ref_bytes,
passphrase=TEST_PASSPHRASE,
pin=TEST_PIN,
embed_mode="lsb",
)
assert decoded.message == TEST_MESSAGE
def test_decode_text_helper(self, carrier_bytes, ref_bytes):
"""Test decode_text convenience function."""
result = encode(
message=TEST_MESSAGE,
reference_photo=ref_bytes,
carrier_image=carrier_bytes,
passphrase=TEST_PASSPHRASE,
pin=TEST_PIN,
embed_mode="lsb",
)
text = decode_text(
stego_image=result.stego_image,
reference_photo=ref_bytes,
passphrase=TEST_PASSPHRASE,
pin=TEST_PIN,
embed_mode="lsb",
)
assert text == TEST_MESSAGE
def test_wrong_passphrase_fails(self, carrier_bytes, ref_bytes):
"""Decoding with wrong passphrase should fail."""
result = encode(
message=TEST_MESSAGE,
reference_photo=ref_bytes,
carrier_image=carrier_bytes,
passphrase=TEST_PASSPHRASE,
pin=TEST_PIN,
embed_mode="lsb",
)
with pytest.raises(Exception):
decode(
stego_image=result.stego_image,
reference_photo=ref_bytes,
passphrase="wrong passphrase words here now",
pin=TEST_PIN,
embed_mode="lsb",
)
def test_wrong_pin_fails(self, carrier_bytes, ref_bytes):
"""Decoding with wrong PIN should fail."""
result = encode(
message=TEST_MESSAGE,
reference_photo=ref_bytes,
carrier_image=carrier_bytes,
passphrase=TEST_PASSPHRASE,
pin=TEST_PIN,
embed_mode="lsb",
)
with pytest.raises(Exception):
decode(
stego_image=result.stego_image,
reference_photo=ref_bytes,
passphrase=TEST_PASSPHRASE,
pin="999999999",
embed_mode="lsb",
)
def test_wrong_reference_fails(self, carrier_bytes, ref_bytes, small_image):
"""Decoding with wrong reference should fail."""
result = encode(
message=TEST_MESSAGE,
reference_photo=ref_bytes,
carrier_image=carrier_bytes,
passphrase=TEST_PASSPHRASE,
pin=TEST_PIN,
embed_mode="lsb",
)
with pytest.raises(Exception):
decode(
stego_image=result.stego_image,
reference_photo=small_image, # Wrong reference
passphrase=TEST_PASSPHRASE,
pin=TEST_PIN,
embed_mode="lsb",
)
class TestDCTMode:
"""Test DCT (Discrete Cosine Transform) encoding/decoding."""
@pytest.fixture(autouse=True)
def check_dct_support(self):
"""Skip DCT tests if not supported."""
if not has_dct_support():
pytest.skip("DCT support not available")
def test_encode_decode_roundtrip(self, carrier_bytes, ref_bytes):
"""Basic DCT encode/decode roundtrip."""
result = encode(
message=TEST_MESSAGE,
reference_photo=ref_bytes,
carrier_image=carrier_bytes,
passphrase=TEST_PASSPHRASE,
pin=TEST_PIN,
embed_mode="dct",
)
assert result.stego_image
decoded = decode(
stego_image=result.stego_image,
reference_photo=ref_bytes,
passphrase=TEST_PASSPHRASE,
pin=TEST_PIN,
embed_mode="dct",
)
assert decoded.message == TEST_MESSAGE
def test_dct_jpeg_output(self, carrier_bytes, ref_bytes):
"""Test DCT mode with JPEG output."""
result = encode(
message=TEST_MESSAGE,
reference_photo=ref_bytes,
carrier_image=carrier_bytes,
passphrase=TEST_PASSPHRASE,
pin=TEST_PIN,
embed_mode="dct",
dct_output_format="jpeg",
)
assert result.stego_image
# Verify it's JPEG by checking magic bytes
assert result.stego_image[:2] == b"\xff\xd8"
class TestChannelKey:
"""Test channel key functionality."""
def test_encode_with_channel_key(self, carrier_bytes, ref_bytes):
"""Encode with channel key."""
channel_key = generate_channel_key()
result = encode(
message=TEST_MESSAGE,
reference_photo=ref_bytes,
carrier_image=carrier_bytes,
passphrase=TEST_PASSPHRASE,
pin=TEST_PIN,
channel_key=channel_key,
embed_mode="lsb",
)
assert result.stego_image
# Decode with same channel key
decoded = decode(
stego_image=result.stego_image,
reference_photo=ref_bytes,
passphrase=TEST_PASSPHRASE,
pin=TEST_PIN,
channel_key=channel_key,
embed_mode="lsb",
)
assert decoded.message == TEST_MESSAGE
def test_wrong_channel_key_fails(self, carrier_bytes, ref_bytes):
"""Decoding with wrong channel key should fail."""
channel_key = generate_channel_key()
wrong_key = generate_channel_key()
result = encode(
message=TEST_MESSAGE,
reference_photo=ref_bytes,
carrier_image=carrier_bytes,
passphrase=TEST_PASSPHRASE,
pin=TEST_PIN,
channel_key=channel_key,
embed_mode="lsb",
)
with pytest.raises(Exception):
decode(
stego_image=result.stego_image,
reference_photo=ref_bytes,
passphrase=TEST_PASSPHRASE,
pin=TEST_PIN,
channel_key=wrong_key,
embed_mode="lsb",
)
class TestCompression:
"""Test message compression."""
def test_long_message_compresses(self, carrier_bytes, ref_bytes):
"""Long messages should be compressed."""
long_message = "A" * 1000
result = encode(
message=long_message,
reference_photo=ref_bytes,
carrier_image=carrier_bytes,
passphrase=TEST_PASSPHRASE,
pin=TEST_PIN,
embed_mode="lsb",
)
assert result.stego_image
decoded = decode(
stego_image=result.stego_image,
reference_photo=ref_bytes,
passphrase=TEST_PASSPHRASE,
pin=TEST_PIN,
embed_mode="lsb",
)
assert decoded.message == long_message
class TestEdgeCases:
"""Test edge cases and error handling."""
def test_unicode_message(self, carrier_bytes, ref_bytes):
"""Test encoding Unicode messages."""
unicode_msg = "Hello 🦖 Stegasoo! 日本語 émojis"
result = encode(
message=unicode_msg,
reference_photo=ref_bytes,
carrier_image=carrier_bytes,
passphrase=TEST_PASSPHRASE,
pin=TEST_PIN,
embed_mode="lsb",
)
assert result.stego_image
decoded = decode(
stego_image=result.stego_image,
reference_photo=ref_bytes,
passphrase=TEST_PASSPHRASE,
pin=TEST_PIN,
embed_mode="lsb",
)
assert decoded.message == unicode_msg
def test_minimum_passphrase(self, carrier_bytes, ref_bytes):
"""Test with minimum valid passphrase."""
min_passphrase = "one two three four" # 4 words minimum
result = encode(
message=TEST_MESSAGE,
reference_photo=ref_bytes,
carrier_image=carrier_bytes,
passphrase=min_passphrase,
pin=TEST_PIN,
embed_mode="lsb",
)
assert result.stego_image
def test_special_characters_in_message(self, carrier_bytes, ref_bytes):
"""Test special characters in message."""
special_msg = "Line1\nLine2\tTab\r\nCRLF"
result = encode(
message=special_msg,
reference_photo=ref_bytes,
carrier_image=carrier_bytes,
passphrase=TEST_PASSPHRASE,
pin=TEST_PIN,
embed_mode="lsb",
)
assert result.stego_image
decoded = decode(
stego_image=result.stego_image,
reference_photo=ref_bytes,
passphrase=TEST_PASSPHRASE,
pin=TEST_PIN,
embed_mode="lsb",
)
assert decoded.message == special_msg
# =============================================================================
# VIDEO STEGANOGRAPHY TESTS (v4.4.0)
# =============================================================================
@pytest.fixture
def test_video_bytes():
"""Create a minimal test video using ffmpeg.
Creates a 2-second test video with solid color frames.
Returns None if ffmpeg is not available.
"""
import shutil
import subprocess
import tempfile
if not shutil.which("ffmpeg"):
return None
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as f:
output_path = f.name
try:
# Create a simple 2-second video with colored frames
# Using lavfi (libavfilter) to generate test pattern
result = subprocess.run(
[
"ffmpeg",
"-y",
"-f",
"lavfi",
"-i",
"color=c=blue:s=320x240:d=2:r=10",
"-c:v",
"libx264",
"-pix_fmt",
"yuv420p",
"-g",
"5", # GOP size - creates I-frames every 5 frames
output_path,
],
capture_output=True,
timeout=30,
)
if result.returncode != 0:
return None
with open(output_path, "rb") as f:
video_data = f.read()
return video_data
except Exception:
return None
finally:
import os
try:
os.unlink(output_path)
except OSError:
pass
class TestVideoSupport:
"""Test video steganography support detection."""
def test_video_support_flag_exists(self):
"""HAS_VIDEO_SUPPORT flag should exist."""
assert hasattr(stegasoo, "HAS_VIDEO_SUPPORT")
assert isinstance(stegasoo.HAS_VIDEO_SUPPORT, bool)
def test_video_constants_exist(self):
"""Video-related constants should exist."""
assert hasattr(stegasoo, "EMBED_MODE_VIDEO_LSB")
assert hasattr(stegasoo, "EMBED_MODE_VIDEO_AUTO")
@pytest.mark.skipif(
not stegasoo.HAS_VIDEO_SUPPORT,
reason="Video support not available (ffmpeg or dependencies missing)",
)
class TestVideoFormatDetection:
"""Test video format detection."""
def test_detect_video_format_mp4(self, test_video_bytes):
"""Should detect MP4 format from magic bytes."""
if test_video_bytes is None:
pytest.skip("Could not create test video")
from soosef.stegasoo import detect_video_format
fmt = detect_video_format(test_video_bytes)
assert fmt in ("mp4", "mov")
def test_detect_video_format_unknown(self):
"""Should return 'unknown' for non-video data."""
from soosef.stegasoo import detect_video_format
fmt = detect_video_format(b"not a video")
assert fmt == "unknown"
@pytest.mark.skipif(
not stegasoo.HAS_VIDEO_SUPPORT,
reason="Video support not available (ffmpeg or dependencies missing)",
)
class TestVideoInfo:
"""Test video metadata extraction."""
def test_get_video_info(self, test_video_bytes):
"""Should extract video metadata."""
if test_video_bytes is None:
pytest.skip("Could not create test video")
from soosef.stegasoo import get_video_info
info = get_video_info(test_video_bytes)
assert info.width == 320
assert info.height == 240
assert info.fps > 0
assert info.duration_seconds > 0
assert info.total_frames > 0
assert info.format in ("mp4", "mov")
def test_validate_video(self, test_video_bytes):
"""Should validate video data."""
if test_video_bytes is None:
pytest.skip("Could not create test video")
from soosef.stegasoo import validate_video
result = validate_video(test_video_bytes, check_duration=False)
assert result.is_valid
assert result.details.get("format") in ("mp4", "mov")
@pytest.mark.skipif(
not stegasoo.HAS_VIDEO_SUPPORT,
reason="Video support not available (ffmpeg or dependencies missing)",
)
class TestVideoCapacity:
"""Test video capacity calculation."""
def test_calculate_video_capacity(self, test_video_bytes):
"""Should calculate steganographic capacity."""
if test_video_bytes is None:
pytest.skip("Could not create test video")
from soosef.stegasoo import calculate_video_capacity
capacity_info = calculate_video_capacity(test_video_bytes)
assert capacity_info.total_frames > 0
assert capacity_info.i_frames > 0
assert capacity_info.usable_capacity_bytes > 0
assert capacity_info.embed_mode == "video_lsb"
assert capacity_info.resolution == (320, 240)
@pytest.mark.skipif(
not stegasoo.HAS_VIDEO_SUPPORT,
reason="Video support not available (ffmpeg or dependencies missing)",
)
class TestVideoEncodeDecode:
"""Test video steganography round-trip."""
def test_video_roundtrip(self, test_video_bytes, ref_bytes):
"""Test encoding and decoding a message in video."""
if test_video_bytes is None:
pytest.skip("Could not create test video")
from soosef.stegasoo import decode_video, encode_video
message = "Secret video message!"
# Encode
stego_video, stats = encode_video(
message=message,
reference_photo=ref_bytes,
carrier_video=test_video_bytes,
passphrase=TEST_PASSPHRASE,
pin=TEST_PIN,
)
assert stego_video
assert len(stego_video) > 0
assert stats.frames_modified > 0
assert stats.codec == "ffv1" # Should use lossless codec
# Decode
result = decode_video(
stego_video=stego_video,
reference_photo=ref_bytes,
passphrase=TEST_PASSPHRASE,
pin=TEST_PIN,
)
assert result.is_text
assert result.message == message
def test_video_wrong_passphrase_fails(self, test_video_bytes, ref_bytes):
"""Decoding with wrong passphrase should fail."""
if test_video_bytes is None:
pytest.skip("Could not create test video")
from soosef.stegasoo import decode_video, encode_video
message = "Secret video message!"
stego_video, _ = encode_video(
message=message,
reference_photo=ref_bytes,
carrier_video=test_video_bytes,
passphrase=TEST_PASSPHRASE,
pin=TEST_PIN,
)
with pytest.raises(Exception):
decode_video(
stego_video=stego_video,
reference_photo=ref_bytes,
passphrase="wrong passphrase words here",
pin=TEST_PIN,
)

View File

@ -0,0 +1,862 @@
"""
Tests for Stegasoo audio steganography.
Tests cover:
- Audio LSB roundtrip (encode + decode)
- Audio spread spectrum roundtrip (v0 legacy + v2 per-channel)
- Wrong credentials fail to decode
- Capacity calculations (per-tier)
- Format detection
- Audio validation
- Per-channel stereo/multichannel embedding (v4.4.0)
- Chip tier roundtrips (v4.4.0)
- LFE channel skipping (v4.4.0)
- Backward compat: v0 decode from v2 code
- Header v2 build/parse roundtrip
- Round-robin bit distribution
"""
import io
from pathlib import Path
import numpy as np
import pytest
import soundfile as sf
from soosef.stegasoo.constants import AUDIO_ENABLED, EMBED_MODE_AUDIO_LSB, EMBED_MODE_AUDIO_SPREAD
from soosef.stegasoo.models import AudioCapacityInfo, AudioEmbedStats, AudioInfo
pytestmark = pytest.mark.skipif(not AUDIO_ENABLED, reason="Audio support disabled (STEGASOO_AUDIO)")
# Path to real test data files
_TEST_DATA = Path(__file__).parent.parent / "test_data"
_REFERENCE_PNG = _TEST_DATA / "reference.png"
_SPEECH_WAV = _TEST_DATA / "stupid_elitist_speech.wav"
# =============================================================================
# FIXTURES
# =============================================================================
@pytest.fixture
def carrier_wav() -> bytes:
"""Generate a small test WAV file (1 second, 44100 Hz, mono, 16-bit)."""
sample_rate = 44100
duration = 1.0
num_samples = int(sample_rate * duration)
t = np.linspace(0, duration, num_samples, endpoint=False)
samples = (np.sin(2 * np.pi * 440 * t) * 16000).astype(np.int16)
buf = io.BytesIO()
sf.write(buf, samples, sample_rate, format="WAV", subtype="PCM_16")
buf.seek(0)
return buf.read()
@pytest.fixture
def carrier_wav_stereo() -> bytes:
"""Generate a stereo test WAV file (5 seconds for spread spectrum capacity)."""
sample_rate = 44100
duration = 5.0
num_samples = int(sample_rate * duration)
t = np.linspace(0, duration, num_samples, endpoint=False)
left = (np.sin(2 * np.pi * 440 * t) * 16000).astype(np.int16)
right = (np.sin(2 * np.pi * 880 * t) * 16000).astype(np.int16)
samples = np.column_stack([left, right])
buf = io.BytesIO()
sf.write(buf, samples, sample_rate, format="WAV", subtype="PCM_16")
buf.seek(0)
return buf.read()
@pytest.fixture
def carrier_wav_long() -> bytes:
"""Generate a longer WAV (15 seconds) for spread spectrum tests."""
sample_rate = 44100
duration = 15.0
num_samples = int(sample_rate * duration)
t = np.linspace(0, duration, num_samples, endpoint=False)
samples = (
(np.sin(2 * np.pi * 440 * t) + np.sin(2 * np.pi * 880 * t) + np.sin(2 * np.pi * 1320 * t))
* 5000
).astype(np.int16)
buf = io.BytesIO()
sf.write(buf, samples, sample_rate, format="WAV", subtype="PCM_16")
buf.seek(0)
return buf.read()
@pytest.fixture
def carrier_wav_stereo_long() -> bytes:
"""Generate a stereo WAV (15 seconds) for per-channel spread tests."""
sample_rate = 48000
duration = 15.0
num_samples = int(sample_rate * duration)
t = np.linspace(0, duration, num_samples, endpoint=False)
left = (np.sin(2 * np.pi * 440 * t) * 10000).astype(np.float64) / 32768.0
right = (np.sin(2 * np.pi * 660 * t) * 10000).astype(np.float64) / 32768.0
samples = np.column_stack([left, right])
buf = io.BytesIO()
sf.write(buf, samples, sample_rate, format="WAV", subtype="PCM_16")
buf.seek(0)
return buf.read()
@pytest.fixture
def carrier_wav_5_1() -> bytes:
"""Generate a 6-channel (5.1) WAV for LFE skip tests."""
sample_rate = 48000
duration = 15.0
num_samples = int(sample_rate * duration)
t = np.linspace(0, duration, num_samples, endpoint=False)
# 6 channels with different frequencies
freqs = [440, 554, 660, 80, 880, 1100] # ch3 = LFE (low freq)
channels = []
for freq in freqs:
ch = (np.sin(2 * np.pi * freq * t) * 8000).astype(np.float64) / 32768.0
channels.append(ch)
samples = np.column_stack(channels)
buf = io.BytesIO()
sf.write(buf, samples, sample_rate, format="WAV", subtype="PCM_16")
buf.seek(0)
return buf.read()
@pytest.fixture
def carrier_wav_spread_integration() -> bytes:
"""Generate a very long WAV (150 seconds) for spread spectrum integration tests."""
sample_rate = 44100
duration = 150.0
num_samples = int(sample_rate * duration)
t = np.linspace(0, duration, num_samples, endpoint=False)
samples = (
(np.sin(2 * np.pi * 440 * t) + np.sin(2 * np.pi * 880 * t) + np.sin(2 * np.pi * 1320 * t))
* 5000
).astype(np.int16)
buf = io.BytesIO()
sf.write(buf, samples, sample_rate, format="WAV", subtype="PCM_16")
buf.seek(0)
return buf.read()
@pytest.fixture
def reference_photo() -> bytes:
"""Load real reference photo from test_data, or generate a small one."""
if _REFERENCE_PNG.exists():
return _REFERENCE_PNG.read_bytes()
from PIL import Image
img = Image.new("RGB", (100, 100), color=(128, 64, 32))
buf = io.BytesIO()
img.save(buf, "PNG")
buf.seek(0)
return buf.read()
@pytest.fixture
def speech_wav() -> bytes:
"""Load real speech WAV from test_data (48kHz mono, ~68s)."""
if not _SPEECH_WAV.exists():
pytest.skip("test_data/stupid_elitist_speech.wav not found")
return _SPEECH_WAV.read_bytes()
# =============================================================================
# AUDIO LSB TESTS
# =============================================================================
class TestAudioLSB:
"""Tests for audio LSB steganography."""
def test_calculate_capacity(self, carrier_wav):
from soosef.stegasoo.audio_steganography import calculate_audio_lsb_capacity
capacity = calculate_audio_lsb_capacity(carrier_wav)
assert capacity > 0
# 1 second at 44100 Hz mono should give ~5KB capacity at 1 bit/sample
assert capacity > 4000
def test_embed_extract_roundtrip(self, carrier_wav):
"""Test basic LSB embed/extract roundtrip."""
from soosef.stegasoo.audio_steganography import embed_in_audio_lsb, extract_from_audio_lsb
payload = b"Hello, audio steganography!"
key = b"\x42" * 32
stego_audio, stats = embed_in_audio_lsb(payload, carrier_wav, key)
assert isinstance(stats, AudioEmbedStats)
assert stats.embed_mode == EMBED_MODE_AUDIO_LSB
assert stats.bytes_embedded > 0
assert stats.samples_modified > 0
assert 0 < stats.capacity_used <= 1.0
extracted = extract_from_audio_lsb(stego_audio, key)
assert extracted is not None
assert extracted == payload
def test_embed_extract_stereo(self, carrier_wav_stereo):
"""Test LSB roundtrip with stereo audio."""
from soosef.stegasoo.audio_steganography import embed_in_audio_lsb, extract_from_audio_lsb
payload = b"Stereo test message"
key = b"\xAB" * 32
stego_audio, stats = embed_in_audio_lsb(payload, carrier_wav_stereo, key)
assert stats.channels == 2
extracted = extract_from_audio_lsb(stego_audio, key)
assert extracted == payload
def test_wrong_key_fails(self, carrier_wav):
"""Test that wrong key produces no valid extraction."""
from soosef.stegasoo.audio_steganography import embed_in_audio_lsb, extract_from_audio_lsb
payload = b"Secret message"
correct_key = b"\x42" * 32
wrong_key = b"\xFF" * 32
stego_audio, _ = embed_in_audio_lsb(payload, carrier_wav, correct_key)
extracted = extract_from_audio_lsb(stego_audio, wrong_key)
assert extracted is None or extracted != payload
def test_two_bits_per_sample(self, carrier_wav):
"""Test embedding with 2 bits per sample."""
from soosef.stegasoo.audio_steganography import embed_in_audio_lsb, extract_from_audio_lsb
payload = b"Two bits per sample test"
key = b"\x55" * 32
stego_audio, stats = embed_in_audio_lsb(payload, carrier_wav, key, bits_per_sample=2)
extracted = extract_from_audio_lsb(stego_audio, key, bits_per_sample=2)
assert extracted == payload
def test_generate_sample_indices(self):
"""Test deterministic sample index generation."""
from soosef.stegasoo.audio_steganography import generate_sample_indices
key = b"\x42" * 32
indices1 = generate_sample_indices(key, 10000, 100)
indices2 = generate_sample_indices(key, 10000, 100)
assert indices1 == indices2
assert all(0 <= i < 10000 for i in indices1)
assert len(set(indices1)) == len(indices1)
# =============================================================================
# AUDIO SPREAD SPECTRUM TESTS (v2 per-channel)
# =============================================================================
class TestAudioSpread:
"""Tests for audio spread spectrum steganography (v2 per-channel)."""
def test_calculate_capacity_default_tier(self, carrier_wav_long):
from soosef.stegasoo.spread_steganography import calculate_audio_spread_capacity
capacity = calculate_audio_spread_capacity(carrier_wav_long)
assert isinstance(capacity, AudioCapacityInfo)
assert capacity.usable_capacity_bytes > 0
assert capacity.embed_mode == EMBED_MODE_AUDIO_SPREAD
assert capacity.chip_tier == 2 # default
assert capacity.chip_length == 1024
def test_calculate_capacity_per_tier(self, carrier_wav_long):
"""Capacity should increase as chip length decreases."""
from soosef.stegasoo.spread_steganography import calculate_audio_spread_capacity
cap_lossless = calculate_audio_spread_capacity(carrier_wav_long, chip_tier=0)
cap_high = calculate_audio_spread_capacity(carrier_wav_long, chip_tier=1)
cap_low = calculate_audio_spread_capacity(carrier_wav_long, chip_tier=2)
assert cap_lossless.chip_length == 256
assert cap_high.chip_length == 512
assert cap_low.chip_length == 1024
# Smaller chip = more capacity
assert cap_lossless.usable_capacity_bytes > cap_high.usable_capacity_bytes
assert cap_high.usable_capacity_bytes > cap_low.usable_capacity_bytes
def test_spread_roundtrip_default_tier(self, carrier_wav_long):
"""Test spread spectrum embed/extract roundtrip (default tier 2)."""
from soosef.stegasoo.spread_steganography import (
embed_in_audio_spread,
extract_from_audio_spread,
)
payload = b"Spread test v2"
seed = b"\x42" * 32
stego_audio, stats = embed_in_audio_spread(payload, carrier_wav_long, seed)
assert isinstance(stats, AudioEmbedStats)
assert stats.embed_mode == EMBED_MODE_AUDIO_SPREAD
assert stats.chip_tier == 2
assert stats.chip_length == 1024
extracted = extract_from_audio_spread(stego_audio, seed)
assert extracted is not None
assert extracted == payload
def test_spread_roundtrip_tier_0(self, carrier_wav_long):
"""Test spread spectrum at tier 0 (chip=256, lossless)."""
from soosef.stegasoo.spread_steganography import (
embed_in_audio_spread,
extract_from_audio_spread,
)
payload = b"Lossless tier test with more data to embed for coverage"
seed = b"\x42" * 32
stego_audio, stats = embed_in_audio_spread(payload, carrier_wav_long, seed, chip_tier=0)
assert stats.chip_tier == 0
assert stats.chip_length == 256
extracted = extract_from_audio_spread(stego_audio, seed)
assert extracted is not None
assert extracted == payload
def test_spread_roundtrip_tier_1(self, carrier_wav_long):
"""Test spread spectrum at tier 1 (chip=512, high lossy)."""
from soosef.stegasoo.spread_steganography import (
embed_in_audio_spread,
extract_from_audio_spread,
)
payload = b"High lossy tier test"
seed = b"\x42" * 32
stego_audio, stats = embed_in_audio_spread(payload, carrier_wav_long, seed, chip_tier=1)
assert stats.chip_tier == 1
assert stats.chip_length == 512
extracted = extract_from_audio_spread(stego_audio, seed)
assert extracted is not None
assert extracted == payload
def test_wrong_seed_fails(self, carrier_wav_long):
"""Test that wrong seed produces no valid extraction."""
from soosef.stegasoo.spread_steganography import (
embed_in_audio_spread,
extract_from_audio_spread,
)
payload = b"Secret spread"
correct_seed = b"\x42" * 32
wrong_seed = b"\xFF" * 32
stego_audio, _ = embed_in_audio_spread(payload, carrier_wav_long, correct_seed)
extracted = extract_from_audio_spread(stego_audio, wrong_seed)
assert extracted is None or extracted != payload
def test_per_channel_stereo_roundtrip(self, carrier_wav_stereo_long):
"""Test that stereo per-channel embedding/extraction works."""
from soosef.stegasoo.spread_steganography import (
embed_in_audio_spread,
extract_from_audio_spread,
)
payload = b"Stereo per-channel test"
seed = b"\xAB" * 32
stego_audio, stats = embed_in_audio_spread(
payload, carrier_wav_stereo_long, seed, chip_tier=0
)
assert stats.channels == 2
assert stats.embeddable_channels == 2
extracted = extract_from_audio_spread(stego_audio, seed)
assert extracted is not None
assert extracted == payload
def test_per_channel_preserves_spatial_mix(self, carrier_wav_stereo_long):
"""Verify that per-channel embedding doesn't destroy the spatial mix.
The difference between left and right channels should be preserved
(not zeroed out as the old mono-broadcast approach would do).
"""
from soosef.stegasoo.spread_steganography import embed_in_audio_spread
payload = b"Spatial preservation test"
seed = b"\xCD" * 32
# Read original
orig_samples, _ = sf.read(io.BytesIO(carrier_wav_stereo_long), dtype="float64", always_2d=True)
orig_diff = orig_samples[:, 0] - orig_samples[:, 1]
# Embed
stego_bytes, _ = embed_in_audio_spread(
payload, carrier_wav_stereo_long, seed, chip_tier=0
)
# Read stego
stego_samples, _ = sf.read(io.BytesIO(stego_bytes), dtype="float64", always_2d=True)
stego_diff = stego_samples[:, 0] - stego_samples[:, 1]
# The channel difference should not be identical (embedding adds different
# noise per channel), but should be very close (embedding is subtle)
# With the old mono-broadcast approach, stego_diff would equal orig_diff
# exactly in unmodified regions but differ where data was embedded.
# With per-channel, both channels get independent modifications.
correlation = np.corrcoef(orig_diff, stego_diff)[0, 1]
assert correlation > 0.95, f"Spatial mix correlation too low: {correlation}"
def test_capacity_scales_with_channels(self, carrier_wav_long, carrier_wav_stereo_long):
"""Stereo should have roughly double the capacity of mono."""
from soosef.stegasoo.spread_steganography import calculate_audio_spread_capacity
mono_cap = calculate_audio_spread_capacity(carrier_wav_long, chip_tier=0)
stereo_cap = calculate_audio_spread_capacity(carrier_wav_stereo_long, chip_tier=0)
# Stereo should be ~1.5-2.2x mono (not exact because header is ch0 only
# and the files have slightly different durations/sample rates)
ratio = stereo_cap.usable_capacity_bytes / mono_cap.usable_capacity_bytes
assert ratio > 1.3, f"Stereo/mono capacity ratio too low: {ratio}"
def test_lfe_skip_5_1(self, carrier_wav_5_1):
"""LFE channel (index 3) should be unmodified in 6-channel audio."""
from soosef.stegasoo.spread_steganography import embed_in_audio_spread
payload = b"LFE skip test"
seed = b"\xEE" * 32
# Read original LFE channel
orig_samples, _ = sf.read(io.BytesIO(carrier_wav_5_1), dtype="float64", always_2d=True)
orig_lfe = orig_samples[:, 3].copy()
stego_bytes, stats = embed_in_audio_spread(
payload, carrier_wav_5_1, seed, chip_tier=0
)
assert stats.embeddable_channels == 5 # 6 channels - 1 LFE = 5
stego_samples, _ = sf.read(io.BytesIO(stego_bytes), dtype="float64", always_2d=True)
stego_lfe = stego_samples[:, 3]
# LFE channel should be completely unmodified
np.testing.assert_array_equal(orig_lfe, stego_lfe)
def test_lfe_skip_roundtrip(self, carrier_wav_5_1):
"""5.1 audio embed/extract roundtrip with LFE skipping."""
from soosef.stegasoo.spread_steganography import (
embed_in_audio_spread,
extract_from_audio_spread,
)
payload = b"5.1 surround test"
seed = b"\xEE" * 32
stego_bytes, stats = embed_in_audio_spread(
payload, carrier_wav_5_1, seed, chip_tier=0
)
assert stats.channels == 6
assert stats.embeddable_channels == 5
extracted = extract_from_audio_spread(stego_bytes, seed)
assert extracted is not None
assert extracted == payload
# =============================================================================
# HEADER V2 TESTS
# =============================================================================
class TestHeaderV2:
"""Tests for v2 header construction and parsing."""
def test_header_v2_build_parse_roundtrip(self):
from soosef.stegasoo.spread_steganography import _build_header_v2, _parse_header
data_length = 12345
chip_tier = 1
num_ch = 2
lfe_skipped = False
header = _build_header_v2(data_length, chip_tier, num_ch, lfe_skipped)
assert len(header) == 20
magic_valid, version, length, tier, nch, lfe = _parse_header(header)
assert magic_valid
assert version == 2
assert length == data_length
assert tier == chip_tier
assert nch == num_ch
assert lfe is False
def test_header_v2_with_lfe_flag(self):
from soosef.stegasoo.spread_steganography import _build_header_v2, _parse_header
header = _build_header_v2(999, 0, 5, lfe_skipped=True)
magic_valid, version, length, tier, nch, lfe = _parse_header(header)
assert magic_valid
assert version == 2
assert length == 999
assert tier == 0
assert nch == 5
assert lfe is True
def test_header_v0_build_parse(self):
from soosef.stegasoo.spread_steganography import _build_header_v0, _parse_header
header = _build_header_v0(4567)
assert len(header) == 16
magic_valid, version, length, tier, nch, lfe = _parse_header(header)
assert magic_valid
assert version == 0
assert length == 4567
assert tier is None
assert nch is None
def test_header_bad_magic(self):
from soosef.stegasoo.spread_steganography import _parse_header
bad_header = b"XXXX" + b"\x00" * 16
magic_valid, version, length, tier, nch, lfe = _parse_header(bad_header)
assert not magic_valid
# =============================================================================
# ROUND-ROBIN BIT DISTRIBUTION TESTS
# =============================================================================
class TestRoundRobin:
"""Tests for round-robin bit distribution."""
def test_distribute_and_collect_identity(self):
from soosef.stegasoo.spread_steganography import (
_collect_bits_round_robin,
_distribute_bits_round_robin,
)
bits = [1, 0, 1, 1, 0, 0, 1, 0, 1, 1]
for num_ch in [1, 2, 3, 4, 5]:
per_ch = _distribute_bits_round_robin(bits, num_ch)
assert len(per_ch) == num_ch
reassembled = _collect_bits_round_robin(per_ch)
assert reassembled == bits, f"Failed for {num_ch} channels"
def test_distribute_round_robin_ordering(self):
from soosef.stegasoo.spread_steganography import _distribute_bits_round_robin
bits = [0, 1, 2, 3, 4, 5] # using ints for clarity
per_ch = _distribute_bits_round_robin(bits, 3)
# ch0: bits 0, 3 ch1: bits 1, 4 ch2: bits 2, 5
assert per_ch[0] == [0, 3]
assert per_ch[1] == [1, 4]
assert per_ch[2] == [2, 5]
def test_distribute_uneven(self):
from soosef.stegasoo.spread_steganography import (
_collect_bits_round_robin,
_distribute_bits_round_robin,
)
bits = [0, 1, 2, 3, 4] # 5 bits across 3 channels
per_ch = _distribute_bits_round_robin(bits, 3)
assert per_ch[0] == [0, 3]
assert per_ch[1] == [1, 4]
assert per_ch[2] == [2]
reassembled = _collect_bits_round_robin(per_ch)
assert reassembled == bits
# =============================================================================
# CHANNEL MANAGEMENT TESTS
# =============================================================================
class TestChannelManagement:
"""Tests for embeddable channel selection."""
def test_mono(self):
from soosef.stegasoo.spread_steganography import _embeddable_channels
assert _embeddable_channels(1) == [0]
def test_stereo(self):
from soosef.stegasoo.spread_steganography import _embeddable_channels
assert _embeddable_channels(2) == [0, 1]
def test_5_1_skips_lfe(self):
from soosef.stegasoo.spread_steganography import _embeddable_channels
channels = _embeddable_channels(6)
assert channels == [0, 1, 2, 4, 5]
assert 3 not in channels # LFE skipped
def test_7_1_skips_lfe(self):
from soosef.stegasoo.spread_steganography import _embeddable_channels
channels = _embeddable_channels(8)
assert 3 not in channels
assert len(channels) == 7
def test_quad_no_skip(self):
from soosef.stegasoo.spread_steganography import _embeddable_channels
# 4 channels < 6, so no LFE skip
assert _embeddable_channels(4) == [0, 1, 2, 3]
# =============================================================================
# FORMAT DETECTION TESTS
# =============================================================================
class TestFormatDetection:
"""Tests for audio format detection."""
def test_detect_wav(self, carrier_wav):
from soosef.stegasoo.audio_utils import detect_audio_format
assert detect_audio_format(carrier_wav) == "wav"
def test_detect_unknown(self):
from soosef.stegasoo.audio_utils import detect_audio_format
assert detect_audio_format(b"not audio data") == "unknown"
def test_detect_empty(self):
from soosef.stegasoo.audio_utils import detect_audio_format
assert detect_audio_format(b"") == "unknown"
# =============================================================================
# AUDIO INFO TESTS
# =============================================================================
class TestAudioInfo:
"""Tests for audio info extraction."""
def test_get_wav_info(self, carrier_wav):
from soosef.stegasoo.audio_utils import get_audio_info
info = get_audio_info(carrier_wav)
assert isinstance(info, AudioInfo)
assert info.sample_rate == 44100
assert info.channels == 1
assert info.format == "wav"
assert abs(info.duration_seconds - 1.0) < 0.1
def test_get_stereo_info(self, carrier_wav_stereo):
from soosef.stegasoo.audio_utils import get_audio_info
info = get_audio_info(carrier_wav_stereo)
assert info.channels == 2
# =============================================================================
# VALIDATION TESTS
# =============================================================================
class TestAudioValidation:
"""Tests for audio validation."""
def test_validate_valid_audio(self, carrier_wav):
from soosef.stegasoo.audio_utils import validate_audio
result = validate_audio(carrier_wav)
assert result.is_valid
def test_validate_empty_audio(self):
from soosef.stegasoo.audio_utils import validate_audio
result = validate_audio(b"")
assert not result.is_valid
def test_validate_invalid_audio(self):
from soosef.stegasoo.audio_utils import validate_audio
result = validate_audio(b"not audio data at all")
assert not result.is_valid
def test_validate_audio_embed_mode(self):
from soosef.stegasoo.validation import validate_audio_embed_mode
assert validate_audio_embed_mode("audio_lsb").is_valid
assert validate_audio_embed_mode("audio_spread").is_valid
assert validate_audio_embed_mode("audio_auto").is_valid
assert not validate_audio_embed_mode("invalid").is_valid
# =============================================================================
# INTEGRATION TESTS
# =============================================================================
class TestIntegration:
"""End-to-end integration tests using encode_audio/decode_audio."""
def test_lsb_encode_decode(self, carrier_wav, reference_photo):
from soosef.stegasoo.decode import decode_audio
from soosef.stegasoo.encode import encode_audio
stego_audio, stats = encode_audio(
message="Hello from audio steganography!",
reference_photo=reference_photo,
carrier_audio=carrier_wav,
passphrase="test words here now",
pin="123456",
embed_mode="audio_lsb",
)
assert len(stego_audio) > 0
result = decode_audio(
stego_audio=stego_audio,
reference_photo=reference_photo,
passphrase="test words here now",
pin="123456",
embed_mode="audio_lsb",
)
assert result.is_text
assert result.message == "Hello from audio steganography!"
def test_lsb_wrong_credentials(self, carrier_wav, reference_photo):
from soosef.stegasoo.decode import decode_audio
from soosef.stegasoo.encode import encode_audio
stego_audio, _ = encode_audio(
message="Secret",
reference_photo=reference_photo,
carrier_audio=carrier_wav,
passphrase="correct horse battery staple",
pin="123456",
embed_mode="audio_lsb",
)
with pytest.raises(Exception):
decode_audio(
stego_audio=stego_audio,
reference_photo=reference_photo,
passphrase="wrong passphrase words here",
pin="654321",
embed_mode="audio_lsb",
)
def test_spread_encode_decode(self, carrier_wav_spread_integration, reference_photo):
"""Test full spread spectrum encode/decode pipeline."""
from soosef.stegasoo.decode import decode_audio
from soosef.stegasoo.encode import encode_audio
stego_audio, stats = encode_audio(
message="Spread integration test",
reference_photo=reference_photo,
carrier_audio=carrier_wav_spread_integration,
passphrase="test words here now",
pin="123456",
embed_mode="audio_spread",
)
result = decode_audio(
stego_audio=stego_audio,
reference_photo=reference_photo,
passphrase="test words here now",
pin="123456",
embed_mode="audio_spread",
)
assert result.message == "Spread integration test"
def test_spread_encode_decode_with_chip_tier(
self, carrier_wav_spread_integration, reference_photo
):
"""Test spread spectrum with explicit chip tier."""
from soosef.stegasoo.decode import decode_audio
from soosef.stegasoo.encode import encode_audio
stego_audio, stats = encode_audio(
message="Tier 0 integration",
reference_photo=reference_photo,
carrier_audio=carrier_wav_spread_integration,
passphrase="test words here now",
pin="123456",
embed_mode="audio_spread",
chip_tier=0,
)
assert stats.chip_tier == 0
assert stats.chip_length == 256
result = decode_audio(
stego_audio=stego_audio,
reference_photo=reference_photo,
passphrase="test words here now",
pin="123456",
embed_mode="audio_spread",
)
assert result.message == "Tier 0 integration"
def test_auto_detect_lsb(self, carrier_wav, reference_photo):
"""Test auto-detection finds LSB encoded audio."""
from soosef.stegasoo.decode import decode_audio
from soosef.stegasoo.encode import encode_audio
stego_audio, _ = encode_audio(
message="Auto-detect test",
reference_photo=reference_photo,
carrier_audio=carrier_wav,
passphrase="test words here now",
pin="123456",
embed_mode="audio_lsb",
)
result = decode_audio(
stego_audio=stego_audio,
reference_photo=reference_photo,
passphrase="test words here now",
pin="123456",
embed_mode="audio_auto",
)
assert result.message == "Auto-detect test"
def test_spread_with_real_speech(self, speech_wav, reference_photo):
"""Test spread spectrum with real speech audio from test_data."""
from soosef.stegasoo.decode import decode_audio
from soosef.stegasoo.encode import encode_audio
message = "Hidden in a speech about elitism"
stego_audio, stats = encode_audio(
message=message,
reference_photo=reference_photo,
carrier_audio=speech_wav,
passphrase="test words here now",
pin="123456",
embed_mode="audio_spread",
chip_tier=0, # lossless tier for max capacity
)
assert stats.chip_tier == 0
result = decode_audio(
stego_audio=stego_audio,
reference_photo=reference_photo,
passphrase="test words here now",
pin="123456",
embed_mode="audio_spread",
)
assert result.message == message

View File

@ -0,0 +1,85 @@
"""Basic tests for image hashing."""
from io import BytesIO
import pytest
from PIL import Image
from soosef.verisoo.hashing import hash_image, perceptual_distance, is_same_image
def create_test_image(width: int = 100, height: int = 100, color: tuple = (255, 0, 0)) -> bytes:
"""Create a simple test image."""
img = Image.new("RGB", (width, height), color)
buffer = BytesIO()
img.save(buffer, format="PNG")
return buffer.getvalue()
class TestHashImage:
"""Tests for hash_image function."""
def test_hash_returns_all_components(self):
"""Hash should return sha256, phash, and dhash."""
image_data = create_test_image()
hashes = hash_image(image_data)
assert hashes.sha256
assert hashes.phash
assert hashes.dhash
assert len(hashes.sha256) == 64 # SHA-256 hex
def test_identical_images_same_hash(self):
"""Identical bytes should produce identical hashes."""
image_data = create_test_image()
hash1 = hash_image(image_data)
hash2 = hash_image(image_data)
assert hash1.sha256 == hash2.sha256
assert hash1.phash == hash2.phash
assert hash1.dhash == hash2.dhash
def test_different_images_different_hash(self):
"""Different images should produce different SHA-256."""
red = create_test_image(color=(255, 0, 0))
blue = create_test_image(color=(0, 0, 255))
hash_red = hash_image(red)
hash_blue = hash_image(blue)
assert hash_red.sha256 != hash_blue.sha256
class TestPerceptualDistance:
"""Tests for perceptual distance calculation."""
def test_identical_hashes_zero_distance(self):
"""Identical hashes should have zero distance."""
h = "0123456789abcdef"
assert perceptual_distance(h, h) == 0
def test_different_hashes_nonzero_distance(self):
"""Different hashes should have positive distance."""
h1 = "0000000000000000"
h2 = "0000000000000001"
assert perceptual_distance(h1, h2) == 1
def test_completely_different_max_distance(self):
"""Completely different hashes should have max distance."""
h1 = "0000000000000000"
h2 = "ffffffffffffffff"
assert perceptual_distance(h1, h2) == 64 # 16 hex chars = 64 bits
class TestIsSameImage:
"""Tests for image comparison."""
def test_exact_match(self):
"""Identical bytes should be exact match."""
image_data = create_test_image()
hash1 = hash_image(image_data)
hash2 = hash_image(image_data)
is_same, reason = is_same_image(hash1, hash2)
assert is_same
assert reason == "exact"