Version 3.1.0 now with experimental DCT support.

This commit is contained in:
Aaron D. Lee
2025-12-31 13:11:34 -05:00
parent e4a4a5e074
commit 4eefc946c4
10 changed files with 2520 additions and 299 deletions

View File

@@ -1,5 +1,5 @@
"""
Stegasoo - Secure Steganography Library
Stegasoo - Secure Steganography Library (v3.0.1)
A Python library for hiding encrypted messages and files in images using
hybrid photo + passphrase + PIN authentication.
@@ -58,7 +58,7 @@ File Embedding:
else:
print(decoded.message)
Capacity Pre-check (v2.2.1):
Capacity Pre-check:
from stegasoo import will_fit
# Check if payload will fit before encoding
@@ -68,13 +68,52 @@ Capacity Pre-check (v2.2.1):
else:
print(f"Need {-result['headroom']} more bytes")
NEW in v3.0 - DCT Embedding Mode:
from stegasoo import encode, has_dct_support, compare_modes
# Check if DCT mode is available (requires scipy)
if has_dct_support():
# DCT mode: smaller capacity, grayscale output, frequency domain
result = encode(
message="Secret",
reference_photo=ref_photo,
carrier_image=carrier,
day_phrase="apple forest thunder",
pin="123456",
embed_mode='dct', # NEW parameter
)
# Compare mode capacities
info = compare_modes(carrier_image)
print(f"LSB capacity: {info['lsb']['capacity_kb']:.1f} KB")
print(f"DCT capacity: {info['dct']['capacity_kb']:.1f} KB")
NEW in v3.0.1 - DCT Output Format:
# DCT mode can output PNG (lossless) or JPEG (smaller, natural)
result = encode(
message="Secret",
...,
embed_mode='dct',
dct_output_format='jpeg', # 'png' (default) or 'jpeg'
)
Debugging:
from stegasoo.debug import debug
debug.enable(True) # Enable debug output
debug.enable_performance(True) # Enable timing
"""
from .constants import __version__, DAY_NAMES, MAX_MESSAGE_SIZE, MAX_FILE_PAYLOAD_SIZE
from .constants import (
__version__,
DAY_NAMES,
MAX_MESSAGE_SIZE,
MAX_FILE_PAYLOAD_SIZE,
# NEW in v3.0 - Embedding modes
EMBED_MODE_LSB,
EMBED_MODE_DCT,
EMBED_MODE_AUTO,
detect_stego_mode,
)
from .models import (
Credentials,
EncodeInput,
@@ -152,8 +191,13 @@ from .steganography import (
get_image_format,
is_lossless_format,
LOSSLESS_FORMATS,
# NEW in v2.2.1
will_fit,
# NEW in v3.0
has_dct_support,
calculate_capacity_by_mode,
will_fit_by_mode,
get_available_modes,
compare_modes,
)
from .utils import (
generate_filename,
@@ -164,7 +208,6 @@ from .utils import (
secure_delete,
SecureDeleter,
format_file_size,
# NEW in v2.2.1
strip_image_metadata,
)
from .debug import debug # Import debug utilities
@@ -183,7 +226,7 @@ from .compression import (
)
# =============================================================================
# NEW IN v2.2.0 - Batch Processing
# Batch Processing
# =============================================================================
from .batch import (
BatchProcessor,
@@ -191,10 +234,43 @@ from .batch import (
BatchItem,
BatchStatus,
batch_capacity_check,
# NEW in v2.2.1
BatchCredentials,
)
# =============================================================================
# NEW in v3.0 - DCT Steganography (optional, requires scipy)
# =============================================================================
try:
from .dct_steganography import (
embed_in_dct,
extract_from_dct,
calculate_dct_capacity,
will_fit_dct,
estimate_capacity_comparison,
DCTEmbedStats,
DCTCapacityInfo,
)
HAS_DCT = True
except ImportError:
HAS_DCT = False
# Provide stub functions that raise helpful errors
def embed_in_dct(*args, **kwargs):
raise ImportError("DCT mode requires scipy. Install: pip install scipy")
def extract_from_dct(*args, **kwargs):
raise ImportError("DCT mode requires scipy. Install: pip install scipy")
def calculate_dct_capacity(*args, **kwargs):
raise ImportError("DCT mode requires scipy. Install: pip install scipy")
def will_fit_dct(*args, **kwargs):
raise ImportError("DCT mode requires scipy. Install: pip install scipy")
def estimate_capacity_comparison(*args, **kwargs):
raise ImportError("DCT mode requires scipy. Install: pip install scipy")
# Stub classes
class DCTEmbedStats:
pass
class DCTCapacityInfo:
pass
# QR Code utilities (optional, depends on qrcode and pyzbar)
try:
from .qr_utils import (
@@ -223,16 +299,22 @@ from pathlib import Path
from typing import Optional, Union, Dict, Any
# =============================================================================
# ENCODE FUNCTION (v3.0.1 - with dct_output_format)
# =============================================================================
def encode(
message: Union[str, bytes, FilePayload],
message, # Union[str, bytes, FilePayload]
reference_photo: bytes,
carrier_image: bytes,
day_phrase: str,
pin: str = "",
rsa_key_data: Optional[bytes] = None,
rsa_password: Optional[str] = None,
date_str: Optional[str] = None,
output_format: Optional[str] = None,
rsa_key_data = None, # Optional[bytes]
rsa_password = None, # Optional[str]
date_str = None, # Optional[str]
output_format = None, # Optional[str]
embed_mode: str = EMBED_MODE_LSB,
dct_output_format: str = "png", # NEW in v3.0.1: 'png' or 'jpeg'
) -> EncodeResult:
"""
Encode a secret message or file into an image.
@@ -249,8 +331,9 @@ def encode(
rsa_key_data: RSA private key PEM bytes (optional if using PIN)
rsa_password: Password for RSA key if encrypted
date_str: Date string YYYY-MM-DD (defaults to today)
output_format: Force output format ('PNG', 'BMP'). If None, preserves
carrier format for lossless types, defaults to PNG for lossy.
output_format: Force output format ('PNG', 'BMP') - LSB mode only
embed_mode: Embedding mode - 'lsb' (default) or 'dct' (v3.0+)
dct_output_format: For DCT mode - 'png' (lossless) or 'jpeg' (smaller)
Returns:
EncodeResult with stego image and metadata
@@ -260,14 +343,37 @@ def encode(
SecurityFactorError: If no PIN or RSA key provided
CapacityError: If carrier is too small
EncryptionError: If encryption fails
ImportError: If DCT mode requested but scipy unavailable
Note:
Output format is always lossless (PNG or BMP) to preserve hidden data.
If carrier is JPEG/GIF, output will be PNG to maintain data integrity.
Example:
# Default LSB mode
>>> result = encode(message="Secret", ...)
# DCT mode with PNG output (lossless)
>>> result = encode(message="Secret", ..., embed_mode='dct')
# DCT mode with JPEG output (smaller, natural)
>>> result = encode(message="Secret", ..., embed_mode='dct', dct_output_format='jpeg')
"""
# Debug logging
debug.print(f"encode called: message type={type(message).__name__}, "
f"day_phrase='{day_phrase[:20]}...', pin_length={len(pin)}")
f"day_phrase='{day_phrase[:20]}...', pin_length={len(pin)}, "
f"embed_mode={embed_mode}, dct_output_format={dct_output_format}")
# Validate embed_mode
if embed_mode not in (EMBED_MODE_LSB, EMBED_MODE_DCT):
raise ValidationError(f"Invalid embed_mode: {embed_mode}. Use 'lsb' or 'dct'")
if embed_mode == EMBED_MODE_DCT and not has_dct_support():
raise ImportError(
"DCT embedding mode requires scipy. "
"Install with: pip install scipy"
)
# Validate dct_output_format
if dct_output_format not in ('png', 'jpeg'):
debug.print(f"Invalid dct_output_format '{dct_output_format}', defaulting to 'png'")
dct_output_format = 'png'
# Validate inputs
require_valid_payload(message)
@@ -301,27 +407,54 @@ def encode(
debug.data(pixel_key, "Pixel key")
# Embed in image (returns extension too)
# CRITICAL: Pass dct_output_format to embed_in_image
stego_data, stats, extension = embed_in_image(
carrier_image, encrypted, pixel_key, output_format=output_format
encrypted,
carrier_image,
pixel_key,
output_format=output_format,
embed_mode=embed_mode,
dct_output_format=dct_output_format, # NEW in v3.0.1
)
# Generate filename with correct extension
filename = generate_filename(date_str, extension=extension)
debug.print(f"Encoding complete: {filename}, "
f"modified {stats.pixels_modified}/{stats.total_pixels} pixels "
f"({stats.modification_percent:.2f}%)")
return EncodeResult(
stego_image=stego_data,
filename=filename,
pixels_modified=stats.pixels_modified,
total_pixels=stats.total_pixels,
capacity_used=stats.capacity_used,
date_used=date_str
)
# Handle stats from either LSB or DCT mode
if hasattr(stats, 'pixels_modified'):
# LSB mode stats
debug.print(f"Encoding complete: {filename}, "
f"modified {stats.pixels_modified}/{stats.total_pixels} pixels "
f"({stats.modification_percent:.2f}%)")
return EncodeResult(
stego_image=stego_data,
filename=filename,
pixels_modified=stats.pixels_modified,
total_pixels=stats.total_pixels,
capacity_used=stats.capacity_used,
date_used=date_str
)
else:
# DCT mode stats
debug.print(f"Encoding complete (DCT): {filename}, "
f"embedded {stats.bits_embedded // 8} bytes "
f"({stats.usage_percent:.2f}% capacity)")
return EncodeResult(
stego_image=stego_data,
filename=filename,
pixels_modified=stats.blocks_used * 64, # Approximate
total_pixels=stats.blocks_available * 64,
capacity_used=stats.usage_percent / 100.0,
date_used=date_str
)
# =============================================================================
# ENCODE_FILE FUNCTION (v3.0.1 - with dct_output_format)
# =============================================================================
def encode_file(
filepath: Union[str, Path],
reference_photo: bytes,
@@ -333,6 +466,8 @@ def encode_file(
date_str: Optional[str] = None,
output_format: Optional[str] = None,
filename_override: Optional[str] = None,
embed_mode: str = EMBED_MODE_LSB,
dct_output_format: str = "png", # NEW in v3.0.1
) -> EncodeResult:
"""
Encode a file into an image.
@@ -348,13 +483,16 @@ def encode_file(
rsa_key_data: RSA private key PEM bytes (optional if using PIN)
rsa_password: Password for RSA key if encrypted
date_str: Date string YYYY-MM-DD (defaults to today)
output_format: Force output format ('PNG', 'BMP')
output_format: Force output format ('PNG', 'BMP') - LSB mode only
filename_override: Override the stored filename
embed_mode: 'lsb' (default) or 'dct' (v3.0+)
dct_output_format: For DCT mode - 'png' or 'jpeg' (v3.0.1+)
Returns:
EncodeResult with stego image and metadata
"""
debug.print(f"encode_file called: filepath={filepath}")
debug.print(f"encode_file called: filepath={filepath}, embed_mode={embed_mode}, "
f"dct_output_format={dct_output_format}")
payload = FilePayload.from_file(str(filepath), filename_override)
return encode(
@@ -367,9 +505,15 @@ def encode_file(
rsa_password=rsa_password,
date_str=date_str,
output_format=output_format,
embed_mode=embed_mode,
dct_output_format=dct_output_format, # NEW in v3.0.1
)
# =============================================================================
# ENCODE_BYTES FUNCTION (v3.0.1 - with dct_output_format)
# =============================================================================
def encode_bytes(
data: bytes,
filename: str,
@@ -382,6 +526,8 @@ def encode_bytes(
date_str: Optional[str] = None,
output_format: Optional[str] = None,
mime_type: Optional[str] = None,
embed_mode: str = EMBED_MODE_LSB,
dct_output_format: str = "png", # NEW in v3.0.1
) -> EncodeResult:
"""
Encode raw bytes with a filename into an image.
@@ -398,13 +544,16 @@ def encode_bytes(
rsa_key_data: RSA private key PEM bytes (optional if using PIN)
rsa_password: Password for RSA key if encrypted
date_str: Date string YYYY-MM-DD (defaults to today)
output_format: Force output format ('PNG', 'BMP')
output_format: Force output format ('PNG', 'BMP') - LSB mode only
mime_type: MIME type of the data
embed_mode: 'lsb' (default) or 'dct' (v3.0+)
dct_output_format: For DCT mode - 'png' or 'jpeg' (v3.0.1+)
Returns:
EncodeResult with stego image and metadata
"""
debug.print(f"encode_bytes called: filename={filename}, data_size={len(data)}")
debug.print(f"encode_bytes called: filename={filename}, data_size={len(data)}, "
f"embed_mode={embed_mode}, dct_output_format={dct_output_format}")
payload = FilePayload(data=data, filename=filename, mime_type=mime_type)
return encode(
@@ -417,9 +566,15 @@ def encode_bytes(
rsa_password=rsa_password,
date_str=date_str,
output_format=output_format,
embed_mode=embed_mode,
dct_output_format=dct_output_format, # NEW in v3.0.1
)
# =============================================================================
# DECODE FUNCTION
# =============================================================================
@debug.time
def decode(
stego_image: bytes,
@@ -429,6 +584,7 @@ def decode(
rsa_key_data: Optional[bytes] = None,
rsa_password: Optional[str] = None,
date_str: Optional[str] = None,
embed_mode: str = EMBED_MODE_AUTO,
) -> DecodeResult:
"""
Decode a secret message or file from a stego image.
@@ -443,6 +599,11 @@ def decode(
pin: Static PIN (if used during encoding)
rsa_key_data: RSA private key PEM bytes (if used during encoding)
rsa_password: Password for RSA key if encrypted
date_str: Date override (defaults to today, then checks header)
embed_mode: 'auto' (default), 'lsb', or 'dct' (v3.0+)
- 'auto': Try LSB first, then DCT if available
- 'lsb': Only try LSB extraction
- 'dct': Only try DCT extraction (requires scipy)
Returns:
DecodeResult with:
@@ -457,9 +618,24 @@ def decode(
SecurityFactorError: If no PIN or RSA key provided
ExtractionError: If data cannot be extracted
DecryptionError: If decryption fails
ImportError: If DCT mode explicitly requested but scipy unavailable
Note:
With embed_mode='auto' (default), tries LSB first then DCT.
For best performance, specify the mode if you know it.
"""
debug.print(f"decode called: stego_image_size={len(stego_image)}, "
f"day_phrase='{day_phrase[:20]}...'")
f"day_phrase='{day_phrase[:20]}...', embed_mode={embed_mode}")
# Validate embed_mode
if embed_mode not in (EMBED_MODE_AUTO, EMBED_MODE_LSB, EMBED_MODE_DCT):
raise ValidationError(f"Invalid embed_mode: {embed_mode}. Use 'auto', 'lsb', or 'dct'")
if embed_mode == EMBED_MODE_DCT and not has_dct_support():
raise ImportError(
"DCT extraction mode requires scipy. "
"Install with: pip install scipy"
)
# Validate inputs
require_security_factors(pin, rsa_key_data)
@@ -479,7 +655,12 @@ def decode(
debug.data(pixel_key, "Pixel key for extraction")
encrypted = extract_from_image(stego_image, pixel_key)
# Extract with specified mode
encrypted = extract_from_image(
stego_image,
pixel_key,
embed_mode=embed_mode,
)
# If we got data, check if it's from a different date
if encrypted:
@@ -490,7 +671,11 @@ def decode(
pixel_key = derive_pixel_key(
reference_photo, day_phrase, header['date'], pin, rsa_key_data
)
encrypted = extract_from_image(stego_image, pixel_key)
encrypted = extract_from_image(
stego_image,
pixel_key,
embed_mode=embed_mode,
)
if not encrypted:
debug.print("No data extracted from image")
@@ -503,6 +688,10 @@ def decode(
return decrypt_message(encrypted, reference_photo, day_phrase, pin, rsa_key_data)
# =============================================================================
# DECODE_TEXT FUNCTION
# =============================================================================
def decode_text(
stego_image: bytes,
reference_photo: bytes,
@@ -511,6 +700,7 @@ def decode_text(
rsa_key_data: Optional[bytes] = None,
rsa_password: Optional[str] = None,
date_str: Optional[str] = None,
embed_mode: str = EMBED_MODE_AUTO,
) -> str:
"""
Decode a text message from a stego image.
@@ -525,6 +715,8 @@ def decode_text(
pin: Static PIN (if used during encoding)
rsa_key_data: RSA private key PEM bytes (if used during encoding)
rsa_password: Password for RSA key if encrypted
date_str: Date override
embed_mode: 'auto' (default), 'lsb', or 'dct' (v3.0+)
Returns:
Decrypted message string
@@ -532,8 +724,17 @@ def decode_text(
Raises:
DecryptionError: If content is a binary file, not text
"""
debug.print("decode_text called")
result = decode(stego_image, reference_photo, day_phrase, pin, rsa_key_data, rsa_password)
debug.print(f"decode_text called, embed_mode={embed_mode}")
result = decode(
stego_image,
reference_photo,
day_phrase,
pin,
rsa_key_data,
rsa_password,
date_str,
embed_mode,
)
if result.is_file:
# Try to decode file as text
@@ -553,6 +754,10 @@ def decode_text(
return message
# =============================================================================
# EXPORTS
# =============================================================================
__all__ = [
# Version
'__version__',
@@ -565,6 +770,27 @@ __all__ = [
'decode_text',
'generate_credentials',
# NEW in v3.0 - Embedding modes
'EMBED_MODE_LSB',
'EMBED_MODE_DCT',
'EMBED_MODE_AUTO',
'has_dct_support',
'compare_modes',
'get_available_modes',
'calculate_capacity_by_mode',
'will_fit_by_mode',
'detect_stego_mode',
'HAS_DCT',
# NEW in v3.0 - DCT functions (available if scipy installed)
'embed_in_dct',
'extract_from_dct',
'calculate_dct_capacity',
'will_fit_dct',
'estimate_capacity_comparison',
'DCTEmbedStats',
'DCTCapacityInfo',
# Constants
'DAY_NAMES',
'LOSSLESS_FORMATS',
@@ -646,7 +872,7 @@ __all__ = [
'get_image_dimensions',
'get_image_format',
'is_lossless_format',
'will_fit', # NEW in v2.2.1
'will_fit',
# Utilities
'generate_filename',
@@ -657,12 +883,12 @@ __all__ = [
'secure_delete',
'SecureDeleter',
'format_file_size',
'strip_image_metadata', # NEW in v2.2.1
'strip_image_metadata',
# Debugging
'debug',
# Compression (v2.2.0)
# Compression
'compress',
'decompress',
'CompressionAlgorithm',
@@ -671,11 +897,11 @@ __all__ = [
'estimate_compressed_size',
'get_available_algorithms',
# Batch processing (v2.2.0)
# Batch processing
'BatchProcessor',
'BatchResult',
'BatchItem',
'BatchStatus',
'batch_capacity_check',
'BatchCredentials', # NEW in v2.2.1
'BatchCredentials',
]

View File

@@ -12,7 +12,7 @@ from pathlib import Path
# VERSION
# ============================================================================
__version__ = "2.2.1"
__version__ = "3.1.0"
# ============================================================================
# FILE FORMAT
@@ -181,3 +181,44 @@ def get_wordlist() -> list[str]:
if _bip39_words is None:
_bip39_words = get_bip39_words()
return _bip39_words
# =============================================================================
# DCT STEGANOGRAPHY (v3.0)
# =============================================================================
# Embedding modes
EMBED_MODE_LSB = 'lsb' # Spatial LSB embedding (default, original mode)
EMBED_MODE_DCT = 'dct' # DCT domain embedding (new in v3.0)
EMBED_MODE_AUTO = 'auto' # Auto-detect on decode
# DCT-specific constants
DCT_MAGIC_HEADER = b'\x89DCT' # Magic header for DCT mode
DCT_FORMAT_VERSION = 1
DCT_STEP_SIZE = 8 # QIM quantization step
# Valid embedding modes
VALID_EMBED_MODES = {EMBED_MODE_LSB, EMBED_MODE_DCT}
def detect_stego_mode(encrypted_data: bytes) -> str:
"""
Detect embedding mode from encrypted payload header.
Args:
encrypted_data: First few bytes of extracted payload
Returns:
'lsb' or 'dct' or 'unknown'
"""
if len(encrypted_data) < 4:
return 'unknown'
header = encrypted_data[:4]
if header == b'\x89ST3':
return EMBED_MODE_LSB
elif header == b'\x89DCT':
return EMBED_MODE_DCT
else:
return 'unknown'

View File

@@ -0,0 +1,554 @@
"""
DCT Domain Steganography Module (v3.0.1)
Embeds data in DCT coefficients of grayscale images.
Supports PNG (lossless) or JPEG (natural, smaller) output.
This provides an alternative to LSB embedding with different trade-offs:
- More resistant to visual inspection
- Survives some image processing
- Lower capacity (~20% of LSB)
- Works in frequency domain
Requires: scipy (for DCT transforms)
"""
import io
import struct
import hashlib
from dataclasses import dataclass
from typing import Optional, Literal
from enum import Enum
import numpy as np
from PIL import Image
# Check for scipy availability
try:
from scipy.fftpack import dct, idct
HAS_SCIPY = True
except ImportError:
HAS_SCIPY = False
dct = None
idct = None
# ============================================================================
# CONSTANTS
# ============================================================================
# DCT block size (standard 8x8 like JPEG)
BLOCK_SIZE = 8
# Coefficients to use for embedding (mid-frequency, zig-zag order positions)
# Avoiding DC (0,0) and high-frequency edges
# These positions are relatively stable across JPEG compression
EMBED_POSITIONS = [
(0, 1), (1, 0), (2, 0), (1, 1), (0, 2), (0, 3), (1, 2), (2, 1), (3, 0),
(4, 0), (3, 1), (2, 2), (1, 3), (0, 4), (0, 5), (1, 4), (2, 3), (3, 2),
(4, 1), (5, 0), (5, 1), (4, 2), (3, 3), (2, 4), (1, 5), (0, 6), (0, 7),
(1, 6), (2, 5), (3, 4), (4, 3), (5, 2), (6, 1), (7, 0),
]
# Use subset of mid-frequency coefficients for better robustness
# Positions 4-20 in zig-zag order (skip very low and very high frequencies)
DEFAULT_EMBED_POSITIONS = EMBED_POSITIONS[4:20] # 16 coefficients per block
# Quantization step for embedding (larger = more robust, more visible)
QUANT_STEP = 25
# Magic bytes for DCT stego identification
DCT_MAGIC = b'DCTS'
# Header: magic(4) + version(1) + flags(1) + length(4) = 10 bytes
HEADER_SIZE = 10
# Output format options
OUTPUT_FORMAT_PNG = 'png'
OUTPUT_FORMAT_JPEG = 'jpeg'
# JPEG quality for output (high to preserve coefficients)
JPEG_OUTPUT_QUALITY = 95
# ============================================================================
# DATA CLASSES
# ============================================================================
class DCTOutputFormat(Enum):
"""Output format for DCT stego images."""
PNG = 'png'
JPEG = 'jpeg'
@dataclass
class DCTEmbedStats:
"""Statistics from DCT embedding operation."""
blocks_used: int
blocks_available: int
bits_embedded: int
capacity_bits: int
usage_percent: float
image_width: int
image_height: int
output_format: str # 'png' or 'jpeg'
@dataclass
class DCTCapacityInfo:
"""Capacity information for a carrier image."""
width: int
height: int
blocks_x: int
blocks_y: int
total_blocks: int
bits_per_block: int
total_capacity_bits: int
total_capacity_bytes: int
usable_capacity_bytes: int # After header overhead
# ============================================================================
# HELPER FUNCTIONS
# ============================================================================
def _check_scipy():
"""Raise ImportError if scipy is not available."""
if not HAS_SCIPY:
raise ImportError(
"DCT steganography requires scipy. "
"Install with: pip install scipy"
)
def _dct2(block: np.ndarray) -> np.ndarray:
"""Apply 2D DCT to a block."""
return dct(dct(block.T, norm='ortho').T, norm='ortho')
def _idct2(block: np.ndarray) -> np.ndarray:
"""Apply 2D inverse DCT to a block."""
return idct(idct(block.T, norm='ortho').T, norm='ortho')
def _to_grayscale(image_data: bytes) -> np.ndarray:
"""Convert image bytes to grayscale numpy array."""
img = Image.open(io.BytesIO(image_data))
gray = img.convert('L')
return np.array(gray, dtype=np.float64)
def _pad_to_blocks(image: np.ndarray) -> tuple[np.ndarray, tuple[int, int]]:
"""Pad image dimensions to be divisible by block size."""
h, w = image.shape
new_h = ((h + BLOCK_SIZE - 1) // BLOCK_SIZE) * BLOCK_SIZE
new_w = ((w + BLOCK_SIZE - 1) // BLOCK_SIZE) * BLOCK_SIZE
if new_h == h and new_w == w:
return image, (h, w)
padded = np.zeros((new_h, new_w), dtype=image.dtype)
padded[:h, :w] = image
# Mirror padding for smoother edges
if new_h > h:
padded[h:, :w] = image[h-(new_h-h):h, :w][::-1, :]
if new_w > w:
padded[:h, w:] = image[:h, w-(new_w-w):w][:, ::-1]
if new_h > h and new_w > w:
padded[h:, w:] = image[h-(new_h-h):h, w-(new_w-w):w][::-1, ::-1]
return padded, (h, w)
def _unpad_image(image: np.ndarray, original_size: tuple[int, int]) -> np.ndarray:
"""Remove padding from image."""
h, w = original_size
return image[:h, :w]
def _embed_bit_in_coeff(coeff: float, bit: int, quant_step: int = QUANT_STEP) -> float:
"""Embed a single bit into a DCT coefficient using QIM."""
# Quantization Index Modulation
quantized = round(coeff / quant_step)
if (quantized % 2) != bit:
# Adjust to embed the bit
if quantized % 2 == 0 and bit == 1:
quantized += 1 if coeff >= quantized * quant_step else -1
elif quantized % 2 == 1 and bit == 0:
quantized += 1 if coeff >= quantized * quant_step else -1
return quantized * quant_step
def _extract_bit_from_coeff(coeff: float, quant_step: int = QUANT_STEP) -> int:
"""Extract a single bit from a DCT coefficient."""
quantized = round(coeff / quant_step)
return quantized % 2
def _generate_block_order(num_blocks: int, seed: bytes) -> list[int]:
"""Generate pseudo-random block order from seed."""
# Create deterministic RNG from seed
hash_bytes = hashlib.sha256(seed).digest()
rng = np.random.RandomState(int.from_bytes(hash_bytes[:4], 'big'))
order = list(range(num_blocks))
rng.shuffle(order)
return order
def _save_stego_image(
image: np.ndarray,
output_format: str = OUTPUT_FORMAT_PNG
) -> bytes:
"""Save stego image in specified format."""
# Clip to valid range and convert to uint8
clipped = np.clip(image, 0, 255).astype(np.uint8)
img = Image.fromarray(clipped, mode='L')
buffer = io.BytesIO()
if output_format == OUTPUT_FORMAT_JPEG:
# High-quality JPEG with no chroma subsampling
img.save(
buffer,
format='JPEG',
quality=JPEG_OUTPUT_QUALITY,
subsampling=0, # 4:4:4 - no subsampling
optimize=True
)
else:
# PNG (lossless, default)
img.save(buffer, format='PNG', optimize=True)
return buffer.getvalue()
def _create_header(data_length: int, flags: int = 0) -> bytes:
"""Create DCT stego header."""
# Header format: MAGIC(4) + VERSION(1) + FLAGS(1) + LENGTH(4)
version = 1
return struct.pack('>4sBBI', DCT_MAGIC, version, flags, data_length)
def _parse_header(header_bits: list[int]) -> tuple[int, int, int]:
"""Parse header from extracted bits. Returns (version, flags, data_length)."""
if len(header_bits) < HEADER_SIZE * 8:
raise ValueError("Insufficient header data")
# Convert bits to bytes
header_bytes = bytes([
sum(header_bits[i*8:(i+1)*8][j] << (7-j) for j in range(8))
for i in range(HEADER_SIZE)
])
magic, version, flags, length = struct.unpack('>4sBBI', header_bytes)
if magic != DCT_MAGIC:
raise ValueError("Invalid DCT stego magic bytes - not a DCT stego image")
return version, flags, length
# ============================================================================
# PUBLIC API
# ============================================================================
def has_dct_support() -> bool:
"""Check if DCT steganography is available."""
return HAS_SCIPY
def calculate_dct_capacity(image_data: bytes) -> DCTCapacityInfo:
"""
Calculate the DCT embedding capacity of an image.
Args:
image_data: Image file bytes
Returns:
DCTCapacityInfo with capacity details
"""
_check_scipy()
img = Image.open(io.BytesIO(image_data))
width, height = img.size
# Calculate blocks
blocks_x = width // BLOCK_SIZE
blocks_y = height // BLOCK_SIZE
total_blocks = blocks_x * blocks_y
# Bits per block (using selected coefficient positions)
bits_per_block = len(DEFAULT_EMBED_POSITIONS)
# Total capacity
total_bits = total_blocks * bits_per_block
total_bytes = total_bits // 8
# Usable capacity (minus header)
usable_bytes = max(0, total_bytes - HEADER_SIZE)
return DCTCapacityInfo(
width=width,
height=height,
blocks_x=blocks_x,
blocks_y=blocks_y,
total_blocks=total_blocks,
bits_per_block=bits_per_block,
total_capacity_bits=total_bits,
total_capacity_bytes=total_bytes,
usable_capacity_bytes=usable_bytes
)
def will_fit_dct(data_length: int, image_data: bytes) -> bool:
"""
Check if data will fit in the image using DCT embedding.
Args:
data_length: Length of data in bytes
image_data: Carrier image bytes
Returns:
True if data fits, False otherwise
"""
capacity = calculate_dct_capacity(image_data)
return data_length <= capacity.usable_capacity_bytes
def estimate_capacity_comparison(image_data: bytes) -> dict:
"""
Compare LSB and DCT capacity for an image.
Args:
image_data: Image file bytes
Returns:
Dict with 'lsb' and 'dct' capacity info
"""
img = Image.open(io.BytesIO(image_data))
width, height = img.size
pixels = width * height
# LSB capacity (3 bits per pixel for RGB, simplified)
lsb_bytes = (pixels * 3) // 8
# DCT capacity
if HAS_SCIPY:
dct_info = calculate_dct_capacity(image_data)
dct_bytes = dct_info.usable_capacity_bytes
else:
# Estimate without scipy
blocks = (width // 8) * (height // 8)
dct_bytes = (blocks * 16) // 8 - HEADER_SIZE
return {
'width': width,
'height': height,
'lsb': {
'capacity_bytes': lsb_bytes,
'capacity_kb': lsb_bytes / 1024,
'output': 'PNG/BMP (color)',
},
'dct': {
'capacity_bytes': dct_bytes,
'capacity_kb': dct_bytes / 1024,
'output': 'PNG or JPEG (grayscale)',
'ratio_vs_lsb': (dct_bytes / lsb_bytes * 100) if lsb_bytes > 0 else 0,
'available': HAS_SCIPY,
}
}
def embed_in_dct(
data: bytes,
carrier_image: bytes,
seed: bytes,
output_format: str = OUTPUT_FORMAT_PNG,
) -> tuple[bytes, DCTEmbedStats]:
"""
Embed data into image using DCT coefficient modification.
Args:
data: Data to embed
carrier_image: Carrier image bytes
seed: Seed for pseudo-random block selection
output_format: Output format - 'png' (default, lossless) or 'jpeg' (smaller)
Returns:
Tuple of (stego_image_bytes, stats)
Raises:
ImportError: If scipy is not available
ValueError: If data is too large for carrier
"""
_check_scipy()
# Validate output format
if output_format not in (OUTPUT_FORMAT_PNG, OUTPUT_FORMAT_JPEG):
raise ValueError(f"Invalid output format: {output_format}. Use 'png' or 'jpeg'")
# Calculate capacity
capacity_info = calculate_dct_capacity(carrier_image)
if len(data) > capacity_info.usable_capacity_bytes:
raise ValueError(
f"Data too large ({len(data)} bytes) for carrier "
f"(capacity: {capacity_info.usable_capacity_bytes} bytes)"
)
# Prepare image
image = _to_grayscale(carrier_image)
padded, original_size = _pad_to_blocks(image)
# Create header + data
header = _create_header(len(data))
payload = header + data
# Convert payload to bits
bits = []
for byte in payload:
for i in range(7, -1, -1):
bits.append((byte >> i) & 1)
# Generate block order
num_blocks = capacity_info.total_blocks
block_order = _generate_block_order(num_blocks, seed)
# Embed bits
bit_idx = 0
blocks_used = 0
h, w = padded.shape
for block_num in block_order:
if bit_idx >= len(bits):
break
# Calculate block position
by = (block_num // (w // BLOCK_SIZE)) * BLOCK_SIZE
bx = (block_num % (w // BLOCK_SIZE)) * BLOCK_SIZE
# Extract and transform block
block = padded[by:by+BLOCK_SIZE, bx:bx+BLOCK_SIZE].copy()
dct_block = _dct2(block)
# Embed bits in selected coefficients
for pos in DEFAULT_EMBED_POSITIONS:
if bit_idx >= len(bits):
break
dct_block[pos] = _embed_bit_in_coeff(dct_block[pos], bits[bit_idx])
bit_idx += 1
# Inverse transform and store
modified_block = _idct2(dct_block)
padded[by:by+BLOCK_SIZE, bx:bx+BLOCK_SIZE] = modified_block
blocks_used += 1
# Remove padding and save
result = _unpad_image(padded, original_size)
stego_bytes = _save_stego_image(result, output_format)
stats = DCTEmbedStats(
blocks_used=blocks_used,
blocks_available=capacity_info.total_blocks,
bits_embedded=len(bits),
capacity_bits=capacity_info.total_capacity_bits,
usage_percent=(len(bits) / capacity_info.total_capacity_bits) * 100,
image_width=original_size[1],
image_height=original_size[0],
output_format=output_format,
)
return stego_bytes, stats
def extract_from_dct(
stego_image: bytes,
seed: bytes,
) -> bytes:
"""
Extract data from DCT stego image.
Args:
stego_image: Stego image bytes
seed: Same seed used for embedding
Returns:
Extracted data bytes
Raises:
ImportError: If scipy is not available
ValueError: If image is not a valid DCT stego image
"""
_check_scipy()
# Prepare image
image = _to_grayscale(stego_image)
padded, original_size = _pad_to_blocks(image)
# Calculate capacity
h, w = padded.shape
blocks_x = w // BLOCK_SIZE
blocks_y = h // BLOCK_SIZE
num_blocks = blocks_x * blocks_y
# Generate same block order
block_order = _generate_block_order(num_blocks, seed)
# Extract all bits (we'll stop when we have enough based on header)
all_bits = []
for block_num in block_order:
by = (block_num // blocks_x) * BLOCK_SIZE
bx = (block_num % blocks_x) * BLOCK_SIZE
block = padded[by:by+BLOCK_SIZE, bx:bx+BLOCK_SIZE]
dct_block = _dct2(block)
for pos in DEFAULT_EMBED_POSITIONS:
bit = _extract_bit_from_coeff(dct_block[pos])
all_bits.append(bit)
# Check if we have enough for header
if len(all_bits) >= HEADER_SIZE * 8:
try:
_, _, data_length = _parse_header(all_bits[:HEADER_SIZE * 8])
total_needed = (HEADER_SIZE + data_length) * 8
if len(all_bits) >= total_needed:
break
except ValueError:
# Not enough data yet or invalid, continue
pass
# Parse header
version, flags, data_length = _parse_header(all_bits)
# Extract data bits
data_bits = all_bits[HEADER_SIZE * 8:(HEADER_SIZE + data_length) * 8]
# Convert bits to bytes
data = bytes([
sum(data_bits[i*8:(i+1)*8][j] << (7-j) for j in range(8))
for i in range(data_length)
])
return data
# ============================================================================
# CONVENIENCE FUNCTIONS
# ============================================================================
def get_output_extension(output_format: str) -> str:
"""Get file extension for output format."""
if output_format == OUTPUT_FORMAT_JPEG:
return '.jpg'
return '.png'
def get_output_mimetype(output_format: str) -> str:
"""Get MIME type for output format."""
if output_format == OUTPUT_FORMAT_JPEG:
return 'image/jpeg'
return 'image/png'

View File

@@ -1,7 +1,16 @@
"""
Stegasoo Steganography Functions
Stegasoo Steganography Functions (v3.0.1)
LSB embedding and extraction with pseudo-random pixel selection.
LSB and DCT embedding modes with pseudo-random pixel/coefficient selection.
New in v3.0:
- DCT domain embedding mode (requires scipy)
- embed_mode parameter for encode/decode
- Auto-detection of embedding mode
- Comparison utilities
New in v3.0.1:
- dct_output_format parameter for DCT mode ('png' or 'jpeg')
"""
import io
@@ -15,6 +24,12 @@ from cryptography.hazmat.backends import default_backend
from .models import EmbedStats, FilePayload
from .exceptions import CapacityError, ExtractionError, EmbeddingError
from .debug import debug
from .constants import (
EMBED_MODE_LSB,
EMBED_MODE_DCT,
EMBED_MODE_AUTO,
VALID_EMBED_MODES,
)
# Lossless formats that preserve LSB data
@@ -40,6 +55,48 @@ HEADER_OVERHEAD = 104 # Magic + version + date + salt + iv + tag
LENGTH_PREFIX = 4 # 4 bytes for payload length
ENCRYPTION_OVERHEAD = HEADER_OVERHEAD + LENGTH_PREFIX
# DCT output format options (v3.0.1)
DCT_OUTPUT_PNG = 'png'
DCT_OUTPUT_JPEG = 'jpeg'
# =============================================================================
# DCT MODULE LAZY LOADING
# =============================================================================
_dct_module = None
def _get_dct_module():
"""Lazy load DCT module to avoid scipy import if not needed."""
global _dct_module
if _dct_module is None:
from . import dct_steganography
_dct_module = dct_steganography
return _dct_module
def has_dct_support() -> bool:
"""
Check if DCT steganography mode is available.
Returns:
True if scipy is installed and DCT functions work
Example:
>>> if has_dct_support():
... result = encode(..., embed_mode='dct')
"""
try:
dct_mod = _get_dct_module()
return dct_mod.has_dct_support()
except ImportError:
return False
# =============================================================================
# FORMAT UTILITIES
# =============================================================================
def get_output_format(input_format: Optional[str]) -> Tuple[str, str]:
"""
@@ -51,12 +108,6 @@ def get_output_format(input_format: Optional[str]) -> Tuple[str, str]:
Returns:
Tuple of (PIL format string, file extension) for output
Falls back to PNG for lossy or unknown formats.
Example:
>>> get_output_format('JPEG')
('PNG', 'png')
>>> get_output_format('PNG')
('PNG', 'png')
"""
debug.validate(input_format is None or isinstance(input_format, str),
"Input format must be string or None")
@@ -67,11 +118,14 @@ def get_output_format(input_format: Optional[str]) -> Tuple[str, str]:
debug.print(f"Using lossless format: {fmt} -> .{ext}")
return fmt, ext
# Default to PNG for lossy formats (JPEG, GIF) or unknown
debug.print(f"Input format {input_format} is lossy or unknown, defaulting to PNG")
return 'PNG', 'png'
# =============================================================================
# CAPACITY FUNCTIONS
# =============================================================================
def will_fit(
payload: Union[str, bytes, FilePayload, int],
carrier_image: bytes,
@@ -79,38 +133,16 @@ def will_fit(
include_compression_estimate: bool = True,
) -> dict:
"""
Check if a payload will fit in a carrier image without performing encryption.
This is a lightweight pre-check to avoid wasted work on payloads that
are too large. For accurate results with compression, the actual compressed
size may vary.
Check if a payload will fit in a carrier image (LSB mode).
Args:
payload: Message string, raw bytes, FilePayload, or size in bytes
carrier_image: Carrier image bytes
bits_per_channel: Bits to use per color channel (1-2)
include_compression_estimate: Estimate compressed size (requires payload data)
include_compression_estimate: Estimate compressed size
Returns:
Dict with:
- fits: bool - Whether payload will fit
- payload_size: int - Raw payload size in bytes
- estimated_encrypted_size: int - Estimated size after encryption + overhead
- capacity: int - Available capacity in bytes
- usage_percent: float - Estimated capacity usage (0-100)
- headroom: int - Bytes remaining (negative if won't fit)
- compressed_estimate: int | None - Estimated compressed size (if applicable)
Example:
>>> result = will_fit("Hello world", carrier_bytes)
>>> result['fits']
True
>>> result['usage_percent']
0.5
>>> result = will_fit(50000, carrier_bytes) # Check if 50KB would fit
>>> result['fits']
False
Dict with fits, capacity, usage info
"""
# Determine payload size
if isinstance(payload, int):
@@ -121,42 +153,35 @@ def will_fit(
payload_size = len(payload_data)
elif isinstance(payload, FilePayload):
payload_data = payload.data
# Account for filename/mime metadata
filename_overhead = len(payload.filename.encode('utf-8')) if payload.filename else 0
mime_overhead = len(payload.mime_type.encode('utf-8')) if payload.mime_type else 0
payload_size = len(payload.data) + filename_overhead + mime_overhead + 5 # +5 for length prefixes + type byte
payload_size = len(payload.data) + filename_overhead + mime_overhead + 5
else:
payload_data = payload
payload_size = len(payload)
# Calculate capacity
capacity = calculate_capacity(carrier_image, bits_per_channel)
# Estimate encrypted size (payload + random padding + overhead)
# Padding adds 64-319 bytes, averaging ~190
estimated_padding = 190
estimated_encrypted_size = payload_size + estimated_padding + ENCRYPTION_OVERHEAD
# Compression estimate
compressed_estimate = None
if include_compression_estimate and payload_data is not None and len(payload_data) >= 64:
try:
import zlib
compressed = zlib.compress(payload_data, level=6)
# Add compression header overhead (9 bytes)
compressed_size = len(compressed) + 9
if compressed_size < payload_size:
compressed_estimate = compressed_size
# Use compressed size for fit calculation
estimated_encrypted_size = compressed_size + estimated_padding + ENCRYPTION_OVERHEAD
except Exception:
pass # Ignore compression errors
pass
headroom = capacity - estimated_encrypted_size
fits = headroom >= 0
usage_percent = (estimated_encrypted_size / capacity * 100) if capacity > 0 else 100.0
result = {
return {
'fits': fits,
'payload_size': payload_size,
'estimated_encrypted_size': estimated_encrypted_size,
@@ -164,14 +189,206 @@ def will_fit(
'usage_percent': min(usage_percent, 100.0),
'headroom': headroom,
'compressed_estimate': compressed_estimate,
'mode': EMBED_MODE_LSB,
}
debug.print(f"will_fit: payload={payload_size}, encrypted~={estimated_encrypted_size}, "
f"capacity={capacity}, fits={fits}")
return result
def calculate_capacity(image_data: bytes, bits_per_channel: int = 1) -> int:
"""
Calculate the maximum message capacity of an image (LSB mode).
Args:
image_data: Image bytes
bits_per_channel: Bits to use per color channel
Returns:
Maximum bytes that can be embedded (minus overhead)
"""
debug.validate(bits_per_channel in (1, 2),
f"bits_per_channel must be 1 or 2, got {bits_per_channel}")
img_file = Image.open(io.BytesIO(image_data))
img = img_file.convert('RGB') if img_file.mode != 'RGB' else img_file
num_pixels = img.size[0] * img.size[1]
bits_per_pixel = 3 * bits_per_channel
max_bytes = (num_pixels * bits_per_pixel) // 8
capacity = max(0, max_bytes - ENCRYPTION_OVERHEAD)
debug.print(f"LSB capacity: {capacity} bytes at {bits_per_channel} bit(s)/channel")
return capacity
def calculate_capacity_by_mode(
image_data: bytes,
embed_mode: str = EMBED_MODE_LSB,
bits_per_channel: int = 1,
) -> dict:
"""
Calculate capacity for specified embedding mode.
Args:
image_data: Carrier image bytes
embed_mode: 'lsb' or 'dct'
bits_per_channel: Bits per channel for LSB mode
Returns:
Dict with capacity information
"""
if embed_mode == EMBED_MODE_DCT:
if not has_dct_support():
raise ImportError("scipy required for DCT mode. Install: pip install scipy")
dct_mod = _get_dct_module()
dct_info = dct_mod.calculate_dct_capacity(image_data)
return {
'mode': EMBED_MODE_DCT,
'capacity_bytes': dct_info.usable_capacity_bytes,
'capacity_bits': dct_info.total_capacity_bits,
'width': dct_info.width,
'height': dct_info.height,
'total_blocks': dct_info.total_blocks,
}
else:
capacity = calculate_capacity(image_data, bits_per_channel)
img = Image.open(io.BytesIO(image_data))
width, height = img.size
return {
'mode': EMBED_MODE_LSB,
'capacity_bytes': capacity,
'capacity_bits': capacity * 8,
'width': width,
'height': height,
'bits_per_channel': bits_per_channel,
}
def will_fit_by_mode(
payload: Union[str, bytes, FilePayload, int],
carrier_image: bytes,
embed_mode: str = EMBED_MODE_LSB,
bits_per_channel: int = 1,
) -> dict:
"""
Check if payload fits in specified mode.
Args:
payload: Message, bytes, FilePayload, or size in bytes
carrier_image: Carrier image bytes
embed_mode: 'lsb' or 'dct'
bits_per_channel: For LSB mode
Returns:
Dict with fits, capacity, usage info
"""
if embed_mode == EMBED_MODE_DCT:
if not has_dct_support():
return {'fits': False, 'error': 'scipy not available', 'mode': EMBED_MODE_DCT}
if isinstance(payload, int):
payload_size = payload
elif isinstance(payload, str):
payload_size = len(payload.encode('utf-8'))
elif hasattr(payload, 'data'):
payload_size = len(payload.data)
else:
payload_size = len(payload)
estimated_size = payload_size + ENCRYPTION_OVERHEAD + 190
dct_mod = _get_dct_module()
fits = dct_mod.will_fit_dct(estimated_size, carrier_image)
capacity_info = dct_mod.calculate_dct_capacity(carrier_image)
capacity = capacity_info.usable_capacity_bytes
usage_percent = (estimated_size / capacity * 100) if capacity > 0 else 100.0
return {
'fits': fits,
'payload_size': payload_size,
'capacity': capacity,
'usage_percent': min(usage_percent, 100.0),
'headroom': capacity - estimated_size,
'mode': EMBED_MODE_DCT,
}
else:
return will_fit(payload, carrier_image, bits_per_channel)
def get_available_modes() -> dict:
"""
Get available embedding modes and their status.
Returns:
Dict mapping mode name to availability info
"""
return {
EMBED_MODE_LSB: {
'available': True,
'name': 'Spatial LSB',
'description': 'Embed in pixel LSBs, outputs PNG/BMP',
'output_format': 'PNG (color)',
},
EMBED_MODE_DCT: {
'available': has_dct_support(),
'name': 'DCT Domain',
'description': 'Embed in DCT coefficients, outputs grayscale PNG or JPEG',
'output_formats': ['PNG (grayscale)', 'JPEG (grayscale)'],
'requires': 'scipy',
},
}
def compare_modes(image_data: bytes) -> dict:
"""
Compare embedding modes for a carrier image.
Args:
image_data: Carrier image bytes
Returns:
Dict with comparison of LSB vs DCT modes
"""
img = Image.open(io.BytesIO(image_data))
width, height = img.size
lsb_bytes = calculate_capacity(image_data, 1)
if has_dct_support():
dct_mod = _get_dct_module()
dct_info = dct_mod.calculate_dct_capacity(image_data)
dct_bytes = dct_info.usable_capacity_bytes
dct_available = True
else:
safe_blocks = (height // 8) * (width // 8)
dct_bytes = (safe_blocks * 16) // 8 # Estimated
dct_available = False
return {
'width': width,
'height': height,
'lsb': {
'capacity_bytes': lsb_bytes,
'capacity_kb': lsb_bytes / 1024,
'available': True,
'output': 'PNG (color)',
},
'dct': {
'capacity_bytes': dct_bytes,
'capacity_kb': dct_bytes / 1024,
'available': dct_available,
'output': 'PNG or JPEG (grayscale)',
'ratio_vs_lsb': (dct_bytes / lsb_bytes * 100) if lsb_bytes > 0 else 0,
},
}
# =============================================================================
# PIXEL INDEX GENERATION
# =============================================================================
@debug.time
def generate_pixel_indices(key: bytes, num_pixels: int, num_needed: int) -> List[int]:
"""
@@ -179,17 +396,6 @@ def generate_pixel_indices(key: bytes, num_pixels: int, num_needed: int) -> List
Uses ChaCha20 as a CSPRNG seeded by the key to deterministically
select which pixels will hold hidden data.
Args:
key: 32-byte key for pixel selection
num_pixels: Total pixels in image
num_needed: Number of pixels needed for embedding
Returns:
List of pixel indices
Note:
Optimizes for both small and large num_needed values.
"""
debug.validate(len(key) == 32, f"Pixel key must be 32 bytes, got {len(key)}")
debug.validate(num_pixels > 0, f"Number of pixels must be positive, got {num_pixels}")
@@ -200,7 +406,6 @@ def generate_pixel_indices(key: bytes, num_pixels: int, num_needed: int) -> List
debug.print(f"Generating {num_needed} pixel indices from {num_pixels} total pixels")
if num_needed >= num_pixels // 2:
# If we need many pixels, shuffle all indices
debug.print(f"Using full shuffle (needed {num_needed}/{num_pixels} pixels)")
nonce = b'\x00' * 16
cipher = Cipher(algorithms.ChaCha20(key, nonce), mode=None, backend=default_backend())
@@ -209,7 +414,6 @@ def generate_pixel_indices(key: bytes, num_pixels: int, num_needed: int) -> List
indices = list(range(num_pixels))
random_bytes = encryptor.update(b'\x00' * (num_pixels * 4))
# Fisher-Yates shuffle using CSPRNG
for i in range(num_pixels - 1, 0, -1):
j_bytes = random_bytes[(num_pixels - 1 - i) * 4:(num_pixels - i) * 4]
j = int.from_bytes(j_bytes, 'big') % (i + 1)
@@ -219,7 +423,6 @@ def generate_pixel_indices(key: bytes, num_pixels: int, num_needed: int) -> List
debug.print(f"Generated {len(selected)} indices via shuffle")
return selected
# Optimized path: generate indices directly (for smaller selections)
debug.print(f"Using optimized selection (needed {num_needed}/{num_pixels} pixels)")
selected = []
used = set()
@@ -228,7 +431,6 @@ def generate_pixel_indices(key: bytes, num_pixels: int, num_needed: int) -> List
cipher = Cipher(algorithms.ChaCha20(key, nonce), mode=None, backend=default_backend())
encryptor = cipher.encryptor()
# Generate more than needed to handle collisions
bytes_needed = (num_needed * 2) * 4
random_bytes = encryptor.update(b'\x00' * bytes_needed)
@@ -244,11 +446,10 @@ def generate_pixel_indices(key: bytes, num_pixels: int, num_needed: int) -> List
else:
collisions += 1
# Generate additional if needed (rare)
if len(selected) < num_needed:
debug.print(f"Need {num_needed - len(selected)} more indices, generating...")
extra_needed = num_needed - len(selected)
for _ in range(extra_needed * 2): # Try twice as many to account for collisions
for _ in range(extra_needed * 2):
extra_bytes = encryptor.update(b'\x00' * 4)
idx = int.from_bytes(extra_bytes, 'big') % num_pixels
if idx not in used:
@@ -263,43 +464,91 @@ def generate_pixel_indices(key: bytes, num_pixels: int, num_needed: int) -> List
return selected
# =============================================================================
# EMBEDDING FUNCTIONS
# =============================================================================
@debug.time
def embed_in_image(
carrier_data: bytes,
encrypted_data: bytes,
data: bytes,
image_data: bytes,
pixel_key: bytes,
bits_per_channel: int = 1,
output_format: Optional[str] = None
) -> Tuple[bytes, EmbedStats, str]:
output_format: Optional[str] = None,
embed_mode: str = EMBED_MODE_LSB,
dct_output_format: str = DCT_OUTPUT_PNG, # NEW in v3.0.1
) -> Tuple[bytes, Union[EmbedStats, 'DCTEmbedStats'], str]:
"""
Embed encrypted data in carrier image using LSB steganography.
Uses pseudo-random pixel selection based on pixel_key to scatter
the data across the image, defeating statistical analysis.
Note: Output images have all metadata (EXIF, etc.) stripped automatically.
Embed data into an image using specified mode.
Args:
carrier_data: Carrier image bytes
encrypted_data: Data to embed
pixel_key: Key for pixel selection
bits_per_channel: Bits to use per color channel (1-2)
output_format: Force specific output format (PNG, BMP).
If None, auto-detect from carrier (lossless) or default to PNG.
data: Data to embed (encrypted payload)
image_data: Carrier image bytes
pixel_key: Key for pixel/coefficient selection
bits_per_channel: Bits per channel (LSB mode only)
output_format: Force output format (LSB mode only)
embed_mode: 'lsb' (default) or 'dct'
dct_output_format: For DCT mode - 'png' (lossless) or 'jpeg' (smaller)
Returns:
Tuple of (image bytes, EmbedStats, file extension)
Tuple of (stego image bytes, stats, file extension)
Raises:
CapacityError: If carrier is too small
CapacityError: If data won't fit
EmbeddingError: If embedding fails
Example:
>>> stego_bytes, stats, ext = embed_in_image(carrier, encrypted, key)
>>> stats.pixels_modified
1500
ImportError: If DCT mode requested but scipy unavailable
"""
debug.print(f"Embedding {len(encrypted_data)} bytes into image")
debug.print(f"embed_in_image: mode={embed_mode}, data={len(data)} bytes")
debug.validate(embed_mode in VALID_EMBED_MODES,
f"Invalid embed_mode: {embed_mode}. Use 'lsb' or 'dct'")
# DCT MODE
if embed_mode == EMBED_MODE_DCT:
if not has_dct_support():
raise ImportError(
"scipy is required for DCT embedding mode. "
"Install with: pip install scipy"
)
# Validate DCT output format
if dct_output_format not in (DCT_OUTPUT_PNG, DCT_OUTPUT_JPEG):
debug.print(f"Invalid dct_output_format '{dct_output_format}', defaulting to PNG")
dct_output_format = DCT_OUTPUT_PNG
dct_mod = _get_dct_module()
# Pass output_format to DCT module (v3.0.1)
stego_bytes, dct_stats = dct_mod.embed_in_dct(
data,
image_data,
pixel_key,
output_format=dct_output_format,
)
# Determine extension based on output format
if dct_output_format == DCT_OUTPUT_JPEG:
ext = 'jpg'
else:
ext = 'png'
debug.print(f"DCT embedding complete: {dct_output_format.upper()} output, ext={ext}")
return stego_bytes, dct_stats, ext
# LSB MODE
return _embed_lsb(data, image_data, pixel_key, bits_per_channel, output_format)
def _embed_lsb(
data: bytes,
image_data: bytes,
pixel_key: bytes,
bits_per_channel: int = 1,
output_format: Optional[str] = None,
) -> Tuple[bytes, EmbedStats, str]:
"""
Embed data using LSB steganography (internal implementation).
"""
debug.print(f"LSB embedding {len(data)} bytes into image")
debug.data(pixel_key, "Pixel key for embedding")
debug.validate(bits_per_channel in (1, 2),
f"bits_per_channel must be 1 or 2, got {bits_per_channel}")
@@ -307,13 +556,12 @@ def embed_in_image(
f"Pixel key must be 32 bytes, got {len(pixel_key)}")
try:
img_file = Image.open(io.BytesIO(carrier_data))
img_file = Image.open(io.BytesIO(image_data))
input_format = img_file.format
debug.print(f"Carrier image: {img_file.size[0]}x{img_file.size[1]}, format: {input_format}")
# Convert to RGB - this returns Image.Image, not ImageFile
img: Image.Image = img_file.convert('RGB') if img_file.mode != 'RGB' else img_file.copy()
img = img_file.convert('RGB') if img_file.mode != 'RGB' else img_file.copy()
if img_file.mode != 'RGB':
debug.print(f"Converting image from {img_file.mode} to RGB")
@@ -325,8 +573,7 @@ def embed_in_image(
debug.print(f"Image capacity: {max_bytes} bytes at {bits_per_channel} bit(s)/channel")
# Prepend length
data_with_len = struct.pack('>I', len(encrypted_data)) + encrypted_data
data_with_len = struct.pack('>I', len(data)) + data
if len(data_with_len) > max_bytes:
debug.print(f"Capacity error: need {len(data_with_len)}, have {max_bytes}")
@@ -335,16 +582,13 @@ def embed_in_image(
debug.print(f"Total data to embed: {len(data_with_len)} bytes "
f"({len(data_with_len)/max_bytes*100:.1f}% of capacity)")
# Convert to binary string
binary_data = ''.join(format(b, '08b') for b in data_with_len)
pixels_needed = (len(binary_data) + bits_per_pixel - 1) // bits_per_pixel
debug.print(f"Need {pixels_needed} pixels to embed {len(binary_data)} bits")
# Get pixel indices
selected_indices = generate_pixel_indices(pixel_key, num_pixels, pixels_needed)
# Embed data
new_pixels = list(pixels)
clear_mask = 0xFF ^ ((1 << bits_per_channel) - 1)
@@ -381,11 +625,9 @@ def embed_in_image(
debug.print(f"Modified {modified_pixels} pixels (out of {len(selected_indices)} selected)")
# Create output image (fresh image = no metadata/EXIF carried over)
stego_img = Image.new('RGB', img.size)
stego_img.putdata(new_pixels)
# Determine output format
if output_format:
out_fmt = output_format.upper()
out_ext = FORMAT_TO_EXT.get(out_fmt, 'png')
@@ -405,42 +647,88 @@ def embed_in_image(
bytes_embedded=len(data_with_len)
)
debug.print(f"Embedding complete: {out_fmt} image, {len(output.getvalue())} bytes")
debug.print(f"LSB embedding complete: {out_fmt} image, {len(output.getvalue())} bytes")
return output.getvalue(), stats, out_ext
except CapacityError:
raise
except Exception as e:
debug.exception(e, "embed_in_image")
debug.exception(e, "embed_lsb")
raise EmbeddingError(f"Failed to embed data: {e}") from e
# =============================================================================
# EXTRACTION FUNCTIONS
# =============================================================================
@debug.time
def extract_from_image(
image_data: bytes,
pixel_key: bytes,
bits_per_channel: int = 1
bits_per_channel: int = 1,
embed_mode: str = EMBED_MODE_AUTO,
) -> Optional[bytes]:
"""
Extract hidden data from a stego image.
Args:
image_data: Stego image bytes
pixel_key: Key for pixel selection (must match encoding)
bits_per_channel: Bits per channel (must match encoding)
pixel_key: Key for pixel/coefficient selection (must match encoding)
bits_per_channel: Bits per channel (LSB mode only)
embed_mode: 'auto' (try both), 'lsb', or 'dct'
Returns:
Extracted data bytes, or None if extraction fails
Raises:
ExtractionError: If extraction fails critically
Example:
>>> extracted = extract_from_image(stego_bytes, key)
>>> len(extracted)
1024
"""
debug.print(f"Extracting from {len(image_data)} byte image")
debug.print(f"extract_from_image: mode={embed_mode}")
# AUTO MODE: Try LSB first, then DCT
if embed_mode == EMBED_MODE_AUTO:
result = _extract_lsb(image_data, pixel_key, bits_per_channel)
if result is not None:
debug.print("Auto-detect: LSB extraction succeeded")
return result
if has_dct_support():
debug.print("Auto-detect: LSB failed, trying DCT")
result = _extract_dct(image_data, pixel_key)
if result is not None:
debug.print("Auto-detect: DCT extraction succeeded")
return result
debug.print("Auto-detect: All modes failed")
return None
# EXPLICIT DCT MODE
elif embed_mode == EMBED_MODE_DCT:
if not has_dct_support():
raise ImportError("scipy required for DCT mode")
return _extract_dct(image_data, pixel_key)
# EXPLICIT LSB MODE
else:
return _extract_lsb(image_data, pixel_key, bits_per_channel)
def _extract_dct(image_data: bytes, pixel_key: bytes) -> Optional[bytes]:
"""Extract using DCT mode."""
try:
dct_mod = _get_dct_module()
return dct_mod.extract_from_dct(image_data, pixel_key)
except Exception as e:
debug.print(f"DCT extraction failed: {e}")
return None
def _extract_lsb(
image_data: bytes,
pixel_key: bytes,
bits_per_channel: int = 1
) -> Optional[bytes]:
"""
Extract using LSB mode (internal implementation).
"""
debug.print(f"LSB extracting from {len(image_data)} byte image")
debug.data(pixel_key, "Pixel key for extraction")
debug.validate(bits_per_channel in (1, 2),
f"bits_per_channel must be 1 or 2, got {bits_per_channel}")
@@ -449,8 +737,7 @@ def extract_from_image(
img_file = Image.open(io.BytesIO(image_data))
debug.print(f"Image: {img_file.size[0]}x{img_file.size[1]}, format: {img_file.format}")
# Convert to RGB
img: Image.Image = img_file.convert('RGB') if img_file.mode != 'RGB' else img_file.copy()
img = img_file.convert('RGB') if img_file.mode != 'RGB' else img_file.copy()
if img_file.mode != 'RGB':
debug.print(f"Converting image from {img_file.mode} to RGB")
@@ -460,7 +747,6 @@ def extract_from_image(
debug.print(f"Image has {num_pixels} pixels, {bits_per_pixel} bits/pixel")
# First, extract enough to get the length (4 bytes = 32 bits)
initial_pixels = (32 + bits_per_pixel - 1) // bits_per_pixel + 10
debug.print(f"Extracting initial {initial_pixels} pixels to find length")
@@ -473,7 +759,6 @@ def extract_from_image(
for bit_pos in range(bits_per_channel - 1, -1, -1):
binary_data += str((channel >> bit_pos) & 1)
# Parse length
try:
length_bits = binary_data[:32]
if len(length_bits) < 32:
@@ -486,13 +771,11 @@ def extract_from_image(
debug.print(f"Failed to parse length: {e}")
return None
# Sanity check
max_possible = (num_pixels * bits_per_pixel) // 8 - 4
if data_length > max_possible or data_length < 10:
debug.print(f"Invalid data length: {data_length} (max possible: {max_possible})")
return None
# Extract full data
total_bits = (4 + data_length) * 8
pixels_needed = (total_bits + bits_per_pixel - 1) // bits_per_pixel
@@ -519,63 +802,21 @@ def extract_from_image(
if len(byte_bits) == 8:
data_bytes.append(int(byte_bits, 2))
debug.print(f"Successfully extracted {len(data_bytes)} bytes")
debug.print(f"LSB successfully extracted {len(data_bytes)} bytes")
return bytes(data_bytes)
except Exception as e:
debug.exception(e, "extract_from_image")
raise ExtractionError(f"Failed to extract data: {e}") from e
debug.exception(e, "extract_lsb")
return None
def calculate_capacity(image_data: bytes, bits_per_channel: int = 1) -> int:
"""
Calculate the maximum message capacity of an image.
Args:
image_data: Image bytes
bits_per_channel: Bits to use per color channel
Returns:
Maximum bytes that can be embedded (minus overhead)
Example:
>>> capacity = calculate_capacity(image_bytes)
>>> capacity
12000
"""
debug.validate(bits_per_channel in (1, 2),
f"bits_per_channel must be 1 or 2, got {bits_per_channel}")
img_file = Image.open(io.BytesIO(image_data))
img: Image.Image = img_file.convert('RGB') if img_file.mode != 'RGB' else img_file
num_pixels = img.size[0] * img.size[1]
bits_per_pixel = 3 * bits_per_channel
max_bytes = (num_pixels * bits_per_pixel) // 8
# Subtract overhead: 4 bytes length + ~100 bytes header
capacity = max(0, max_bytes - ENCRYPTION_OVERHEAD)
debug.print(f"Image capacity: {capacity} bytes at {bits_per_channel} bit(s)/channel")
return capacity
# =============================================================================
# UTILITY FUNCTIONS
# =============================================================================
def get_image_dimensions(image_data: bytes) -> Tuple[int, int]:
"""
Get image dimensions without loading full image.
Args:
image_data: Image bytes
Returns:
Tuple of (width, height)
Example:
>>> width, height = get_image_dimensions(image_bytes)
>>> width, height
(800, 600)
"""
"""Get image dimensions without loading full image."""
debug.validate(len(image_data) > 0, "Image data cannot be empty")
img = Image.open(io.BytesIO(image_data))
dimensions = img.size
debug.print(f"Image dimensions: {dimensions[0]}x{dimensions[1]}")
@@ -583,20 +824,7 @@ def get_image_dimensions(image_data: bytes) -> Tuple[int, int]:
def get_image_format(image_data: bytes) -> Optional[str]:
"""
Get image format (PIL format string like 'PNG', 'JPEG').
Args:
image_data: Image bytes
Returns:
Format string or None if invalid
Example:
>>> format = get_image_format(image_bytes)
>>> format
'PNG'
"""
"""Get image format (PIL format string like 'PNG', 'JPEG')."""
try:
img = Image.open(io.BytesIO(image_data))
format_str = img.format
@@ -608,19 +836,7 @@ def get_image_format(image_data: bytes) -> Optional[str]:
def is_lossless_format(image_data: bytes) -> bool:
"""
Check if image is in a lossless format suitable for steganography.
Args:
image_data: Image bytes
Returns:
True if format is lossless (PNG, BMP, TIFF)
Example:
>>> is_lossless_format(image_bytes)
True
"""
"""Check if image is in a lossless format suitable for steganography."""
fmt = get_image_format(image_data)
is_lossless = fmt is not None and fmt.upper() in LOSSLESS_FORMATS
debug.print(f"Image is lossless: {is_lossless} (format: {fmt})")