diff --git a/frontends/web/templates/about.html b/frontends/web/templates/about.html index b0d80c0..ffd6980 100644 --- a/frontends/web/templates/about.html +++ b/frontends/web/templates/about.html @@ -100,6 +100,7 @@
  • Output: JPEG or PNG
  • Color: Color or grayscale
  • Speed: ~2s
  • +
  • Error Correction: Reed-Solomon v4.1

  • @@ -383,11 +384,18 @@ + + 4.1.0 + + Reed-Solomon error correction for DCT mode (corrects up to 16 byte errors per 223-byte chunk), + majority voting on length headers, improved robustness with problematic carrier images + + 4.0.0 Channel keys for group/deployment isolation, - DCT default, simplified auth, passphrase replaces day_phrase, + DCT default, simplified auth, passphrase replaces day_phrase, 4-word default, JPEG fix, large image support, subprocess isolation, Python 3.10-3.12 diff --git a/pyproject.toml b/pyproject.toml index b4a2911..52edb98 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,6 +48,7 @@ dct = [ "numpy>=2.0.0", "scipy>=1.10.0", "jpegio>=0.2.0", + "reedsolo>=1.7.0", ] cli = [ "click>=8.0.0", @@ -67,6 +68,7 @@ web = [ "numpy>=2.0.0", "scipy>=1.10.0", "jpegio>=0.2.0", + "reedsolo>=1.7.0", ] api = [ "fastapi>=0.100.0", @@ -78,6 +80,7 @@ api = [ "numpy>=2.0.0", "scipy>=1.10.0", "jpegio>=0.2.0", + "reedsolo>=1.7.0", ] all = [ "stegasoo[cli,web,api,dct,compression]", diff --git a/src/stegasoo/dct_steganography.py b/src/stegasoo/dct_steganography.py index 9ce7777..2c1727f 100644 --- a/src/stegasoo/dct_steganography.py +++ b/src/stegasoo/dct_steganography.py @@ -1,17 +1,22 @@ """ -DCT Domain Steganography Module (v3.2.0-patch2) +DCT Domain Steganography Module (v4.1.0) Embeds data in DCT coefficients with two approaches: 1. PNG output: Scipy-based DCT transform (grayscale or color) 2. JPEG output: jpegio-based coefficient manipulation (if available) +v4.1.0 Changes: +- Reed-Solomon error correction protects against bit errors in problematic blocks +- Majority voting on length headers (3 copies) for additional robustness +- RS can correct up to 16 byte errors per 223-byte chunk + v3.2.0-patch2 Changes: - Chunked processing for large images to avoid heap corruption - Process image in vertical strips to limit memory per operation - Isolated DCT operations with fresh array allocations - Workaround for scipy.fftpack memory issues -Requires: scipy (for PNG mode), optionally jpegio (for JPEG mode) +Requires: scipy (for PNG mode), optionally jpegio (for JPEG mode), reedsolo (for error correction) """ import gc @@ -102,6 +107,13 @@ JPEGIO_MAGIC = b"JPGS" JPEGIO_MIN_COEF_MAGNITUDE = 2 JPEGIO_EMBED_CHANNEL = 0 FLAG_COLOR_MODE = 0x01 +FLAG_RS_PROTECTED = 0x02 # Reed-Solomon error correction enabled + +# Reed-Solomon settings - 32 symbols can correct up to 16 byte errors per 223-byte chunk +RS_NSYM = 32 +RS_LENGTH_HEADER_SIZE = 8 # 8 bytes: 4 for raw_payload_length + 4 for rs_payload_length +RS_LENGTH_COPIES = 3 # Store length header 3 times for majority voting +RS_LENGTH_PREFIX_SIZE = RS_LENGTH_HEADER_SIZE * RS_LENGTH_COPIES # Total: 24 bytes # Chunking settings for large images MAX_CHUNK_HEIGHT = 512 # Process in 512-pixel tall strips @@ -167,6 +179,44 @@ def has_jpegio_support() -> bool: return HAS_JPEGIO +# ============================================================================ +# REED-SOLOMON ERROR CORRECTION +# Protects against bit errors in problematic image blocks +# ============================================================================ + +# Check for reedsolo availability +try: + from reedsolo import RSCodec, ReedSolomonError + + HAS_REEDSOLO = True +except ImportError: + HAS_REEDSOLO = False + RSCodec = None + ReedSolomonError = None + + +def _rs_encode(data: bytes) -> bytes: + """Add Reed-Solomon error correction symbols to data.""" + if not HAS_REEDSOLO: + return data # No protection if reedsolo not available + rs = RSCodec(RS_NSYM) + return bytes(rs.encode(data)) + + +def _rs_decode(data: bytes) -> bytes: + """Decode Reed-Solomon protected data, correcting errors if possible.""" + if not HAS_REEDSOLO: + return data # No decoding if reedsolo not available + rs = RSCodec(RS_NSYM) + try: + decoded, _, errata_pos = rs.decode(data) + if errata_pos: + pass # Errors were corrected + return bytes(decoded) + except ReedSolomonError as e: + raise ValueError(f"Reed-Solomon decoding failed: {e}") from e + + # ============================================================================ # SAFE DCT FUNCTIONS # These create fresh arrays to avoid scipy memory corruption issues @@ -436,7 +486,17 @@ def calculate_dct_capacity(image_data: bytes) -> DCTCapacityInfo: bits_per_block = len(DEFAULT_EMBED_POSITIONS) total_bits = total_blocks * bits_per_block total_bytes = total_bits // 8 - usable_bytes = max(0, total_bytes - HEADER_SIZE) + # Account for header and RS overhead + # RS format: [24-byte length prefix (3 copies)] + RS(header + data) + # RS adds RS_NSYM bytes per 223-byte chunk (255 - RS_NSYM = 223) + # Conservatively estimate RS overhead as ~15% + one chunk minimum + if HAS_REEDSOLO: + # Overhead = 24 (prefix) + 10 (header) + RS overhead + # Simplify: base overhead = 24 + 10 + 32 + 15% margin for larger data + overhead = RS_LENGTH_PREFIX_SIZE + HEADER_SIZE + RS_NSYM + 20 + else: + overhead = HEADER_SIZE + usable_bytes = max(0, total_bytes - overhead) return DCTCapacityInfo( width=width, @@ -538,9 +598,20 @@ def _embed_scipy_dct_safe( flags = FLAG_COLOR_MODE if color_mode == "color" else 0 - # Prepare payload bits + # Build raw payload (header + data) header = _create_header(len(data), flags) - payload = header + data + raw_payload = header + data + + # Apply Reed-Solomon error correction to entire payload if available + if HAS_REEDSOLO: + rs_payload = _rs_encode(raw_payload) + # Format: [length_header x 3 for majority voting] + [RS-encoded payload] + # Each length_header is 8 bytes: 4 for raw_payload_length + 4 for rs_payload_length + length_header = struct.pack(">II", len(raw_payload), len(rs_payload)) + length_prefix = length_header * RS_LENGTH_COPIES # Repeat 3 times + payload = length_prefix + rs_payload + else: + payload = raw_payload bits = [] for byte in payload: for i in range(7, -1, -1): @@ -761,8 +832,19 @@ def _embed_jpegio( all_positions = _jpegio_get_usable_positions(coef_array) order = _jpegio_generate_order(len(all_positions), seed) + # Build raw payload (header + data) header = _jpegio_create_header(len(data), flags) - payload = header + data + raw_payload = header + data + + # Apply Reed-Solomon error correction to entire payload if available + if HAS_REEDSOLO: + rs_payload = _rs_encode(raw_payload) + # Format: [length_header x 3 for majority voting] + [RS-encoded payload] + length_header = struct.pack(">II", len(raw_payload), len(rs_payload)) + length_prefix = length_header * RS_LENGTH_COPIES + payload = length_prefix + rs_payload + else: + payload = raw_payload bits = [] for byte in payload: @@ -892,6 +974,69 @@ def _extract_scipy_dct_safe(stego_image: bytes, seed: bytes) -> bytes: del padded gc.collect() + # Try RS-protected format first (has 24-byte length prefix: 3 copies of 8-byte header) + if HAS_REEDSOLO and len(all_bits) >= RS_LENGTH_PREFIX_SIZE * 8: + # Extract length prefix (24 bytes: 3 copies of 8-byte header for majority voting) + length_prefix_bits = all_bits[: RS_LENGTH_PREFIX_SIZE * 8] + length_prefix_bytes = bytes( + [ + sum(length_prefix_bits[i * 8 : (i + 1) * 8][j] << (7 - j) for j in range(8)) + for i in range(RS_LENGTH_PREFIX_SIZE) + ] + ) + + # Extract 3 copies and use majority voting + copies = [] + for i in range(RS_LENGTH_COPIES): + start = i * RS_LENGTH_HEADER_SIZE + end = start + RS_LENGTH_HEADER_SIZE + copies.append(length_prefix_bytes[start:end]) + + # Count occurrences of each unique copy + from collections import Counter + counter = Counter(copies) + best_header, count = counter.most_common(1)[0] + + # Only proceed if we have at least 2 matching copies (majority) + if count >= 2: + raw_payload_length, rs_encoded_length = struct.unpack(">II", best_header) + else: + # No majority - try first copy as fallback + raw_payload_length, rs_encoded_length = struct.unpack(">II", copies[0]) + + # Sanity check: both lengths should be reasonable + max_reasonable = (len(all_bits) // 8) - RS_LENGTH_PREFIX_SIZE + if (raw_payload_length > 0 and raw_payload_length <= max_reasonable and + rs_encoded_length > 0 and rs_encoded_length <= max_reasonable and + rs_encoded_length >= raw_payload_length): + # This looks like RS-protected format + total_bits_needed = (RS_LENGTH_PREFIX_SIZE + rs_encoded_length) * 8 + + if len(all_bits) >= total_bits_needed: + rs_bits = all_bits[RS_LENGTH_PREFIX_SIZE * 8 : total_bits_needed] + rs_encoded = bytes( + [ + sum(rs_bits[i * 8 : (i + 1) * 8][j] << (7 - j) for j in range(8)) + for i in range(rs_encoded_length) + ] + ) + + try: + # RS decode to get header + data + raw_payload = _rs_decode(rs_encoded) + + # Parse header from decoded payload + _, flags, data_length = _parse_header( + [((raw_payload[i // 8] >> (7 - i % 8)) & 1) for i in range(HEADER_SIZE * 8)] + ) + + # Extract data + data = raw_payload[HEADER_SIZE : HEADER_SIZE + data_length] + return data + except (ValueError, struct.error): + pass # Fall through to legacy format + + # Legacy format: header not protected by RS _, flags, data_length = _parse_header(all_bits) data_bits = all_bits[HEADER_SIZE * 8 : (HEADER_SIZE + data_length) * 8] @@ -922,6 +1067,72 @@ def _extract_jpegio(stego_image: bytes, seed: bytes) -> bytes: all_positions = _jpegio_get_usable_positions(coef_array) order = _jpegio_generate_order(len(all_positions), seed) + # Try RS-protected format first (has 24-byte length prefix: 3 copies for majority voting) + if HAS_REEDSOLO and len(all_positions) >= RS_LENGTH_PREFIX_SIZE * 8: + # Extract length prefix (24 bytes: 3 copies of 8-byte header) + length_prefix_bits = [] + for pos_idx in order[: RS_LENGTH_PREFIX_SIZE * 8]: + row, col = all_positions[pos_idx] + coef = coef_array[row, col] + length_prefix_bits.append(coef & 1) + + length_prefix_bytes = bytes( + [ + sum(length_prefix_bits[i * 8 : (i + 1) * 8][j] << (7 - j) for j in range(8)) + for i in range(RS_LENGTH_PREFIX_SIZE) + ] + ) + + # Extract 3 copies and use majority voting + from collections import Counter + copies = [] + for i in range(RS_LENGTH_COPIES): + start = i * RS_LENGTH_HEADER_SIZE + end = start + RS_LENGTH_HEADER_SIZE + copies.append(length_prefix_bytes[start:end]) + + counter = Counter(copies) + best_header, count = counter.most_common(1)[0] + + if count >= 2: + raw_payload_length, rs_encoded_length = struct.unpack(">II", best_header) + else: + raw_payload_length, rs_encoded_length = struct.unpack(">II", copies[0]) + + # Sanity check + max_reasonable = (len(all_positions) // 8) - RS_LENGTH_PREFIX_SIZE + if (raw_payload_length > 0 and raw_payload_length <= max_reasonable and + rs_encoded_length > 0 and rs_encoded_length <= max_reasonable and + rs_encoded_length >= raw_payload_length): + total_bits_needed = (RS_LENGTH_PREFIX_SIZE + rs_encoded_length) * 8 + + if len(all_positions) >= total_bits_needed: + # Extract RS-encoded data + all_bits = [] + for bit_idx, pos_idx in enumerate(order): + if bit_idx >= total_bits_needed: + break + row, col = all_positions[pos_idx] + coef = coef_array[row, col] + all_bits.append(coef & 1) + + rs_bits = all_bits[RS_LENGTH_PREFIX_SIZE * 8 :] + rs_encoded = bytes( + [ + sum(rs_bits[i * 8 : (i + 1) * 8][j] << (7 - j) for j in range(8)) + for i in range(rs_encoded_length) + ] + ) + + try: + raw_payload = _rs_decode(rs_encoded) + _, flags, data_length = _jpegio_parse_header(raw_payload[:HEADER_SIZE]) + data = raw_payload[HEADER_SIZE : HEADER_SIZE + data_length] + return data + except (ValueError, struct.error): + pass # Fall through to legacy format + + # Legacy format: header not protected by RS header_bits = [] for pos_idx in order[: HEADER_SIZE * 8]: row, col = all_positions[pos_idx] @@ -936,7 +1147,6 @@ def _extract_jpegio(stego_image: bytes, seed: bytes) -> bytes: ) _, flags, data_length = _jpegio_parse_header(header_bytes) - total_bits_needed = (HEADER_SIZE + data_length) * 8 all_bits = [] @@ -948,7 +1158,6 @@ def _extract_jpegio(stego_image: bytes, seed: bytes) -> bytes: all_bits.append(coef & 1) data_bits = all_bits[HEADER_SIZE * 8 :] - data = bytes( [ sum(data_bits[i * 8 : (i + 1) * 8][j] << (7 - j) for j in range(8))