Files
relicario/crates/relicario-core/src/imgsecret.rs
adlee-was-taken c8535e11f5 fix(core): correct off-by-one in imgsecret SOF bounds guard
peek_jpeg_dimensions reads jpeg[i+8] as the last byte, so the guard
should be \`i + 8 >= jpeg.len()\`, not \`i + 9 >= jpeg.len()\`. The old
guard would reject a valid SOF marker ending exactly at len()-1.
Caught in Task 2 code-quality review.
2026-04-19 21:34:53 -04:00

1139 lines
45 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
//! DCT-based steganographic embedding of a 256-bit secret in JPEG images.
//!
//! This is the novel component of relicario. It hides a 32-byte secret inside a
//! JPEG image's luminance channel using Quantization Index Modulation (QIM) on
//! mid-frequency DCT coefficients, with majority voting across multiple redundant
//! copies for robustness.
//!
//! ## High-level algorithm
//!
//! ### Embedding (`embed`)
//!
//! 1. Decode the carrier JPEG and extract the luminance (Y) channel.
//! 2. Compute the "embed region" -- the central 70% of the image (15% margin
//! on each side acts as a crumple zone for mild cropping).
//! 3. Divide the embed region into 8x8 pixel blocks and select evenly-spaced
//! blocks for embedding.
//! 4. For each copy of the secret (5-50 copies depending on image size):
//! - For each of the 22 blocks needed to hold 256 bits (12 bits per block):
//! - Apply the 2D DCT to the 8x8 block.
//! - Embed bits into 12 mid-frequency DCT coefficients using QIM.
//! - Apply the inverse DCT to write the modified block back.
//! 5. Reconstruct the JPEG by replacing only the Y channel and re-encoding.
//!
//! ### Extraction (`extract`)
//!
//! 1. Decode the JPEG and extract the Y channel.
//! 2. Try the canonical extraction (assuming the image is uncropped).
//! 3. If that fails, try crop-recovery: search for plausible original dimensions
//! and pixel offsets, reconstructing the block grid accordingly.
//! 4. For each copy of the secret, extract bits from DCT coefficients via QIM.
//! 5. Majority-vote each bit position across all copies. Require >= 60% confidence.
//!
//! ## Robustness
//!
//! The combination of QIM with a high quantization step (50.0), mid-frequency
//! coefficient placement, and majority voting across many copies makes the
//! watermark survive:
//! - JPEG recompression down to quality ~85
//! - Mild cropping (up to ~10% from edges, within the 15% crumple zone)
//! - Color space conversions (embedding is in luminance only)
use crate::error::{RelicarioError, Result};
use image::codecs::jpeg::JpegEncoder;
use image::ImageReader;
use image::{ImageEncoder, Rgb, RgbImage};
use std::f64::consts::PI;
use std::io::Cursor;
// ─── Constants ───────────────────────────────────────────────────────────────
/// DCT block size. JPEG uses 8x8 blocks, so we match that to minimize
/// interference with the JPEG codec's own quantization.
const BLOCK_SIZE: usize = 8;
/// QIM quantization step. Higher values make the watermark more robust to
/// recompression but introduce more visible artifacts. A value of 50.0 is
/// higher than the typical academic value of 25 -- this is intentional because
/// we need to survive JPEG recompression at Q85 and below, which applies
/// aggressive quantization to mid-frequency coefficients. The trade-off is
/// acceptable because the reference image is a personal photo, not a
/// publication-quality image.
const QUANT_STEP: f64 = 50.0;
/// Minimum image dimension (width or height) in pixels. Images smaller than
/// this cannot hold enough 8x8 blocks for reliable embedding.
const MIN_DIMENSION: u32 = 100;
/// Maximum image dimension (width or height) in pixels. Images larger than
/// this are rejected before full decode to prevent DoS via attacker-supplied
/// oversized JPEGs (audit M3).
pub const MAX_DIMENSION: u32 = 10_000;
/// Number of secret bits to embed: 256 bits = 32 bytes.
const SECRET_BITS: usize = 256;
/// Minimum number of redundant copies of the secret. More copies improve
/// extraction reliability via majority voting, but require more blocks.
const MIN_COPIES: usize = 5;
/// Number of mid-frequency DCT positions used per block. Each block carries
/// 12 bits of the secret. This matches `EMBED_POSITIONS.len()`.
const BITS_PER_BLOCK: usize = 12; // EMBED_POSITIONS.len()
/// Number of 8x8 blocks needed to hold one complete copy of the 256-bit secret.
/// ceil(256 / 12) = 22 blocks per copy.
const BLOCKS_PER_COPY: usize = (SECRET_BITS + BITS_PER_BLOCK - 1) / BITS_PER_BLOCK; // 22
/// Mid-frequency DCT coefficient positions for embedding, specified as
/// (row, col) indices into the 8x8 DCT coefficient matrix.
///
/// These correspond to zig-zag scan positions 6 through 17 -- the "sweet spot"
/// between low-frequency coefficients (which carry visible image structure and
/// are heavily quantized by JPEG) and high-frequency coefficients (which carry
/// noise/detail and are aggressively zeroed by JPEG compression).
///
/// Mid-frequency coefficients survive JPEG recompression better than high-frequency
/// ones, while causing less visible distortion than modifying low-frequency ones.
///
/// The zig-zag ordering is the standard JPEG scan order:
/// ```text
/// Zig-zag positions 6-9: (0,3) (1,2) (2,1) (3,0)
/// Zig-zag positions 10-13: (4,0) (3,1) (2,2) (1,3)
/// Zig-zag positions 14-17: (0,4) (0,5) (1,4) (2,3)
/// ```
const EMBED_POSITIONS: [(usize, usize); 12] = [
(0, 3),
(1, 2),
(2, 1),
(3, 0), // zig-zag 6-9
(0, 4),
(1, 3),
(2, 2),
(3, 1), // zig-zag 10-13
(4, 0),
(0, 5),
(1, 4),
(2, 3), // zig-zag 14-17
];
// ─── Dimension guard ─────────────────────────────────────────────────────────
/// Walk JPEG markers until we hit an SOF (start-of-frame) marker, which
/// carries the image dimensions in bytes 5..=8 of its segment.
///
/// This peek does NOT decode any pixel data, so an oversized JPEG header is
/// rejected in O(marker-count) time without allocating a frame buffer.
fn peek_jpeg_dimensions(jpeg: &[u8]) -> Result<(u32, u32)> {
let mut i = 0;
while i + 1 < jpeg.len() {
if jpeg[i] != 0xFF {
i += 1;
continue;
}
let marker = jpeg[i + 1];
match marker {
0xD8 | 0xD9 => {
i += 2;
continue;
} // SOI / EOI
0xC0..=0xC3 | 0xC5..=0xC7 | 0xC9..=0xCB | 0xCD..=0xCF => {
// SOFn — height in [i+5..i+7], width in [i+7..i+9]
if i + 8 >= jpeg.len() {
return Err(RelicarioError::ImgSecret("truncated SOF marker".into()));
}
let height = u16::from_be_bytes([jpeg[i + 5], jpeg[i + 6]]) as u32;
let width = u16::from_be_bytes([jpeg[i + 7], jpeg[i + 8]]) as u32;
return Ok((width, height));
}
_ => {
if i + 3 >= jpeg.len() {
return Err(RelicarioError::ImgSecret("truncated marker segment".into()));
}
let seg_len = u16::from_be_bytes([jpeg[i + 2], jpeg[i + 3]]) as usize;
i += 2 + seg_len;
}
}
}
Err(RelicarioError::ImgSecret(
"no SOF marker found in JPEG".into(),
))
}
/// Reject JPEGs that claim dimensions exceeding [`MAX_DIMENSION`].
///
/// Called at the entry point of both `embed` and `extract` to prevent
/// attacker-supplied 32000×32000 images from wedging the WASM service worker
/// during the expensive DCT extraction pass (audit M3).
fn enforce_dimension_cap(jpeg: &[u8]) -> Result<()> {
let (w, h) = peek_jpeg_dimensions(jpeg)?;
if w > MAX_DIMENSION || h > MAX_DIMENSION {
return Err(RelicarioError::ImgSecret(format!(
"image dimensions {w}x{h} exceed {MAX_DIMENSION}x{MAX_DIMENSION} cap"
)));
}
Ok(())
}
// ─── YChannel ────────────────────────────────────────────────────────────────
/// The luminance (Y) channel of an image, stored as a flat array of f64 values.
///
/// We embed exclusively in the luminance channel because:
/// - Luminance is not spatially subsampled by JPEG (unlike chrominance which
/// is typically 4:2:0), so the full DCT block grid is available for embedding.
/// - JPEG's chrominance subsampling would destroy embedded data by halving
/// the spatial resolution before DCT, misaligning our block positions.
/// - Working with a single channel keeps the DCT operations simple and fast.
struct YChannel {
/// Row-major luminance values. `data[y * width + x]` gives the luminance
/// at pixel (x, y). Values are in the range [0, 255] after extraction
/// from RGB, but may temporarily go slightly outside this range during
/// DCT manipulation.
data: Vec<f64>,
width: usize,
height: usize,
}
impl YChannel {
/// Get the luminance value at pixel (x, y).
fn get(&self, x: usize, y: usize) -> f64 {
self.data[y * self.width + x]
}
/// Set the luminance value at pixel (x, y).
fn set(&mut self, x: usize, y: usize, val: f64) {
self.data[y * self.width + x] = val;
}
}
// ─── EmbedRegion ─────────────────────────────────────────────────────────────
/// Defines the central region of the image where embedding occurs.
///
/// The embed region is the central 70% of the image -- a 15% margin is excluded
/// on each side. This margin acts as a "crumple zone": if the image is mildly
/// cropped (e.g., a social media platform trims edges), the embedded data in the
/// center remains intact. The 15% margin is sufficient to tolerate up to ~10%
/// cropping from any single edge.
struct EmbedRegion {
/// Pixel offset from the left edge to the start of the embed region.
x_offset: usize,
/// Pixel offset from the top edge to the start of the embed region.
y_offset: usize,
/// Width of the embed region in pixels.
#[allow(dead_code)]
region_width: usize,
/// Height of the embed region in pixels.
#[allow(dead_code)]
region_height: usize,
/// Number of complete 8x8 blocks that fit horizontally in the embed region.
blocks_x: usize,
/// Number of complete 8x8 blocks that fit vertically in the embed region.
blocks_y: usize,
}
// ─── Helper functions ────────────────────────────────────────────────────────
/// Decode a JPEG from raw bytes and extract the luminance (Y) channel.
///
/// Converts each RGB pixel to luminance using the ITU-R BT.601 formula:
/// `Y = 0.299*R + 0.587*G + 0.114*B`
fn extract_y_channel(jpeg_bytes: &[u8]) -> Result<YChannel> {
let reader = ImageReader::new(Cursor::new(jpeg_bytes))
.with_guessed_format()
.map_err(|e| RelicarioError::ImgSecret(format!("failed to read image: {e}")))?;
let img = reader
.decode()
.map_err(|e| RelicarioError::ImgSecret(format!("failed to decode image: {e}")))?;
let rgb = img.to_rgb8();
let (width, height) = (rgb.width() as usize, rgb.height() as usize);
let mut data = Vec::with_capacity(width * height);
for y in 0..height {
for x in 0..width {
let p = rgb.get_pixel(x as u32, y as u32);
// ITU-R BT.601 luma coefficients
let luma = 0.299 * p[0] as f64 + 0.587 * p[1] as f64 + 0.114 * p[2] as f64;
data.push(luma);
}
}
Ok(YChannel {
data,
width,
height,
})
}
/// Compute the embed region for a YChannel (convenience wrapper).
fn central_region(y: &YChannel) -> EmbedRegion {
compute_region(y.width, y.height)
}
/// Compute the central embed region for given image dimensions.
///
/// The region excludes a 15% margin on each side, leaving the central 70%.
/// The margin acts as a crumple zone for crop tolerance.
fn compute_region(width: usize, height: usize) -> EmbedRegion {
let margin_x = (width as f64 * 0.15) as usize;
let margin_y = (height as f64 * 0.15) as usize;
let x_offset = margin_x;
let y_offset = margin_y;
let region_width = width - 2 * margin_x;
let region_height = height - 2 * margin_y;
let blocks_x = region_width / BLOCK_SIZE;
let blocks_y = region_height / BLOCK_SIZE;
EmbedRegion {
x_offset,
y_offset,
region_width,
region_height,
blocks_x,
blocks_y,
}
}
/// Read an 8x8 pixel block from the Y channel at absolute pixel coordinates.
///
/// Returns `None` if the block would extend beyond the image boundaries
/// (used during crop-recovery extraction where some blocks may have been
/// cropped away).
fn read_block_abs(y: &YChannel, px: usize, py: usize) -> Option<[[f64; 8]; 8]> {
if px + 8 > y.width || py + 8 > y.height {
return None;
}
let mut block = [[0.0f64; 8]; 8];
for row in 0..8 {
for col in 0..8 {
block[row][col] = y.get(px + col, py + row);
}
}
Some(block)
}
/// Read an 8x8 block from the Y channel using block coordinates relative to
/// the embed region.
fn read_block(y: &YChannel, bx: usize, by: usize, region: &EmbedRegion) -> [[f64; 8]; 8] {
let start_x = region.x_offset + bx * BLOCK_SIZE;
let start_y = region.y_offset + by * BLOCK_SIZE;
read_block_abs(y, start_x, start_y).unwrap()
}
/// Write an 8x8 block back to the Y channel using block coordinates relative
/// to the embed region.
fn write_block(y: &mut YChannel, bx: usize, by: usize, region: &EmbedRegion, block: &[[f64; 8]; 8]) {
let start_x = region.x_offset + bx * BLOCK_SIZE;
let start_y = region.y_offset + by * BLOCK_SIZE;
for row in 0..8 {
for col in 0..8 {
y.set(start_x + col, start_y + row, block[row][col]);
}
}
}
// ─── DCT ─────────────────────────────────────────────────────────────────────
//
// The Discrete Cosine Transform (DCT) converts a spatial-domain signal (pixel
// values) into a frequency-domain representation (coefficients). JPEG compression
// itself uses the 8x8 Type-II DCT, so working in the same domain lets us embed
// data where JPEG's own quantization is least destructive.
//
// We implement the DCT from scratch (rather than depending on a library) to keep
// the crate dependency-light and WASM-friendly. The 8x8 size is small enough
// that the naive O(N^2) computation is fast.
/// 1D Type-II DCT of an 8-element signal.
///
/// Applies the orthonormal DCT-II:
/// X[k] = c(k) * sum_{i=0}^{7} x[i] * cos((2i+1)*k*pi/16)
///
/// where c(0) = sqrt(1/8) and c(k) = sqrt(2/8) for k > 0.
fn dct1d(input: &[f64; 8]) -> [f64; 8] {
let mut output = [0.0f64; 8];
for k in 0..8 {
let ck = if k == 0 {
(1.0 / 8.0_f64).sqrt()
} else {
(2.0 / 8.0_f64).sqrt()
};
let mut sum = 0.0;
for i in 0..8 {
sum += input[i] * ((2 * i + 1) as f64 * k as f64 * PI / 16.0).cos();
}
output[k] = ck * sum;
}
output
}
/// 1D Type-III DCT (inverse DCT) of an 8-element signal.
///
/// Reconstructs the spatial-domain signal from DCT coefficients:
/// x[i] = sum_{k=0}^{7} c(k) * X[k] * cos((2i+1)*k*pi/16)
fn idct1d(input: &[f64; 8]) -> [f64; 8] {
let mut output = [0.0f64; 8];
for i in 0..8 {
let mut sum = 0.0;
for k in 0..8 {
let ck = if k == 0 {
(1.0 / 8.0_f64).sqrt()
} else {
(2.0 / 8.0_f64).sqrt()
};
sum += ck * input[k] * ((2 * i + 1) as f64 * k as f64 * PI / 16.0).cos();
}
output[i] = sum;
}
output
}
/// 2D DCT of an 8x8 block, computed as separable 1D DCTs.
///
/// First applies the 1D DCT to each row, then to each column of the result.
/// This is mathematically equivalent to the full 2D DCT but faster (O(N^3)
/// instead of O(N^4) for the naive 2D formulation).
fn dct2_8x8(block: &[[f64; 8]; 8]) -> [[f64; 8]; 8] {
// Step 1: DCT along rows
let mut temp = [[0.0f64; 8]; 8];
for row in 0..8 {
temp[row] = dct1d(&block[row]);
}
// Step 2: DCT along columns
let mut result = [[0.0f64; 8]; 8];
for col in 0..8 {
let mut column = [0.0f64; 8];
for row in 0..8 {
column[row] = temp[row][col];
}
let transformed = dct1d(&column);
for row in 0..8 {
result[row][col] = transformed[row];
}
}
result
}
/// 2D inverse DCT of an 8x8 block, computed as separable 1D inverse DCTs.
///
/// Reverses the 2D DCT: first applies IDCT along columns, then along rows.
/// (The order is reversed compared to the forward transform.)
fn idct2_8x8(block: &[[f64; 8]; 8]) -> [[f64; 8]; 8] {
// Step 1: IDCT along columns
let mut temp = [[0.0f64; 8]; 8];
for col in 0..8 {
let mut column = [0.0f64; 8];
for row in 0..8 {
column[row] = block[row][col];
}
let transformed = idct1d(&column);
for row in 0..8 {
temp[row][col] = transformed[row];
}
}
// Step 2: IDCT along rows
let mut result = [[0.0f64; 8]; 8];
for row in 0..8 {
result[row] = idct1d(&temp[row]);
}
result
}
// ─── QIM ─────────────────────────────────────────────────────────────────────
//
// Quantization Index Modulation (QIM) is the core technique for encoding bits
// into DCT coefficients. It works by quantizing each coefficient to one of two
// interleaved grids, where the grid selection encodes the bit value.
//
// For bit 0: quantize to the nearest multiple of Q (grid: ..., -Q, 0, Q, 2Q, ...)
// For bit 1: quantize to the nearest multiple of Q, offset by Q/2 (grid: ..., -Q/2, Q/2, 3Q/2, ...)
//
// Extraction simply measures which grid the coefficient is closest to.
//
// QIM is preferred over spread-spectrum or LSB methods because it is:
// - Robust to recompression (the quantization step is larger than JPEG's own)
// - Simple to implement and analyze
// - Deterministic (no pseudo-random spreading sequence to synchronize)
/// Embed a single bit into a DCT coefficient using QIM.
///
/// Quantizes the coefficient to the nearest point on the grid selected by `bit`:
/// - `bit=0`: grid at multiples of `q` (i.e., 0, q, 2q, ...)
/// - `bit=1`: grid at multiples of `q` offset by `q/2` (i.e., q/2, 3q/2, ...)
///
/// The returned value is the modified coefficient.
fn qim_embed(coef: f64, bit: u8, q: f64) -> f64 {
let offset = if bit == 1 { q / 2.0 } else { 0.0 };
let shifted = coef - offset;
let quantized = (shifted / q).round() * q;
quantized + offset
}
/// Extract a single bit from a DCT coefficient using QIM.
///
/// Computes the distance from the coefficient to each grid (bit-0 grid and
/// bit-1 grid) and returns whichever grid is closer. This is the ML (maximum
/// likelihood) decoder for QIM under additive noise.
fn qim_extract(coef: f64, q: f64) -> u8 {
// Distance to the nearest bit-0 grid point
let d0 = (coef - (coef / q).round() * q).abs();
// Distance to the nearest bit-1 grid point (offset by q/2)
let offset = q / 2.0;
let shifted = coef - offset;
let d1 = (shifted - (shifted / q).round() * q).abs();
if d0 <= d1 { 0 } else { 1 }
}
// ─── Bit conversion ──────────────────────────────────────────────────────────
/// Convert a byte slice to a vector of individual bits (MSB first).
///
/// Each byte is expanded to 8 bits, with bit 7 (MSB) first.
/// Example: `[0xCA]` -> `[1, 1, 0, 0, 1, 0, 1, 0]`
fn bytes_to_bits(bytes: &[u8]) -> Vec<u8> {
let mut bits = Vec::with_capacity(bytes.len() * 8);
for &byte in bytes {
for i in (0..8).rev() {
bits.push((byte >> i) & 1);
}
}
bits
}
/// Convert a vector of individual bits (MSB first) back to bytes.
///
/// Pads the last byte with zeros if the bit count is not a multiple of 8.
fn bits_to_bytes(bits: &[u8]) -> Vec<u8> {
let mut bytes = Vec::with_capacity((bits.len() + 7) / 8);
for chunk in bits.chunks(8) {
let mut byte = 0u8;
for (i, &bit) in chunk.iter().enumerate() {
byte |= bit << (7 - i);
}
bytes.push(byte);
}
bytes
}
// ─── Block selection ─────────────────────────────────────────────────────────
/// Compute the absolute pixel positions of embed blocks for a given image size.
///
/// This function deterministically maps image dimensions to a list of block
/// positions. Both the embedder and extractor call this function with the same
/// dimensions to agree on where blocks are. During crop recovery, the extractor
/// tries different assumed original dimensions to find the correct grid.
///
/// Returns `Vec<(px, py)>` -- top-left corners of 8x8 blocks in pixel coordinates.
/// Returns an empty vec if the image is too small to embed.
///
/// Blocks are selected with even spacing (stride) across the embed region to
/// spread the watermark uniformly, making it more resilient to localized damage.
/// The number of copies is capped at 50 to avoid diminishing returns.
fn compute_embed_positions(img_width: usize, img_height: usize) -> Vec<(usize, usize)> {
let region = compute_region(img_width, img_height);
let total_blocks = region.blocks_x * region.blocks_y;
if total_blocks < BLOCKS_PER_COPY * MIN_COPIES {
return Vec::new();
}
let num_copies = (total_blocks / BLOCKS_PER_COPY).min(50);
let target_count = num_copies * BLOCKS_PER_COPY;
// Stride ensures blocks are evenly distributed across the embed region
let stride = (total_blocks / target_count).max(1);
let mut positions = Vec::with_capacity(target_count);
let mut idx = 0;
while positions.len() < target_count && idx < total_blocks {
let bx = idx % region.blocks_x;
let by = idx / region.blocks_x;
let px = region.x_offset + bx * BLOCK_SIZE;
let py = region.y_offset + by * BLOCK_SIZE;
positions.push((px, py));
idx += stride;
}
positions
}
/// Select embed blocks using block-coordinate indices relative to the embed region.
///
/// Similar to [`compute_embed_positions`] but returns `(bx, by)` block indices
/// rather than absolute pixel positions. Used during embedding where block
/// coordinates are more convenient for the read_block/write_block API.
fn select_embed_blocks(region: &EmbedRegion, target_count: usize) -> Vec<(usize, usize)> {
let total_blocks = region.blocks_x * region.blocks_y;
if total_blocks == 0 || target_count == 0 {
return Vec::new();
}
// Even stride distributes blocks uniformly across the region
let stride = (total_blocks / target_count).max(1);
let mut blocks = Vec::with_capacity(target_count);
let mut idx = 0;
while blocks.len() < target_count && idx < total_blocks {
let bx = idx % region.blocks_x;
let by = idx / region.blocks_x;
blocks.push((bx, by));
idx += stride;
}
blocks
}
// ─── Reconstruct JPEG ────────────────────────────────────────────────────────
/// Reconstruct a JPEG image after modifying its luminance channel.
///
/// This function takes the original JPEG (for its Cb/Cr chrominance data) and
/// the modified Y channel, then:
///
/// 1. Decodes the original JPEG to get per-pixel Cb and Cr values.
/// 2. For each pixel, combines the modified Y with the original Cb/Cr.
/// 3. Converts YCbCr back to RGB using the ITU-R BT.601 inverse formula.
/// 4. Re-encodes as JPEG at quality 92 (high enough to preserve the watermark).
///
/// Only the luminance changes; chrominance is preserved from the original.
fn reconstruct_jpeg(original_jpeg: &[u8], y_modified: &YChannel) -> Result<Vec<u8>> {
let reader = ImageReader::new(Cursor::new(original_jpeg))
.with_guessed_format()
.map_err(|e| RelicarioError::ImgSecret(format!("failed to read image: {e}")))?;
let img = reader
.decode()
.map_err(|e| RelicarioError::ImgSecret(format!("failed to decode image: {e}")))?;
let rgb = img.to_rgb8();
let (width, height) = (rgb.width(), rgb.height());
let mut output = RgbImage::new(width, height);
for py in 0..height {
for px in 0..width {
let orig = rgb.get_pixel(px, py);
let r = orig[0] as f64;
let g = orig[1] as f64;
let b = orig[2] as f64;
// Extract Cb and Cr from the original pixel (we only modify Y)
let _y_orig = 0.299 * r + 0.587 * g + 0.114 * b;
let cb = -0.168736 * r - 0.331264 * g + 0.5 * b + 128.0;
let cr = 0.5 * r - 0.418688 * g - 0.081312 * b + 128.0;
// Use the modified Y value from our watermarked luminance channel
let y_new = y_modified.get(px as usize, py as usize);
// Convert YCbCr -> RGB using ITU-R BT.601 inverse
let r_new = y_new + 1.402 * (cr - 128.0);
let g_new = y_new - 0.344136 * (cb - 128.0) - 0.714136 * (cr - 128.0);
let b_new = y_new + 1.772 * (cb - 128.0);
output.put_pixel(
px,
py,
Rgb([
r_new.round().clamp(0.0, 255.0) as u8,
g_new.round().clamp(0.0, 255.0) as u8,
b_new.round().clamp(0.0, 255.0) as u8,
]),
);
}
}
let mut buf = Vec::new();
let encoder = JpegEncoder::new_with_quality(&mut buf, 92);
encoder
.write_image(output.as_raw(), width, height, image::ExtendedColorType::Rgb8)
.map_err(|e| RelicarioError::ImgSecret(format!("failed to encode JPEG: {e}")))?;
Ok(buf)
}
// ─── Public API ──────────────────────────────────────────────────────────────
/// Embed a 256-bit secret into a carrier JPEG image.
///
/// Returns the modified JPEG bytes with the secret hidden in the luminance
/// channel's mid-frequency DCT coefficients.
///
/// ## Pipeline
///
/// 1. Decode the carrier and extract the Y (luminance) channel.
/// 2. Validate that the image is large enough (>= 100x100 pixels, and enough
/// blocks in the central region for at least 5 redundant copies).
/// 3. Compute how many copies fit (up to 50) and select evenly-spaced blocks.
/// 4. For each copy, iterate through the 22 blocks that hold 256 bits:
/// - Forward DCT the 8x8 block.
/// - Embed 12 bits per block into the mid-frequency coefficients via QIM.
/// - Inverse DCT to write the modified spatial-domain values back.
/// 5. Reconstruct the JPEG with the modified Y channel and original Cb/Cr.
///
/// # Errors
///
/// - [`RelicarioError::ImageTooSmall`] if the image is below minimum dimensions
/// or does not have enough blocks for reliable embedding.
/// - [`RelicarioError::ImgSecret`] if the image cannot be decoded or re-encoded.
pub fn embed(carrier_jpeg: &[u8], secret: &[u8; 32]) -> Result<Vec<u8>> {
enforce_dimension_cap(carrier_jpeg)?;
let mut y = extract_y_channel(carrier_jpeg)?;
if (y.width as u32) < MIN_DIMENSION || (y.height as u32) < MIN_DIMENSION {
return Err(RelicarioError::ImageTooSmall {
min_width: MIN_DIMENSION,
min_height: MIN_DIMENSION,
actual_width: y.width as u32,
actual_height: y.height as u32,
});
}
let region = central_region(&y);
let total_blocks = region.blocks_x * region.blocks_y;
if total_blocks < BLOCKS_PER_COPY * MIN_COPIES {
return Err(RelicarioError::ImageTooSmall {
min_width: MIN_DIMENSION,
min_height: MIN_DIMENSION,
actual_width: y.width as u32,
actual_height: y.height as u32,
});
}
// Cap at 50 copies -- beyond that, additional redundancy has diminishing
// returns and the image modification becomes more visible.
let num_copies = (total_blocks / BLOCKS_PER_COPY).min(50);
let bits = bytes_to_bits(secret);
let blocks_needed = num_copies * BLOCKS_PER_COPY;
let embed_blocks = select_embed_blocks(&region, blocks_needed);
// Embed each copy of the secret into its assigned blocks
for copy in 0..num_copies {
for block_idx in 0..BLOCKS_PER_COPY {
let global_idx = copy * BLOCKS_PER_COPY + block_idx;
if global_idx >= embed_blocks.len() {
break;
}
let (bx, by) = embed_blocks[global_idx];
let mut block = read_block(&y, bx, by, &region);
let mut dct = dct2_8x8(&block);
// Embed up to 12 bits (BITS_PER_BLOCK) in this block's
// mid-frequency DCT coefficients
for (pos_idx, &(row, col)) in EMBED_POSITIONS.iter().enumerate() {
let bit_idx = block_idx * BITS_PER_BLOCK + pos_idx;
if bit_idx >= SECRET_BITS {
break;
}
dct[row][col] = qim_embed(dct[row][col], bits[bit_idx], QUANT_STEP);
}
block = idct2_8x8(&dct);
write_block(&mut y, bx, by, &region, &block);
}
}
reconstruct_jpeg(carrier_jpeg, &y)
}
/// Extract a 256-bit secret from a (possibly re-encoded or mildly cropped) JPEG.
///
/// Delegates to [`extract_with_crop_recovery`] which first tries canonical
/// extraction (assuming the image has its original dimensions), then falls back
/// to searching for plausible original dimensions if the image was cropped.
///
/// # Errors
///
/// - [`RelicarioError::ExtractionFailed`] if no valid secret could be recovered
/// (image was never watermarked, or was too heavily recompressed/cropped).
pub fn extract(jpeg_bytes: &[u8]) -> Result<[u8; 32]> {
enforce_dimension_cap(jpeg_bytes)?;
extract_with_crop_recovery(jpeg_bytes)
}
/// Attempt to extract the secret assuming specific original image dimensions
/// and a pixel offset (for crop recovery).
///
/// The block grid is computed based on `orig_w`/`orig_h` (the assumed original
/// dimensions), and then each block position is shifted by `dx`/`dy` when
/// reading from the actual (possibly cropped) image.
///
/// Uses majority voting across all copies: for each of the 256 bit positions,
/// the extracted bit from every copy votes, and the majority wins. A minimum
/// confidence threshold of 60% is required -- below that, the extraction is
/// considered unreliable and fails.
fn try_extract_with_layout(
y: &YChannel,
orig_w: usize,
orig_h: usize,
dx: isize,
dy: isize,
) -> Result<[u8; 32]> {
let positions = compute_embed_positions(orig_w, orig_h);
if positions.is_empty() {
return Err(RelicarioError::ExtractionFailed);
}
let region = compute_region(orig_w, orig_h);
let total_blocks = region.blocks_x * region.blocks_y;
let num_copies = (total_blocks / BLOCKS_PER_COPY).min(50);
// Accumulate votes for each bit position across all copies
let mut votes_one = vec![0usize; SECRET_BITS];
let mut votes_total = vec![0usize; SECRET_BITS];
for copy in 0..num_copies {
for block_idx in 0..BLOCKS_PER_COPY {
let global_idx = copy * BLOCKS_PER_COPY + block_idx;
if global_idx >= positions.len() {
break;
}
// Apply crop offset to find the actual block position in the
// (possibly cropped) image
let (orig_px, orig_py) = positions[global_idx];
let actual_px = orig_px as isize + dx;
let actual_py = orig_py as isize + dy;
if actual_px < 0 || actual_py < 0 {
continue;
}
let actual_px = actual_px as usize;
let actual_py = actual_py as usize;
let block = match read_block_abs(y, actual_px, actual_py) {
Some(b) => b,
None => continue, // block out of bounds (cropped away)
};
let dct = dct2_8x8(&block);
// Extract bits from mid-frequency coefficients and tally votes
for (pos_idx, &(row, col)) in EMBED_POSITIONS.iter().enumerate() {
let bit_idx = block_idx * BITS_PER_BLOCK + pos_idx;
if bit_idx >= SECRET_BITS {
break;
}
let extracted_bit = qim_extract(dct[row][col], QUANT_STEP);
votes_total[bit_idx] += 1;
if extracted_bit == 1 {
votes_one[bit_idx] += 1;
}
}
}
}
// Majority vote with confidence check: each bit must have >= 60% agreement
// across copies. Below that threshold, the watermark is considered too
// degraded for reliable extraction.
let mut result_bits = vec![0u8; SECRET_BITS];
for i in 0..SECRET_BITS {
if votes_total[i] == 0 {
return Err(RelicarioError::ExtractionFailed);
}
let ones = votes_one[i];
let zeros = votes_total[i] - ones;
let majority = ones.max(zeros);
let confidence = majority as f64 / votes_total[i] as f64;
if confidence < 0.60 {
return Err(RelicarioError::ExtractionFailed);
}
result_bits[i] = if ones > zeros { 1 } else { 0 };
}
let result_bytes = bits_to_bytes(&result_bits);
let mut secret = [0u8; 32];
secret.copy_from_slice(&result_bytes[..32]);
Ok(secret)
}
/// Extract with automatic crop recovery.
///
/// Tries extraction in order of decreasing likelihood:
///
/// 1. **Uncropped**: assume the image has its original dimensions (most common case).
/// 2. **Width-only crop (8-pixel aligned)**: try original widths from current up to
/// +20%, stepping by 8 pixels (JPEG block alignment). Assumes right-side crop
/// (left edge unchanged, dx=0).
/// 3. **Height-only crop (8-pixel aligned)**: same strategy for vertical crops.
/// 4. **Width crop (non-aligned)**: finer 1-pixel step for non-block-aligned crops.
///
/// The search space is limited to 20% expansion in each dimension, which covers
/// the 15% crumple zone plus some margin for measurement error.
fn extract_with_crop_recovery(jpeg_bytes: &[u8]) -> Result<[u8; 32]> {
let y = extract_y_channel(jpeg_bytes)?;
if (y.width as u32) < MIN_DIMENSION || (y.height as u32) < MIN_DIMENSION {
return Err(RelicarioError::ExtractionFailed);
}
// Try 1: assume the image is uncropped (original size = current size)
if let Ok(secret) = try_extract_with_layout(&y, y.width, y.height, 0, 0) {
return Ok(secret);
}
// The image may have been cropped. Search for the original dimensions.
// A crop removes pixels from edges. The central region was computed from the
// original dimensions. We need to figure out what those were.
//
// Strategy: try original widths from current_w to current_w * 1.20, stepping
// by 8 pixels (JPEG block alignment). For each candidate original width,
// the embed grid is fully determined. We then need to find dx (the pixel
// offset due to left-side cropping, which is 0 for right-only crop).
let max_orig_w = (y.width as f64 * 1.20) as usize;
let max_orig_h = (y.height as f64 * 1.20) as usize;
// Try 2: width-only crops, block-aligned steps (most common crop scenario)
for orig_w in (y.width..=max_orig_w).step_by(BLOCK_SIZE) {
// Right-side crop: dx = 0 (left edge unchanged)
if let Ok(secret) = try_extract_with_layout(&y, orig_w, y.height, 0, 0) {
return Ok(secret);
}
}
// Try 3: height-only crops, block-aligned steps
for orig_h in (y.height..=max_orig_h).step_by(BLOCK_SIZE) {
if let Ok(secret) = try_extract_with_layout(&y, y.width, orig_h, 0, 0) {
return Ok(secret);
}
}
// Try 4: width crops with finer step (non-8-aligned crops are rarer but possible)
for orig_w in (y.width..=max_orig_w).step_by(1) {
if orig_w % BLOCK_SIZE == 0 {
continue; // already tried in step 2
}
if let Ok(secret) = try_extract_with_layout(&y, orig_w, y.height, 0, 0) {
return Ok(secret);
}
}
Err(RelicarioError::ExtractionFailed)
}
// ─── Tests ───────────────────────────────────────────────────────────────────
#[cfg(test)]
mod tests {
use super::*;
use image::GenericImageView;
fn make_test_jpeg(width: u32, height: u32) -> Vec<u8> {
use image::codecs::jpeg::JpegEncoder;
use image::{ImageBuffer, ImageEncoder, Rgb};
let img = ImageBuffer::from_fn(width, height, |x, y| {
Rgb([
((x * 7 + y * 13) % 256) as u8,
((x * 11 + y * 3) % 256) as u8,
((x * 5 + y * 17) % 256) as u8,
])
});
let mut buf = Vec::new();
let encoder = JpegEncoder::new_with_quality(&mut buf, 92);
encoder
.write_image(img.as_raw(), width, height, image::ExtendedColorType::Rgb8)
.unwrap();
buf
}
#[test]
fn dct2_idct2_round_trip() {
let block: [[f64; 8]; 8] = [
[52.0, 55.0, 61.0, 66.0, 70.0, 61.0, 64.0, 73.0],
[63.0, 59.0, 55.0, 90.0, 109.0, 85.0, 69.0, 72.0],
[62.0, 59.0, 68.0, 113.0, 144.0, 104.0, 66.0, 73.0],
[63.0, 58.0, 71.0, 122.0, 154.0, 106.0, 70.0, 69.0],
[67.0, 61.0, 68.0, 104.0, 126.0, 88.0, 68.0, 70.0],
[79.0, 65.0, 60.0, 70.0, 77.0, 68.0, 58.0, 75.0],
[85.0, 71.0, 64.0, 59.0, 55.0, 61.0, 65.0, 83.0],
[87.0, 79.0, 69.0, 68.0, 65.0, 76.0, 78.0, 94.0],
];
let dct = dct2_8x8(&block);
let recovered = idct2_8x8(&dct);
for row in 0..8 {
for col in 0..8 {
assert!(
(block[row][col] - recovered[row][col]).abs() < 1e-6,
"Mismatch at ({}, {}): {} vs {}",
row,
col,
block[row][col],
recovered[row][col]
);
}
}
}
#[test]
fn qim_embed_extract_single_bit() {
let coefficients = [-50.0, -10.0, 0.0, 10.0, 50.0, 100.0, -100.0];
for &coef in &coefficients {
for bit in 0..=1u8 {
let embedded = qim_embed(coef, bit, QUANT_STEP);
let extracted = qim_extract(embedded, QUANT_STEP);
assert_eq!(extracted, bit, "Failed for coef={}, bit={}", coef, bit);
}
}
}
#[test]
fn qim_survives_small_noise() {
let coefficients = [-50.0, 0.0, 30.0, 75.0, -75.0];
let noise_levels = [-10.0, -5.0, 5.0, 10.0]; // < QUANT_STEP/4 = 12.5
for &coef in &coefficients {
for bit in 0..=1u8 {
let embedded = qim_embed(coef, bit, QUANT_STEP);
for &noise in &noise_levels {
let noisy = embedded + noise;
let extracted = qim_extract(noisy, QUANT_STEP);
assert_eq!(
extracted, bit,
"Failed for coef={}, bit={}, noise={}",
coef, bit, noise
);
}
}
}
}
#[test]
fn extract_y_channel_from_synthetic_jpeg() {
let jpeg = make_test_jpeg(200, 150);
let y = extract_y_channel(&jpeg).unwrap();
assert_eq!(y.width, 200);
assert_eq!(y.height, 150);
assert_eq!(y.data.len(), 200 * 150);
}
#[test]
fn read_write_block_round_trip() {
let jpeg = make_test_jpeg(200, 150);
let mut y = extract_y_channel(&jpeg).unwrap();
let region = central_region(&y);
let original = read_block(&y, 0, 0, &region);
write_block(&mut y, 0, 0, &region, &original);
let after = read_block(&y, 0, 0, &region);
for row in 0..8 {
for col in 0..8 {
assert_eq!(original[row][col], after[row][col]);
}
}
}
#[test]
fn embed_extract_round_trip() {
let jpeg = make_test_jpeg(400, 300);
let secret: [u8; 32] = [
0xDE, 0xAD, 0xBE, 0xEF, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A,
0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
0x19, 0x1A, 0x1B, 0x1C,
];
let stego = embed(&jpeg, &secret).unwrap();
let extracted = extract(&stego).unwrap();
assert_eq!(extracted, secret);
}
#[test]
fn embed_extract_random_secret() {
use rand::RngCore;
let jpeg = make_test_jpeg(400, 300);
let mut secret = [0u8; 32];
rand::thread_rng().fill_bytes(&mut secret);
let stego = embed(&jpeg, &secret).unwrap();
let extracted = extract(&stego).unwrap();
assert_eq!(extracted, secret);
}
#[test]
fn extract_from_non_embedded_image_fails() {
let jpeg = make_test_jpeg(400, 300);
let result = extract(&jpeg);
assert!(result.is_err());
}
#[test]
fn image_too_small_fails() {
let jpeg = make_test_jpeg(32, 32);
let secret = [0u8; 32];
let result = embed(&jpeg, &secret);
assert!(result.is_err());
}
#[test]
fn embed_extract_survives_recompression_q85() {
let jpeg = make_test_jpeg(400, 300);
let secret: [u8; 32] = [
0xCA, 0xFE, 0xBA, 0xBE, 0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF, 0xFE, 0xDC,
0xBA, 0x98, 0x76, 0x54, 0x32, 0x10, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88,
0x99, 0xAA, 0xBB, 0xCC,
];
let stego = embed(&jpeg, &secret).unwrap();
// Re-encode at Q85
let reader = ImageReader::new(Cursor::new(&stego))
.with_guessed_format()
.unwrap();
let img = reader.decode().unwrap();
let rgb = img.to_rgb8();
let (w, h) = (rgb.width(), rgb.height());
let mut recompressed = Vec::new();
let encoder = JpegEncoder::new_with_quality(&mut recompressed, 85);
encoder
.write_image(rgb.as_raw(), w, h, image::ExtendedColorType::Rgb8)
.unwrap();
let extracted = extract(&recompressed).unwrap();
assert_eq!(extracted, secret);
}
#[test]
fn rejects_oversized_image_without_full_decode() {
// Synthesize a JPEG header claiming 20000x20000 dimensions.
// The actual pixel data is irrelevant — the dimension peek should bail out
// before decoding any pixels.
let jpeg = build_oversized_jpeg_header(20_000, 20_000);
let result = extract(&jpeg);
assert!(matches!(result, Err(RelicarioError::ImgSecret(ref msg)) if msg.contains("dimension")));
}
fn build_oversized_jpeg_header(width: u16, height: u16) -> Vec<u8> {
// SOI + APP0 JFIF + SOF0 declaring width/height + SOS with minimal data + EOI
let mut v = vec![0xFF, 0xD8]; // SOI
v.extend_from_slice(&[0xFF, 0xE0, 0x00, 0x10]); // APP0
v.extend_from_slice(b"JFIF\0");
v.extend_from_slice(&[0x01, 0x01, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00]);
v.extend_from_slice(&[0xFF, 0xC0, 0x00, 0x11, 0x08]); // SOF0
v.extend_from_slice(&height.to_be_bytes());
v.extend_from_slice(&width.to_be_bytes());
v.extend_from_slice(&[0x03, 0x01, 0x22, 0x00, 0x02, 0x11, 0x01, 0x03, 0x11, 0x01]);
v.extend_from_slice(&[0xFF, 0xD9]); // EOI
v
}
#[test]
fn embed_extract_survives_10pct_crop() {
let jpeg = make_test_jpeg(400, 300);
let secret: [u8; 32] = [
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E,
0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C,
0x1D, 0x1E, 0x1F, 0x20,
];
let stego = embed(&jpeg, &secret).unwrap();
// Crop 10% from right edge
let reader = ImageReader::new(Cursor::new(&stego))
.with_guessed_format()
.unwrap();
let img = reader.decode().unwrap();
let (w, h) = img.dimensions();
let crop_pixels = (w as f64 * 0.10) as u32;
let cropped = img.crop_imm(0, 0, w - crop_pixels, h);
let rgb = cropped.to_rgb8();
let (cw, ch) = (rgb.width(), rgb.height());
let mut cropped_jpeg = Vec::new();
let encoder = JpegEncoder::new_with_quality(&mut cropped_jpeg, 92);
encoder
.write_image(rgb.as_raw(), cw, ch, image::ExtendedColorType::Rgb8)
.unwrap();
let extracted = extract(&cropped_jpeg).unwrap();
assert_eq!(extracted, secret);
}
}