//! DCT-based steganographic embedding of a 256-bit secret in JPEG images.
//!
//! This is the novel component of relicario. It hides a 32-byte secret inside a
//! JPEG image's luminance channel using Quantization Index Modulation (QIM) on
//! mid-frequency DCT coefficients, with majority voting across multiple redundant
//! copies for robustness.
//!
//! ## High-level algorithm
//!
//! ### Embedding (`embed`)
//!
//! 1. Decode the carrier JPEG and extract the luminance (Y) channel.
//! 2. Compute the "embed region" -- the central 70% of the image (15% margin
//!    on each side acts as a crumple zone for mild cropping).
//! 3. Divide the embed region into 8x8 pixel blocks and select evenly-spaced
//!    blocks for embedding.
//! 4. For each copy of the secret (5-50 copies depending on image size):
//!    - For each of the 22 blocks needed to hold 256 bits (12 bits per block):
//!      - Apply the 2D DCT to the 8x8 block.
//!      - Embed bits into 12 mid-frequency DCT coefficients using QIM.
//!      - Apply the inverse DCT to write the modified block back.
//! 5. Reconstruct the JPEG by replacing only the Y channel and re-encoding.
//!
//! ### Extraction (`extract`)
//!
//! 1. Decode the JPEG and extract the Y channel.
//! 2. Try the canonical extraction (assuming the image is uncropped).
//! 3. If that fails, try crop-recovery: search for plausible original dimensions
//!    and pixel offsets, reconstructing the block grid accordingly.
//! 4. For each copy of the secret, extract bits from DCT coefficients via QIM.
//! 5. Majority-vote each bit position across all copies. Require >= 60% confidence.
//!
//! ## Robustness
//!
//! The combination of QIM with a high quantization step (50.0), mid-frequency
//! coefficient placement, and majority voting across many copies makes the
//! watermark survive:
//! - JPEG recompression down to quality ~85
//! - Mild cropping (up to ~10% from edges, within the 15% crumple zone)
//! - Color space conversions (embedding is in luminance only)

use crate::error::{RelicarioError, Result};
use image::codecs::jpeg::JpegEncoder;
use image::ImageReader;
use image::{ImageEncoder, Rgb, RgbImage};
use std::f64::consts::PI;
use std::io::Cursor;

// ─── Constants ───────────────────────────────────────────────────────────────

/// DCT block size. JPEG uses 8x8 blocks, so we match that to minimize
/// interference with the JPEG codec's own quantization.
const BLOCK_SIZE: usize = 8;

/// QIM quantization step. Higher values make the watermark more robust to
/// recompression but introduce more visible artifacts. A value of 50.0 is
/// higher than the typical academic value of 25 -- this is intentional because
/// we need to survive JPEG recompression at Q85 and below, which applies
/// aggressive quantization to mid-frequency coefficients. The trade-off is
/// acceptable because the reference image is a personal photo, not a
/// publication-quality image.
const QUANT_STEP: f64 = 50.0;

/// Minimum image dimension (width or height) in pixels. Images smaller than
/// this cannot hold enough 8x8 blocks for reliable embedding.
const MIN_DIMENSION: u32 = 100;

/// Maximum image dimension (width or height) in pixels. Images larger than
/// this are rejected before full decode to prevent DoS via attacker-supplied
/// oversized JPEGs (audit M3).
pub const MAX_DIMENSION: u32 = 10_000;

/// Number of secret bits to embed: 256 bits = 32 bytes.
const SECRET_BITS: usize = 256;

/// Minimum number of redundant copies of the secret. More copies improve
/// extraction reliability via majority voting, but require more blocks.
const MIN_COPIES: usize = 5;

/// Number of mid-frequency DCT positions used per block. Each block carries
/// 12 bits of the secret. This matches `EMBED_POSITIONS.len()`.
const BITS_PER_BLOCK: usize = 12; // EMBED_POSITIONS.len()

/// Number of 8x8 blocks needed to hold one complete copy of the 256-bit secret.
/// ceil(256 / 12) = 22 blocks per copy.
const BLOCKS_PER_COPY: usize = SECRET_BITS.div_ceil(BITS_PER_BLOCK); // 22

/// Mid-frequency DCT coefficient positions for embedding, specified as
/// (row, col) indices into the 8x8 DCT coefficient matrix.
///
/// These correspond to zig-zag scan positions 6 through 17 -- the "sweet spot"
/// between low-frequency coefficients (which carry visible image structure and
/// are heavily quantized by JPEG) and high-frequency coefficients (which carry
/// noise/detail and are aggressively zeroed by JPEG compression).
///
/// Mid-frequency coefficients survive JPEG recompression better than high-frequency
/// ones, while causing less visible distortion than modifying low-frequency ones.
///
/// The zig-zag ordering is the standard JPEG scan order:
/// ```text
/// Zig-zag positions 6-9:   (0,3) (1,2) (2,1) (3,0)
/// Zig-zag positions 10-13: (4,0) (3,1) (2,2) (1,3)
/// Zig-zag positions 14-17: (0,4) (0,5) (1,4) (2,3)
/// ```
const EMBED_POSITIONS: [(usize, usize); 12] = [
    (0, 3),
    (1, 2),
    (2, 1),
    (3, 0), // zig-zag 6-9
    (0, 4),
    (1, 3),
    (2, 2),
    (3, 1), // zig-zag 10-13
    (4, 0),
    (0, 5),
    (1, 4),
    (2, 3), // zig-zag 14-17
];

// ─── Dimension guard ─────────────────────────────────────────────────────────

/// Walk JPEG markers until we hit an SOF (start-of-frame) marker, which
/// carries the image dimensions in bytes 5..=8 of its segment.
///
/// This peek does NOT decode any pixel data, so an oversized JPEG header is
/// rejected in O(marker-count) time without allocating a frame buffer.
fn peek_jpeg_dimensions(jpeg: &[u8]) -> Result<(u32, u32)> {
    let mut i = 0;
    while i + 1 < jpeg.len() {
        if jpeg[i] != 0xFF {
            i += 1;
            continue;
        }
        let marker = jpeg[i + 1];
        match marker {
            0xD8 | 0xD9 => {
                i += 2;
                continue;
            } // SOI / EOI
            0xC0..=0xC3 | 0xC5..=0xC7 | 0xC9..=0xCB | 0xCD..=0xCF => {
                // SOFn — height in [i+5..i+7], width in [i+7..i+9]
                if i + 8 >= jpeg.len() {
                    return Err(RelicarioError::ImgSecret("truncated SOF marker".into()));
                }
                let height = u16::from_be_bytes([jpeg[i + 5], jpeg[i + 6]]) as u32;
                let width = u16::from_be_bytes([jpeg[i + 7], jpeg[i + 8]]) as u32;
                return Ok((width, height));
            }
            _ => {
                if i + 3 >= jpeg.len() {
                    return Err(RelicarioError::ImgSecret("truncated marker segment".into()));
                }
                let seg_len = u16::from_be_bytes([jpeg[i + 2], jpeg[i + 3]]) as usize;
                i += 2 + seg_len;
            }
        }
    }
    Err(RelicarioError::ImgSecret(
        "no SOF marker found in JPEG".into(),
    ))
}

/// Reject JPEGs that claim dimensions exceeding [`MAX_DIMENSION`].
///
/// Called at the entry point of both `embed` and `extract` to prevent
/// attacker-supplied 32000×32000 images from wedging the WASM service worker
/// during the expensive DCT extraction pass (audit M3).
fn enforce_dimension_cap(jpeg: &[u8]) -> Result<()> {
    let (w, h) = peek_jpeg_dimensions(jpeg)?;
    if w > MAX_DIMENSION || h > MAX_DIMENSION {
        return Err(RelicarioError::ImgSecret(format!(
            "image dimensions {w}x{h} exceed {MAX_DIMENSION}x{MAX_DIMENSION} cap"
        )));
    }
    Ok(())
}

// ─── YChannel ────────────────────────────────────────────────────────────────

/// The luminance (Y) channel of an image, stored as a flat array of f64 values.
///
/// We embed exclusively in the luminance channel because:
/// - Luminance is not spatially subsampled by JPEG (unlike chrominance which
///   is typically 4:2:0), so the full DCT block grid is available for embedding.
/// - JPEG's chrominance subsampling would destroy embedded data by halving
///   the spatial resolution before DCT, misaligning our block positions.
/// - Working with a single channel keeps the DCT operations simple and fast.
struct YChannel {
    /// Row-major luminance values. `data[y * width + x]` gives the luminance
    /// at pixel (x, y). Values are in the range [0, 255] after extraction
    /// from RGB, but may temporarily go slightly outside this range during
    /// DCT manipulation.
    data: Vec<f64>,
    width: usize,
    height: usize,
}

impl YChannel {
    /// Get the luminance value at pixel (x, y).
    fn get(&self, x: usize, y: usize) -> f64 {
        self.data[y * self.width + x]
    }

    /// Set the luminance value at pixel (x, y).
    fn set(&mut self, x: usize, y: usize, val: f64) {
        self.data[y * self.width + x] = val;
    }
}

// ─── EmbedRegion ─────────────────────────────────────────────────────────────

/// Defines the central region of the image where embedding occurs.
///
/// The embed region is the central 70% of the image -- a 15% margin is excluded
/// on each side. This margin acts as a "crumple zone": if the image is mildly
/// cropped (e.g., a social media platform trims edges), the embedded data in the
/// center remains intact. The 15% margin is sufficient to tolerate up to ~10%
/// cropping from any single edge.
struct EmbedRegion {
    /// Pixel offset from the left edge to the start of the embed region.
    x_offset: usize,
    /// Pixel offset from the top edge to the start of the embed region.
    y_offset: usize,
    /// Width of the embed region in pixels.
    #[allow(dead_code)]
    region_width: usize,
    /// Height of the embed region in pixels.
    #[allow(dead_code)]
    region_height: usize,
    /// Number of complete 8x8 blocks that fit horizontally in the embed region.
    blocks_x: usize,
    /// Number of complete 8x8 blocks that fit vertically in the embed region.
    blocks_y: usize,
}

// ─── Helper functions ────────────────────────────────────────────────────────

/// Decode a JPEG from raw bytes and extract the luminance (Y) channel.
///
/// Converts each RGB pixel to luminance using the ITU-R BT.601 formula:
/// `Y = 0.299*R + 0.587*G + 0.114*B`
fn extract_y_channel(jpeg_bytes: &[u8]) -> Result<YChannel> {
    let reader = ImageReader::new(Cursor::new(jpeg_bytes))
        .with_guessed_format()
        .map_err(|e| RelicarioError::ImgSecret(format!("failed to read image: {e}")))?;
    let img = reader
        .decode()
        .map_err(|e| RelicarioError::ImgSecret(format!("failed to decode image: {e}")))?;
    let rgb = img.to_rgb8();
    let (width, height) = (rgb.width() as usize, rgb.height() as usize);
    let mut data = Vec::with_capacity(width * height);
    for y in 0..height {
        for x in 0..width {
            let p = rgb.get_pixel(x as u32, y as u32);
            // ITU-R BT.601 luma coefficients
            let luma = 0.299 * p[0] as f64 + 0.587 * p[1] as f64 + 0.114 * p[2] as f64;
            data.push(luma);
        }
    }
    Ok(YChannel {
        data,
        width,
        height,
    })
}

/// Compute the embed region for a YChannel (convenience wrapper).
fn central_region(y: &YChannel) -> EmbedRegion {
    compute_region(y.width, y.height)
}

/// Compute the central embed region for given image dimensions.
///
/// The region excludes a 15% margin on each side, leaving the central 70%.
/// The margin acts as a crumple zone for crop tolerance.
fn compute_region(width: usize, height: usize) -> EmbedRegion {
    let margin_x = (width as f64 * 0.15) as usize;
    let margin_y = (height as f64 * 0.15) as usize;
    let x_offset = margin_x;
    let y_offset = margin_y;
    let region_width = width - 2 * margin_x;
    let region_height = height - 2 * margin_y;
    let blocks_x = region_width / BLOCK_SIZE;
    let blocks_y = region_height / BLOCK_SIZE;
    EmbedRegion {
        x_offset,
        y_offset,
        region_width,
        region_height,
        blocks_x,
        blocks_y,
    }
}

/// Read an 8x8 pixel block from the Y channel at absolute pixel coordinates.
///
/// Returns `None` if the block would extend beyond the image boundaries
/// (used during crop-recovery extraction where some blocks may have been
/// cropped away).
fn read_block_abs(y: &YChannel, px: usize, py: usize) -> Option<[[f64; 8]; 8]> {
    if px + 8 > y.width || py + 8 > y.height {
        return None;
    }
    let mut block = [[0.0f64; 8]; 8];
    for (row, block_row) in block.iter_mut().enumerate() {
        for (col, cell) in block_row.iter_mut().enumerate() {
            *cell = y.get(px + col, py + row);
        }
    }
    Some(block)
}

/// Read an 8x8 block from the Y channel using block coordinates relative to
/// the embed region.
fn read_block(y: &YChannel, bx: usize, by: usize, region: &EmbedRegion) -> [[f64; 8]; 8] {
    let start_x = region.x_offset + bx * BLOCK_SIZE;
    let start_y = region.y_offset + by * BLOCK_SIZE;
    read_block_abs(y, start_x, start_y).unwrap()
}

/// Write an 8x8 block back to the Y channel using block coordinates relative
/// to the embed region.
fn write_block(y: &mut YChannel, bx: usize, by: usize, region: &EmbedRegion, block: &[[f64; 8]; 8]) {
    let start_x = region.x_offset + bx * BLOCK_SIZE;
    let start_y = region.y_offset + by * BLOCK_SIZE;
    for (row, block_row) in block.iter().enumerate() {
        for (col, &cell) in block_row.iter().enumerate() {
            y.set(start_x + col, start_y + row, cell);
        }
    }
}

// ─── DCT ─────────────────────────────────────────────────────────────────────
//
// The Discrete Cosine Transform (DCT) converts a spatial-domain signal (pixel
// values) into a frequency-domain representation (coefficients). JPEG compression
// itself uses the 8x8 Type-II DCT, so working in the same domain lets us embed
// data where JPEG's own quantization is least destructive.
//
// We implement the DCT from scratch (rather than depending on a library) to keep
// the crate dependency-light and WASM-friendly. The 8x8 size is small enough
// that the naive O(N^2) computation is fast.

/// 1D Type-II DCT of an 8-element signal.
///
/// Applies the orthonormal DCT-II:
///   X[k] = c(k) * sum_{i=0}^{7} x[i] * cos((2i+1)*k*pi/16)
///
/// where c(0) = sqrt(1/8) and c(k) = sqrt(2/8) for k > 0.
fn dct1d(input: &[f64; 8]) -> [f64; 8] {
    let mut output = [0.0f64; 8];
    for (k, out_k) in output.iter_mut().enumerate() {
        let ck = if k == 0 {
            (1.0 / 8.0_f64).sqrt()
        } else {
            (2.0 / 8.0_f64).sqrt()
        };
        let mut sum = 0.0;
        for (i, &x) in input.iter().enumerate() {
            sum += x * ((2 * i + 1) as f64 * k as f64 * PI / 16.0).cos();
        }
        *out_k = ck * sum;
    }
    output
}

/// 1D Type-III DCT (inverse DCT) of an 8-element signal.
///
/// Reconstructs the spatial-domain signal from DCT coefficients:
///   x[i] = sum_{k=0}^{7} c(k) * X[k] * cos((2i+1)*k*pi/16)
fn idct1d(input: &[f64; 8]) -> [f64; 8] {
    let mut output = [0.0f64; 8];
    for (i, out_i) in output.iter_mut().enumerate() {
        let mut sum = 0.0;
        for (k, &x) in input.iter().enumerate() {
            let ck = if k == 0 {
                (1.0 / 8.0_f64).sqrt()
            } else {
                (2.0 / 8.0_f64).sqrt()
            };
            sum += ck * x * ((2 * i + 1) as f64 * k as f64 * PI / 16.0).cos();
        }
        *out_i = sum;
    }
    output
}

/// 2D DCT of an 8x8 block, computed as separable 1D DCTs.
///
/// First applies the 1D DCT to each row, then to each column of the result.
/// This is mathematically equivalent to the full 2D DCT but faster (O(N^3)
/// instead of O(N^4) for the naive 2D formulation).
fn dct2_8x8(block: &[[f64; 8]; 8]) -> [[f64; 8]; 8] {
    // Step 1: DCT along rows
    let mut temp = [[0.0f64; 8]; 8];
    for row in 0..8 {
        temp[row] = dct1d(&block[row]);
    }
    // Step 2: DCT along columns
    let mut result = [[0.0f64; 8]; 8];
    for col in 0..8 {
        let mut column = [0.0f64; 8];
        for row in 0..8 {
            column[row] = temp[row][col];
        }
        let transformed = dct1d(&column);
        for row in 0..8 {
            result[row][col] = transformed[row];
        }
    }
    result
}

/// 2D inverse DCT of an 8x8 block, computed as separable 1D inverse DCTs.
///
/// Reverses the 2D DCT: first applies IDCT along columns, then along rows.
/// (The order is reversed compared to the forward transform.)
fn idct2_8x8(block: &[[f64; 8]; 8]) -> [[f64; 8]; 8] {
    // Step 1: IDCT along columns
    let mut temp = [[0.0f64; 8]; 8];
    for col in 0..8 {
        let mut column = [0.0f64; 8];
        for row in 0..8 {
            column[row] = block[row][col];
        }
        let transformed = idct1d(&column);
        for row in 0..8 {
            temp[row][col] = transformed[row];
        }
    }
    // Step 2: IDCT along rows
    let mut result = [[0.0f64; 8]; 8];
    for row in 0..8 {
        result[row] = idct1d(&temp[row]);
    }
    result
}

// ─── QIM ─────────────────────────────────────────────────────────────────────
//
// Quantization Index Modulation (QIM) is the core technique for encoding bits
// into DCT coefficients. It works by quantizing each coefficient to one of two
// interleaved grids, where the grid selection encodes the bit value.
//
// For bit 0: quantize to the nearest multiple of Q  (grid: ..., -Q, 0, Q, 2Q, ...)
// For bit 1: quantize to the nearest multiple of Q, offset by Q/2  (grid: ..., -Q/2, Q/2, 3Q/2, ...)
//
// Extraction simply measures which grid the coefficient is closest to.
//
// QIM is preferred over spread-spectrum or LSB methods because it is:
// - Robust to recompression (the quantization step is larger than JPEG's own)
// - Simple to implement and analyze
// - Deterministic (no pseudo-random spreading sequence to synchronize)

/// Embed a single bit into a DCT coefficient using QIM.
///
/// Quantizes the coefficient to the nearest point on the grid selected by `bit`:
/// - `bit=0`: grid at multiples of `q` (i.e., 0, q, 2q, ...)
/// - `bit=1`: grid at multiples of `q` offset by `q/2` (i.e., q/2, 3q/2, ...)
///
/// The returned value is the modified coefficient.
fn qim_embed(coef: f64, bit: u8, q: f64) -> f64 {
    let offset = if bit == 1 { q / 2.0 } else { 0.0 };
    let shifted = coef - offset;
    let quantized = (shifted / q).round() * q;
    quantized + offset
}

/// Extract a single bit from a DCT coefficient using QIM.
///
/// Computes the distance from the coefficient to each grid (bit-0 grid and
/// bit-1 grid) and returns whichever grid is closer. This is the ML (maximum
/// likelihood) decoder for QIM under additive noise.
fn qim_extract(coef: f64, q: f64) -> u8 {
    // Distance to the nearest bit-0 grid point
    let d0 = (coef - (coef / q).round() * q).abs();
    // Distance to the nearest bit-1 grid point (offset by q/2)
    let offset = q / 2.0;
    let shifted = coef - offset;
    let d1 = (shifted - (shifted / q).round() * q).abs();
    if d0 <= d1 { 0 } else { 1 }
}

// ─── Bit conversion ──────────────────────────────────────────────────────────

/// Convert a byte slice to a vector of individual bits (MSB first).
///
/// Each byte is expanded to 8 bits, with bit 7 (MSB) first.
/// Example: `[0xCA]` -> `[1, 1, 0, 0, 1, 0, 1, 0]`
fn bytes_to_bits(bytes: &[u8]) -> Vec<u8> {
    let mut bits = Vec::with_capacity(bytes.len() * 8);
    for &byte in bytes {
        for i in (0..8).rev() {
            bits.push((byte >> i) & 1);
        }
    }
    bits
}

/// Convert a vector of individual bits (MSB first) back to bytes.
///
/// Pads the last byte with zeros if the bit count is not a multiple of 8.
fn bits_to_bytes(bits: &[u8]) -> Vec<u8> {
    let mut bytes = Vec::with_capacity(bits.len().div_ceil(8));
    for chunk in bits.chunks(8) {
        let mut byte = 0u8;
        for (i, &bit) in chunk.iter().enumerate() {
            byte |= bit << (7 - i);
        }
        bytes.push(byte);
    }
    bytes
}

// ─── Block selection ─────────────────────────────────────────────────────────

/// Compute the absolute pixel positions of embed blocks for a given image size.
///
/// This function deterministically maps image dimensions to a list of block
/// positions. Both the embedder and extractor call this function with the same
/// dimensions to agree on where blocks are. During crop recovery, the extractor
/// tries different assumed original dimensions to find the correct grid.
///
/// Returns `Vec<(px, py)>` -- top-left corners of 8x8 blocks in pixel coordinates.
/// Returns an empty vec if the image is too small to embed.
///
/// Blocks are selected with even spacing (stride) across the embed region to
/// spread the watermark uniformly, making it more resilient to localized damage.
/// The number of copies is capped at 50 to avoid diminishing returns.
fn compute_embed_positions(img_width: usize, img_height: usize) -> Vec<(usize, usize)> {
    let region = compute_region(img_width, img_height);
    let total_blocks = region.blocks_x * region.blocks_y;
    if total_blocks < BLOCKS_PER_COPY * MIN_COPIES {
        return Vec::new();
    }
    let num_copies = (total_blocks / BLOCKS_PER_COPY).min(50);
    let target_count = num_copies * BLOCKS_PER_COPY;

    // Stride ensures blocks are evenly distributed across the embed region
    let stride = (total_blocks / target_count).max(1);
    let mut positions = Vec::with_capacity(target_count);
    let mut idx = 0;
    while positions.len() < target_count && idx < total_blocks {
        let bx = idx % region.blocks_x;
        let by = idx / region.blocks_x;
        let px = region.x_offset + bx * BLOCK_SIZE;
        let py = region.y_offset + by * BLOCK_SIZE;
        positions.push((px, py));
        idx += stride;
    }
    positions
}

/// Select embed blocks using block-coordinate indices relative to the embed region.
///
/// Similar to [`compute_embed_positions`] but returns `(bx, by)` block indices
/// rather than absolute pixel positions. Used during embedding where block
/// coordinates are more convenient for the read_block/write_block API.
fn select_embed_blocks(region: &EmbedRegion, target_count: usize) -> Vec<(usize, usize)> {
    let total_blocks = region.blocks_x * region.blocks_y;
    if total_blocks == 0 || target_count == 0 {
        return Vec::new();
    }
    // Even stride distributes blocks uniformly across the region
    let stride = (total_blocks / target_count).max(1);
    let mut blocks = Vec::with_capacity(target_count);
    let mut idx = 0;
    while blocks.len() < target_count && idx < total_blocks {
        let bx = idx % region.blocks_x;
        let by = idx / region.blocks_x;
        blocks.push((bx, by));
        idx += stride;
    }
    blocks
}

// ─── Reconstruct JPEG ────────────────────────────────────────────────────────

/// Reconstruct a JPEG image after modifying its luminance channel.
///
/// This function takes the original JPEG (for its Cb/Cr chrominance data) and
/// the modified Y channel, then:
///
/// 1. Decodes the original JPEG to get per-pixel Cb and Cr values.
/// 2. For each pixel, combines the modified Y with the original Cb/Cr.
/// 3. Converts YCbCr back to RGB using the ITU-R BT.601 inverse formula.
/// 4. Re-encodes as JPEG at quality 92 (high enough to preserve the watermark).
///
/// Only the luminance changes; chrominance is preserved from the original.
fn reconstruct_jpeg(original_jpeg: &[u8], y_modified: &YChannel) -> Result<Vec<u8>> {
    let reader = ImageReader::new(Cursor::new(original_jpeg))
        .with_guessed_format()
        .map_err(|e| RelicarioError::ImgSecret(format!("failed to read image: {e}")))?;
    let img = reader
        .decode()
        .map_err(|e| RelicarioError::ImgSecret(format!("failed to decode image: {e}")))?;
    let rgb = img.to_rgb8();
    let (width, height) = (rgb.width(), rgb.height());

    let mut output = RgbImage::new(width, height);

    for py in 0..height {
        for px in 0..width {
            let orig = rgb.get_pixel(px, py);
            let r = orig[0] as f64;
            let g = orig[1] as f64;
            let b = orig[2] as f64;

            // Extract Cb and Cr from the original pixel (we only modify Y)
            let _y_orig = 0.299 * r + 0.587 * g + 0.114 * b;
            let cb = -0.168736 * r - 0.331264 * g + 0.5 * b + 128.0;
            let cr = 0.5 * r - 0.418688 * g - 0.081312 * b + 128.0;

            // Use the modified Y value from our watermarked luminance channel
            let y_new = y_modified.get(px as usize, py as usize);

            // Convert YCbCr -> RGB using ITU-R BT.601 inverse
            let r_new = y_new + 1.402 * (cr - 128.0);
            let g_new = y_new - 0.344136 * (cb - 128.0) - 0.714136 * (cr - 128.0);
            let b_new = y_new + 1.772 * (cb - 128.0);

            output.put_pixel(
                px,
                py,
                Rgb([
                    r_new.round().clamp(0.0, 255.0) as u8,
                    g_new.round().clamp(0.0, 255.0) as u8,
                    b_new.round().clamp(0.0, 255.0) as u8,
                ]),
            );
        }
    }

    let mut buf = Vec::new();
    let encoder = JpegEncoder::new_with_quality(&mut buf, 92);
    encoder
        .write_image(output.as_raw(), width, height, image::ExtendedColorType::Rgb8)
        .map_err(|e| RelicarioError::ImgSecret(format!("failed to encode JPEG: {e}")))?;
    Ok(buf)
}

// ─── Public API ──────────────────────────────────────────────────────────────

/// Embed a 256-bit secret into a carrier JPEG image.
///
/// Returns the modified JPEG bytes with the secret hidden in the luminance
/// channel's mid-frequency DCT coefficients.
///
/// ## Pipeline
///
/// 1. Decode the carrier and extract the Y (luminance) channel.
/// 2. Validate that the image is large enough (>= 100x100 pixels, and enough
///    blocks in the central region for at least 5 redundant copies).
/// 3. Compute how many copies fit (up to 50) and select evenly-spaced blocks.
/// 4. For each copy, iterate through the 22 blocks that hold 256 bits:
///    - Forward DCT the 8x8 block.
///    - Embed 12 bits per block into the mid-frequency coefficients via QIM.
///    - Inverse DCT to write the modified spatial-domain values back.
/// 5. Reconstruct the JPEG with the modified Y channel and original Cb/Cr.
///
/// # Errors
///
/// - [`RelicarioError::ImageTooSmall`] if the image is below minimum dimensions
///   or does not have enough blocks for reliable embedding.
/// - [`RelicarioError::ImgSecret`] if the image cannot be decoded or re-encoded.
pub fn embed(carrier_jpeg: &[u8], secret: &[u8; 32]) -> Result<Vec<u8>> {
    enforce_dimension_cap(carrier_jpeg)?;
    let mut y = extract_y_channel(carrier_jpeg)?;

    if (y.width as u32) < MIN_DIMENSION || (y.height as u32) < MIN_DIMENSION {
        return Err(RelicarioError::ImageTooSmall {
            min_width: MIN_DIMENSION,
            min_height: MIN_DIMENSION,
            actual_width: y.width as u32,
            actual_height: y.height as u32,
        });
    }

    let region = central_region(&y);
    let total_blocks = region.blocks_x * region.blocks_y;

    if total_blocks < BLOCKS_PER_COPY * MIN_COPIES {
        return Err(RelicarioError::ImageTooSmall {
            min_width: MIN_DIMENSION,
            min_height: MIN_DIMENSION,
            actual_width: y.width as u32,
            actual_height: y.height as u32,
        });
    }

    // Cap at 50 copies -- beyond that, additional redundancy has diminishing
    // returns and the image modification becomes more visible.
    let num_copies = (total_blocks / BLOCKS_PER_COPY).min(50);
    let bits = bytes_to_bits(secret);

    let blocks_needed = num_copies * BLOCKS_PER_COPY;
    let embed_blocks = select_embed_blocks(&region, blocks_needed);

    // Embed each copy of the secret into its assigned blocks
    for copy in 0..num_copies {
        for block_idx in 0..BLOCKS_PER_COPY {
            let global_idx = copy * BLOCKS_PER_COPY + block_idx;
            if global_idx >= embed_blocks.len() {
                break;
            }
            let (bx, by) = embed_blocks[global_idx];
            let mut block = read_block(&y, bx, by, &region);
            let mut dct = dct2_8x8(&block);

            // Embed up to 12 bits (BITS_PER_BLOCK) in this block's
            // mid-frequency DCT coefficients
            for (pos_idx, &(row, col)) in EMBED_POSITIONS.iter().enumerate() {
                let bit_idx = block_idx * BITS_PER_BLOCK + pos_idx;
                if bit_idx >= SECRET_BITS {
                    break;
                }
                dct[row][col] = qim_embed(dct[row][col], bits[bit_idx], QUANT_STEP);
            }

            block = idct2_8x8(&dct);
            write_block(&mut y, bx, by, &region, &block);
        }
    }

    reconstruct_jpeg(carrier_jpeg, &y)
}

/// Extract a 256-bit secret from a (possibly re-encoded or mildly cropped) JPEG.
///
/// Delegates to [`extract_with_crop_recovery`] which first tries canonical
/// extraction (assuming the image has its original dimensions), then falls back
/// to searching for plausible original dimensions if the image was cropped.
///
/// # Errors
///
/// - [`RelicarioError::ExtractionFailed`] if no valid secret could be recovered
///   (image was never watermarked, or was too heavily recompressed/cropped).
pub fn extract(jpeg_bytes: &[u8]) -> Result<[u8; 32]> {
    enforce_dimension_cap(jpeg_bytes)?;
    extract_with_crop_recovery(jpeg_bytes)
}

/// Attempt to extract the secret assuming specific original image dimensions
/// and a pixel offset (for crop recovery).
///
/// The block grid is computed based on `orig_w`/`orig_h` (the assumed original
/// dimensions), and then each block position is shifted by `dx`/`dy` when
/// reading from the actual (possibly cropped) image.
///
/// Uses majority voting across all copies: for each of the 256 bit positions,
/// the extracted bit from every copy votes, and the majority wins. A minimum
/// confidence threshold of 60% is required -- below that, the extraction is
/// considered unreliable and fails.
fn try_extract_with_layout(
    y: &YChannel,
    orig_w: usize,
    orig_h: usize,
    dx: isize,
    dy: isize,
) -> Result<[u8; 32]> {
    let positions = compute_embed_positions(orig_w, orig_h);
    if positions.is_empty() {
        return Err(RelicarioError::ExtractionFailed);
    }

    let region = compute_region(orig_w, orig_h);
    let total_blocks = region.blocks_x * region.blocks_y;
    let num_copies = (total_blocks / BLOCKS_PER_COPY).min(50);

    // Accumulate votes for each bit position across all copies
    let mut votes_one = vec![0usize; SECRET_BITS];
    let mut votes_total = vec![0usize; SECRET_BITS];

    for copy in 0..num_copies {
        for block_idx in 0..BLOCKS_PER_COPY {
            let global_idx = copy * BLOCKS_PER_COPY + block_idx;
            if global_idx >= positions.len() {
                break;
            }
            // Apply crop offset to find the actual block position in the
            // (possibly cropped) image
            let (orig_px, orig_py) = positions[global_idx];
            let actual_px = orig_px as isize + dx;
            let actual_py = orig_py as isize + dy;
            if actual_px < 0 || actual_py < 0 {
                continue;
            }
            let actual_px = actual_px as usize;
            let actual_py = actual_py as usize;

            let block = match read_block_abs(y, actual_px, actual_py) {
                Some(b) => b,
                None => continue, // block out of bounds (cropped away)
            };
            let dct = dct2_8x8(&block);

            // Extract bits from mid-frequency coefficients and tally votes
            for (pos_idx, &(row, col)) in EMBED_POSITIONS.iter().enumerate() {
                let bit_idx = block_idx * BITS_PER_BLOCK + pos_idx;
                if bit_idx >= SECRET_BITS {
                    break;
                }
                let extracted_bit = qim_extract(dct[row][col], QUANT_STEP);
                votes_total[bit_idx] += 1;
                if extracted_bit == 1 {
                    votes_one[bit_idx] += 1;
                }
            }
        }
    }

    // Majority vote with confidence check: each bit must have >= 60% agreement
    // across copies. Below that threshold, the watermark is considered too
    // degraded for reliable extraction.
    let mut result_bits = vec![0u8; SECRET_BITS];
    for i in 0..SECRET_BITS {
        if votes_total[i] == 0 {
            return Err(RelicarioError::ExtractionFailed);
        }
        let ones = votes_one[i];
        let zeros = votes_total[i] - ones;
        let majority = ones.max(zeros);
        let confidence = majority as f64 / votes_total[i] as f64;
        if confidence < 0.60 {
            return Err(RelicarioError::ExtractionFailed);
        }
        result_bits[i] = if ones > zeros { 1 } else { 0 };
    }

    let result_bytes = bits_to_bytes(&result_bits);
    let mut secret = [0u8; 32];
    secret.copy_from_slice(&result_bytes[..32]);
    Ok(secret)
}

/// Extract with automatic crop recovery.
///
/// Tries extraction in order of decreasing likelihood:
///
/// 1. **Uncropped**: assume the image has its original dimensions (most common case).
/// 2. **Width-only crop (8-pixel aligned)**: try original widths from current up to
///    +20%, stepping by 8 pixels (JPEG block alignment). Assumes right-side crop
///    (left edge unchanged, dx=0).
/// 3. **Height-only crop (8-pixel aligned)**: same strategy for vertical crops.
/// 4. **Width crop (non-aligned)**: finer 1-pixel step for non-block-aligned crops.
///
/// The search space is limited to 20% expansion in each dimension, which covers
/// the 15% crumple zone plus some margin for measurement error.
fn extract_with_crop_recovery(jpeg_bytes: &[u8]) -> Result<[u8; 32]> {
    let y = extract_y_channel(jpeg_bytes)?;

    if (y.width as u32) < MIN_DIMENSION || (y.height as u32) < MIN_DIMENSION {
        return Err(RelicarioError::ExtractionFailed);
    }

    // Try 1: assume the image is uncropped (original size = current size)
    if let Ok(secret) = try_extract_with_layout(&y, y.width, y.height, 0, 0) {
        return Ok(secret);
    }

    // The image may have been cropped. Search for the original dimensions.
    // A crop removes pixels from edges. The central region was computed from the
    // original dimensions. We need to figure out what those were.
    //
    // Strategy: try original widths from current_w to current_w * 1.20, stepping
    // by 8 pixels (JPEG block alignment). For each candidate original width,
    // the embed grid is fully determined. We then need to find dx (the pixel
    // offset due to left-side cropping, which is 0 for right-only crop).

    let max_orig_w = (y.width as f64 * 1.20) as usize;
    let max_orig_h = (y.height as f64 * 1.20) as usize;

    // Try 2: width-only crops, block-aligned steps (most common crop scenario)
    for orig_w in (y.width..=max_orig_w).step_by(BLOCK_SIZE) {
        // Right-side crop: dx = 0 (left edge unchanged)
        if let Ok(secret) = try_extract_with_layout(&y, orig_w, y.height, 0, 0) {
            return Ok(secret);
        }
    }

    // Try 3: height-only crops, block-aligned steps
    for orig_h in (y.height..=max_orig_h).step_by(BLOCK_SIZE) {
        if let Ok(secret) = try_extract_with_layout(&y, y.width, orig_h, 0, 0) {
            return Ok(secret);
        }
    }

    // Try 4: width crops with finer step (non-8-aligned crops are rarer but possible)
    for orig_w in (y.width..=max_orig_w).step_by(1) {
        if orig_w % BLOCK_SIZE == 0 {
            continue; // already tried in step 2
        }
        if let Ok(secret) = try_extract_with_layout(&y, orig_w, y.height, 0, 0) {
            return Ok(secret);
        }
    }

    Err(RelicarioError::ExtractionFailed)
}

// ─── Tests ───────────────────────────────────────────────────────────────────

#[cfg(test)]
mod tests {
    use super::*;
    use image::GenericImageView;

    fn make_test_jpeg(width: u32, height: u32) -> Vec<u8> {
        use image::codecs::jpeg::JpegEncoder;
        use image::{ImageBuffer, ImageEncoder, Rgb};
        let img = ImageBuffer::from_fn(width, height, |x, y| {
            Rgb([
                ((x * 7 + y * 13) % 256) as u8,
                ((x * 11 + y * 3) % 256) as u8,
                ((x * 5 + y * 17) % 256) as u8,
            ])
        });
        let mut buf = Vec::new();
        let encoder = JpegEncoder::new_with_quality(&mut buf, 92);
        encoder
            .write_image(img.as_raw(), width, height, image::ExtendedColorType::Rgb8)
            .unwrap();
        buf
    }

    #[test]
    fn dct2_idct2_round_trip() {
        let block: [[f64; 8]; 8] = [
            [52.0, 55.0, 61.0, 66.0, 70.0, 61.0, 64.0, 73.0],
            [63.0, 59.0, 55.0, 90.0, 109.0, 85.0, 69.0, 72.0],
            [62.0, 59.0, 68.0, 113.0, 144.0, 104.0, 66.0, 73.0],
            [63.0, 58.0, 71.0, 122.0, 154.0, 106.0, 70.0, 69.0],
            [67.0, 61.0, 68.0, 104.0, 126.0, 88.0, 68.0, 70.0],
            [79.0, 65.0, 60.0, 70.0, 77.0, 68.0, 58.0, 75.0],
            [85.0, 71.0, 64.0, 59.0, 55.0, 61.0, 65.0, 83.0],
            [87.0, 79.0, 69.0, 68.0, 65.0, 76.0, 78.0, 94.0],
        ];

        let dct = dct2_8x8(&block);
        let recovered = idct2_8x8(&dct);

        for row in 0..8 {
            for col in 0..8 {
                assert!(
                    (block[row][col] - recovered[row][col]).abs() < 1e-6,
                    "Mismatch at ({}, {}): {} vs {}",
                    row,
                    col,
                    block[row][col],
                    recovered[row][col]
                );
            }
        }
    }

    #[test]
    fn qim_embed_extract_single_bit() {
        let coefficients = [-50.0, -10.0, 0.0, 10.0, 50.0, 100.0, -100.0];
        for &coef in &coefficients {
            for bit in 0..=1u8 {
                let embedded = qim_embed(coef, bit, QUANT_STEP);
                let extracted = qim_extract(embedded, QUANT_STEP);
                assert_eq!(extracted, bit, "Failed for coef={}, bit={}", coef, bit);
            }
        }
    }

    #[test]
    fn qim_survives_small_noise() {
        let coefficients = [-50.0, 0.0, 30.0, 75.0, -75.0];
        let noise_levels = [-10.0, -5.0, 5.0, 10.0]; // < QUANT_STEP/4 = 12.5
        for &coef in &coefficients {
            for bit in 0..=1u8 {
                let embedded = qim_embed(coef, bit, QUANT_STEP);
                for &noise in &noise_levels {
                    let noisy = embedded + noise;
                    let extracted = qim_extract(noisy, QUANT_STEP);
                    assert_eq!(
                        extracted, bit,
                        "Failed for coef={}, bit={}, noise={}",
                        coef, bit, noise
                    );
                }
            }
        }
    }

    #[test]
    fn extract_y_channel_from_synthetic_jpeg() {
        let jpeg = make_test_jpeg(200, 150);
        let y = extract_y_channel(&jpeg).unwrap();
        assert_eq!(y.width, 200);
        assert_eq!(y.height, 150);
        assert_eq!(y.data.len(), 200 * 150);
    }

    #[test]
    fn read_write_block_round_trip() {
        let jpeg = make_test_jpeg(200, 150);
        let mut y = extract_y_channel(&jpeg).unwrap();
        let region = central_region(&y);

        let original = read_block(&y, 0, 0, &region);
        write_block(&mut y, 0, 0, &region, &original);
        let after = read_block(&y, 0, 0, &region);

        for row in 0..8 {
            for col in 0..8 {
                assert_eq!(original[row][col], after[row][col]);
            }
        }
    }

    #[test]
    fn embed_extract_round_trip() {
        let jpeg = make_test_jpeg(400, 300);
        let secret: [u8; 32] = [
            0xDE, 0xAD, 0xBE, 0xEF, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A,
            0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
            0x19, 0x1A, 0x1B, 0x1C,
        ];

        let stego = embed(&jpeg, &secret).unwrap();
        let extracted = extract(&stego).unwrap();
        assert_eq!(extracted, secret);
    }

    #[test]
    fn embed_extract_random_secret() {
        use rand::RngCore;
        let jpeg = make_test_jpeg(400, 300);
        let mut secret = [0u8; 32];
        rand::thread_rng().fill_bytes(&mut secret);

        let stego = embed(&jpeg, &secret).unwrap();
        let extracted = extract(&stego).unwrap();
        assert_eq!(extracted, secret);
    }

    #[test]
    fn extract_from_non_embedded_image_fails() {
        let jpeg = make_test_jpeg(400, 300);
        let result = extract(&jpeg);
        assert!(result.is_err());
    }

    #[test]
    fn image_too_small_fails() {
        let jpeg = make_test_jpeg(32, 32);
        let secret = [0u8; 32];
        let result = embed(&jpeg, &secret);
        assert!(result.is_err());
    }

    #[test]
    fn embed_extract_survives_recompression_q85() {
        let jpeg = make_test_jpeg(400, 300);
        let secret: [u8; 32] = [
            0xCA, 0xFE, 0xBA, 0xBE, 0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF, 0xFE, 0xDC,
            0xBA, 0x98, 0x76, 0x54, 0x32, 0x10, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88,
            0x99, 0xAA, 0xBB, 0xCC,
        ];

        let stego = embed(&jpeg, &secret).unwrap();

        // Re-encode at Q85
        let reader = ImageReader::new(Cursor::new(&stego))
            .with_guessed_format()
            .unwrap();
        let img = reader.decode().unwrap();
        let rgb = img.to_rgb8();
        let (w, h) = (rgb.width(), rgb.height());
        let mut recompressed = Vec::new();
        let encoder = JpegEncoder::new_with_quality(&mut recompressed, 85);
        encoder
            .write_image(rgb.as_raw(), w, h, image::ExtendedColorType::Rgb8)
            .unwrap();

        let extracted = extract(&recompressed).unwrap();
        assert_eq!(extracted, secret);
    }

    #[test]
    fn rejects_oversized_image_without_full_decode() {
        // Synthesize a JPEG header claiming 20000x20000 dimensions.
        // The actual pixel data is irrelevant — the dimension peek should bail out
        // before decoding any pixels.
        let jpeg = build_oversized_jpeg_header(20_000, 20_000);
        let result = extract(&jpeg);
        assert!(matches!(result, Err(RelicarioError::ImgSecret(ref msg)) if msg.contains("dimension")));
    }

    fn build_oversized_jpeg_header(width: u16, height: u16) -> Vec<u8> {
        // SOI + APP0 JFIF + SOF0 declaring width/height + SOS with minimal data + EOI
        let mut v = vec![0xFF, 0xD8];                                   // SOI
        v.extend_from_slice(&[0xFF, 0xE0, 0x00, 0x10]);                 // APP0
        v.extend_from_slice(b"JFIF\0");
        v.extend_from_slice(&[0x01, 0x01, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00]);
        v.extend_from_slice(&[0xFF, 0xC0, 0x00, 0x11, 0x08]);           // SOF0
        v.extend_from_slice(&height.to_be_bytes());
        v.extend_from_slice(&width.to_be_bytes());
        v.extend_from_slice(&[0x03, 0x01, 0x22, 0x00, 0x02, 0x11, 0x01, 0x03, 0x11, 0x01]);
        v.extend_from_slice(&[0xFF, 0xD9]);                              // EOI
        v
    }

    #[test]
    fn embed_extract_survives_10pct_crop() {
        let jpeg = make_test_jpeg(400, 300);
        let secret: [u8; 32] = [
            0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E,
            0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C,
            0x1D, 0x1E, 0x1F, 0x20,
        ];

        let stego = embed(&jpeg, &secret).unwrap();

        // Crop 10% from right edge
        let reader = ImageReader::new(Cursor::new(&stego))
            .with_guessed_format()
            .unwrap();
        let img = reader.decode().unwrap();
        let (w, h) = img.dimensions();
        let crop_pixels = (w as f64 * 0.10) as u32;
        let cropped = img.crop_imm(0, 0, w - crop_pixels, h);
        let rgb = cropped.to_rgb8();
        let (cw, ch) = (rgb.width(), rgb.height());

        let mut cropped_jpeg = Vec::new();
        let encoder = JpegEncoder::new_with_quality(&mut cropped_jpeg, 92);
        encoder
            .write_image(rgb.as_raw(), cw, ch, image::ExtendedColorType::Rgb8)
            .unwrap();

        let extracted = extract(&cropped_jpeg).unwrap();
        assert_eq!(extracted, secret);
    }
}