stegasoo/tests/test_audio.py

"""
Tests for Stegasoo audio steganography.

Tests cover:
- Audio LSB roundtrip (encode + decode)
- Audio MDCT roundtrip (encode + decode)
- Wrong credentials fail to decode
- Capacity calculations
- Format detection
- Audio validation
"""

import io

import numpy as np
import pytest
import soundfile as sf

from stegasoo.constants import (
    EMBED_MODE_AUDIO_LSB,
    EMBED_MODE_AUDIO_SPREAD,
)
from stegasoo.models import AudioCapacityInfo, AudioEmbedStats, AudioInfo

# =============================================================================
# FIXTURES
# =============================================================================


@pytest.fixture
def carrier_wav() -> bytes:
    """Generate a small test WAV file (1 second, 44100 Hz, mono, 16-bit)."""
    sample_rate = 44100
    duration = 1.0
    num_samples = int(sample_rate * duration)
    # Generate a simple sine wave
    t = np.linspace(0, duration, num_samples, endpoint=False)
    samples = (np.sin(2 * np.pi * 440 * t) * 16000).astype(np.int16)

    buf = io.BytesIO()
    sf.write(buf, samples, sample_rate, format="WAV", subtype="PCM_16")
    buf.seek(0)
    return buf.read()


@pytest.fixture
def carrier_wav_stereo() -> bytes:
    """Generate a stereo test WAV file."""
    sample_rate = 44100
    duration = 1.0
    num_samples = int(sample_rate * duration)
    t = np.linspace(0, duration, num_samples, endpoint=False)
    left = (np.sin(2 * np.pi * 440 * t) * 16000).astype(np.int16)
    right = (np.sin(2 * np.pi * 880 * t) * 16000).astype(np.int16)
    samples = np.column_stack([left, right])

    buf = io.BytesIO()
    sf.write(buf, samples, sample_rate, format="WAV", subtype="PCM_16")
    buf.seek(0)
    return buf.read()


@pytest.fixture
def carrier_wav_long() -> bytes:
    """Generate a longer WAV (15 seconds) for spread spectrum tests."""
    sample_rate = 44100
    duration = 15.0
    num_samples = int(sample_rate * duration)
    t = np.linspace(0, duration, num_samples, endpoint=False)
    # Mix of frequencies for better MDCT embedding
    samples = (
        (np.sin(2 * np.pi * 440 * t) + np.sin(2 * np.pi * 880 * t) + np.sin(2 * np.pi * 1320 * t))
        * 5000
    ).astype(np.int16)

    buf = io.BytesIO()
    sf.write(buf, samples, sample_rate, format="WAV", subtype="PCM_16")
    buf.seek(0)
    return buf.read()


@pytest.fixture
def carrier_wav_spread_integration() -> bytes:
    """Generate a very long WAV (150 seconds) for spread spectrum integration tests.

    Spread spectrum needs 1024 samples per bit. With encryption + RS overhead (~690 bytes),
    we need at least 690*8*1024 = 5.7M samples ~ 130 seconds at 44.1kHz.
    """
    sample_rate = 44100
    duration = 150.0
    num_samples = int(sample_rate * duration)
    t = np.linspace(0, duration, num_samples, endpoint=False)
    samples = (
        (np.sin(2 * np.pi * 440 * t) + np.sin(2 * np.pi * 880 * t) + np.sin(2 * np.pi * 1320 * t))
        * 5000
    ).astype(np.int16)

    buf = io.BytesIO()
    sf.write(buf, samples, sample_rate, format="WAV", subtype="PCM_16")
    buf.seek(0)
    return buf.read()


@pytest.fixture
def reference_photo() -> bytes:
    """Generate a small reference photo (PNG)."""
    from PIL import Image

    img = Image.new("RGB", (100, 100), color=(128, 64, 32))
    buf = io.BytesIO()
    img.save(buf, "PNG")
    buf.seek(0)
    return buf.read()


# =============================================================================
# AUDIO LSB TESTS
# =============================================================================


class TestAudioLSB:
    """Tests for audio LSB steganography."""

    def test_calculate_capacity(self, carrier_wav):
        from stegasoo.audio_steganography import calculate_audio_lsb_capacity

        capacity = calculate_audio_lsb_capacity(carrier_wav)
        assert capacity > 0
        # 1 second at 44100 Hz mono should give ~5KB capacity at 1 bit/sample
        assert capacity > 4000

    def test_embed_extract_roundtrip(self, carrier_wav):
        """Test basic LSB embed/extract roundtrip."""
        from stegasoo.audio_steganography import embed_in_audio_lsb, extract_from_audio_lsb

        payload = b"Hello, audio steganography!"
        # Prepend with magic header to simulate real usage pattern
        key = b"\x42" * 32

        stego_audio, stats = embed_in_audio_lsb(payload, carrier_wav, key)

        assert isinstance(stats, AudioEmbedStats)
        assert stats.embed_mode == EMBED_MODE_AUDIO_LSB
        assert stats.bytes_embedded > 0
        assert stats.samples_modified > 0
        assert 0 < stats.capacity_used <= 1.0

        # Extract
        extracted = extract_from_audio_lsb(stego_audio, key)
        assert extracted is not None
        assert extracted == payload

    def test_embed_extract_stereo(self, carrier_wav_stereo):
        """Test LSB roundtrip with stereo audio."""
        from stegasoo.audio_steganography import embed_in_audio_lsb, extract_from_audio_lsb

        payload = b"Stereo test message"
        key = b"\xAB" * 32

        stego_audio, stats = embed_in_audio_lsb(payload, carrier_wav_stereo, key)
        assert stats.channels == 2

        extracted = extract_from_audio_lsb(stego_audio, key)
        assert extracted == payload

    def test_wrong_key_fails(self, carrier_wav):
        """Test that wrong key produces no valid extraction."""
        from stegasoo.audio_steganography import embed_in_audio_lsb, extract_from_audio_lsb

        payload = b"Secret message"
        correct_key = b"\x42" * 32
        wrong_key = b"\xFF" * 32

        stego_audio, _ = embed_in_audio_lsb(payload, carrier_wav, correct_key)

        extracted = extract_from_audio_lsb(stego_audio, wrong_key)
        # Should return None or garbage (not the original message)
        assert extracted is None or extracted != payload

    def test_two_bits_per_sample(self, carrier_wav):
        """Test embedding with 2 bits per sample."""
        from stegasoo.audio_steganography import embed_in_audio_lsb, extract_from_audio_lsb

        payload = b"Two bits per sample test"
        key = b"\x55" * 32

        stego_audio, stats = embed_in_audio_lsb(payload, carrier_wav, key, bits_per_sample=2)

        extracted = extract_from_audio_lsb(stego_audio, key, bits_per_sample=2)
        assert extracted == payload

    def test_generate_sample_indices(self):
        """Test deterministic sample index generation."""
        from stegasoo.audio_steganography import generate_sample_indices

        key = b"\x42" * 32
        indices1 = generate_sample_indices(key, 10000, 100)
        indices2 = generate_sample_indices(key, 10000, 100)

        # Same key should produce same indices
        assert indices1 == indices2

        # All indices should be valid
        assert all(0 <= i < 10000 for i in indices1)

        # No duplicates
        assert len(set(indices1)) == len(indices1)


# =============================================================================
# AUDIO SPREAD SPECTRUM TESTS
# =============================================================================


class TestAudioSpread:
    """Tests for audio spread spectrum steganography."""

    def test_calculate_capacity(self, carrier_wav_long):
        from stegasoo.spread_steganography import calculate_audio_spread_capacity

        capacity = calculate_audio_spread_capacity(carrier_wav_long)
        assert isinstance(capacity, AudioCapacityInfo)
        assert capacity.usable_capacity_bytes > 0
        assert capacity.embed_mode == EMBED_MODE_AUDIO_SPREAD

    def test_spread_roundtrip(self, carrier_wav_long):
        """Test spread spectrum embed/extract roundtrip."""
        from stegasoo.spread_steganography import (
            embed_in_audio_spread,
            extract_from_audio_spread,
        )

        payload = b"Spread test"
        seed = b"\x42" * 32

        stego_audio, stats = embed_in_audio_spread(payload, carrier_wav_long, seed)

        assert isinstance(stats, AudioEmbedStats)
        assert stats.embed_mode == EMBED_MODE_AUDIO_SPREAD

        extracted = extract_from_audio_spread(stego_audio, seed)
        assert extracted is not None
        assert extracted == payload

    def test_wrong_seed_fails(self, carrier_wav_long):
        """Test that wrong seed produces no valid extraction."""
        from stegasoo.spread_steganography import (
            embed_in_audio_spread,
            extract_from_audio_spread,
        )

        payload = b"Secret spread"
        correct_seed = b"\x42" * 32
        wrong_seed = b"\xFF" * 32

        stego_audio, _ = embed_in_audio_spread(payload, carrier_wav_long, correct_seed)

        extracted = extract_from_audio_spread(stego_audio, wrong_seed)
        assert extracted is None or extracted != payload


# =============================================================================
# FORMAT DETECTION TESTS
# =============================================================================


class TestFormatDetection:
    """Tests for audio format detection."""

    def test_detect_wav(self, carrier_wav):
        from stegasoo.audio_utils import detect_audio_format

        assert detect_audio_format(carrier_wav) == "wav"

    def test_detect_unknown(self):
        from stegasoo.audio_utils import detect_audio_format

        assert detect_audio_format(b"not audio data") == "unknown"

    def test_detect_empty(self):
        from stegasoo.audio_utils import detect_audio_format

        assert detect_audio_format(b"") == "unknown"


# =============================================================================
# AUDIO INFO TESTS
# =============================================================================


class TestAudioInfo:
    """Tests for audio info extraction."""

    def test_get_wav_info(self, carrier_wav):
        from stegasoo.audio_utils import get_audio_info

        info = get_audio_info(carrier_wav)
        assert isinstance(info, AudioInfo)
        assert info.sample_rate == 44100
        assert info.channels == 1
        assert info.format == "wav"
        assert abs(info.duration_seconds - 1.0) < 0.1

    def test_get_stereo_info(self, carrier_wav_stereo):
        from stegasoo.audio_utils import get_audio_info

        info = get_audio_info(carrier_wav_stereo)
        assert info.channels == 2


# =============================================================================
# VALIDATION TESTS
# =============================================================================


class TestAudioValidation:
    """Tests for audio validation."""

    def test_validate_valid_audio(self, carrier_wav):
        from stegasoo.audio_utils import validate_audio

        result = validate_audio(carrier_wav)
        assert result.is_valid

    def test_validate_empty_audio(self):
        from stegasoo.audio_utils import validate_audio

        result = validate_audio(b"")
        assert not result.is_valid

    def test_validate_invalid_audio(self):
        from stegasoo.audio_utils import validate_audio

        result = validate_audio(b"not audio data at all")
        assert not result.is_valid

    def test_validate_audio_embed_mode(self):
        from stegasoo.validation import validate_audio_embed_mode

        assert validate_audio_embed_mode("audio_lsb").is_valid
        assert validate_audio_embed_mode("audio_spread").is_valid
        assert validate_audio_embed_mode("audio_auto").is_valid
        assert not validate_audio_embed_mode("invalid").is_valid


# =============================================================================
# INTEGRATION TESTS
# =============================================================================


class TestIntegration:
    """End-to-end integration tests using encode_audio/decode_audio."""

    def test_lsb_encode_decode(self, carrier_wav, reference_photo):
        from stegasoo.decode import decode_audio
        from stegasoo.encode import encode_audio

        stego_audio, stats = encode_audio(
            message="Hello from audio steganography!",
            reference_photo=reference_photo,
            carrier_audio=carrier_wav,
            passphrase="test words here now",
            pin="123456",
            embed_mode="audio_lsb",
        )

        assert len(stego_audio) > 0

        result = decode_audio(
            stego_audio=stego_audio,
            reference_photo=reference_photo,
            passphrase="test words here now",
            pin="123456",
            embed_mode="audio_lsb",
        )

        assert result.is_text
        assert result.message == "Hello from audio steganography!"

    def test_lsb_wrong_credentials(self, carrier_wav, reference_photo):
        from stegasoo.decode import decode_audio
        from stegasoo.encode import encode_audio

        stego_audio, _ = encode_audio(
            message="Secret",
            reference_photo=reference_photo,
            carrier_audio=carrier_wav,
            passphrase="correct horse battery staple",
            pin="123456",
            embed_mode="audio_lsb",
        )

        with pytest.raises(Exception):
            decode_audio(
                stego_audio=stego_audio,
                reference_photo=reference_photo,
                passphrase="wrong passphrase words here",
                pin="654321",
                embed_mode="audio_lsb",
            )

    def test_spread_encode_decode(self, carrier_wav_spread_integration, reference_photo):
        """Test full spread spectrum encode/decode pipeline."""
        from stegasoo.decode import decode_audio
        from stegasoo.encode import encode_audio

        stego_audio, stats = encode_audio(
            message="Spread integration test",
            reference_photo=reference_photo,
            carrier_audio=carrier_wav_spread_integration,
            passphrase="test words here now",
            pin="123456",
            embed_mode="audio_spread",
        )

        result = decode_audio(
            stego_audio=stego_audio,
            reference_photo=reference_photo,
            passphrase="test words here now",
            pin="123456",
            embed_mode="audio_spread",
        )

        assert result.message == "Spread integration test"

    def test_auto_detect_lsb(self, carrier_wav, reference_photo):
        """Test auto-detection finds LSB encoded audio."""
        from stegasoo.decode import decode_audio
        from stegasoo.encode import encode_audio

        stego_audio, _ = encode_audio(
            message="Auto-detect test",
            reference_photo=reference_photo,
            carrier_audio=carrier_wav,
            passphrase="test words here now",
            pin="123456",
            embed_mode="audio_lsb",
        )

        result = decode_audio(
            stego_audio=stego_audio,
            reference_photo=reference_photo,
            passphrase="test words here now",
            pin="123456",
            embed_mode="audio_auto",
        )

        assert result.message == "Auto-detect test"