Files
stegasoo/tests/test_audio.py
adlee-was-taken 0248bec813 Add audio steganography with LSB and spread spectrum modes
Implement two audio embedding modes following the same multi-factor
authentication pipeline as image steganography (passphrase + PIN +
optional RSA key + optional channel key):

- audio_lsb: High-capacity LSB embedding in PCM samples for lossless
  formats (WAV/FLAC). Uses ChaCha20-keyed sample index selection.
- audio_spread: Direct-sequence spread spectrum (DSSS) with ChaCha20-
  keyed bipolar chip codes, Reed-Solomon error correction, and 3-copy
  majority-voted length headers. Designed to survive lossy compression.

New files:
- audio_steganography.py: LSB embed/extract on PCM samples
- spread_steganography.py: Spread spectrum embed/extract
- audio_utils.py: Format detection, transcoding, validation helpers
- tests/test_audio.py: 22 tests covering both modes end-to-end

Updated encode.py, decode.py, cli.py (audio-encode/audio-decode
commands), constants.py, models.py, exceptions.py, validation.py,
__init__.py, and pyproject.toml ([audio] extra).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-27 20:26:07 -05:00

449 lines
15 KiB
Python

"""
Tests for Stegasoo audio steganography.
Tests cover:
- Audio LSB roundtrip (encode + decode)
- Audio MDCT roundtrip (encode + decode)
- Wrong credentials fail to decode
- Capacity calculations
- Format detection
- Audio validation
"""
import io
import numpy as np
import pytest
import soundfile as sf
from stegasoo.constants import (
EMBED_MODE_AUDIO_LSB,
EMBED_MODE_AUDIO_SPREAD,
)
from stegasoo.models import AudioCapacityInfo, AudioEmbedStats, AudioInfo
# =============================================================================
# FIXTURES
# =============================================================================
@pytest.fixture
def carrier_wav() -> bytes:
"""Generate a small test WAV file (1 second, 44100 Hz, mono, 16-bit)."""
sample_rate = 44100
duration = 1.0
num_samples = int(sample_rate * duration)
# Generate a simple sine wave
t = np.linspace(0, duration, num_samples, endpoint=False)
samples = (np.sin(2 * np.pi * 440 * t) * 16000).astype(np.int16)
buf = io.BytesIO()
sf.write(buf, samples, sample_rate, format="WAV", subtype="PCM_16")
buf.seek(0)
return buf.read()
@pytest.fixture
def carrier_wav_stereo() -> bytes:
"""Generate a stereo test WAV file."""
sample_rate = 44100
duration = 1.0
num_samples = int(sample_rate * duration)
t = np.linspace(0, duration, num_samples, endpoint=False)
left = (np.sin(2 * np.pi * 440 * t) * 16000).astype(np.int16)
right = (np.sin(2 * np.pi * 880 * t) * 16000).astype(np.int16)
samples = np.column_stack([left, right])
buf = io.BytesIO()
sf.write(buf, samples, sample_rate, format="WAV", subtype="PCM_16")
buf.seek(0)
return buf.read()
@pytest.fixture
def carrier_wav_long() -> bytes:
"""Generate a longer WAV (15 seconds) for spread spectrum tests."""
sample_rate = 44100
duration = 15.0
num_samples = int(sample_rate * duration)
t = np.linspace(0, duration, num_samples, endpoint=False)
# Mix of frequencies for better MDCT embedding
samples = (
(np.sin(2 * np.pi * 440 * t) + np.sin(2 * np.pi * 880 * t) + np.sin(2 * np.pi * 1320 * t))
* 5000
).astype(np.int16)
buf = io.BytesIO()
sf.write(buf, samples, sample_rate, format="WAV", subtype="PCM_16")
buf.seek(0)
return buf.read()
@pytest.fixture
def carrier_wav_spread_integration() -> bytes:
"""Generate a very long WAV (150 seconds) for spread spectrum integration tests.
Spread spectrum needs 1024 samples per bit. With encryption + RS overhead (~690 bytes),
we need at least 690*8*1024 = 5.7M samples ~ 130 seconds at 44.1kHz.
"""
sample_rate = 44100
duration = 150.0
num_samples = int(sample_rate * duration)
t = np.linspace(0, duration, num_samples, endpoint=False)
samples = (
(np.sin(2 * np.pi * 440 * t) + np.sin(2 * np.pi * 880 * t) + np.sin(2 * np.pi * 1320 * t))
* 5000
).astype(np.int16)
buf = io.BytesIO()
sf.write(buf, samples, sample_rate, format="WAV", subtype="PCM_16")
buf.seek(0)
return buf.read()
@pytest.fixture
def reference_photo() -> bytes:
"""Generate a small reference photo (PNG)."""
from PIL import Image
img = Image.new("RGB", (100, 100), color=(128, 64, 32))
buf = io.BytesIO()
img.save(buf, "PNG")
buf.seek(0)
return buf.read()
# =============================================================================
# AUDIO LSB TESTS
# =============================================================================
class TestAudioLSB:
"""Tests for audio LSB steganography."""
def test_calculate_capacity(self, carrier_wav):
from stegasoo.audio_steganography import calculate_audio_lsb_capacity
capacity = calculate_audio_lsb_capacity(carrier_wav)
assert capacity > 0
# 1 second at 44100 Hz mono should give ~5KB capacity at 1 bit/sample
assert capacity > 4000
def test_embed_extract_roundtrip(self, carrier_wav):
"""Test basic LSB embed/extract roundtrip."""
from stegasoo.audio_steganography import embed_in_audio_lsb, extract_from_audio_lsb
payload = b"Hello, audio steganography!"
# Prepend with magic header to simulate real usage pattern
key = b"\x42" * 32
stego_audio, stats = embed_in_audio_lsb(payload, carrier_wav, key)
assert isinstance(stats, AudioEmbedStats)
assert stats.embed_mode == EMBED_MODE_AUDIO_LSB
assert stats.bytes_embedded > 0
assert stats.samples_modified > 0
assert 0 < stats.capacity_used <= 1.0
# Extract
extracted = extract_from_audio_lsb(stego_audio, key)
assert extracted is not None
assert extracted == payload
def test_embed_extract_stereo(self, carrier_wav_stereo):
"""Test LSB roundtrip with stereo audio."""
from stegasoo.audio_steganography import embed_in_audio_lsb, extract_from_audio_lsb
payload = b"Stereo test message"
key = b"\xAB" * 32
stego_audio, stats = embed_in_audio_lsb(payload, carrier_wav_stereo, key)
assert stats.channels == 2
extracted = extract_from_audio_lsb(stego_audio, key)
assert extracted == payload
def test_wrong_key_fails(self, carrier_wav):
"""Test that wrong key produces no valid extraction."""
from stegasoo.audio_steganography import embed_in_audio_lsb, extract_from_audio_lsb
payload = b"Secret message"
correct_key = b"\x42" * 32
wrong_key = b"\xFF" * 32
stego_audio, _ = embed_in_audio_lsb(payload, carrier_wav, correct_key)
extracted = extract_from_audio_lsb(stego_audio, wrong_key)
# Should return None or garbage (not the original message)
assert extracted is None or extracted != payload
def test_two_bits_per_sample(self, carrier_wav):
"""Test embedding with 2 bits per sample."""
from stegasoo.audio_steganography import embed_in_audio_lsb, extract_from_audio_lsb
payload = b"Two bits per sample test"
key = b"\x55" * 32
stego_audio, stats = embed_in_audio_lsb(payload, carrier_wav, key, bits_per_sample=2)
extracted = extract_from_audio_lsb(stego_audio, key, bits_per_sample=2)
assert extracted == payload
def test_generate_sample_indices(self):
"""Test deterministic sample index generation."""
from stegasoo.audio_steganography import generate_sample_indices
key = b"\x42" * 32
indices1 = generate_sample_indices(key, 10000, 100)
indices2 = generate_sample_indices(key, 10000, 100)
# Same key should produce same indices
assert indices1 == indices2
# All indices should be valid
assert all(0 <= i < 10000 for i in indices1)
# No duplicates
assert len(set(indices1)) == len(indices1)
# =============================================================================
# AUDIO SPREAD SPECTRUM TESTS
# =============================================================================
class TestAudioSpread:
"""Tests for audio spread spectrum steganography."""
def test_calculate_capacity(self, carrier_wav_long):
from stegasoo.spread_steganography import calculate_audio_spread_capacity
capacity = calculate_audio_spread_capacity(carrier_wav_long)
assert isinstance(capacity, AudioCapacityInfo)
assert capacity.usable_capacity_bytes > 0
assert capacity.embed_mode == EMBED_MODE_AUDIO_SPREAD
def test_spread_roundtrip(self, carrier_wav_long):
"""Test spread spectrum embed/extract roundtrip."""
from stegasoo.spread_steganography import (
embed_in_audio_spread,
extract_from_audio_spread,
)
payload = b"Spread test"
seed = b"\x42" * 32
stego_audio, stats = embed_in_audio_spread(payload, carrier_wav_long, seed)
assert isinstance(stats, AudioEmbedStats)
assert stats.embed_mode == EMBED_MODE_AUDIO_SPREAD
extracted = extract_from_audio_spread(stego_audio, seed)
assert extracted is not None
assert extracted == payload
def test_wrong_seed_fails(self, carrier_wav_long):
"""Test that wrong seed produces no valid extraction."""
from stegasoo.spread_steganography import (
embed_in_audio_spread,
extract_from_audio_spread,
)
payload = b"Secret spread"
correct_seed = b"\x42" * 32
wrong_seed = b"\xFF" * 32
stego_audio, _ = embed_in_audio_spread(payload, carrier_wav_long, correct_seed)
extracted = extract_from_audio_spread(stego_audio, wrong_seed)
assert extracted is None or extracted != payload
# =============================================================================
# FORMAT DETECTION TESTS
# =============================================================================
class TestFormatDetection:
"""Tests for audio format detection."""
def test_detect_wav(self, carrier_wav):
from stegasoo.audio_utils import detect_audio_format
assert detect_audio_format(carrier_wav) == "wav"
def test_detect_unknown(self):
from stegasoo.audio_utils import detect_audio_format
assert detect_audio_format(b"not audio data") == "unknown"
def test_detect_empty(self):
from stegasoo.audio_utils import detect_audio_format
assert detect_audio_format(b"") == "unknown"
# =============================================================================
# AUDIO INFO TESTS
# =============================================================================
class TestAudioInfo:
"""Tests for audio info extraction."""
def test_get_wav_info(self, carrier_wav):
from stegasoo.audio_utils import get_audio_info
info = get_audio_info(carrier_wav)
assert isinstance(info, AudioInfo)
assert info.sample_rate == 44100
assert info.channels == 1
assert info.format == "wav"
assert abs(info.duration_seconds - 1.0) < 0.1
def test_get_stereo_info(self, carrier_wav_stereo):
from stegasoo.audio_utils import get_audio_info
info = get_audio_info(carrier_wav_stereo)
assert info.channels == 2
# =============================================================================
# VALIDATION TESTS
# =============================================================================
class TestAudioValidation:
"""Tests for audio validation."""
def test_validate_valid_audio(self, carrier_wav):
from stegasoo.audio_utils import validate_audio
result = validate_audio(carrier_wav)
assert result.is_valid
def test_validate_empty_audio(self):
from stegasoo.audio_utils import validate_audio
result = validate_audio(b"")
assert not result.is_valid
def test_validate_invalid_audio(self):
from stegasoo.audio_utils import validate_audio
result = validate_audio(b"not audio data at all")
assert not result.is_valid
def test_validate_audio_embed_mode(self):
from stegasoo.validation import validate_audio_embed_mode
assert validate_audio_embed_mode("audio_lsb").is_valid
assert validate_audio_embed_mode("audio_spread").is_valid
assert validate_audio_embed_mode("audio_auto").is_valid
assert not validate_audio_embed_mode("invalid").is_valid
# =============================================================================
# INTEGRATION TESTS
# =============================================================================
class TestIntegration:
"""End-to-end integration tests using encode_audio/decode_audio."""
def test_lsb_encode_decode(self, carrier_wav, reference_photo):
from stegasoo.decode import decode_audio
from stegasoo.encode import encode_audio
stego_audio, stats = encode_audio(
message="Hello from audio steganography!",
reference_photo=reference_photo,
carrier_audio=carrier_wav,
passphrase="test words here now",
pin="123456",
embed_mode="audio_lsb",
)
assert len(stego_audio) > 0
result = decode_audio(
stego_audio=stego_audio,
reference_photo=reference_photo,
passphrase="test words here now",
pin="123456",
embed_mode="audio_lsb",
)
assert result.is_text
assert result.message == "Hello from audio steganography!"
def test_lsb_wrong_credentials(self, carrier_wav, reference_photo):
from stegasoo.decode import decode_audio
from stegasoo.encode import encode_audio
stego_audio, _ = encode_audio(
message="Secret",
reference_photo=reference_photo,
carrier_audio=carrier_wav,
passphrase="correct horse battery staple",
pin="123456",
embed_mode="audio_lsb",
)
with pytest.raises(Exception):
decode_audio(
stego_audio=stego_audio,
reference_photo=reference_photo,
passphrase="wrong passphrase words here",
pin="654321",
embed_mode="audio_lsb",
)
def test_spread_encode_decode(self, carrier_wav_spread_integration, reference_photo):
"""Test full spread spectrum encode/decode pipeline."""
from stegasoo.decode import decode_audio
from stegasoo.encode import encode_audio
stego_audio, stats = encode_audio(
message="Spread integration test",
reference_photo=reference_photo,
carrier_audio=carrier_wav_spread_integration,
passphrase="test words here now",
pin="123456",
embed_mode="audio_spread",
)
result = decode_audio(
stego_audio=stego_audio,
reference_photo=reference_photo,
passphrase="test words here now",
pin="123456",
embed_mode="audio_spread",
)
assert result.message == "Spread integration test"
def test_auto_detect_lsb(self, carrier_wav, reference_photo):
"""Test auto-detection finds LSB encoded audio."""
from stegasoo.decode import decode_audio
from stegasoo.encode import encode_audio
stego_audio, _ = encode_audio(
message="Auto-detect test",
reference_photo=reference_photo,
carrier_audio=carrier_wav,
passphrase="test words here now",
pin="123456",
embed_mode="audio_lsb",
)
result = decode_audio(
stego_audio=stego_audio,
reference_photo=reference_photo,
passphrase="test words here now",
pin="123456",
embed_mode="audio_auto",
)
assert result.message == "Auto-detect test"