refactor(core): extract base32 module, dedupe two RFC 4648 impls
New crates/relicario-core/src/base32.rs hosts encode_rfc4648 + decode_rfc4648_lenient (case-insensitive, optional padding, whitespace stripped). Folds inline base32_encode (item.rs:255-275) and decode_base32_totp (import_lastpass.rs:202-220) into the shared module; both call sites updated. - New RelicarioError::InvalidBase32(String) variant for the decoder error path - Module is pub(crate); public API surface unchanged - Steam alphabet (item_types/totp.rs:13) intentionally separate with neighbour comment pointing at crate::base32 Plan B Phase 7 sub-step 1 (DEV-A P2 base32 dedup half). docs/superpowers/specs/2026-05-04-cli-restructure-design.md. cargo test --workspace: green cargo clippy --workspace: silent Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
132
crates/relicario-core/src/base32.rs
Normal file
132
crates/relicario-core/src/base32.rs
Normal file
@@ -0,0 +1,132 @@
|
||||
//! RFC 4648 base32 codec, no-padding form, lenient on input.
|
||||
//!
|
||||
//! The encoder produces canonical no-padding RFC 4648 output (uppercase ASCII).
|
||||
//! The decoder is lenient: case-insensitive, optional `=` padding, whitespace
|
||||
//! anywhere is stripped before decoding.
|
||||
//!
|
||||
//! Steam Guard's authenticator uses a different (de-ambiguated) alphabet —
|
||||
//! see `crate::item_types::totp::STEAM_ALPHABET`. That codec is intentionally
|
||||
//! NOT routed through this module.
|
||||
|
||||
use crate::error::{RelicarioError, Result};
|
||||
|
||||
const ALPHA: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ234567";
|
||||
|
||||
/// RFC 4648 base32 encoder, no-padding form. Output is uppercase ASCII.
|
||||
pub fn encode_rfc4648(bytes: &[u8]) -> String {
|
||||
let mut out = String::new();
|
||||
let mut buffer: u32 = 0;
|
||||
let mut bits: u32 = 0;
|
||||
for &b in bytes {
|
||||
buffer = (buffer << 8) | (b as u32);
|
||||
bits += 8;
|
||||
while bits >= 5 {
|
||||
let idx = ((buffer >> (bits - 5)) & 0x1f) as usize;
|
||||
out.push(ALPHA[idx] as char);
|
||||
bits -= 5;
|
||||
}
|
||||
}
|
||||
if bits > 0 {
|
||||
let idx = ((buffer << (5 - bits)) & 0x1f) as usize;
|
||||
out.push(ALPHA[idx] as char);
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// RFC 4648 base32 decoder, lenient on input.
|
||||
///
|
||||
/// Accepts upper- or lower-case letters, optional `=` padding, and whitespace
|
||||
/// anywhere. Trailing bits less than a full byte are silently discarded
|
||||
/// (canonical RFC 4648 decode).
|
||||
pub fn decode_rfc4648_lenient(s: &str) -> Result<Vec<u8>> {
|
||||
let cleaned: String = s
|
||||
.chars()
|
||||
.filter(|c| !c.is_whitespace())
|
||||
.collect::<String>()
|
||||
.to_ascii_uppercase();
|
||||
let trimmed = cleaned.trim_end_matches('=');
|
||||
let mut out: Vec<u8> = Vec::with_capacity(trimmed.len() * 5 / 8);
|
||||
let mut buffer: u32 = 0;
|
||||
let mut bits: u32 = 0;
|
||||
for ch in trimmed.bytes() {
|
||||
let idx = ALPHA.iter().position(|&a| a == ch).ok_or_else(|| {
|
||||
RelicarioError::InvalidBase32(format!("non-alphabet character {:?}", ch as char))
|
||||
})?;
|
||||
buffer = (buffer << 5) | (idx as u32);
|
||||
bits += 5;
|
||||
if bits >= 8 {
|
||||
bits -= 8;
|
||||
out.push(((buffer >> bits) & 0xff) as u8);
|
||||
}
|
||||
}
|
||||
Ok(out)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn encode_rfc4648_matches_rfc_test_vectors() {
|
||||
// RFC 4648 §10 test vectors, no-padding form.
|
||||
assert_eq!(encode_rfc4648(b""), "");
|
||||
assert_eq!(encode_rfc4648(b"f"), "MY");
|
||||
assert_eq!(encode_rfc4648(b"fo"), "MZXQ");
|
||||
assert_eq!(encode_rfc4648(b"foo"), "MZXW6");
|
||||
assert_eq!(encode_rfc4648(b"foob"), "MZXW6YQ");
|
||||
assert_eq!(encode_rfc4648(b"fooba"), "MZXW6YTB");
|
||||
assert_eq!(encode_rfc4648(b"foobar"), "MZXW6YTBOI");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_rfc4648_lenient_inverts_encoder_on_known_vectors() {
|
||||
let cases: &[(&str, &[u8])] = &[
|
||||
("", b""),
|
||||
("MY", b"f"),
|
||||
("MZXQ", b"fo"),
|
||||
("MZXW6", b"foo"),
|
||||
("MZXW6YQ", b"foob"),
|
||||
("MZXW6YTB", b"fooba"),
|
||||
("MZXW6YTBOI", b"foobar"),
|
||||
];
|
||||
for (s, want) in cases {
|
||||
assert_eq!(&decode_rfc4648_lenient(s).unwrap()[..], *want);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_rfc4648_lenient_accepts_lowercase_and_mixed_case() {
|
||||
assert_eq!(decode_rfc4648_lenient("mzxw6").unwrap(), b"foo");
|
||||
assert_eq!(decode_rfc4648_lenient("MzXw6yTbOi").unwrap(), b"foobar");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_rfc4648_lenient_strips_optional_padding() {
|
||||
assert_eq!(decode_rfc4648_lenient("MY======").unwrap(), b"f");
|
||||
assert_eq!(decode_rfc4648_lenient("MZXW6===").unwrap(), b"foo");
|
||||
assert_eq!(decode_rfc4648_lenient("MZXW6YTBOI======").unwrap(), b"foobar");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_rfc4648_lenient_strips_whitespace_anywhere() {
|
||||
assert_eq!(decode_rfc4648_lenient(" MZXW 6YTB OI ").unwrap(), b"foobar");
|
||||
assert_eq!(decode_rfc4648_lenient("MZXW\n6YTB\tOI").unwrap(), b"foobar");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_rfc4648_lenient_rejects_non_alphabet_chars() {
|
||||
assert!(matches!(
|
||||
decode_rfc4648_lenient("MY1"),
|
||||
Err(RelicarioError::InvalidBase32(_))
|
||||
));
|
||||
assert!(decode_rfc4648_lenient("???").is_err());
|
||||
assert!(decode_rfc4648_lenient("MZ!XW").is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn encode_decode_round_trips_arbitrary_bytes() {
|
||||
let bytes: Vec<u8> = (0u8..=255).collect();
|
||||
let encoded = encode_rfc4648(&bytes);
|
||||
assert_eq!(decode_rfc4648_lenient(&encoded).unwrap(), bytes);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user