cmd_backup_restore previously called tar::Archive::unpack with default settings, allowing malicious .relbak archives to escape the target directory via .. entries, absolute paths, or symlinks. No size cap meant tar bombs could exhaust disk space. Replaced with relicario_core::safe_unpack_git_archive which: - Rejects .. (ParentDir), absolute (RootDir), and drive-prefix (Prefix) components with "path traversal blocked" error. - Rejects symlinks and hardlinks outright. - Checks declared header size before reading body; rejects entries or cumulative totals exceeding the caller's cap. - Returns (relative-path, bytes) pairs; the CLI re-checks dest.starts_with(git_dir) after OS-level path resolution. - CLI cap: min(100 × compressed size, 1 GiB). Acceptance: 5 unit tests in relicario-core (traversal, absolute path, symlink, size bomb, happy path); existing CLI backup roundtrip tests remain green. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
139 lines
5.2 KiB
Rust
139 lines
5.2 KiB
Rust
//! Safe tar unpacking for backup restore.
|
|
//!
|
|
//! The standard `tar::Archive::unpack` has no guards against path traversal,
|
|
//! absolute paths, symlinks, hardlinks, or tar bombs. This module replaces it
|
|
//! with `safe_unpack_git_archive`, which validates every entry before returning
|
|
//! `(relative_path, bytes)` pairs to the caller.
|
|
|
|
use std::io::Read;
|
|
use std::path::{Component, PathBuf};
|
|
|
|
use tar::EntryType;
|
|
|
|
use crate::error::{RelicarioError, Result};
|
|
|
|
/// Default cap on total uncompressed bytes extracted in one restore (1 GiB).
|
|
pub const DEFAULT_MAX_UNCOMPRESSED: u64 = 1024 * 1024 * 1024;
|
|
|
|
/// Decode `tar_bytes` and return `(relative_path, file_bytes)` pairs for
|
|
/// regular files only.
|
|
///
|
|
/// # Errors
|
|
///
|
|
/// Returns `Err(RelicarioError::BackupRestore(...))` if:
|
|
///
|
|
/// - Any path component is `..` (`Component::ParentDir`) — "path traversal blocked".
|
|
/// - Any path starts with `/` (`Component::RootDir`) — "path traversal blocked".
|
|
/// - Any path has a Windows drive prefix (`Component::Prefix`) — "path traversal blocked".
|
|
/// - An entry is a symlink or hardlink — "symlink/link rejected".
|
|
/// - An entry's declared size exceeds `max_uncompressed_bytes` — "size cap exceeded".
|
|
/// - The running total of all entry sizes exceeds `max_uncompressed_bytes` — "size cap exceeded".
|
|
/// - An entry has an unexpected type (not regular file, not directory) — "unexpected entry type".
|
|
pub fn safe_unpack_git_archive(
|
|
tar_bytes: &[u8],
|
|
max_uncompressed_bytes: u64,
|
|
) -> Result<Vec<(PathBuf, Vec<u8>)>> {
|
|
let mut archive = tar::Archive::new(tar_bytes);
|
|
let entries = archive
|
|
.entries()
|
|
.map_err(|e| RelicarioError::BackupRestore(format!("failed to read tar entries: {e}")))?;
|
|
|
|
let mut result: Vec<(PathBuf, Vec<u8>)> = Vec::new();
|
|
let mut cumulative: u64 = 0;
|
|
|
|
for entry in entries {
|
|
let mut entry = entry.map_err(|e| {
|
|
RelicarioError::BackupRestore(format!("failed to read tar entry: {e}"))
|
|
})?;
|
|
|
|
let header = entry.header();
|
|
let entry_type = header.entry_type();
|
|
|
|
// Reject symlinks and hardlinks.
|
|
match entry_type {
|
|
EntryType::Symlink => {
|
|
return Err(RelicarioError::BackupRestore(
|
|
"symlink entry rejected".to_string(),
|
|
));
|
|
}
|
|
EntryType::Link => {
|
|
return Err(RelicarioError::BackupRestore(
|
|
"hardlink entry rejected".to_string(),
|
|
));
|
|
}
|
|
EntryType::Directory => {
|
|
// Directories are implicit — skip without reading body.
|
|
continue;
|
|
}
|
|
EntryType::Regular | EntryType::Continuous | EntryType::GNUSparse => {
|
|
// These are normal file types; fall through to path checks.
|
|
}
|
|
_ => {
|
|
return Err(RelicarioError::BackupRestore(format!(
|
|
"unexpected entry type: {:?}",
|
|
entry_type
|
|
)));
|
|
}
|
|
}
|
|
|
|
// Validate the path.
|
|
let path = entry.path().map_err(|e| {
|
|
RelicarioError::BackupRestore(format!("invalid path in tar entry: {e}"))
|
|
})?;
|
|
let path = path.into_owned();
|
|
|
|
for component in path.components() {
|
|
match component {
|
|
Component::ParentDir => {
|
|
return Err(RelicarioError::BackupRestore(
|
|
"path traversal blocked: entry contains '..' component".to_string(),
|
|
));
|
|
}
|
|
Component::RootDir => {
|
|
return Err(RelicarioError::BackupRestore(
|
|
"path traversal blocked: entry has absolute path".to_string(),
|
|
));
|
|
}
|
|
Component::Prefix(_) => {
|
|
return Err(RelicarioError::BackupRestore(
|
|
"path traversal blocked: entry has Windows drive prefix".to_string(),
|
|
));
|
|
}
|
|
Component::Normal(_) | Component::CurDir => {
|
|
// Acceptable components.
|
|
}
|
|
}
|
|
}
|
|
|
|
// Check declared size before reading body.
|
|
let claimed = header.size().map_err(|e| {
|
|
RelicarioError::BackupRestore(format!("could not read entry size: {e}"))
|
|
})?;
|
|
|
|
if claimed > max_uncompressed_bytes {
|
|
return Err(RelicarioError::BackupRestore(format!(
|
|
"size cap exceeded: entry claims {claimed} bytes (cap {max_uncompressed_bytes})"
|
|
)));
|
|
}
|
|
|
|
let new_total = cumulative.saturating_add(claimed);
|
|
if new_total > max_uncompressed_bytes {
|
|
return Err(RelicarioError::BackupRestore(format!(
|
|
"size cap exceeded: cumulative size would reach {new_total} bytes (cap {max_uncompressed_bytes})"
|
|
)));
|
|
}
|
|
|
|
// Read the file body.
|
|
let mut body = Vec::with_capacity(claimed as usize);
|
|
entry.read_to_end(&mut body).map_err(|e| {
|
|
RelicarioError::BackupRestore(format!("failed to read entry body: {e}"))
|
|
})?;
|
|
|
|
cumulative += body.len() as u64;
|
|
|
|
result.push((path, body));
|
|
}
|
|
|
|
Ok(result)
|
|
}
|