//! Safe tar unpacking for backup restore. //! //! The standard `tar::Archive::unpack` has no guards against path traversal, //! absolute paths, symlinks, hardlinks, or tar bombs. This module replaces it //! with `safe_unpack_git_archive`, which validates every entry before returning //! `(relative_path, bytes)` pairs to the caller. use std::io::Read; use std::path::{Component, PathBuf}; use tar::EntryType; use crate::error::{RelicarioError, Result}; /// Default cap on total uncompressed bytes extracted in one restore (1 GiB). pub const DEFAULT_MAX_UNCOMPRESSED: u64 = 1024 * 1024 * 1024; /// Decode `tar_bytes` and return `(relative_path, file_bytes)` pairs for /// regular files only. /// /// # Errors /// /// Returns `Err(RelicarioError::BackupRestore(...))` if: /// /// - Any path component is `..` (`Component::ParentDir`) — "path traversal blocked". /// - Any path starts with `/` (`Component::RootDir`) — "path traversal blocked". /// - Any path has a Windows drive prefix (`Component::Prefix`) — "path traversal blocked". /// - An entry is a symlink or hardlink — "symlink/link rejected". /// - An entry's declared size exceeds `max_uncompressed_bytes` — "size cap exceeded". /// - The running total of all entry sizes exceeds `max_uncompressed_bytes` — "size cap exceeded". /// - An entry has an unexpected type (not regular file, not directory) — "unexpected entry type". pub fn safe_unpack_git_archive( tar_bytes: &[u8], max_uncompressed_bytes: u64, ) -> Result)>> { let mut archive = tar::Archive::new(tar_bytes); let entries = archive .entries() .map_err(|e| RelicarioError::BackupRestore(format!("failed to read tar entries: {e}")))?; let mut result: Vec<(PathBuf, Vec)> = Vec::new(); let mut cumulative: u64 = 0; for entry in entries { let mut entry = entry.map_err(|e| { RelicarioError::BackupRestore(format!("failed to read tar entry: {e}")) })?; let header = entry.header(); let entry_type = header.entry_type(); // Reject symlinks and hardlinks. match entry_type { EntryType::Symlink => { return Err(RelicarioError::BackupRestore( "symlink entry rejected".to_string(), )); } EntryType::Link => { return Err(RelicarioError::BackupRestore( "hardlink entry rejected".to_string(), )); } EntryType::Directory => { // Directories are implicit — skip without reading body. continue; } EntryType::Regular | EntryType::Continuous | EntryType::GNUSparse => { // These are normal file types; fall through to path checks. } _ => { return Err(RelicarioError::BackupRestore(format!( "unexpected entry type: {:?}", entry_type ))); } } // Validate the path. let path = entry.path().map_err(|e| { RelicarioError::BackupRestore(format!("invalid path in tar entry: {e}")) })?; let path = path.into_owned(); for component in path.components() { match component { Component::ParentDir => { return Err(RelicarioError::BackupRestore( "path traversal blocked: entry contains '..' component".to_string(), )); } Component::RootDir => { return Err(RelicarioError::BackupRestore( "path traversal blocked: entry has absolute path".to_string(), )); } Component::Prefix(_) => { return Err(RelicarioError::BackupRestore( "path traversal blocked: entry has Windows drive prefix".to_string(), )); } Component::Normal(_) | Component::CurDir => { // Acceptable components. } } } // Check declared size before reading body. let claimed = header.size().map_err(|e| { RelicarioError::BackupRestore(format!("could not read entry size: {e}")) })?; if claimed > max_uncompressed_bytes { return Err(RelicarioError::BackupRestore(format!( "size cap exceeded: entry claims {claimed} bytes (cap {max_uncompressed_bytes})" ))); } let new_total = cumulative.saturating_add(claimed); if new_total > max_uncompressed_bytes { return Err(RelicarioError::BackupRestore(format!( "size cap exceeded: cumulative size would reach {new_total} bytes (cap {max_uncompressed_bytes})" ))); } // Read the file body. let mut body = Vec::with_capacity(claimed as usize); entry.read_to_end(&mut body).map_err(|e| { RelicarioError::BackupRestore(format!("failed to read entry body: {e}")) })?; cumulative += body.len() as u64; result.push((path, body)); } Ok(result) }