From d6f9dbecee129d1ab25ec31d75c19b90990ed1c2 Mon Sep 17 00:00:00 2001 From: Alexander Weiss Date: Mon, 8 Aug 2022 23:14:11 +0200 Subject: [PATCH] Add escaping of filenames --- Cargo.lock | 66 +++++++++++ Cargo.toml | 5 + src/archiver/archiver_impl.rs | 2 +- src/backend/ignore.rs | 23 ++-- src/backend/node.rs | 206 +++++++++++++++++++++------------- 5 files changed, 216 insertions(+), 86 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ec912ca..0b473cf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -481,6 +481,16 @@ dependencies = [ "syn", ] +[[package]] +name = "env_logger" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a19187fea3ac7e84da7dacf48de0c45d63c6a76f9490dae389aead16c243fce3" +dependencies = [ + "log", + "regex", +] + [[package]] name = "fastrand" version = "1.8.0" @@ -589,6 +599,12 @@ version = "0.3.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57c66a976bf5909d801bbef33416c41372779507e7a6b3a5e25e4749c58f776a" +[[package]] +name = "futures-timer" +version = "3.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e64b03909df88034c26dc1547e8970b91f98bdb65165d6a4e9110d94263dbb2c" + [[package]] name = "futures-util" version = "0.3.21" @@ -1185,6 +1201,28 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "quickcheck" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "588f6378e4dd99458b60ec275b4477add41ce4fa9f64dcba6f15adccb19b50d6" +dependencies = [ + "env_logger", + "log", + "rand", +] + +[[package]] +name = "quickcheck_macros" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b22a693222d716a9587786f37ac3f6b4faedb5b80c23914e7303ff5a1d8016e9" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "quote" version = "1.0.20" @@ -1354,6 +1392,31 @@ dependencies = [ "winapi", ] +[[package]] +name = "rstest" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9c9dc66cc29792b663ffb5269be669f1613664e69ad56441fdb895c2347b930" +dependencies = [ + "futures", + "futures-timer", + "rstest_macros", + "rustc_version", +] + +[[package]] +name = "rstest_macros" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5015e68a0685a95ade3eee617ff7101ab6a3fc689203101ca16ebc16f2b89c66" +dependencies = [ + "cfg-if", + "proc-macro2", + "quote", + "rustc_version", + "syn", +] + [[package]] name = "rust-argon2" version = "0.8.3" @@ -1409,9 +1472,12 @@ dependencies = [ "nix", "path-absolutize", "prettytable-rs", + "quickcheck", + "quickcheck_macros", "rand", "reqwest", "rpassword", + "rstest", "scrypt", "serde", "serde-aux", diff --git a/Cargo.toml b/Cargo.toml index 886ec95..3e6afed 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -73,3 +73,8 @@ gethostname = "0.2" humantime = "2" users = "0.11" itertools = "0.10" + +[dev-dependencies] +rstest = "0.15" +quickcheck = "1" +quickcheck_macros = "1" diff --git a/src/archiver/archiver_impl.rs b/src/archiver/archiver_impl.rs index 755d820..05c8f77 100644 --- a/src/archiver/archiver_impl.rs +++ b/src/archiver/archiver_impl.rs @@ -126,7 +126,7 @@ impl Archiver { self.stack.push((node, tree, parent)); return Ok(()); } else { - let node = Node::new_dir(p.to_os_string(), Metadata::default()); + let node = Node::new_node(p, NodeType::Dir, Metadata::default()); let new_parent = self.parent.sub_parent(&node).await?; let parent = std::mem::replace(&mut self.parent, new_parent); self.stack.push((node, tree, parent)); diff --git a/src/backend/ignore.rs b/src/backend/ignore.rs index 65f372e..de9512b 100644 --- a/src/backend/ignore.rs +++ b/src/backend/ignore.rs @@ -8,7 +8,7 @@ use clap::Parser; use ignore::{overrides::OverrideBuilder, DirEntry, Walk, WalkBuilder}; use users::{Groups, Users, UsersCache}; -use super::{node::Metadata, Node, ReadSource}; +use super::{node::Metadata, node::NodeType, Node, ReadSource}; pub struct LocalSource { builder: WalkBuilder, @@ -152,7 +152,7 @@ impl Iterator for LocalSource { // map_entry: turn entry into (Path, Node) fn map_entry(entry: DirEntry, with_atime: bool, cache: &UsersCache) -> Result<(PathBuf, Node)> { - let name = entry.file_name().to_os_string(); + let name = entry.file_name(); let m = entry.metadata()?; let uid = m.uid(); @@ -195,20 +195,25 @@ fn map_entry(entry: DirEntry, with_atime: bool, cache: &UsersCache) -> Result<(P let filetype = m.file_type(); let node = if m.is_dir() { - Node::new_dir(name, meta) + Node::new_node(name, NodeType::Dir, meta) } else if m.is_symlink() { let target = read_link(entry.path())?; - Node::new_symlink(name, target, meta) + let node_type = NodeType::Symlink { + linktarget: target.to_str().expect("no unicode").to_string(), + }; + Node::new_node(name, node_type, meta) } else if filetype.is_block_device() { - Node::new_dev(name, meta, m.rdev()) + let node_type = NodeType::Dev { device: m.rdev() }; + Node::new_node(name, node_type, meta) } else if filetype.is_char_device() { - Node::new_chardev(name, meta, m.rdev()) + let node_type = NodeType::Chardev { device: m.rdev() }; + Node::new_node(name, node_type, meta) } else if filetype.is_fifo() { - Node::new_fifo(name, meta) + Node::new_node(name, NodeType::Fifo, meta) } else if filetype.is_socket() { - Node::new_socket(name, meta) + Node::new_node(name, NodeType::Socket, meta) } else { - Node::new_file(name, meta) + Node::new_node(name, NodeType::File, meta) }; Ok((entry.path().to_path_buf(), node)) } diff --git a/src/backend/node.rs b/src/backend/node.rs index 28f1a19..5d28151 100644 --- a/src/backend/node.rs +++ b/src/backend/node.rs @@ -1,7 +1,9 @@ -use std::ffi::OsString; +use std::ffi::{OsStr, OsString}; use std::fmt::Debug; -use std::path::PathBuf; +use std::os::unix::ffi::OsStrExt; +use std::str::FromStr; +use anyhow::{anyhow, bail, Result}; use chrono::{DateTime, Local}; use derive_getters::Getters; use derive_more::{Constructor, IsVariant}; @@ -12,15 +14,15 @@ use crate::id::Id; #[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Constructor)] pub struct Node { - name: String, + pub name: String, #[serde(flatten)] - node_type: NodeType, + pub node_type: NodeType, #[serde(flatten)] - meta: Metadata, + pub meta: Metadata, #[serde(default, deserialize_with = "deserialize_default_from_null")] - content: Option>, + pub content: Option>, #[serde(default, skip_serializing_if = "Option::is_none")] - subtree: Option, + pub subtree: Option, } #[derive(Clone, Debug, PartialEq, Serialize, Deserialize, IsVariant)] @@ -76,78 +78,15 @@ fn is_default(t: &T) -> bool { } impl Node { - pub fn new_file(name: OsString, meta: Metadata) -> Self { - Self { - name: name.to_str().expect("no unicode").to_string(), - node_type: NodeType::File, + pub fn new_node(name: &OsStr, node_type: NodeType, meta: Metadata) -> Self { + Node { + name: escape_filename(name), + node_type, content: None, subtree: None, meta, } } - - pub fn new_dir(name: OsString, meta: Metadata) -> Self { - Self { - name: name.to_str().expect("no unicode").to_string(), - node_type: NodeType::Dir, - content: None, - subtree: None, - meta, - } - } - - pub fn new_symlink(name: OsString, target: PathBuf, meta: Metadata) -> Self { - Self { - name: name.to_str().expect("no unicode").to_string(), - node_type: NodeType::Symlink { - linktarget: target.to_str().expect("no unicode").to_string(), - }, - content: None, - subtree: None, - meta, - } - } - - pub fn new_dev(name: OsString, meta: Metadata, device: u64) -> Self { - Self { - name: name.to_str().expect("no unicode").to_string(), - node_type: NodeType::Dev { device }, - content: None, - subtree: None, - meta, - } - } - - pub fn new_chardev(name: OsString, meta: Metadata, device: u64) -> Self { - Self { - name: name.to_str().expect("no unicode").to_string(), - node_type: NodeType::Chardev { device }, - content: None, - subtree: None, - meta, - } - } - - pub fn new_fifo(name: OsString, meta: Metadata) -> Self { - Self { - name: name.to_str().expect("no unicode").to_string(), - node_type: NodeType::Fifo, - content: None, - subtree: None, - meta, - } - } - - pub fn new_socket(name: OsString, meta: Metadata) -> Self { - Self { - name: name.to_str().expect("no unicode").to_string(), - node_type: NodeType::Socket, - content: None, - subtree: None, - meta, - } - } - pub fn is_dir(&self) -> bool { self.node_type == NodeType::Dir } @@ -160,8 +99,8 @@ impl Node { self.content = Some(content); } - pub fn name(&self) -> &String { - &self.name + pub fn name(&self) -> OsString { + unescape_filename(&self.name).unwrap_or_else(|_| OsString::from_str(&self.name).unwrap()) } pub fn node_type(&self) -> &NodeType { @@ -180,3 +119,118 @@ impl Node { &self.subtree } } + +pub fn escape_filename(name: &OsStr) -> String { + name.as_bytes().escape_ascii().to_string() +} + +// inspired by the enquote crate +pub fn unescape_filename(s: &str) -> Result { + let mut chars = s.chars(); + let mut u = Vec::new(); + loop { + match chars.next() { + None => break, + Some(c) => match c { + '\\' => match chars.next() { + None => bail!("UnexpectedEOF"), + Some(c) => match c { + '\\' => u.push(b'\\'), + '"' => u.push(b'"'), + '\'' => u.push(b'\''), + '`' => u.push(b'`'), + 'a' => u.push(b'\x07'), + 'b' => u.push(b'\x08'), + 'f' => u.push(b'\x0c'), + 'n' => u.push(b'\n'), + 'r' => u.push(b'\r'), + 't' => u.push(b'\t'), + 'v' => u.push(b'\x0b'), + // hex + 'x' => { + let hex = take(&mut chars, 2); + u.push(u8::from_str_radix(&hex, 16)?) + } + // unicode + 'u' => { + let n = u32::from_str_radix(&take(&mut chars, 4), 16)?; + let c = + std::char::from_u32(n).ok_or_else(|| anyhow!("invalid unicode"))?; + let mut bytes = vec![0u8; c.len_utf8()]; + c.encode_utf8(&mut bytes); + u.extend_from_slice(&bytes); + } + 'U' => { + let n = u32::from_str_radix(&take(&mut chars, 8), 16)?; + let c = + std::char::from_u32(n).ok_or_else(|| anyhow!("invalid unicode"))?; + let mut bytes = vec![0u8; c.len_utf8()]; + c.encode_utf8(&mut bytes); + u.extend_from_slice(&bytes); + } + _ => bail!("UnrecognizedEscape"), + }, + }, + // normal char + _ => { + let mut bytes = vec![0u8; c.len_utf8()]; + c.encode_utf8(&mut bytes); + u.extend_from_slice(&bytes); + } + }, + } + } + + Ok(OsStr::from_bytes(&u).to_os_string()) +} + +#[inline] +// Iterator#take cannot be used because it consumes the iterator +fn take>(iterator: &mut I, n: usize) -> String { + let mut s = String::with_capacity(n); + for _ in 0..n { + s.push(iterator.next().unwrap_or_default()); + } + s +} + +#[cfg(test)] +mod tests { + use super::*; + + use quickcheck_macros::quickcheck; + use rstest::rstest; + + #[quickcheck] + fn escape_unescape_is_identity(bytes: Vec) -> bool { + let name = OsStr::from_bytes(&bytes); + name == &match unescape_filename(&escape_filename(name)) { + Ok(s) => s, + Err(_) => return false, + } + } + + #[rstest] + #[case(r#"\\"#, b"\\")] + #[case(r#"\""#, b"\"")] + #[case(r#"\'"#, b"\'")] + #[case(r#"\`"#, b"`")] + #[case(r#"\a"#, b"\x07")] + #[case(r#"\b"#, b"\x08")] + #[case(r#"\v"#, b"\x0b")] + #[case(r#"\f"#, b"\x0c")] + #[case(r#"\n"#, b"\n")] + #[case(r#"\r"#, b"\r")] + #[case(r#"\t"#, b"\t")] + #[case(r#"\xab"#, b"\xab")] + #[case(r#"\xAB"#, b"\xab")] + #[case(r#"\xFF"#, b"\xff")] + #[case(r#"\u00df"#, b"\xc3\x9f")] + #[case(r#"\u00DF"#, b"\xc3\x9f")] + #[case(r#"\u2764"#, b"\xe2\x9d\xa4")] + #[case(r#"\U0001f4af"#, b"\xf0\x9f\x92\xaf")] + fn unescape_cases(#[case] input: &str, #[case] expected: &[u8]) { + let expected = OsStr::from_bytes(expected); + assert_eq!(expected, unescape_filename(input).unwrap()) + } +}