Add escaping of filenames

This commit is contained in:
Alexander Weiss 2022-08-08 23:14:11 +02:00
parent effd7240bd
commit d6f9dbecee
5 changed files with 216 additions and 86 deletions

66
Cargo.lock generated
View File

@ -481,6 +481,16 @@ dependencies = [
"syn",
]
[[package]]
name = "env_logger"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a19187fea3ac7e84da7dacf48de0c45d63c6a76f9490dae389aead16c243fce3"
dependencies = [
"log",
"regex",
]
[[package]]
name = "fastrand"
version = "1.8.0"
@ -589,6 +599,12 @@ version = "0.3.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57c66a976bf5909d801bbef33416c41372779507e7a6b3a5e25e4749c58f776a"
[[package]]
name = "futures-timer"
version = "3.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e64b03909df88034c26dc1547e8970b91f98bdb65165d6a4e9110d94263dbb2c"
[[package]]
name = "futures-util"
version = "0.3.21"
@ -1185,6 +1201,28 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "quickcheck"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "588f6378e4dd99458b60ec275b4477add41ce4fa9f64dcba6f15adccb19b50d6"
dependencies = [
"env_logger",
"log",
"rand",
]
[[package]]
name = "quickcheck_macros"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b22a693222d716a9587786f37ac3f6b4faedb5b80c23914e7303ff5a1d8016e9"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "quote"
version = "1.0.20"
@ -1354,6 +1392,31 @@ dependencies = [
"winapi",
]
[[package]]
name = "rstest"
version = "0.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e9c9dc66cc29792b663ffb5269be669f1613664e69ad56441fdb895c2347b930"
dependencies = [
"futures",
"futures-timer",
"rstest_macros",
"rustc_version",
]
[[package]]
name = "rstest_macros"
version = "0.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5015e68a0685a95ade3eee617ff7101ab6a3fc689203101ca16ebc16f2b89c66"
dependencies = [
"cfg-if",
"proc-macro2",
"quote",
"rustc_version",
"syn",
]
[[package]]
name = "rust-argon2"
version = "0.8.3"
@ -1409,9 +1472,12 @@ dependencies = [
"nix",
"path-absolutize",
"prettytable-rs",
"quickcheck",
"quickcheck_macros",
"rand",
"reqwest",
"rpassword",
"rstest",
"scrypt",
"serde",
"serde-aux",

View File

@ -73,3 +73,8 @@ gethostname = "0.2"
humantime = "2"
users = "0.11"
itertools = "0.10"
[dev-dependencies]
rstest = "0.15"
quickcheck = "1"
quickcheck_macros = "1"

View File

@ -126,7 +126,7 @@ impl<BE: DecryptWriteBackend, I: IndexedBackend> Archiver<BE, I> {
self.stack.push((node, tree, parent));
return Ok(());
} else {
let node = Node::new_dir(p.to_os_string(), Metadata::default());
let node = Node::new_node(p, NodeType::Dir, Metadata::default());
let new_parent = self.parent.sub_parent(&node).await?;
let parent = std::mem::replace(&mut self.parent, new_parent);
self.stack.push((node, tree, parent));

View File

@ -8,7 +8,7 @@ use clap::Parser;
use ignore::{overrides::OverrideBuilder, DirEntry, Walk, WalkBuilder};
use users::{Groups, Users, UsersCache};
use super::{node::Metadata, Node, ReadSource};
use super::{node::Metadata, node::NodeType, Node, ReadSource};
pub struct LocalSource {
builder: WalkBuilder,
@ -152,7 +152,7 @@ impl Iterator for LocalSource {
// map_entry: turn entry into (Path, Node)
fn map_entry(entry: DirEntry, with_atime: bool, cache: &UsersCache) -> Result<(PathBuf, Node)> {
let name = entry.file_name().to_os_string();
let name = entry.file_name();
let m = entry.metadata()?;
let uid = m.uid();
@ -195,20 +195,25 @@ fn map_entry(entry: DirEntry, with_atime: bool, cache: &UsersCache) -> Result<(P
let filetype = m.file_type();
let node = if m.is_dir() {
Node::new_dir(name, meta)
Node::new_node(name, NodeType::Dir, meta)
} else if m.is_symlink() {
let target = read_link(entry.path())?;
Node::new_symlink(name, target, meta)
let node_type = NodeType::Symlink {
linktarget: target.to_str().expect("no unicode").to_string(),
};
Node::new_node(name, node_type, meta)
} else if filetype.is_block_device() {
Node::new_dev(name, meta, m.rdev())
let node_type = NodeType::Dev { device: m.rdev() };
Node::new_node(name, node_type, meta)
} else if filetype.is_char_device() {
Node::new_chardev(name, meta, m.rdev())
let node_type = NodeType::Chardev { device: m.rdev() };
Node::new_node(name, node_type, meta)
} else if filetype.is_fifo() {
Node::new_fifo(name, meta)
Node::new_node(name, NodeType::Fifo, meta)
} else if filetype.is_socket() {
Node::new_socket(name, meta)
Node::new_node(name, NodeType::Socket, meta)
} else {
Node::new_file(name, meta)
Node::new_node(name, NodeType::File, meta)
};
Ok((entry.path().to_path_buf(), node))
}

View File

@ -1,7 +1,9 @@
use std::ffi::OsString;
use std::ffi::{OsStr, OsString};
use std::fmt::Debug;
use std::path::PathBuf;
use std::os::unix::ffi::OsStrExt;
use std::str::FromStr;
use anyhow::{anyhow, bail, Result};
use chrono::{DateTime, Local};
use derive_getters::Getters;
use derive_more::{Constructor, IsVariant};
@ -12,15 +14,15 @@ use crate::id::Id;
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Constructor)]
pub struct Node {
name: String,
pub name: String,
#[serde(flatten)]
node_type: NodeType,
pub node_type: NodeType,
#[serde(flatten)]
meta: Metadata,
pub meta: Metadata,
#[serde(default, deserialize_with = "deserialize_default_from_null")]
content: Option<Vec<Id>>,
pub content: Option<Vec<Id>>,
#[serde(default, skip_serializing_if = "Option::is_none")]
subtree: Option<Id>,
pub subtree: Option<Id>,
}
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, IsVariant)]
@ -76,78 +78,15 @@ fn is_default<T: Default + PartialEq>(t: &T) -> bool {
}
impl Node {
pub fn new_file(name: OsString, meta: Metadata) -> Self {
Self {
name: name.to_str().expect("no unicode").to_string(),
node_type: NodeType::File,
pub fn new_node(name: &OsStr, node_type: NodeType, meta: Metadata) -> Self {
Node {
name: escape_filename(name),
node_type,
content: None,
subtree: None,
meta,
}
}
pub fn new_dir(name: OsString, meta: Metadata) -> Self {
Self {
name: name.to_str().expect("no unicode").to_string(),
node_type: NodeType::Dir,
content: None,
subtree: None,
meta,
}
}
pub fn new_symlink(name: OsString, target: PathBuf, meta: Metadata) -> Self {
Self {
name: name.to_str().expect("no unicode").to_string(),
node_type: NodeType::Symlink {
linktarget: target.to_str().expect("no unicode").to_string(),
},
content: None,
subtree: None,
meta,
}
}
pub fn new_dev(name: OsString, meta: Metadata, device: u64) -> Self {
Self {
name: name.to_str().expect("no unicode").to_string(),
node_type: NodeType::Dev { device },
content: None,
subtree: None,
meta,
}
}
pub fn new_chardev(name: OsString, meta: Metadata, device: u64) -> Self {
Self {
name: name.to_str().expect("no unicode").to_string(),
node_type: NodeType::Chardev { device },
content: None,
subtree: None,
meta,
}
}
pub fn new_fifo(name: OsString, meta: Metadata) -> Self {
Self {
name: name.to_str().expect("no unicode").to_string(),
node_type: NodeType::Fifo,
content: None,
subtree: None,
meta,
}
}
pub fn new_socket(name: OsString, meta: Metadata) -> Self {
Self {
name: name.to_str().expect("no unicode").to_string(),
node_type: NodeType::Socket,
content: None,
subtree: None,
meta,
}
}
pub fn is_dir(&self) -> bool {
self.node_type == NodeType::Dir
}
@ -160,8 +99,8 @@ impl Node {
self.content = Some(content);
}
pub fn name(&self) -> &String {
&self.name
pub fn name(&self) -> OsString {
unescape_filename(&self.name).unwrap_or_else(|_| OsString::from_str(&self.name).unwrap())
}
pub fn node_type(&self) -> &NodeType {
@ -180,3 +119,118 @@ impl Node {
&self.subtree
}
}
pub fn escape_filename(name: &OsStr) -> String {
name.as_bytes().escape_ascii().to_string()
}
// inspired by the enquote crate
pub fn unescape_filename(s: &str) -> Result<OsString> {
let mut chars = s.chars();
let mut u = Vec::new();
loop {
match chars.next() {
None => break,
Some(c) => match c {
'\\' => match chars.next() {
None => bail!("UnexpectedEOF"),
Some(c) => match c {
'\\' => u.push(b'\\'),
'"' => u.push(b'"'),
'\'' => u.push(b'\''),
'`' => u.push(b'`'),
'a' => u.push(b'\x07'),
'b' => u.push(b'\x08'),
'f' => u.push(b'\x0c'),
'n' => u.push(b'\n'),
'r' => u.push(b'\r'),
't' => u.push(b'\t'),
'v' => u.push(b'\x0b'),
// hex
'x' => {
let hex = take(&mut chars, 2);
u.push(u8::from_str_radix(&hex, 16)?)
}
// unicode
'u' => {
let n = u32::from_str_radix(&take(&mut chars, 4), 16)?;
let c =
std::char::from_u32(n).ok_or_else(|| anyhow!("invalid unicode"))?;
let mut bytes = vec![0u8; c.len_utf8()];
c.encode_utf8(&mut bytes);
u.extend_from_slice(&bytes);
}
'U' => {
let n = u32::from_str_radix(&take(&mut chars, 8), 16)?;
let c =
std::char::from_u32(n).ok_or_else(|| anyhow!("invalid unicode"))?;
let mut bytes = vec![0u8; c.len_utf8()];
c.encode_utf8(&mut bytes);
u.extend_from_slice(&bytes);
}
_ => bail!("UnrecognizedEscape"),
},
},
// normal char
_ => {
let mut bytes = vec![0u8; c.len_utf8()];
c.encode_utf8(&mut bytes);
u.extend_from_slice(&bytes);
}
},
}
}
Ok(OsStr::from_bytes(&u).to_os_string())
}
#[inline]
// Iterator#take cannot be used because it consumes the iterator
fn take<I: Iterator<Item = char>>(iterator: &mut I, n: usize) -> String {
let mut s = String::with_capacity(n);
for _ in 0..n {
s.push(iterator.next().unwrap_or_default());
}
s
}
#[cfg(test)]
mod tests {
use super::*;
use quickcheck_macros::quickcheck;
use rstest::rstest;
#[quickcheck]
fn escape_unescape_is_identity(bytes: Vec<u8>) -> bool {
let name = OsStr::from_bytes(&bytes);
name == &match unescape_filename(&escape_filename(name)) {
Ok(s) => s,
Err(_) => return false,
}
}
#[rstest]
#[case(r#"\\"#, b"\\")]
#[case(r#"\""#, b"\"")]
#[case(r#"\'"#, b"\'")]
#[case(r#"\`"#, b"`")]
#[case(r#"\a"#, b"\x07")]
#[case(r#"\b"#, b"\x08")]
#[case(r#"\v"#, b"\x0b")]
#[case(r#"\f"#, b"\x0c")]
#[case(r#"\n"#, b"\n")]
#[case(r#"\r"#, b"\r")]
#[case(r#"\t"#, b"\t")]
#[case(r#"\xab"#, b"\xab")]
#[case(r#"\xAB"#, b"\xab")]
#[case(r#"\xFF"#, b"\xff")]
#[case(r#"\u00df"#, b"\xc3\x9f")]
#[case(r#"\u00DF"#, b"\xc3\x9f")]
#[case(r#"\u2764"#, b"\xe2\x9d\xa4")]
#[case(r#"\U0001f4af"#, b"\xf0\x9f\x92\xaf")]
fn unescape_cases(#[case] input: &str, #[case] expected: &[u8]) {
let expected = OsStr::from_bytes(expected);
assert_eq!(expected, unescape_filename(input).unwrap())
}
}