diff --git a/src/backend/ignore.rs b/src/backend/ignore.rs new file mode 100644 index 0000000..e771739 --- /dev/null +++ b/src/backend/ignore.rs @@ -0,0 +1,213 @@ +use std::fs::{read_link, File}; +use std::os::linux::fs::MetadataExt; +use std::path::{Path, PathBuf}; + +use anyhow::Result; +use chrono::{TimeZone, Utc}; +use clap::Parser; +use ignore::{overrides::OverrideBuilder, DirEntry, Walk, WalkBuilder}; +use users::{Groups, Users, UsersCache}; + +use super::{node::Metadata, Node, ReadSource}; + +pub struct LocalSource { + builder: WalkBuilder, + walker: Walk, + with_atime: bool, + cache: UsersCache, +} + +#[derive(Parser)] +pub struct LocalSourceOptions { + /// Save access time for files and directories + #[clap(long)] + with_atime: bool, + + /// Exclude other file systems, don't cross filesystem boundaries and subvolumes + #[clap(long, short = 'x')] + one_file_system: bool, + + /// Glob pattern to include/exclue (can be specified multiple times) + #[clap(long, short = 'g')] + glob: Vec, + + /// Read glob patterns to exclude/include from a file (can be specified multiple times) + #[clap(long, value_name = "FILE")] + glob_file: Vec, + + /// Exclude contents of directories containing filename (can be specified multiple times) + #[clap(long, value_name = "FILE")] + exclude_if_present: Vec, + + /// Ignore files based on .gitignore files + #[clap(long)] + git_ignore: bool, + + /// Same as --glob pattern but ignores the casing of filenames + #[clap(long, value_name = "GLOB")] + iglob: Vec, + + /// Same as --glob-file ignores the casing of filenames in patterns + #[clap(long, value_name = "FILE")] + iglob_file: Vec, +} + +impl LocalSource { + pub fn new(opts: LocalSourceOptions, backup_path: PathBuf) -> Result { + let mut walk_builder = WalkBuilder::new(backup_path.clone()); + /* + for path in &paths[1..] { + wb.add(path); + } + */ + + let mut override_builder = OverrideBuilder::new("/"); + + for g in opts.glob { + override_builder.add(&g)?; + } + + for file in opts.glob_file { + for line in std::fs::read_to_string(file)?.lines() { + override_builder.add(line)?; + } + } + + override_builder.case_insensitive(true)?; + for g in opts.iglob { + override_builder.add(&g)?; + } + + for file in opts.iglob_file { + for line in std::fs::read_to_string(file)?.lines() { + override_builder.add(line)?; + } + } + + walk_builder + .follow_links(false) + .hidden(false) + .ignore(false) + .git_ignore(opts.git_ignore) + .sort_by_file_path(Path::cmp) + .same_file_system(opts.one_file_system) + .overrides(override_builder.build()?); + + if !opts.exclude_if_present.is_empty() { + walk_builder.filter_entry(move |entry| match entry.file_type() { + None => true, + Some(tpe) if tpe.is_dir() => { + for file in &opts.exclude_if_present { + if entry.path().join(file).exists() { + return false; + } + } + true + } + Some(_) => true, + }); + } + + let with_atime = opts.with_atime; + let cache = UsersCache::new(); + let builder = walk_builder; + let walker = builder.build(); + + Ok(Self { + builder, + walker, + with_atime, + cache, + }) + } +} + +impl ReadSource for LocalSource { + type Reader = File; + fn read(path: &Path) -> Result { + Ok(File::open(path)?) + } + fn size(&self) -> Result { + let mut size = 0; + for entry in self.builder.build() { + if let Err(e) = entry.and_then(|e| e.metadata()).map(|m| { + size += if m.is_dir() { 0 } else { m.len() }; + }) { + eprintln!("ignoring error {}", e); + } + } + Ok(size) + } +} + +impl Iterator for LocalSource { + type Item = Result<(PathBuf, Node)>; + + fn next(&mut self) -> std::option::Option { + self.walker + .next() + .map(|e| map_entry(e?, self.with_atime, &self.cache)) + } +} + +// map_entry: turn entry into (Path, Node) +fn map_entry(entry: DirEntry, with_atime: bool, cache: &UsersCache) -> Result<(PathBuf, Node)> { + let name = entry.file_name().to_os_string(); + let m = entry.metadata()?; + + let uid = m.st_uid(); + let gid = m.st_gid(); + let user = cache + .get_user_by_uid(uid) + .map(|u| u.name().to_str().unwrap().to_string()); + let group = cache + .get_group_by_gid(gid) + .map(|g| g.name().to_str().unwrap().to_string()); + + let mtime = Some( + Utc.timestamp(m.st_mtime(), m.st_mtime_nsec().try_into()?) + .into(), + ); + let atime = if with_atime { + Some( + Utc.timestamp(m.st_atime(), m.st_atime_nsec().try_into()?) + .into(), + ) + } else { + // TODO: Use None here? + mtime + }; + let ctime = Some( + Utc.timestamp(m.st_ctime(), m.st_ctime_nsec().try_into()?) + .into(), + ); + let size = if m.is_dir() { 0 } else { m.len() }; + let mode = m.st_mode(); + let inode = m.st_ino(); + let device_id = m.st_dev(); + let links = m.st_nlink(); + + let meta = Metadata { + size, + mtime, + atime, + ctime, + mode, + uid, + gid, + user, + group, + inode, + device_id, + links, + }; + let node = if m.is_dir() { + Node::new_dir(name, meta) + } else if m.is_symlink() { + let target = read_link(entry.path())?; + Node::new_symlink(name, target, meta) + } else { + Node::new_file(name, meta) + }; + Ok((entry.path().to_path_buf(), node)) +} diff --git a/src/backend/mod.rs b/src/backend/mod.rs index ee31ed3..1ce5b70 100644 --- a/src/backend/mod.rs +++ b/src/backend/mod.rs @@ -1,16 +1,18 @@ use std::io::{Cursor, Read}; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; -use anyhow::anyhow; +use anyhow::{anyhow, Result}; use crate::crypto::hash; use crate::id::Id; pub mod decrypt; pub mod dry_run; +pub mod ignore; pub mod local; pub mod node; +pub use self::ignore::*; pub use decrypt::*; pub use dry_run::*; pub use local::*; @@ -121,13 +123,12 @@ pub trait WriteBackend: Clone { } } -pub trait WalkerItem { - fn node(&self) -> Node; - fn read(&self) -> Box; +pub trait ReadSource: Iterator> { + type Reader: Read; + fn read(path: &Path) -> Result; + fn size(&self) -> Result; } -pub trait ReadSource: Iterator {} - pub trait WriteSource: Clone { fn create(&self, path: PathBuf, node: Node); fn set_metadata(&self, path: PathBuf, node: Node); diff --git a/src/commands/backup.rs b/src/commands/backup.rs index 5e975ee..3957a3a 100644 --- a/src/commands/backup.rs +++ b/src/commands/backup.rs @@ -1,33 +1,26 @@ use std::ffi::OsString; -use std::fs::read_link; -use std::os::linux::fs::MetadataExt; -use std::path::{Path, PathBuf}; +use std::path::PathBuf; use anyhow::{anyhow, Result}; -use chrono::{TimeZone, Utc}; use clap::Parser; use gethostname::gethostname; -use ignore::{overrides::OverrideBuilder, DirEntry, WalkBuilder}; use indicatif::{ProgressBar, ProgressStyle}; use path_absolutize::*; -use users::{cache::UsersCache, Groups, Users}; use vlog::*; use crate::archiver::{Archiver, Parent}; -use crate::backend::{DecryptFullBackend, DryRunBackend}; -use crate::blob::{Metadata, Node}; +use crate::backend::{ + DecryptFullBackend, DryRunBackend, LocalSource, LocalSourceOptions, ReadSource, +}; use crate::index::IndexBackend; use crate::repo::{ConfigFile, SnapshotFile}; + #[derive(Parser)] pub(super) struct Opts { /// Do not upload or write any data, just show what would be done #[clap(long, short = 'n')] dry_run: bool, - /// Save access time for files and directories - #[clap(long)] - with_atime: bool, - /// Snapshot to use as parent #[clap(long, value_name = "SNAPSHOT", conflicts_with = "force")] parent: Option, @@ -36,33 +29,8 @@ pub(super) struct Opts { #[clap(long, short, conflicts_with = "parent")] force: bool, - /// Exclude other file systems, don't cross filesystem boundaries and subvolumes - #[clap(long, short = 'x')] - one_file_system: bool, - - /// Glob pattern to include/exclue (can be specified multiple times) - #[clap(long, short = 'g')] - glob: Vec, - - /// Read glob patterns to exclude/include from a file (can be specified multiple times) - #[clap(long, value_name = "FILE")] - glob_file: Vec, - - /// Exclude contents of directories containing filename (can be specified multiple times) - #[clap(long, value_name = "FILE")] - exclude_if_present: Vec, - - /// Ignore files based on .gitignore files - #[clap(long)] - git_ignore: bool, - - /// Same as --glob pattern but ignores the casing of filenames - #[clap(long, value_name = "GLOB")] - iglob: Vec, - - /// Same as --glob-file ignores the casing of filenames in patterns - #[clap(long, value_name = "FILE")] - iglob_file: Vec, + #[clap(flatten)] + ignore_opts: LocalSourceOptions, /// backup source source: String, @@ -106,83 +74,11 @@ pub(super) fn execute(opts: Opts, be: &impl DecryptFullBackend) -> Result<()> { let parent = Parent::new(&index, parent_tree.as_ref()); let mut archiver = Archiver::new(be, index, poly, parent)?; - let mut walk_builder = WalkBuilder::new(backup_path.clone()); - /* - for path in &paths[1..] { - wb.add(path); - } - */ - - let mut override_builder = OverrideBuilder::new("/"); - - for g in opts.glob { - override_builder.add(&g)?; - } - - for file in opts.glob_file { - for line in std::fs::read_to_string(file)?.lines() { - override_builder.add(line)?; - } - } - - override_builder.case_insensitive(true)?; - for g in opts.iglob { - override_builder.add(&g)?; - } - - for file in opts.iglob_file { - for line in std::fs::read_to_string(file)?.lines() { - override_builder.add(line)?; - } - } - - walk_builder - .follow_links(false) - .hidden(false) - .ignore(false) - .git_ignore(opts.git_ignore) - .sort_by_file_path(Path::cmp) - .same_file_system(opts.one_file_system) - .overrides(override_builder.build()?); - - if !opts.exclude_if_present.is_empty() { - walk_builder.filter_entry(move |entry| match entry.file_type() { - None => true, - Some(tpe) if tpe.is_dir() => { - for file in &opts.exclude_if_present { - if entry.path().join(file).exists() { - return false; - } - } - true - } - Some(_) => true, - }); - } - - // total size to backup => only used in progress bar - let mut size = 0; - if get_verbosity_level() == 1 { - v1!("scanning backup source..."); - for entry in walk_builder.build() { - if let Err(e) = entry.and_then(|e| e.metadata()).map(|m| { - size += if m.is_dir() { 0 } else { m.len() }; - }) { - eprintln!("ignoring error {}", e); - } - } - } - - let cache = UsersCache::new(); - - v1!("starting backup..."); - - let nodes = walk_builder - .build() - .map(|entry| map_entry(entry?, opts.with_atime, &cache)); + let src = LocalSource::new(opts.ignore_opts, backup_path.to_path_buf())?; let p = if get_verbosity_level() == 1 { - ProgressBar::new(size).with_style( + v1!("determining size of backup source..."); + ProgressBar::new(src.size()?).with_style( ProgressStyle::default_bar() .template("[{elapsed_precise}] {bar:40.cyan/blue} {bytes:>10}/{total_bytes:10}"), ) @@ -190,9 +86,9 @@ pub(super) fn execute(opts: Opts, be: &impl DecryptFullBackend) -> Result<()> { ProgressBar::hidden() }; - p.reset(); - for res in nodes { - if let Err(e) = res.and_then(|(path, node)| { + v1!("starting backup..."); + for item in src { + if let Err(e) = item.and_then(|(path, node)| { let size = *node.meta().size(); archiver.add_entry(&path, node)?; p.inc(size); @@ -210,65 +106,3 @@ pub(super) fn execute(opts: Opts, be: &impl DecryptFullBackend) -> Result<()> { Ok(()) } - -// map_entry: turn entry into a Path, a Node and a Reader -fn map_entry(entry: DirEntry, with_atime: bool, cache: &UsersCache) -> Result<(PathBuf, Node)> { - let name = entry.file_name().to_os_string(); - let m = entry.metadata()?; - - let uid = m.st_uid(); - let gid = m.st_gid(); - let user = cache - .get_user_by_uid(uid) - .map(|u| u.name().to_str().unwrap().to_string()); - let group = cache - .get_group_by_gid(gid) - .map(|g| g.name().to_str().unwrap().to_string()); - - let mtime = Some( - Utc.timestamp(m.st_mtime(), m.st_mtime_nsec().try_into()?) - .into(), - ); - let atime = if with_atime { - Some( - Utc.timestamp(m.st_atime(), m.st_atime_nsec().try_into()?) - .into(), - ) - } else { - // TODO: Use None here? - mtime - }; - let ctime = Some( - Utc.timestamp(m.st_ctime(), m.st_ctime_nsec().try_into()?) - .into(), - ); - let size = if m.is_dir() { 0 } else { m.len() }; - let mode = m.st_mode(); - let inode = m.st_ino(); - let device_id = m.st_dev(); - let links = m.st_nlink(); - - let meta = Metadata { - size, - mtime, - atime, - ctime, - mode, - uid, - gid, - user, - group, - inode, - device_id, - links, - }; - let node = if m.is_dir() { - Node::new_dir(name, meta) - } else if m.is_symlink() { - let target = read_link(entry.path())?; - Node::new_symlink(name, target, meta) - } else { - Node::new_file(name, meta) - }; - Ok((entry.path().to_path_buf(), node)) -}