From 7fe4ae6c056aab314219dd0dbbe694fa5dd0baf2 Mon Sep 17 00:00:00 2001 From: Alexander Weiss Date: Tue, 22 Feb 2022 01:26:45 +0100 Subject: [PATCH] correct handling of trees in backup --- src/archiver.rs | 151 +++++++++++++++++++++++++++++++++++++++++ src/backend/node.rs | 18 ++--- src/commands/backup.rs | 135 ++++-------------------------------- src/main.rs | 1 + 4 files changed, 176 insertions(+), 129 deletions(-) create mode 100644 src/archiver.rs diff --git a/src/archiver.rs b/src/archiver.rs new file mode 100644 index 0000000..17452c9 --- /dev/null +++ b/src/archiver.rs @@ -0,0 +1,151 @@ +use std::cell::RefCell; +use std::ffi::OsString; +use std::fs::{File, FileType}; +use std::io::{BufRead, BufReader}; +use std::path::{Path, PathBuf}; +use std::rc::Rc; + +use anyhow::Result; + +use crate::backend::DecryptWriteBackend; +use crate::blob::{BlobType, Node, Packer, Tree}; +use crate::chunker::ChunkIter; +use crate::crypto::{hash, CryptoKey}; +use crate::index::{Indexer, ReadIndex}; +use crate::repo::{SnapshotFile, TagList}; + +pub type SharedIndexer = Rc>>; + +pub struct Archiver { + path: PathBuf, + tree: Tree, + names: Vec, + trees: Vec, + size: u64, + count: u64, + be: BE, + index: I, + indexer: SharedIndexer, + data_packer: Packer, + tree_packer: Packer, + poly: u64, +} + +impl Archiver { + pub fn new(be: BE, key: C, index: I, poly: u64) -> Result { + let indexer = Rc::new(RefCell::new(Indexer::new(be.clone()))); + Ok(Self { + path: PathBuf::from("/"), + tree: Tree::new(), + names: Vec::new(), + trees: Vec::new(), + size: 0, + count: 0, + index, + data_packer: Packer::new(be.clone(), indexer.clone(), key.clone())?, + tree_packer: Packer::new(be.clone(), indexer.clone(), key.clone())?, + poly, + be, + indexer, + }) + } + + pub fn add_entry(&mut self, path: &Path, name: OsString, file_type: FileType) -> Result<()> { + let basepath = if file_type.is_dir() { + path + } else { + path.parent().unwrap() + }; + + self.finish_trees(&basepath)?; + + let missing_dirs = basepath.strip_prefix(&self.path)?; + println!("missing_dirs: {:?}", missing_dirs); + + for p in missing_dirs.iter() { + // new subdir + let tree = std::mem::replace(&mut self.tree, Tree::new()); + self.trees.push(tree); + self.names.push(p.to_os_string()); + self.path.push(p); + println!("add tree {:?}, path: {:?}", p, self.path); + } + + if file_type.is_file() { + let f = File::open(&path)?; + let reader: BufReader = BufReader::new(f); + self.backup_file(name, reader)?; + } + Ok(()) + } + + pub fn finish_trees(&mut self, path: &Path) -> Result<()> { + while !path.starts_with(&self.path) { + // go back to parent dir + let chunk = self.tree.serialize()?; + let id = hash(&chunk); + if !self.index.has(&id) { + self.tree_packer.add(&chunk, &id, BlobType::Tree)?; + } + self.tree = self.trees.pop().unwrap(); + let name = self.names.pop().unwrap(); + println!("finish: {:?}", name); + let node = Node::from_tree(name, id)?; + + self.tree.add(node); + self.path.pop(); + } + Ok(()) + } + + pub fn backup_file(&mut self, name: OsString, reader: impl BufRead) -> Result<()> { + let chunk_iter = ChunkIter::new(reader, &self.poly); + let mut content = Vec::new(); + let mut filesize: u64 = 0; + + for chunk in chunk_iter { + let chunk = chunk?; + filesize += chunk.len() as u64; + let id = hash(&chunk); + if !self.index.has(&id) { + self.data_packer.add(&chunk, &id, BlobType::Data)?; + } + content.push(id); + } + let node = Node::from_content(name, content, filesize)?; + self.tree.add(node); + self.count += 1; + self.size += filesize; + Ok(()) + } + + pub fn finalize_snapshot(&mut self, backup_path: PathBuf) -> Result<()> { + self.finish_trees(&PathBuf::from("/"))?; + + let chunk = self.tree.serialize()?; + let id = hash(&chunk); + if !self.index.has(&id) { + self.tree_packer.add(&chunk, &id, BlobType::Tree)?; + } + + self.data_packer.finalize()?; + self.tree_packer.finalize()?; + self.indexer.borrow().finalize()?; + + // save snapshot + let snap = SnapshotFile::new( + id, + vec![backup_path.to_str().unwrap().to_string()], + "host".to_string(), + "user".to_string(), + 0, + 0, + TagList::default(), + Some(self.count), + Some(self.size), + ); + let id = snap.save_to_backend(&self.be)?; + println!("snapshot {} successfully saved.", id); + Ok(()) + } +} diff --git a/src/backend/node.rs b/src/backend/node.rs index 80b641a..71c6275 100644 --- a/src/backend/node.rs +++ b/src/backend/node.rs @@ -1,5 +1,7 @@ +use std::ffi::OsString; use std::fmt::Debug; +use anyhow::Result; use chrono::{DateTime, Local}; use derive_getters::Getters; use derive_more::{Constructor, IsVariant}; @@ -55,23 +57,23 @@ pub struct Metadata { } impl Node { - pub fn from_content(name: String, content: Vec, _size: u64) -> Self { - Self { - name, + pub fn from_content(name: OsString, content: Vec, _size: u64) -> Result { + Ok(Self { + name: name.to_str().expect("no unicode").to_string(), node_type: NodeType::File, content, subtree: None, meta: Metadata::default(), - } + }) } - pub fn from_tree(name: String, id: Id) -> Self { - Self { - name, + pub fn from_tree(name: OsString, id: Id) -> Result { + Ok(Self { + name: name.to_str().expect("no unicode").to_string(), node_type: NodeType::Dir, content: Vec::new(), subtree: Some(id), meta: Metadata::default(), - } + }) } } diff --git a/src/commands/backup.rs b/src/commands/backup.rs index d3826ac..33a00ae 100644 --- a/src/commands/backup.rs +++ b/src/commands/backup.rs @@ -1,20 +1,15 @@ -use std::cell::RefCell; -use std::fs::File; -use std::io::BufReader; -use std::path::{Path, PathBuf}; -use std::rc::Rc; +use std::path::PathBuf; use anyhow::Result; use clap::Parser; use ignore::WalkBuilder; use path_absolutize::*; +use crate::archiver::Archiver; use crate::backend::{DecryptWriteBackend, ReadBackend}; -use crate::blob::{BlobType, Node, Packer, Tree}; -use crate::chunker::ChunkIter; -use crate::crypto::{hash, Key}; -use crate::index::{AllIndexFiles, BoomIndex, Indexer, ReadIndex}; -use crate::repo::{ConfigFile, SnapshotFile, TagList}; +use crate::crypto::Key; +use crate::index::{AllIndexFiles, BoomIndex}; +use crate::repo::ConfigFile; #[derive(Parser)] pub(super) struct Opts { @@ -31,12 +26,13 @@ pub(super) fn execute( let poly = u64::from_str_radix(config.chunker_polynomial(), 16)?; let path = PathBuf::from(&opts.sources[0]); - backup_file(path.absolutize()?, &poly, be, key)?; + let path = path.absolutize()?; + backup_file(path.into(), &poly, be, key)?; Ok(()) } fn backup_file( - path: impl AsRef, + backup_path: PathBuf, poly: &u64, be: &(impl ReadBackend + DecryptWriteBackend), key: &Key, @@ -44,129 +40,26 @@ fn backup_file( println! {"reading index..."} let index: BoomIndex = AllIndexFiles::new(be.clone()).into_iter().collect(); - let indexer = Rc::new(RefCell::new(Indexer::new(be.clone()))); - let mut data_packer = Packer::new(be.clone(), indexer.clone(), key.clone())?; - let mut tree_packer = Packer::new(be.clone(), indexer.clone(), key.clone())?; + let mut archiver = Archiver::new(be.clone(), key.clone(), index, *poly)?; - let mut wb = WalkBuilder::new(path); + let mut wb = WalkBuilder::new(backup_path.clone()); /* for path in paths[1..].into_iter() { wb.add(path); } */ - wb.follow_links(false).hidden(false); - let mut path = PathBuf::new(); - let mut tree = Tree::new(); - let mut names = Vec::new(); - let mut trees = Vec::new(); - let mut size: u64 = 0; - let mut count: u64 = 0; - for entry in wb.build() { let entry = entry?; // TODO - let name = entry.file_name().to_string_lossy().to_string(); + let name = entry.file_name().to_os_string(); let file_type = entry.file_type().unwrap(); - println!("{:?}, {:?}", entry.path(), path); + println!("entry: {:?}", entry.path()); - if file_type.is_dir() { - for p in entry.path().strip_prefix(&path).iter() { - // new subdir - trees.push(tree); - tree = Tree::new(); - names.push(name.clone()); - path.push(p); - println!("{:?}, {:?}", entry.path(), path); - } - continue; - } - - while !entry.path().starts_with(&path) { - // go back to parent dir - // 1. finish tree - let chunk = tree.serialize()?; - let id = hash(&chunk); - if !index.has(&id) { - tree_packer.add(&chunk, &id, BlobType::Tree)?; - } - tree = trees.pop().unwrap(); - let name = names.pop().unwrap(); - let node = Node::from_tree(name, id); - - tree.add(node); - path.pop(); - println!("{:?}, {:?}", entry.path(), path); - } - - if file_type.is_file() { - let f = File::open(&entry.path())?; - let reader: BufReader = BufReader::new(f); - - let chunk_iter = ChunkIter::new(reader, poly); - let mut content = Vec::new(); - let mut filesize: u64 = 0; - - for chunk in chunk_iter { - let chunk = chunk?; - filesize += chunk.len() as u64; - let id = hash(&chunk); - if !index.has(&id) { - data_packer.add(&chunk, &id, BlobType::Data)?; - } - content.push(id); - } - let node = Node::from_content(name, content, filesize); - tree.add(node); - count += 1; - size += filesize; - } + archiver.add_entry(entry.path(), name, file_type)?; } - - loop { - // go back to parent dir - // 1. finish tree - let chunk = tree.serialize()?; - let id = hash(&chunk); - if !index.has(&id) { - tree_packer.add(&chunk, &id, BlobType::Tree)?; - } - tree = match trees.pop() { - Some(tree) => tree, - None => break, - }; - let name = names.pop().unwrap(); - let node = Node::from_tree(name, id); - - tree.add(node); - path.pop(); - } - - let chunk = tree.serialize()?; - let id = hash(&chunk); - if !index.has(&id) { - tree_packer.add(&chunk, &id, BlobType::Tree)?; - } - - data_packer.finalize()?; - tree_packer.finalize()?; - indexer.borrow().finalize()?; - - // save snapshot - let snap = SnapshotFile::new( - id, - vec![path.to_str().unwrap().to_string()], - "host".to_string(), - "user".to_string(), - 0, - 0, - TagList::default(), - Some(count), - Some(size), - ); - let id = snap.save_to_backend(be)?; - println!("snapshot {} successfully saved.", id); + archiver.finalize_snapshot(backup_path)?; Ok(()) } diff --git a/src/main.rs b/src/main.rs index 884e59c..1e8bdb2 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,5 +1,6 @@ use anyhow::Result; +mod archiver; mod backend; mod blob; mod chunker;