correct handling of trees in backup

This commit is contained in:
Alexander Weiss 2022-02-22 01:26:45 +01:00
parent efee1acbcb
commit 7fe4ae6c05
4 changed files with 176 additions and 129 deletions

151
src/archiver.rs Normal file
View File

@ -0,0 +1,151 @@
use std::cell::RefCell;
use std::ffi::OsString;
use std::fs::{File, FileType};
use std::io::{BufRead, BufReader};
use std::path::{Path, PathBuf};
use std::rc::Rc;
use anyhow::Result;
use crate::backend::DecryptWriteBackend;
use crate::blob::{BlobType, Node, Packer, Tree};
use crate::chunker::ChunkIter;
use crate::crypto::{hash, CryptoKey};
use crate::index::{Indexer, ReadIndex};
use crate::repo::{SnapshotFile, TagList};
pub type SharedIndexer<BE> = Rc<RefCell<Indexer<BE>>>;
pub struct Archiver<BE: DecryptWriteBackend, C: CryptoKey, I: ReadIndex> {
path: PathBuf,
tree: Tree,
names: Vec<OsString>,
trees: Vec<Tree>,
size: u64,
count: u64,
be: BE,
index: I,
indexer: SharedIndexer<BE>,
data_packer: Packer<BE, C>,
tree_packer: Packer<BE, C>,
poly: u64,
}
impl<BE: DecryptWriteBackend, C: CryptoKey, I: ReadIndex> Archiver<BE, C, I> {
pub fn new(be: BE, key: C, index: I, poly: u64) -> Result<Self> {
let indexer = Rc::new(RefCell::new(Indexer::new(be.clone())));
Ok(Self {
path: PathBuf::from("/"),
tree: Tree::new(),
names: Vec::new(),
trees: Vec::new(),
size: 0,
count: 0,
index,
data_packer: Packer::new(be.clone(), indexer.clone(), key.clone())?,
tree_packer: Packer::new(be.clone(), indexer.clone(), key.clone())?,
poly,
be,
indexer,
})
}
pub fn add_entry(&mut self, path: &Path, name: OsString, file_type: FileType) -> Result<()> {
let basepath = if file_type.is_dir() {
path
} else {
path.parent().unwrap()
};
self.finish_trees(&basepath)?;
let missing_dirs = basepath.strip_prefix(&self.path)?;
println!("missing_dirs: {:?}", missing_dirs);
for p in missing_dirs.iter() {
// new subdir
let tree = std::mem::replace(&mut self.tree, Tree::new());
self.trees.push(tree);
self.names.push(p.to_os_string());
self.path.push(p);
println!("add tree {:?}, path: {:?}", p, self.path);
}
if file_type.is_file() {
let f = File::open(&path)?;
let reader: BufReader<File> = BufReader::new(f);
self.backup_file(name, reader)?;
}
Ok(())
}
pub fn finish_trees(&mut self, path: &Path) -> Result<()> {
while !path.starts_with(&self.path) {
// go back to parent dir
let chunk = self.tree.serialize()?;
let id = hash(&chunk);
if !self.index.has(&id) {
self.tree_packer.add(&chunk, &id, BlobType::Tree)?;
}
self.tree = self.trees.pop().unwrap();
let name = self.names.pop().unwrap();
println!("finish: {:?}", name);
let node = Node::from_tree(name, id)?;
self.tree.add(node);
self.path.pop();
}
Ok(())
}
pub fn backup_file(&mut self, name: OsString, reader: impl BufRead) -> Result<()> {
let chunk_iter = ChunkIter::new(reader, &self.poly);
let mut content = Vec::new();
let mut filesize: u64 = 0;
for chunk in chunk_iter {
let chunk = chunk?;
filesize += chunk.len() as u64;
let id = hash(&chunk);
if !self.index.has(&id) {
self.data_packer.add(&chunk, &id, BlobType::Data)?;
}
content.push(id);
}
let node = Node::from_content(name, content, filesize)?;
self.tree.add(node);
self.count += 1;
self.size += filesize;
Ok(())
}
pub fn finalize_snapshot(&mut self, backup_path: PathBuf) -> Result<()> {
self.finish_trees(&PathBuf::from("/"))?;
let chunk = self.tree.serialize()?;
let id = hash(&chunk);
if !self.index.has(&id) {
self.tree_packer.add(&chunk, &id, BlobType::Tree)?;
}
self.data_packer.finalize()?;
self.tree_packer.finalize()?;
self.indexer.borrow().finalize()?;
// save snapshot
let snap = SnapshotFile::new(
id,
vec![backup_path.to_str().unwrap().to_string()],
"host".to_string(),
"user".to_string(),
0,
0,
TagList::default(),
Some(self.count),
Some(self.size),
);
let id = snap.save_to_backend(&self.be)?;
println!("snapshot {} successfully saved.", id);
Ok(())
}
}

View File

@ -1,5 +1,7 @@
use std::ffi::OsString;
use std::fmt::Debug;
use anyhow::Result;
use chrono::{DateTime, Local};
use derive_getters::Getters;
use derive_more::{Constructor, IsVariant};
@ -55,23 +57,23 @@ pub struct Metadata {
}
impl Node {
pub fn from_content(name: String, content: Vec<Id>, _size: u64) -> Self {
Self {
name,
pub fn from_content(name: OsString, content: Vec<Id>, _size: u64) -> Result<Self> {
Ok(Self {
name: name.to_str().expect("no unicode").to_string(),
node_type: NodeType::File,
content,
subtree: None,
meta: Metadata::default(),
}
})
}
pub fn from_tree(name: String, id: Id) -> Self {
Self {
name,
pub fn from_tree(name: OsString, id: Id) -> Result<Self> {
Ok(Self {
name: name.to_str().expect("no unicode").to_string(),
node_type: NodeType::Dir,
content: Vec::new(),
subtree: Some(id),
meta: Metadata::default(),
}
})
}
}

View File

@ -1,20 +1,15 @@
use std::cell::RefCell;
use std::fs::File;
use std::io::BufReader;
use std::path::{Path, PathBuf};
use std::rc::Rc;
use std::path::PathBuf;
use anyhow::Result;
use clap::Parser;
use ignore::WalkBuilder;
use path_absolutize::*;
use crate::archiver::Archiver;
use crate::backend::{DecryptWriteBackend, ReadBackend};
use crate::blob::{BlobType, Node, Packer, Tree};
use crate::chunker::ChunkIter;
use crate::crypto::{hash, Key};
use crate::index::{AllIndexFiles, BoomIndex, Indexer, ReadIndex};
use crate::repo::{ConfigFile, SnapshotFile, TagList};
use crate::crypto::Key;
use crate::index::{AllIndexFiles, BoomIndex};
use crate::repo::ConfigFile;
#[derive(Parser)]
pub(super) struct Opts {
@ -31,12 +26,13 @@ pub(super) fn execute(
let poly = u64::from_str_radix(config.chunker_polynomial(), 16)?;
let path = PathBuf::from(&opts.sources[0]);
backup_file(path.absolutize()?, &poly, be, key)?;
let path = path.absolutize()?;
backup_file(path.into(), &poly, be, key)?;
Ok(())
}
fn backup_file(
path: impl AsRef<Path>,
backup_path: PathBuf,
poly: &u64,
be: &(impl ReadBackend + DecryptWriteBackend),
key: &Key,
@ -44,129 +40,26 @@ fn backup_file(
println! {"reading index..."}
let index: BoomIndex = AllIndexFiles::new(be.clone()).into_iter().collect();
let indexer = Rc::new(RefCell::new(Indexer::new(be.clone())));
let mut data_packer = Packer::new(be.clone(), indexer.clone(), key.clone())?;
let mut tree_packer = Packer::new(be.clone(), indexer.clone(), key.clone())?;
let mut archiver = Archiver::new(be.clone(), key.clone(), index, *poly)?;
let mut wb = WalkBuilder::new(path);
let mut wb = WalkBuilder::new(backup_path.clone());
/*
for path in paths[1..].into_iter() {
wb.add(path);
}
*/
wb.follow_links(false).hidden(false);
let mut path = PathBuf::new();
let mut tree = Tree::new();
let mut names = Vec::new();
let mut trees = Vec::new();
let mut size: u64 = 0;
let mut count: u64 = 0;
for entry in wb.build() {
let entry = entry?;
// TODO
let name = entry.file_name().to_string_lossy().to_string();
let name = entry.file_name().to_os_string();
let file_type = entry.file_type().unwrap();
println!("{:?}, {:?}", entry.path(), path);
println!("entry: {:?}", entry.path());
if file_type.is_dir() {
for p in entry.path().strip_prefix(&path).iter() {
// new subdir
trees.push(tree);
tree = Tree::new();
names.push(name.clone());
path.push(p);
println!("{:?}, {:?}", entry.path(), path);
}
continue;
}
while !entry.path().starts_with(&path) {
// go back to parent dir
// 1. finish tree
let chunk = tree.serialize()?;
let id = hash(&chunk);
if !index.has(&id) {
tree_packer.add(&chunk, &id, BlobType::Tree)?;
}
tree = trees.pop().unwrap();
let name = names.pop().unwrap();
let node = Node::from_tree(name, id);
tree.add(node);
path.pop();
println!("{:?}, {:?}", entry.path(), path);
}
if file_type.is_file() {
let f = File::open(&entry.path())?;
let reader: BufReader<File> = BufReader::new(f);
let chunk_iter = ChunkIter::new(reader, poly);
let mut content = Vec::new();
let mut filesize: u64 = 0;
for chunk in chunk_iter {
let chunk = chunk?;
filesize += chunk.len() as u64;
let id = hash(&chunk);
if !index.has(&id) {
data_packer.add(&chunk, &id, BlobType::Data)?;
}
content.push(id);
}
let node = Node::from_content(name, content, filesize);
tree.add(node);
count += 1;
size += filesize;
}
archiver.add_entry(entry.path(), name, file_type)?;
}
loop {
// go back to parent dir
// 1. finish tree
let chunk = tree.serialize()?;
let id = hash(&chunk);
if !index.has(&id) {
tree_packer.add(&chunk, &id, BlobType::Tree)?;
}
tree = match trees.pop() {
Some(tree) => tree,
None => break,
};
let name = names.pop().unwrap();
let node = Node::from_tree(name, id);
tree.add(node);
path.pop();
}
let chunk = tree.serialize()?;
let id = hash(&chunk);
if !index.has(&id) {
tree_packer.add(&chunk, &id, BlobType::Tree)?;
}
data_packer.finalize()?;
tree_packer.finalize()?;
indexer.borrow().finalize()?;
// save snapshot
let snap = SnapshotFile::new(
id,
vec![path.to_str().unwrap().to_string()],
"host".to_string(),
"user".to_string(),
0,
0,
TagList::default(),
Some(count),
Some(size),
);
let id = snap.save_to_backend(be)?;
println!("snapshot {} successfully saved.", id);
archiver.finalize_snapshot(backup_path)?;
Ok(())
}

View File

@ -1,5 +1,6 @@
use anyhow::Result;
mod archiver;
mod backend;
mod blob;
mod chunker;