diff --git a/Cargo.toml b/Cargo.toml index 2da1535..b566ce8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,13 +11,16 @@ thiserror = "1.0" derive_more = "0.99" derive-getters = "0.2" aes256ctr_poly1305aes = { path = "../aes256ctr_poly1305aes" } -base64 = "0.13" +sha2 = "0.10" +rand = "0.8" scrypt = { version = "0.8", default-features = false } +base64 = "0.13" +hex = { version = "0.4", features = ["serde"] } serde = { version = "1", features = ["derive"] } serde_json = "1" serde-aux = "3" -hex = { version = "0.4", features = ["serde"] } chrono = { version = "0.4", features = ["serde"] } +tempfile = "3" # index boomphf = "0.5" # chunker @@ -25,6 +28,15 @@ cdc = "0.1" # local backend walkdir = "2" # commands +bytesize = "1" clap = { version = "3", features = ["derive"] } +rpassword = "5" prettytable-rs = "0.8" itertools = "0.10" +ignore = "0.4" + + +# usefule crates: +# ambassador -> derive traits from struct/enum element(s) +# delegate -> impl methods from struct/enum element(s) + diff --git a/src/backend/decrypt.rs b/src/backend/decrypt.rs index 35162e2..8838cc1 100644 --- a/src/backend/decrypt.rs +++ b/src/backend/decrypt.rs @@ -1,28 +1,35 @@ +use std::io::{Cursor, Read}; + use thiserror::Error; -use super::{FileType, Id, ReadBackend}; -use crate::crypto::{CryptoError, Key}; +use super::{FileType, Id, ReadBackend, WriteBackend}; +use crate::crypto::{hash, CryptoKey}; + +pub trait DecryptWriteBackend: WriteBackend { + type Key; + fn key(&self) -> &Self::Key; +} /// RepoError describes the errors that can be returned by accessing this repository #[derive(Error, Debug)] -pub enum RepoError { - /// Represents an error while decrypting. - #[error("Decryption error")] - CryptoError(CryptoError), +pub enum RepoError { + /// Represents an error while encrypting/decrypting. + #[error("Crypto error")] + CryptoError(C), /// Represents another error from the embedded repository. #[error("Repo error")] - RepoError(#[from] E), + RepoError(#[from] R), } #[derive(Clone)] -pub struct DecryptBackend { +pub struct DecryptBackend { backend: R, - key: Key, + key: C, } -impl DecryptBackend { - pub fn new(be: &R, key: Key) -> Self { +impl DecryptBackend { + pub fn new(be: &R, key: C) -> Self { Self { backend: be.clone(), key, @@ -30,8 +37,15 @@ impl DecryptBackend { } } -impl ReadBackend for DecryptBackend { - type Error = RepoError; +impl DecryptWriteBackend for DecryptBackend { + type Key = C; + fn key(&self) -> &Self::Key { + &self.key + } +} + +impl ReadBackend for DecryptBackend { + type Error = RepoError; fn location(&self) -> &str { self.backend.location() @@ -65,3 +79,22 @@ impl ReadBackend for DecryptBackend { .map_err(RepoError::CryptoError) } } + +impl WriteBackend for DecryptBackend { + type Error = RepoError; + + fn write_full(&self, tpe: FileType, id: &Id, r: &mut impl Read) -> Result<(), Self::Error> { + self.backend.write_full(tpe, id, r)?; + Ok(()) + } + + fn hash_write_full(&self, tpe: FileType, data: &[u8]) -> Result { + let data = self + .key + .encrypt_data(data) + .map_err(RepoError::CryptoError)?; + let id = hash(&data); + self.write_full(tpe, &id, &mut Cursor::new(data))?; + Ok(id) + } +} diff --git a/src/backend/local.rs b/src/backend/local.rs index 8d6e65a..bb25a8e 100644 --- a/src/backend/local.rs +++ b/src/backend/local.rs @@ -1,10 +1,10 @@ use std::fs::{self, File}; -use std::io::{Read, Seek, SeekFrom}; +use std::io::{copy, Read, Seek, SeekFrom}; use std::os::unix::fs::FileExt; use std::path::{Path, PathBuf}; use walkdir::WalkDir; -use super::{FileType, Id, ReadBackend}; +use super::{FileType, Id, ReadBackend, WriteBackend}; #[derive(Clone)] pub struct LocalBackend { @@ -84,6 +84,21 @@ impl ReadBackend for LocalBackend { } } +impl WriteBackend for LocalBackend { + type Error = std::io::Error; + + fn write_full(&self, tpe: FileType, id: &Id, r: &mut impl Read) -> Result<(), Self::Error> { + println!("writing tpe: {:?}, id: {}", &tpe, &id); + let filename = self.path(tpe, *id); + let mut file = fs::OpenOptions::new() + .create_new(true) + .write(true) + .open(&filename)?; + copy(r, &mut file)?; + file.sync_all() + } +} + impl LocalBackend { pub fn walker(&self) -> impl Iterator { let path = self.path.clone(); @@ -129,4 +144,5 @@ impl LocalBackend { .unwrap(); file.write_all_at(data, offset).unwrap(); } + } diff --git a/src/backend/mod.rs b/src/backend/mod.rs index 871d280..b720659 100644 --- a/src/backend/mod.rs +++ b/src/backend/mod.rs @@ -1,13 +1,18 @@ -use crate::id::*; +use std::io::{Cursor, Read}; + use anyhow::anyhow; +use crate::crypto::hash; +use crate::id::Id; + pub mod decrypt; pub mod local; +pub mod node; -pub use decrypt::DecryptBackend; -pub use local::LocalBackend; +pub use decrypt::*; +pub use local::*; -#[derive(Clone, Copy)] +#[derive(Clone, Copy, Debug)] pub enum FileType { Config, Index, @@ -92,6 +97,17 @@ pub enum MapResult { NonUnique, } +pub trait WriteBackend: Clone { + type Error: Send + Sync + std::error::Error + 'static; + + fn write_full(&self, tpe: FileType, id: &Id, r: &mut impl Read) -> Result<(), Self::Error>; + + fn hash_write_full(&self, tpe: FileType, data: &[u8]) -> Result { + let id = hash(data); + self.write_full(tpe, &id, &mut Cursor::new(data))?; + Ok(id) + } +} /* pub trait ReadSource: Clone { fn walker(&self) -> &dyn Iterator; diff --git a/src/backend/node.rs b/src/backend/node.rs new file mode 100644 index 0000000..80b641a --- /dev/null +++ b/src/backend/node.rs @@ -0,0 +1,77 @@ +use std::fmt::Debug; + +use chrono::{DateTime, Local}; +use derive_getters::Getters; +use derive_more::{Constructor, IsVariant}; +use serde::{Deserialize, Serialize}; +use serde_aux::prelude::*; + +use crate::id::Id; + +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Constructor, Getters)] +pub struct Node { + name: String, + #[serde(flatten)] + node_type: NodeType, + #[serde(deserialize_with = "deserialize_default_from_null")] + content: Vec, + subtree: Option, + #[serde(flatten)] + meta: Metadata, +} + +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, IsVariant)] +#[serde(tag = "type", rename_all = "lowercase")] +pub enum NodeType { + File, + Dir, + Symlink { linktarget: String }, + Device { device: u64 }, +} + +#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize, Getters)] +pub struct Metadata { + #[serde(default)] + size: u64, + mtime: Option>, + atime: Option>, + ctime: Option>, + #[serde(default)] + mode: u32, + #[serde(default)] + uid: u32, + #[serde(default)] + gid: u32, + #[serde(default)] + user: String, + #[serde(default)] + group: String, + #[serde(default)] + inode: u64, + #[serde(default)] + device_id: u64, + #[serde(default)] + links: u64, +} + +impl Node { + pub fn from_content(name: String, content: Vec, _size: u64) -> Self { + Self { + name, + node_type: NodeType::File, + content, + subtree: None, + meta: Metadata::default(), + } + } + + pub fn from_tree(name: String, id: Id) -> Self { + Self { + name, + node_type: NodeType::Dir, + content: Vec::new(), + subtree: Some(id), + meta: Metadata::default(), + } + } +} diff --git a/src/blob/mod.rs b/src/blob/mod.rs index dd41577..e87a980 100644 --- a/src/blob/mod.rs +++ b/src/blob/mod.rs @@ -1,4 +1,7 @@ +mod packer; mod tree; +pub use crate::backend::node::*; +pub use packer::*; pub use tree::*; use derive_more::Constructor; diff --git a/src/blob/packer.rs b/src/blob/packer.rs new file mode 100644 index 0000000..2b83c70 --- /dev/null +++ b/src/blob/packer.rs @@ -0,0 +1,110 @@ +use std::fs::File; +use std::io::{Seek, SeekFrom, Write}; +use std::time::{Duration, SystemTime}; + +use anyhow::{anyhow, Result}; +use tempfile::tempfile; + +use super::BlobType; +use crate::backend::{DecryptWriteBackend, FileType}; +use crate::crypto::{CryptoKey, Hasher}; +use crate::id::Id; +use crate::index::SharedIndexer; +use crate::repo::IndexPack; + +const MAX_SIZE: u32 = 50000; +const MAX_AGE: Duration = Duration::from_secs(300); + +pub struct Packer { + be: BE, + file: File, + count: u32, + created: SystemTime, + index: IndexPack, + indexer: SharedIndexer, + hasher: Hasher, + key: C, +} + +impl Packer { + pub fn new(be: BE, indexer: SharedIndexer, key: C) -> Result { + Ok(Self { + be, + file: tempfile()?, + count: 0, + created: SystemTime::now(), + index: IndexPack::new(), + indexer, + hasher: Hasher::new(), + key, + }) + } + + pub fn reset(&mut self) -> Result<()> { + self.file = tempfile()?; + self.count = 0; + self.created = SystemTime::now(); + self.hasher.reset(); + Ok(()) + } + + pub fn finalize(&mut self) -> Result<()> { + self.save() + } + + pub fn save(&mut self) -> Result<()> { + if self.count == 0 { + return Ok(()); + } + let id = self.hasher.finalize(); + self.index.set_id(id); + + self.file.flush()?; + self.file.seek(SeekFrom::Start(0))?; + self.be.write_full(FileType::Pack, &id, &mut self.file)?; + + let index = std::mem::replace(&mut self.index, IndexPack::new()); + self.indexer.borrow_mut().add(index)?; + Ok(()) + } + + pub fn add(&mut self, data: &[u8], id: &Id, tpe: BlobType) -> Result<()> { + // only add if this blob is not present + if self.has(id) { + return Ok(()); + } + if self.indexer.borrow().has(id) { + return Ok(()); + } + + let data = self + .key + .encrypt_data(data) + .map_err(|_| anyhow!("crypto error"))?; + + self.hasher.update(&data); + let len = self.file.write(&data)?.try_into()?; + self.index.add(*id, tpe, self.count, len); + self.count += len; + + // check if IndexFile needs to be saved + if self.count >= MAX_SIZE || self.created.elapsed()? >= MAX_AGE { + self.save()?; + self.reset()?; + } + Ok(()) + } + + fn has(&self, id: &Id) -> bool { + self.index.blobs().iter().find(|b| b.id() == id).is_some() + } +} + +/* +impl Drop for Packer { + fn drop(&mut self) { + // ignore error when dropping Indexer + let _ = self.finalize(); + } +} +*/ diff --git a/src/blob/tree.rs b/src/blob/tree.rs index 11dd1d6..6df346a 100644 --- a/src/blob/tree.rs +++ b/src/blob/tree.rs @@ -3,64 +3,13 @@ use std::mem; use std::path::PathBuf; use anyhow::{anyhow, Result}; -use chrono::{DateTime, Local}; -use derive_getters::Getters; use serde::{Deserialize, Serialize}; -use serde_aux::prelude::*; use crate::backend::ReadBackend; use crate::id::Id; use crate::index::ReadIndex; -#[derive(Clone, Debug, Serialize, Deserialize, Getters)] -pub struct Node { - name: String, - #[serde(rename = "type")] - tpe: String, - #[serde(default)] - mode: u32, - mtime: DateTime, - atime: DateTime, - ctime: DateTime, - #[serde(default)] - uid: u32, - #[serde(default)] - gid: u32, - #[serde(default)] - user: String, - #[serde(default)] - group: String, - #[serde(default)] - inode: u64, - #[serde(default)] - device_id: u64, - #[serde(default)] - size: u64, - #[serde(default)] - links: u64, - #[serde(default)] - linktarget: String, - #[serde(default)] - device: u64, - #[serde(deserialize_with = "deserialize_default_from_null")] - content: Vec, - #[serde(default)] - subtree: Id, -} - -impl Node { - pub fn is_tree(&self) -> bool { - &self.tpe == "dir" - } - - pub fn is_file(&self) -> bool { - &self.tpe == "file" - } - - pub fn is_symlink(&self) -> bool { - &self.tpe == "symlink" - } -} +use super::Node; #[derive(Clone, Debug, Serialize, Deserialize)] pub struct Tree { @@ -68,9 +17,21 @@ pub struct Tree { } impl Tree { - pub fn from_backend(be: &impl ReadBackend, index: &impl ReadIndex, id: Id) -> Result { + pub fn new() -> Self { + Self { nodes: Vec::new() } + } + + pub fn add(&mut self, node: Node) { + self.nodes.push(node) + } + + pub fn serialize(&self) -> Result> { + Ok(serde_json::to_vec(&self)?) + } + + pub fn from_backend(be: &impl ReadBackend, index: &impl ReadIndex, id: &Id) -> Result { let data = index - .get_id(&id) + .get_id(id) .ok_or(anyhow!("blob not found in index"))? .read_data(be)?; @@ -101,7 +62,7 @@ pub fn tree_iterator_once<'a>( let mut visited = HashSet::new(); TreeIterator::new( move |i| { - if visited.insert(i) { + if visited.insert(*i) { Tree::from_backend(be, index, i).unwrap().nodes.into_iter() } else { Vec::new().into_iter() @@ -116,7 +77,7 @@ pub fn tree_iterator_once<'a>( pub struct TreeIterator where IT: Iterator, - F: FnMut(Id) -> IT, + F: FnMut(&Id) -> IT, { open_iterators: Vec, inner: IT, @@ -127,10 +88,10 @@ where impl TreeIterator where IT: Iterator, - F: FnMut(Id) -> IT, + F: FnMut(&Id) -> IT, { fn new(mut getter: F, ids: Vec) -> Self { - let mut iters = ids.into_iter().map(&mut getter).collect::>(); + let mut iters = ids.iter().map(&mut getter).collect::>(); iters.rotate_right(1); Self { inner: iters.pop().unwrap(), @@ -144,7 +105,7 @@ where impl Iterator for TreeIterator where IT: Iterator, - F: FnMut(Id) -> IT, + F: FnMut(&Id) -> IT, { type Item = (PathBuf, Node); @@ -152,11 +113,11 @@ where loop { match self.inner.next() { Some(node) => { - let path = self.path.join(node.name.clone()); - if node.is_tree() { - let old_inner = mem::replace(&mut self.inner, (self.getter)(node.subtree)); + let path = self.path.join(node.name()); + if let Some(subtree) = node.subtree() { + let old_inner = mem::replace(&mut self.inner, (self.getter)(subtree)); self.open_iterators.push(old_inner); - self.path.push(node.name.clone()); + self.path.push(node.name()); } return Some((path, node)); diff --git a/src/commands/backup.rs b/src/commands/backup.rs new file mode 100644 index 0000000..7e7fc17 --- /dev/null +++ b/src/commands/backup.rs @@ -0,0 +1,171 @@ +use std::cell::RefCell; +use std::fs::File; +use std::io::BufReader; +use std::path::PathBuf; +use std::rc::Rc; + +use anyhow::Result; +use clap::Parser; +use ignore::WalkBuilder; + +use crate::backend::{DecryptWriteBackend, ReadBackend}; +use crate::blob::{BlobType, Node, Packer, Tree}; +use crate::chunker::ChunkIter; +use crate::crypto::{hash, Key}; +use crate::index::{AllIndexFiles, BoomIndex, Indexer, ReadIndex}; +use crate::repo::{ConfigFile, SnapshotFile, TagList}; + +#[derive(Parser)] +pub(super) struct Opts { + /// backup sources + sources: Vec, +} + +pub(super) fn execute( + opts: Opts, + be: &(impl ReadBackend + DecryptWriteBackend), + key: &Key, +) -> Result<()> { + let config = ConfigFile::from_backend_no_id(be)?; + + let poly = "37ffea04120bf1"; + let poly = u64::from_str_radix(config.chunker_polynomial(), 16)?; + backup_file(opts.sources, &poly, be, key)?; + Ok(()) +} + +fn backup_file( + paths: Vec, + poly: &u64, + be: &(impl ReadBackend + DecryptWriteBackend), + key: &Key, +) -> Result<()> { + let index: BoomIndex = AllIndexFiles::new(be.clone()).into_iter().collect(); + + let indexer = Rc::new(RefCell::new(Indexer::new(be.clone()))); + let mut data_packer = Packer::new(be.clone(), indexer.clone(), key.clone())?; + let mut tree_packer = Packer::new(be.clone(), indexer.clone(), key.clone())?; + + let path = &paths[0]; + let mut wb = WalkBuilder::new(path); + /* + for path in paths[1..].into_iter() { + wb.add(path); + } + */ + + wb.follow_links(false).hidden(false); + + let mut path = PathBuf::new(); + let mut tree = Tree::new(); + let mut names = Vec::new(); + let mut trees = Vec::new(); + let mut size: u64 = 0; + let mut count: u64 = 0; + + for entry in wb.build() { + let entry = entry?; + // TODO + let name = entry.file_name().to_string_lossy().to_string(); + let file_type = entry.file_type().unwrap(); + println!("{:?}, {:?}", entry.path(), path); + + if file_type.is_dir() { + for p in entry.path().strip_prefix(&path).iter() { + // new subdir + trees.push(tree); + tree = Tree::new(); + names.push(name.clone()); + path.push(p); + println!("{:?}, {:?}", entry.path(), path); + } + continue; + } + + while !entry.path().starts_with(&path) { + // go back to parent dir + // 1. finish tree + let chunk = tree.serialize()?; + let id = hash(&chunk); + if !index.has(&id) { + tree_packer.add(&chunk, &id, BlobType::Tree)?; + } + tree = trees.pop().unwrap(); + let name = names.pop().unwrap(); + let node = Node::from_tree(name, id); + + tree.add(node); + path.pop(); + println!("{:?}, {:?}", entry.path(), path); + } + + if file_type.is_file() { + let f = File::open(&entry.path())?; + let reader: BufReader = BufReader::new(f); + + let chunk_iter = ChunkIter::new(reader, poly); + let mut content = Vec::new(); + let mut filesize: u64 = 0; + + for chunk in chunk_iter { + let chunk = chunk?; + filesize += chunk.len() as u64; + let id = hash(&chunk); + if !index.has(&id) { + data_packer.add(&chunk, &id, BlobType::Data)?; + } + content.push(id); + } + let node = Node::from_content(name, content, filesize); + tree.add(node); + count += 1; + size += filesize; + } + } + + loop { + // go back to parent dir + // 1. finish tree + let chunk = tree.serialize()?; + let id = hash(&chunk); + if !index.has(&id) { + tree_packer.add(&chunk, &id, BlobType::Tree)?; + } + tree = match trees.pop() { + Some(tree) => tree, + None => break, + }; + let name = names.pop().unwrap(); + let node = Node::from_tree(name, id); + + tree.add(node); + path.pop(); + } + + let chunk = tree.serialize()?; + let id = hash(&chunk); + if !index.has(&id) { + tree_packer.add(&chunk, &id, BlobType::Tree)?; + } + + data_packer.finalize()?; + tree_packer.finalize()?; + indexer.borrow().finalize()?; + + // save snapshot + let snap = SnapshotFile::new( + id, + paths, + "host".to_string(), + "user".to_string(), + 0, + 0, + TagList::default(), + Some(count), + Some(size), + ); + let id = snap.save_to_backend(be)?; + println!("snapshot {} successfully saved.", id); + + Ok(()) +} diff --git a/src/commands/cat.rs b/src/commands/cat.rs index 911d206..b927539 100644 --- a/src/commands/cat.rs +++ b/src/commands/cat.rs @@ -19,11 +19,11 @@ pub(super) fn execute(be: &impl ReadBackend, dbe: &impl ReadBackend, opts: Opts) // special treatment for catingg blobs: read the index and use it to locate the blob "blob" => { let id = Id::from_hex(&opts.id)?; - let index = BoomIndex::from_iter(AllIndexFiles::new(be.clone()).into_iter()); + let index = BoomIndex::from_iter(AllIndexFiles::new(dbe.clone()).into_iter()); let dec = index .get_id(&id) .ok_or(anyhow!("blob not found in index"))? - .read_data(be)?; + .read_data(dbe)?; print!("{}", String::from_utf8_lossy(&dec)); return Ok(()); } diff --git a/src/commands/check.rs b/src/commands/check.rs index cdf7846..68264aa 100644 --- a/src/commands/check.rs +++ b/src/commands/check.rs @@ -3,7 +3,7 @@ use clap::Parser; use std::collections::HashMap; use crate::backend::{FileType, ReadBackend}; -use crate::blob::tree_iterator_once; +use crate::blob::{tree_iterator_once, NodeType}; use crate::index::{AllIndexFiles, BoomIndex, ReadIndex}; use crate::repo::{IndexBlob, SnapshotFile}; @@ -79,8 +79,8 @@ fn check_snapshots(be: &impl ReadBackend, index: &impl ReadIndex) -> Result<()> .collect(); for (path, node) in tree_iterator_once(be, index, snap_ids) { - match node.tpe() as &str { - "file" => { + match node.node_type() { + NodeType::File => { for (i, id) in node.content().iter().enumerate() { if id.is_null() { println!("file {:?} blob {} has null ID", path, i); @@ -92,15 +92,15 @@ fn check_snapshots(be: &impl ReadBackend, index: &impl ReadIndex) -> Result<()> } } - "dir" => { - if node.subtree().is_null() { - println!("dir {:?} subtree has null ID", path); + NodeType::Dir => { + match node.subtree() { + None => println!("dir {:?} subtree does not exist", path), + Some(tree) if tree.is_null() => println!("dir {:?} subtree has null ID", path), + _ => {} // subtree is ok } } - "symlink" | "socket" | "chardev" | "dev" | "fifo" => {} // nothing to check - - tpe => println!("file {:?} unkown type {}", path, tpe), + _ => {} // nothing to check } } diff --git a/src/commands/diff.rs b/src/commands/diff.rs index e42018b..ab61c4e 100644 --- a/src/commands/diff.rs +++ b/src/commands/diff.rs @@ -6,7 +6,7 @@ use itertools::{ }; use crate::backend::{FileType, ReadBackend}; -use crate::blob::tree_iterator; +use crate::blob::{tree_iterator, NodeType}; use crate::id::Id; use crate::index::{AllIndexFiles, BoomIndex}; use crate::repo::SnapshotFile; @@ -43,11 +43,21 @@ pub(super) fn execute(be: &impl ReadBackend, opts: Opts) -> Result<()> { match file { Left((path, _)) => println!("- {:?}", path), Right((path, _)) => println!("+ {:?}", path), - Both((path, node1), (_, node2)) => { - if node1.content() != node2.content() { - println!("M {:?}", path); + Both((path, node1), (_, node2)) => match node1.node_type() { + tpe if tpe != node2.node_type() => println!("M {:?}", path), // type was changed + NodeType::File if node1.content() != node2.content() => println!("M {:?}", path), + NodeType::Symlink { linktarget } => { + if let NodeType::Symlink { + linktarget: linktarget2, + } = node2.node_type() + { + if *linktarget != *linktarget2 { + println!("M {:?}", path) + } + } } - } + _ => {} // no difference to show + }, } } diff --git a/src/commands/ls.rs b/src/commands/ls.rs index cdab612..99f109d 100644 --- a/src/commands/ls.rs +++ b/src/commands/ls.rs @@ -16,7 +16,8 @@ pub(super) struct Opts { pub(super) fn execute(be: &impl ReadBackend, opts: Opts) -> Result<()> { let id = Id::from_hex(&opts.id).or_else(|_| { // if the given id param is not a full Id, search for a suitable one - be.find_starts_with(FileType::Index, &[&opts.id])?.remove(0) + be.find_starts_with(FileType::Snapshot, &[&opts.id])? + .remove(0) })?; let index = BoomIndex::from_iter(AllIndexFiles::new(be.clone()).into_iter()); diff --git a/src/commands/mod.rs b/src/commands/mod.rs index a247904..5f70c2d 100644 --- a/src/commands/mod.rs +++ b/src/commands/mod.rs @@ -1,11 +1,14 @@ use std::fs; +use std::path::PathBuf; -use anyhow::Result; +use anyhow::{bail, Result}; use clap::{Parser, Subcommand}; +use rpassword::{prompt_password_stderr, read_password_with_reader}; use crate::backend::{DecryptBackend, LocalBackend}; use crate::repo; +mod backup; mod cat; mod check; mod diff; @@ -22,8 +25,8 @@ struct Opts { repository: String, /// file to read the password from - #[clap(short, long)] - password_file: String, + #[clap(short, long, parse(from_os_str))] + password_file: Option, #[clap(subcommand)] command: Command, @@ -31,6 +34,9 @@ struct Opts { #[derive(Subcommand)] enum Command { + /// backup to the repository + Backup(backup::Opts), + /// cat repository files and blobs Cat(cat::Opts), @@ -53,15 +59,32 @@ enum Command { Restore(restore::Opts), } +const MAX_PASSWORD_RETRIES: usize = 5; + pub fn execute() -> Result<()> { let args = Opts::parse(); let be = LocalBackend::new(&args.repository); - let passwd = fs::read_to_string(&args.password_file)?.replace("\n", ""); - let key = repo::find_key_in_backend(&be, &passwd, None)?; - let dbe = DecryptBackend::new(&be, key); + + let key = match args.password_file { + None => (0..MAX_PASSWORD_RETRIES) + .map(|_| { + let pass = prompt_password_stderr("enter repository password: ")?; + repo::find_key_in_backend(&be, &pass, None) + }) + .find(Result::is_ok) + .unwrap_or_else(|| bail!("tried too often...aborting!"))?, + Some(file) => { + let pass = fs::read_to_string(file)?.replace("\n", ""); + repo::find_key_in_backend(&be, &pass, None)? + } + }; + eprintln!("password is correct"); + + let dbe = DecryptBackend::new(&be, key.clone()); match args.command { + Command::Backup(opts) => backup::execute(opts, &dbe, &key), Command::Cat(opts) => cat::execute(&be, &dbe, opts), Command::Check(opts) => check::execute(&dbe, opts), Command::Diff(opts) => diff::execute(&dbe, opts), diff --git a/src/commands/restore.rs b/src/commands/restore.rs index 35362ab..2867ae1 100644 --- a/src/commands/restore.rs +++ b/src/commands/restore.rs @@ -10,7 +10,7 @@ use itertools::{ }; use crate::backend::{FileType, LocalBackend, ReadBackend}; -use crate::blob::{tree_iterator, Node}; +use crate::blob::{tree_iterator, Node, NodeType}; use crate::id::Id; use crate::index::{AllIndexFiles, BoomIndex, ReadIndex}; use crate::repo::SnapshotFile; @@ -32,7 +32,8 @@ pub(super) fn execute(be: &impl ReadBackend, opts: Opts) -> Result<()> { println!("getting snapshot..."); let id = Id::from_hex(&opts.id).or_else(|_| { // if the given id param is not a full Id, search for a suitable one - be.find_starts_with(FileType::Index, &[&opts.id])?.remove(0) + be.find_starts_with(FileType::Snapshot, &[&opts.id])? + .remove(0) })?; let snap = SnapshotFile::from_backend(be, id)?; @@ -75,16 +76,19 @@ fn allocate_and_collect( match file { // node is only in snapshot Left((path, node)) => { - if node.is_tree() && !opts.dry_run { - dest.create_dir(&path); - } - if node.is_file() { - // collect blobs needed for restoring - let size = file_infos.add_file(&node, path.clone(), index); - // create the file - if !opts.dry_run { - dest.create_file(&path, size); + match node.node_type() { + NodeType::Dir => { + dest.create_dir(&path); } + NodeType::File => { + // collect blobs needed for restoring + let size = file_infos.add_file(&node, path.clone(), index); + // create the file + if !opts.dry_run { + dest.create_file(&path, size); + } + } + _ => {} // nothing to do for symlink, device, etc. } } // node is in snapshot but already exists @@ -147,8 +151,10 @@ fn restore_metadata( ) -> Result<()> { // walk over tree in repository and compare with tree in dest for (path, node) in tree_iterator(be, index, vec![tree]) { - if node.is_symlink() && !opts.dry_run { - dest.create_symlink(&path, node.linktarget()); + if !opts.dry_run { + if let NodeType::Symlink { linktarget } = node.node_type() { + dest.create_symlink(&path, linktarget); + } } // TODO: metadata } @@ -188,12 +194,12 @@ impl FileInfos { /// Add the file to FilesInfos using index to get blob information. /// Returns the computed length of the file - fn add_file(&mut self, node: &Node, name: PathBuf, index: &impl ReadIndex) -> u64 { + fn add_file(&mut self, file: &Node, name: PathBuf, index: &impl ReadIndex) -> u64 { let mut file_pos = 0; - if !node.content().is_empty() { + if !file.content().is_empty() { let file_idx = self.names.len(); self.names.push(name); - for id in node.content().iter() { + for id in file.content().iter() { let ie = index.get_id(id).unwrap(); let bl = BlobLocation { offset: *ie.offset(), diff --git a/src/commands/snapshots.rs b/src/commands/snapshots.rs index fffec28..c64689b 100644 --- a/src/commands/snapshots.rs +++ b/src/commands/snapshots.rs @@ -1,26 +1,35 @@ use anyhow::Result; +use bytesize::ByteSize; use clap::Parser; use prettytable::{cell, format, row, Table}; -use crate::backend::{FileType, ReadBackend}; +use crate::backend::ReadBackend; use crate::repo::SnapshotFile; #[derive(Parser)] pub(super) struct Opts {} pub(super) fn execute(be: &impl ReadBackend, _opts: Opts) -> Result<()> { - let mut table = Table::new(); - table.set_titles(row!["ID", "Time", "Host", "Tags", "Paths"]); - table.set_format(*format::consts::FORMAT_NO_BORDER_LINE_SEPARATOR); + let mut snapshots = SnapshotFile::all_from_backend(be)?; + snapshots.sort(); - for id in be.list(FileType::Snapshot)? { - let sn = SnapshotFile::from_backend(be, id)?; - let paths = sn - .paths - .iter() - .map(|p| p.to_string_lossy() + "\n") - .collect::(); - table.add_row(row![id, sn.time, sn.hostname, "", paths,]); + let mut table = Table::new(); + table.set_titles( + row![b->"ID", b->"Time", b->"Host", b->"Tags", b->"Paths", br->"Files", br->"Size"], + ); + table.set_format(*format::consts::FORMAT_NO_BORDER_LINE_SEPARATOR); + for sn in snapshots { + let paths = sn.paths.into_iter().map(|p| p + "\n").collect::(); + let time = sn.time.format("%Y-%m-%d %H:%M:%S"); + let size = sn + .size + .map(|b| ByteSize(b).to_string_as(true)) + .unwrap_or("?".to_string()); + let files = sn + .file_count + .map(|c| c.to_string()) + .unwrap_or("?".to_string()); + table.add_row(row![sn.id, time, sn.hostname, "", paths, r->files, r->size]); } table.printstd(); diff --git a/src/crypto/aespoly1305.rs b/src/crypto/aespoly1305.rs new file mode 100644 index 0000000..ecedb6e --- /dev/null +++ b/src/crypto/aespoly1305.rs @@ -0,0 +1,89 @@ +use aes256ctr_poly1305aes::{ + aead::{self, Aead, AeadInPlace, NewAead}, + Aes256CtrPoly1305Aes, +}; +use rand::{thread_rng, RngCore}; + +use super::CryptoKey; + +type Nonce = aead::Nonce; +type AeadKey = aead::Key; + +#[derive(Clone, Default)] +pub struct Key(AeadKey); + +impl Key { + pub fn from_slice(key: &[u8]) -> Self { + Self(*AeadKey::from_slice(key)) + } + + pub fn from_keys(encrypt: &[u8], k: &[u8], r: &[u8]) -> Self { + let mut key = AeadKey::default(); + key[0..32].copy_from_slice(encrypt); + key[32..48].copy_from_slice(k); + key[48..64].copy_from_slice(r); + + Self(key) + } +} + +impl CryptoKey for Key { + type CryptoError = aead::Error; + + fn decrypt_data(&self, data: &[u8]) -> Result, Self::CryptoError> { + /* TODO + if data.len() < 16 { + return Err(CryptoError); + } + */ + let nonce = Nonce::from_slice(&data[0..16]); + Aes256CtrPoly1305Aes::new(&self.0).decrypt(nonce, &data[16..]) + } + + fn encrypt_data(&self, data: &[u8]) -> Result, Self::CryptoError> { + let mut nonce = Nonce::default(); + thread_rng().fill_bytes(&mut nonce); + + let mut res = Vec::with_capacity(data.len() + 32); + res.extend_from_slice(&nonce); + res.extend_from_slice(data); + let tag = Aes256CtrPoly1305Aes::new(&self.0).encrypt_in_place_detached( + &nonce, + &[], + &mut res[16..], + )?; + res.extend_from_slice(&tag); + Ok(res) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn encrypt_decrypt_hello() { + let key = Key::default(); + let data: Vec = b"Hello!".to_vec(); + let enc = key.encrypt_data(&data).unwrap(); + let dec = key.decrypt_data(&enc).unwrap(); + assert_eq!(data, dec); + } + + #[test] + fn encrypt_decrypt_empty() { + let key = Key::default(); + let data = Vec::::new(); + let enc = key.encrypt_data(&data).unwrap(); + let dec = key.decrypt_data(&enc).unwrap(); + assert_eq!(data, dec); + } + + #[test] + fn decrypt_empty() { + let key = Key::default(); + let data = Vec::::new(); + let res = key.decrypt_data(&data); + assert!(res.is_err()); + } +} diff --git a/src/crypto/hasher.rs b/src/crypto/hasher.rs new file mode 100644 index 0000000..619638a --- /dev/null +++ b/src/crypto/hasher.rs @@ -0,0 +1,27 @@ +use sha2::{Digest, Sha256}; + +use crate::id::Id; + +pub fn hash(data: &[u8]) -> Id { + Id::new(Sha256::digest(data).into()) +} + +pub struct Hasher(Sha256); + +impl Hasher { + pub fn new() -> Self { + Self(Sha256::new()) + } + + pub fn reset(&mut self) { + self.0.reset(); + } + + pub fn update(&mut self, data: &[u8]) { + self.0.update(data) + } + + pub fn finalize(&mut self) -> Id { + Id::new(self.0.finalize_reset().into()) + } +} diff --git a/src/crypto/mod.rs b/src/crypto/mod.rs index a8f6da3..76b1261 100644 --- a/src/crypto/mod.rs +++ b/src/crypto/mod.rs @@ -1,32 +1,12 @@ -use aes256ctr_poly1305aes::{ - aead::{self, Aead, NewAead}, - Aes256CtrPoly1305Aes, -}; +use std::fmt::Debug; -pub type CryptoError = aead::Error; +mod aespoly1305; +mod hasher; +pub use aespoly1305::*; +pub use hasher::*; -type Nonce = aead::Nonce; -type AeadKey = aead::Key; - -#[derive(Clone)] -pub struct Key(AeadKey); - -impl Key { - pub fn from_slice(key: &[u8]) -> Self { - Self(*AeadKey::from_slice(key)) - } - - pub fn from_keys(encrypt: &[u8], k: &[u8], r: &[u8]) -> Self { - let mut key = AeadKey::default(); - key[0..32].copy_from_slice(encrypt); - key[32..48].copy_from_slice(k); - key[48..64].copy_from_slice(r); - - Self(key) - } - - pub fn decrypt_data(&self, data: &[u8]) -> Result, CryptoError> { - let nonce = Nonce::from_slice(&data[0..16]); - Aes256CtrPoly1305Aes::new(&self.0).decrypt(nonce, &data[16..]) - } +pub trait CryptoKey: Clone + Sized { + type CryptoError: Debug + Send + Sync + 'static; + fn decrypt_data(&self, data: &[u8]) -> Result, Self::CryptoError>; + fn encrypt_data(&self, data: &[u8]) -> Result, Self::CryptoError>; } diff --git a/src/id.rs b/src/id.rs index 326c1f8..94b608d 100644 --- a/src/id.rs +++ b/src/id.rs @@ -1,10 +1,12 @@ use std::fmt; -use derive_more::Display; +use derive_more::{Constructor, Display}; use serde::{Deserialize, Serialize}; use thiserror::Error; -#[derive(Clone, Copy, Default, PartialEq, Eq, Hash, Serialize, Deserialize, Display)] +#[derive( + Clone, Copy, Default, PartialEq, Eq, Hash, Constructor, Serialize, Deserialize, Display, +)] #[display(fmt = "{}", "&self.to_hex()[0..8]")] pub struct Id( #[serde(serialize_with = "hex::serde::serialize")] @@ -36,8 +38,8 @@ impl Id { hex::encode(self.0) } - pub fn is_null(self) -> bool { - self == Id::default() + pub fn is_null(&self) -> bool { + self == &Id::default() } } diff --git a/src/index/indexer.rs b/src/index/indexer.rs new file mode 100644 index 0000000..95731dc --- /dev/null +++ b/src/index/indexer.rs @@ -0,0 +1,99 @@ +use std::cell::RefCell; +use std::collections::HashSet; +use std::rc::Rc; +use std::time::{Duration, SystemTime}; + +use anyhow::Result; + +use crate::backend::WriteBackend; +use crate::id::Id; +use crate::repo::{IndexFile, IndexPack}; + +pub type SharedIndexer = Rc>>; + +pub struct Indexer { + be: BE, + file: IndexFile, + count: usize, + created: SystemTime, + indexed: HashSet, +} + +const MAX_SIZE: usize = 50000; +const MAX_AGE: Duration = Duration::from_secs(300); + +impl Indexer { + pub fn new(be: BE) -> Self { + Self { + be, + file: IndexFile::new(), + count: 0, + created: SystemTime::now(), + indexed: HashSet::new(), + } + } + + pub fn reset(&mut self) { + self.file = IndexFile::new(); + self.count = 0; + self.created = SystemTime::now(); + } + + pub fn finalize(&self) -> Result<()> { + self.save() + } + + pub fn save(&self) -> Result<()> { + if self.count > 0 { + self.file.save_to_backend(&self.be)?; + } + Ok(()) + } + + pub fn add(&mut self, pack: IndexPack) -> Result<()> { + self.count += pack.blobs().len(); + + for blob in pack.blobs() { + self.indexed.insert(*blob.id()); + } + + self.file.add(pack); + + // check if IndexFile needs to be saved + if self.count >= MAX_SIZE || self.created.elapsed()? >= MAX_AGE { + self.save()?; + self.reset(); + } + Ok(()) + } + + pub fn has(&self, id: &Id) -> bool { + self.indexed.contains(id) + } +} + +/* +impl Drop for Indexer { + fn drop(&mut self) { + // ignore error when dropping Indexer + let _ = self.save(); + } +} +*/ +/* +impl ReadIndex for Indexer { + fn get_id(&self, id: &Id) -> Option { + for pack in self.file.packs() { + if let Some(blob) = pack.blobs().iter().find(|b| b.id() == id) { + return Some(IndexEntry { + pack: *pack.id(), + tpe: *blob.tpe(), + offset: *blob.offset(), + length: *blob.length(), + }); + } + } + None + } +} +*/ diff --git a/src/index/mod.rs b/src/index/mod.rs index 9ff46a3..c05dca9 100644 --- a/src/index/mod.rs +++ b/src/index/mod.rs @@ -5,10 +5,12 @@ use anyhow::Result; use derive_getters::{Dissolve, Getters}; use derive_more::Constructor; -pub mod boom; -pub mod indexfiles; +mod boom; +mod indexer; +mod indexfiles; pub use boom::*; +pub use indexer::*; pub use indexfiles::*; #[derive(Debug, Clone, Constructor, Getters, Dissolve)] @@ -25,6 +27,11 @@ impl IndexEntry { Ok(be.read_partial(FileType::Pack, self.pack, self.offset, self.length)?) } } + pub trait ReadIndex { fn get_id(&self, id: &Id) -> Option; + + fn has(&self, id: &Id) -> bool { + self.get_id(id).is_some() + } } diff --git a/src/repo/config.rs b/src/repo/config.rs index c828bb8..9f56540 100644 --- a/src/repo/config.rs +++ b/src/repo/config.rs @@ -1,10 +1,11 @@ use anyhow::Result; +use derive_getters::Getters; use serde::{Deserialize, Serialize}; use crate::backend::{FileType, ReadBackend}; use crate::id::Id; -#[derive(Debug, Default, Serialize, Deserialize)] +#[derive(Debug, Default, Serialize, Deserialize, Getters)] pub struct ConfigFile { version: u32, id: Id, @@ -12,7 +13,7 @@ pub struct ConfigFile { } impl ConfigFile { - pub fn from_backend_no_id(b: B) -> Result { + pub fn from_backend_no_id(b: &B) -> Result { let data = b.read_full(FileType::Config, Id::default())?; Ok(serde_json::from_slice::(&data)?) } diff --git a/src/repo/index.rs b/src/repo/index.rs index 1d0d40c..f4bf293 100644 --- a/src/repo/index.rs +++ b/src/repo/index.rs @@ -2,7 +2,7 @@ use anyhow::Result; use derive_getters::{Dissolve, Getters}; use serde::{Deserialize, Serialize}; -use crate::backend::{FileType, ReadBackend}; +use crate::backend::{FileType, ReadBackend, WriteBackend}; use crate::blob::BlobType; use crate::id::Id; @@ -14,11 +14,29 @@ pub struct IndexFile { } impl IndexFile { + pub fn new() -> Self { + Self { + supersedes: None, + packs: Vec::new(), + } + } + /// Get an IndexFile from the backend pub fn from_backend(be: &B, id: Id) -> Result { let data = be.read_full(FileType::Index, id)?; Ok(serde_json::from_slice(&data)?) } + + /// Sace an IndexFile to the backend + pub fn save_to_backend(&self, be: &B) -> Result<()> { + let data = serde_json::to_vec(&self)?; + be.hash_write_full(FileType::Index, &data)?; + Ok(()) + } + + pub fn add(&mut self, p: IndexPack) { + self.packs.push(p); + } } #[derive(Debug, Serialize, Deserialize, Getters, Dissolve)] @@ -27,6 +45,28 @@ pub struct IndexPack { blobs: Vec, } +impl IndexPack { + pub fn new() -> Self { + Self { + id: Id::default(), + blobs: Vec::new(), + } + } + + pub fn set_id(&mut self, id: Id) { + self.id = id; + } + + pub fn add(&mut self, id: Id, tpe: BlobType, offset: u32, length: u32) { + self.blobs.push(IndexBlob { + id, + tpe, + offset, + length, + }); + } +} + #[derive(Debug, Clone, Serialize, Deserialize, Getters, Dissolve)] pub struct IndexBlob { id: Id, diff --git a/src/repo/key.rs b/src/repo/key.rs index 1229a6b..297f804 100644 --- a/src/repo/key.rs +++ b/src/repo/key.rs @@ -1,5 +1,5 @@ use crate::backend::{FileType, ReadBackend}; -use crate::crypto::Key; +use crate::crypto::{CryptoKey, Key}; use crate::id::Id; use anyhow::{anyhow, Result}; diff --git a/src/repo/snapshot.rs b/src/repo/snapshot.rs index 0e05df0..b70f190 100644 --- a/src/repo/snapshot.rs +++ b/src/repo/snapshot.rs @@ -1,16 +1,15 @@ use anyhow::Result; use chrono::{DateTime, Local}; use serde::{Deserialize, Serialize}; -use std::path::PathBuf; use super::Id; -use crate::backend::{FileType, ReadBackend}; +use crate::backend::{FileType, ReadBackend, WriteBackend}; -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)] pub struct SnapshotFile { pub time: DateTime, pub tree: Id, - pub paths: Vec, + pub paths: Vec, #[serde(default)] pub hostname: String, #[serde(default)] @@ -21,18 +20,82 @@ pub struct SnapshotFile { pub gid: u32, #[serde(default)] pub tags: TagList, + pub file_count: Option, + pub size: Option, + + #[serde(skip)] + pub id: Id, } impl SnapshotFile { - /// Get an IndexFile from the backend + pub fn new( + tree: Id, + paths: Vec, + hostname: String, + username: String, + uid: u32, + gid: u32, + tags: TagList, + file_count: Option, + size: Option, + ) -> Self { + Self { + time: Local::now(), + tree, + paths, + hostname, + username, + uid, + gid, + tags, + file_count, + size, + id: Id::default(), + } + } + + /// Get a SnapshotFile from the backend pub fn from_backend(be: &B, id: Id) -> Result { let data = be.read_full(FileType::Snapshot, id)?; - Ok(serde_json::from_slice(&data)?) + let mut snap: Self = serde_json::from_slice(&data)?; + snap.set_id(id); + Ok(snap) + } + + /// Get all SnapshotFiles from the backend + pub fn all_from_backend(be: &B) -> Result> { + let snapshots: Vec<_> = be + .list(FileType::Snapshot)? + .into_iter() + .map(|id| SnapshotFile::from_backend(be, id)) + .collect::>()?; + Ok(snapshots) + } + + /// Save a SnapshotFile to the backend + pub fn save_to_backend(&self, be: &B) -> Result { + let data = serde_json::to_vec(&self)?; + Ok(be.hash_write_full(FileType::Snapshot, &data)?) + } + + pub fn set_id(&mut self, id: Id) { + self.id = id; } } -#[derive(Default, Debug, Serialize, Deserialize)] +impl PartialOrd for SnapshotFile { + fn partial_cmp(&self, other: &Self) -> Option { + self.time.partial_cmp(&other.time) + } +} +impl Ord for SnapshotFile { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.time.cmp(&other.time) + } +} + +#[derive(Default, Debug, PartialEq, Eq, PartialOrd, Serialize, Deserialize)] pub struct TagList(Vec); -#[derive(Default, Debug, Serialize, Deserialize)] +#[derive(Default, Debug, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)] pub struct Tag(String);