diff --git a/src/backend/cache.rs b/src/backend/cache.rs index 8bf698f..0bb4f6c 100644 --- a/src/backend/cache.rs +++ b/src/backend/cache.rs @@ -183,10 +183,6 @@ impl Cache { Ok(walker.collect()) } - // TODO: this function is yet only called from list_with_size. This cleans up - // index and snapshot files. - // It should also be called when reading the index to clean up pack files. - pub async fn remove_not_in_list(&self, tpe: FileType, list: &Vec<(Id, u32)>) -> Result<()> { let mut list_cache = self.list_with_size(tpe).await?; // remove present files from the cache list diff --git a/src/commands/forget.rs b/src/commands/forget.rs index 98b8521..957cd9c 100644 --- a/src/commands/forget.rs +++ b/src/commands/forget.rs @@ -5,7 +5,7 @@ use derivative::Derivative; use prettytable::{cell, format, row, Table}; use super::{progress_counter, prune}; -use crate::backend::{DecryptFullBackend, FileType}; +use crate::backend::{Cache, DecryptFullBackend, FileType}; use crate::repo::{ ConfigFile, SnapshotFile, SnapshotFilter, SnapshotGroup, SnapshotGroupCriterion, StringList, }; @@ -44,6 +44,7 @@ pub(super) struct Opts { pub(super) async fn execute( be: &(impl DecryptFullBackend + Unpin), + cache: Option, mut opts: Opts, config: ConfigFile, ) -> Result<()> { @@ -127,7 +128,7 @@ pub(super) async fn execute( } if opts.prune { - prune::execute(be, opts.prune_opts, config, forget_snaps).await?; + prune::execute(be, cache, opts.prune_opts, config, forget_snaps).await?; } Ok(()) diff --git a/src/commands/mod.rs b/src/commands/mod.rs index d0d3fcf..3509dfe 100644 --- a/src/commands/mod.rs +++ b/src/commands/mod.rs @@ -205,14 +205,14 @@ pub async fn execute() -> Result<()> { Command::Cat(opts) => cat::execute(&dbe, opts).await?, Command::Check(opts) => check::execute(&dbe, &cache, &be_hot, &be, opts).await?, Command::Diff(opts) => diff::execute(&dbe, opts).await?, - Command::Forget(opts) => forget::execute(&dbe, opts, config).await?, + Command::Forget(opts) => forget::execute(&dbe, cache, opts, config).await?, Command::Init(_) => {} // already handled above Command::Key(opts) => key::execute(&dbe, key, opts).await?, Command::List(opts) => list::execute(&dbe, opts).await?, Command::Ls(opts) => ls::execute(&dbe, opts).await?, Command::SelfUpdate(_) => {} // already handled above Command::Snapshots(opts) => snapshots::execute(&dbe, opts).await?, - Command::Prune(opts) => prune::execute(&dbe, opts, config, vec![]).await?, + Command::Prune(opts) => prune::execute(&dbe, cache, opts, config, vec![]).await?, Command::Restore(opts) => restore::execute(&dbe, opts).await?, Command::Repoinfo(opts) => repoinfo::execute(&dbe, &be_hot, opts).await?, Command::Tag(opts) => tag::execute(&dbe, opts).await?, diff --git a/src/commands/prune.rs b/src/commands/prune.rs index fcf98a3..259236a 100644 --- a/src/commands/prune.rs +++ b/src/commands/prune.rs @@ -11,7 +11,7 @@ use futures::{future, TryStreamExt}; use vlog::*; use super::{bytes, no_progress, progress_bytes, progress_counter, wait, warm_up, warm_up_command}; -use crate::backend::{DecryptFullBackend, DecryptReadBackend, FileType}; +use crate::backend::{Cache, DecryptFullBackend, DecryptReadBackend, FileType}; use crate::blob::{BlobType, BlobTypeMap, NodeType, Repacker, TreeStreamerOnce}; use crate::id::Id; use crate::index::{IndexBackend, IndexCollector, IndexType, IndexedBackend, Indexer}; @@ -44,6 +44,10 @@ pub(super) struct Opts { #[clap(long, value_name = "DURATION", default_value = "0d")] keep_pack: humantime::Duration, + /// only remove unneded pack file from local cache + #[clap(long)] + cache_only: bool, + /// simply copy blobs when repacking instead of decrypting; possibly compressing; encrypting #[clap(long)] fast_repack: bool, @@ -72,6 +76,7 @@ pub(super) struct Opts { pub(super) async fn execute( be: &(impl DecryptFullBackend + Unpin), + cache: Option, opts: Opts, config: ConfigFile, ignore_snaps: Vec, @@ -97,6 +102,21 @@ pub(super) async fn execute( } p.finish(); + if let Some(cache) = &cache { + v1!("cleaning up packs from cache..."); + cache + .remove_not_in_list(FileType::Pack, index_collector.tree_packs()) + .await?; + } + match (cache.is_some(), opts.cache_only) { + (true, true) => return Ok(()), + (false, true) => { + ve1!("Warning: option --cache-only used without a cache."); + return Ok(()); + } + _ => {} + } + let used_ids = { let indexed_be = IndexBackend::new_from_index(&be.clone(), index_collector.into_index()); find_used_blobs(&indexed_be, ignore_snaps).await? diff --git a/src/index/binarysorted.rs b/src/index/binarysorted.rs index b966287..061b355 100644 --- a/src/index/binarysorted.rs +++ b/src/index/binarysorted.rs @@ -1,6 +1,7 @@ use std::num::NonZeroU32; use super::{BlobType, IndexEntry, ReadIndex}; +use crate::blob::BlobTypeMap; use crate::id::Id; use crate::repo::IndexPack; @@ -19,50 +20,69 @@ pub(crate) enum IndexType { OnlyTrees, } -enum SortedHashSetMap { +enum EntriesVariants { None, - Set(Vec), - Map(Vec), + Ids(Vec), + FullEntries(Vec), } -pub(crate) struct IndexCollector { - packs: Vec, - tree: Vec, - data: SortedHashSetMap, - total_tree_size: u64, - total_data_size: u64, +impl Default for EntriesVariants { + fn default() -> Self { + Self::None + } } +#[derive(Default)] +pub(crate) struct TypeIndexCollector { + packs: Vec<(Id, u32)>, + entries: EntriesVariants, + total_size: u64, +} + +#[derive(Default)] +pub(crate) struct IndexCollector(BlobTypeMap); + +pub(crate) struct TypeIndex { + packs: Vec, + entries: EntriesVariants, + total_size: u64, +} +pub struct Index(BlobTypeMap); + impl IndexCollector { pub fn new(tpe: IndexType) -> Self { - let data = match tpe { - IndexType::OnlyTrees => SortedHashSetMap::None, - IndexType::FullTrees => SortedHashSetMap::Set(Vec::new()), - IndexType::Full => SortedHashSetMap::Map(Vec::new()), + let mut collector = Self::default(); + + collector.0[BlobType::Tree].entries = EntriesVariants::FullEntries(Vec::new()); + collector.0[BlobType::Data].entries = match tpe { + IndexType::OnlyTrees => EntriesVariants::None, + IndexType::FullTrees => EntriesVariants::Ids(Vec::new()), + IndexType::Full => EntriesVariants::FullEntries(Vec::new()), }; - Self { - packs: Vec::new(), - tree: Vec::new(), - data, - total_tree_size: 0, - total_data_size: 0, - } + + collector } - pub fn into_index(mut self) -> Index { - self.tree.sort_unstable_by_key(|e| e.id); - match &mut self.data { - SortedHashSetMap::None => {} - SortedHashSetMap::Set(ids) => ids.sort_unstable(), - SortedHashSetMap::Map(data) => data.sort_unstable_by_key(|e| e.id), - }; - Index { - packs: self.packs, - tree: self.tree, - data: self.data, - total_tree_size: self.total_tree_size, - total_data_size: self.total_data_size, - } + pub fn tree_packs(&self) -> &Vec<(Id, u32)> { + &self.0[BlobType::Tree].packs + } + + // Turns Collector into an index by sorting the entries. + pub fn into_index(self) -> Index { + Index(self.0.map(|_, mut tc| { + match &mut tc.entries { + EntriesVariants::None => {} + EntriesVariants::Ids(ids) => ids.sort_unstable(), + EntriesVariants::FullEntries(entries) => entries.sort_unstable_by_key(|e| e.id), + }; + + let packs = tc.packs.into_iter().map(|(id, _)| id).collect(); + TypeIndex { + packs, + entries: tc.entries, + total_size: tc.total_size, + } + })) } } @@ -72,21 +92,19 @@ impl Extend for IndexCollector { T: IntoIterator, { for p in iter { - let idx = self.packs.len(); - self.packs.push(p.id); let len = p.blobs.len(); let blob_type = p.blob_type(); + let size = p.pack_size(); - match blob_type { - BlobType::Tree => self.total_tree_size += p.pack_size() as u64, - BlobType::Data => self.total_data_size += p.pack_size() as u64, - } + let idx = self.0[blob_type].packs.len(); + self.0[blob_type].packs.push((p.id, size)); - match (p.blob_type(), &mut self.data) { - (BlobType::Tree, _) => self.tree.reserve(len), - (BlobType::Data, SortedHashSetMap::None) => {} - (BlobType::Data, SortedHashSetMap::Set(ids)) => ids.reserve(len), - (BlobType::Data, SortedHashSetMap::Map(data)) => data.reserve(len), + self.0[blob_type].total_size += size as u64; + + match &mut self.0[blob_type].entries { + EntriesVariants::None => {} + EntriesVariants::Ids(ids) => ids.reserve(len), + EntriesVariants::FullEntries(entries) => entries.reserve(len), }; for blob in &p.blobs { @@ -97,39 +115,31 @@ impl Extend for IndexCollector { length: blob.length, uncompressed_length: blob.uncompressed_length, }; - match (blob.tpe, &mut self.data) { - (BlobType::Tree, _) => self.tree.push(be), - (BlobType::Data, SortedHashSetMap::None) => {} - (BlobType::Data, SortedHashSetMap::Set(ids)) => ids.push(blob.id), - (BlobType::Data, SortedHashSetMap::Map(data)) => data.push(be), + match &mut self.0[blob_type].entries { + EntriesVariants::None => {} + EntriesVariants::Ids(ids) => ids.push(blob.id), + EntriesVariants::FullEntries(entries) => entries.push(be), }; } } } } -pub struct Index { - packs: Vec, - tree: Vec, - data: SortedHashSetMap, - total_tree_size: u64, - total_data_size: u64, -} - impl ReadIndex for Index { - fn get_id(&self, tpe: &BlobType, id: &Id) -> Option { - let vec = match (tpe, &self.data) { - (BlobType::Tree, _) => &self.tree, - (BlobType::Data, SortedHashSetMap::Map(data)) => data, - (BlobType::Data, _) => { + fn get_id(&self, blob_type: &BlobType, id: &Id) -> Option { + let vec = match &self.0[*blob_type].entries { + EntriesVariants::FullEntries(entries) => entries, + _ => { + // get_id() only gives results if index contains full entries return None; } }; + vec.binary_search_by_key(id, |e| e.id).ok().map(|index| { let be = &vec[index]; IndexEntry::new( - *tpe, - self.packs[be.pack_idx], + *blob_type, + self.0[*blob_type].packs[be.pack_idx], be.offset, be.length, be.uncompressed_length, @@ -137,21 +147,192 @@ impl ReadIndex for Index { }) } - fn total_size(&self, tpe: &BlobType) -> u64 { - match tpe { - BlobType::Tree => self.total_tree_size, - BlobType::Data => self.total_data_size, - } + fn total_size(&self, blob_type: &BlobType) -> u64 { + self.0[*blob_type].total_size } - fn has(&self, tpe: &BlobType, id: &Id) -> bool { - match (tpe, &self.data) { - (BlobType::Tree, _) => self.tree.binary_search_by_key(id, |e| e.id).is_ok(), - (BlobType::Data, SortedHashSetMap::Map(data)) => { - data.binary_search_by_key(id, |e| e.id).is_ok() + fn has(&self, blob_type: &BlobType, id: &Id) -> bool { + match &self.0[*blob_type].entries { + EntriesVariants::FullEntries(entries) => { + entries.binary_search_by_key(id, |e| e.id).is_ok() } - (BlobType::Data, SortedHashSetMap::Set(data)) => data.binary_search(id).is_ok(), - (BlobType::Data, SortedHashSetMap::None) => false, + EntriesVariants::Ids(ids) => ids.binary_search(id).is_ok(), + // has() only gives results if index contains full entries or ids + EntriesVariants::None => false, } } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::repo::IndexFile; + + const JSON_INDEX: &str = r#" +{"packs":[{"id":"217f145b63fbc10267f5a686186689ea3389bed0d6a54b50ffc84d71f99eb7fa", + "blobs":[{"id":"a3e048f1073299310981d8f5447861df0eca26a706645b5e2fa355c31c2205ed", + "type":"data", + "offset":0, + "length":2869, + "uncompressed_length":9987}, + {"id":"458c0b9b656a6593b7ba85ecdbfe85d6cb32af70c2e9c5fd1871cf3dccc39044", + "type":"data", + "offset":2869, + "length":2316, + "uncompressed_length":7370}, + {"id":"fac5e908151e565267570108127b96e6bae22bcdda1d3d867f63ed1555fc8aef", + "type":"data", + "offset":5185, + "length":2095, + "uncompressed_length":6411} + ]}, + {"id":"3b25ec6d16401c31099c259311562160b1b5efbcf70bd69d0463104d3b8148fc", + "blobs":[{"id":"620b2cef43d4c7aab3d7c911a3c0e872d2e0e70f170201002b8af8fb98c59da5", + "type":"data", + "offset":6324, + "length":1413, + "uncompressed_length":3752}, + {"id":"ee67585c7c53324e74537ab7aa44f889c0767c1b67e7e336fae6204aef2d4c73", + "type":"data", + "offset":7737, + "length":7686, + "uncompressed_length":29928}, + {"id":"8aaa5f7f6c7b4a5ea5c70a744bf40002c54542e5a573c13d41ac9c8b17f426c1", + "type":"data", + "offset":15423, + "length":1419, + "uncompressed_length":3905}, + {"id":"f2ca1bb6c7e907d06dafe4687e579fce76b37e4e93b7605022da52e6ccc26fd2", + "type":"data", + "offset":16842, + "length":46, + "uncompressed_length":5} + ]}, + {"id":"8431a27d38dd7d192dc37abd43a85d6dc4298de72fc8f583c5d7cdd09fa47274", + "blobs":[{"id":"3b25ec6d16401c31099c259311562160b1b5efbcf70bd69d0463104d3b8148fc", + "type":"tree", + "offset":0, + "length":794, + "uncompressed_length":3030}, + {"id":"2ef8decbd2a17d9bfb1b35cfbdcd368175ea86d05dd93a4751fdacbe5213e611", + "type":"tree", + "offset":794, + "length":592, + "uncompressed_length":1912} + ]} + ]}"#; + + fn index(it: IndexType) -> Index { + let index: IndexFile = serde_json::from_str(JSON_INDEX).unwrap(); + let mut collector = IndexCollector::new(it); + collector.extend(index.packs); + collector.into_index() + } + + fn parse(s: &str) -> Id { + Id::from_hex(s).unwrap() + } + + #[test] + fn all_index_types() { + for it in [IndexType::OnlyTrees, IndexType::FullTrees, IndexType::Full] { + let index = index(it); + + let id = parse("0000000000000000000000000000000000000000000000000000000000000000"); + assert!(!index.has(&BlobType::Data, &id)); + assert!(index.get_id(&BlobType::Data, &id).is_none()); + assert!(!index.has(&BlobType::Tree, &id)); + assert!(index.get_id(&BlobType::Tree, &id).is_none()); + + let id = parse("aac5e908151e5652b7570108127b96e6bae22bcdda1d3d867f63ed1555fc8aef"); + assert!(!index.has(&BlobType::Data, &id,)); + assert!(index.get_id(&BlobType::Data, &id).is_none()); + assert!(!index.has(&BlobType::Tree, &id)); + assert!(index.get_id(&BlobType::Tree, &id).is_none()); + + let id = parse("2ef8decbd2a17d9bfb1b35cfbdcd368175ea86d05dd93a4751fdacbe5213e611"); + assert!(!index.has(&BlobType::Data, &id)); + assert!(index.get_id(&BlobType::Data, &id).is_none()); + assert!(index.has(&BlobType::Tree, &id)); + assert_eq!( + index.get_id(&BlobType::Tree, &id), + Some(IndexEntry { + blob_type: BlobType::Tree, + pack: parse("8431a27d38dd7d192dc37abd43a85d6dc4298de72fc8f583c5d7cdd09fa47274"), + offset: 794, + length: 592, + uncompressed_length: Some(NonZeroU32::new(1912).unwrap()), + }), + ); + } + } + + #[test] + fn only_trees() { + let index = index(IndexType::OnlyTrees); + + let id = parse("fac5e908151e565267570108127b96e6bae22bcdda1d3d867f63ed1555fc8aef"); + assert!(!index.has(&BlobType::Data, &id)); + assert!(index.get_id(&BlobType::Data, &id).is_none()); + assert!(!index.has(&BlobType::Tree, &id)); + assert!(index.get_id(&BlobType::Tree, &id).is_none()); + + let id = parse("620b2cef43d4c7aab3d7c911a3c0e872d2e0e70f170201002b8af8fb98c59da5"); + assert!(!index.has(&BlobType::Data, &id)); + assert!(index.get_id(&BlobType::Data, &id).is_none()); + assert!(!index.has(&BlobType::Tree, &id)); + assert!(index.get_id(&BlobType::Tree, &id).is_none()); + } + + #[test] + fn full_trees() { + let index = index(IndexType::FullTrees); + + let id = parse("fac5e908151e565267570108127b96e6bae22bcdda1d3d867f63ed1555fc8aef"); + assert!(index.has(&BlobType::Data, &id)); + assert!(index.get_id(&BlobType::Data, &id).is_none()); + assert!(!index.has(&BlobType::Tree, &id)); + assert!(index.get_id(&BlobType::Tree, &id).is_none()); + + let id = parse("620b2cef43d4c7aab3d7c911a3c0e872d2e0e70f170201002b8af8fb98c59da5"); + assert!(index.has(&BlobType::Data, &id)); + assert!(index.get_id(&BlobType::Data, &id).is_none()); + assert!(!index.has(&BlobType::Tree, &id)); + assert!(index.get_id(&BlobType::Tree, &id).is_none()); + } + + #[test] + fn full() { + let index = index(IndexType::Full); + + let id = parse("fac5e908151e565267570108127b96e6bae22bcdda1d3d867f63ed1555fc8aef"); + assert!(index.has(&BlobType::Data, &id)); + assert_eq!( + index.get_id(&BlobType::Data, &id), + Some(IndexEntry { + blob_type: BlobType::Data, + pack: parse("217f145b63fbc10267f5a686186689ea3389bed0d6a54b50ffc84d71f99eb7fa"), + offset: 5185, + length: 2095, + uncompressed_length: Some(NonZeroU32::new(6411).unwrap()), + }), + ); + assert!(!index.has(&BlobType::Tree, &id)); + assert!(index.get_id(&BlobType::Tree, &id).is_none()); + + let id = parse("620b2cef43d4c7aab3d7c911a3c0e872d2e0e70f170201002b8af8fb98c59da5"); + assert!(index.has(&BlobType::Data, &id)); + assert_eq!( + index.get_id(&BlobType::Data, &id), + Some(IndexEntry { + blob_type: BlobType::Data, + pack: parse("3b25ec6d16401c31099c259311562160b1b5efbcf70bd69d0463104d3b8148fc"), + offset: 6324, + length: 1413, + uncompressed_length: Some(NonZeroU32::new(3752).unwrap()), + }), + ); + assert!(!index.has(&BlobType::Tree, &id)); + assert!(index.get_id(&BlobType::Tree, &id).is_none()); + } +} diff --git a/src/index/mod.rs b/src/index/mod.rs index b11d4de..c6a9d72 100644 --- a/src/index/mod.rs +++ b/src/index/mod.rs @@ -22,7 +22,7 @@ mod indexer; pub use binarysorted::*; pub use indexer::*; -#[derive(Debug, Clone, Constructor, Getters)] +#[derive(Debug, Clone, PartialEq, Eq, Constructor, Getters)] pub struct IndexEntry { blob_type: BlobType, pack: Id,