Merge pull request #122 from rustic-rs/cache-remove-packs

Clear packs in cache within prune and add option `--cache-only`
This commit is contained in:
aawsome 2022-08-15 20:52:40 +02:00 committed by GitHub
commit 7c189697c1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 285 additions and 87 deletions

View File

@ -183,10 +183,6 @@ impl Cache {
Ok(walker.collect())
}
// TODO: this function is yet only called from list_with_size. This cleans up
// index and snapshot files.
// It should also be called when reading the index to clean up pack files.
pub async fn remove_not_in_list(&self, tpe: FileType, list: &Vec<(Id, u32)>) -> Result<()> {
let mut list_cache = self.list_with_size(tpe).await?;
// remove present files from the cache list

View File

@ -5,7 +5,7 @@ use derivative::Derivative;
use prettytable::{cell, format, row, Table};
use super::{progress_counter, prune};
use crate::backend::{DecryptFullBackend, FileType};
use crate::backend::{Cache, DecryptFullBackend, FileType};
use crate::repo::{
ConfigFile, SnapshotFile, SnapshotFilter, SnapshotGroup, SnapshotGroupCriterion, StringList,
};
@ -44,6 +44,7 @@ pub(super) struct Opts {
pub(super) async fn execute(
be: &(impl DecryptFullBackend + Unpin),
cache: Option<Cache>,
mut opts: Opts,
config: ConfigFile,
) -> Result<()> {
@ -127,7 +128,7 @@ pub(super) async fn execute(
}
if opts.prune {
prune::execute(be, opts.prune_opts, config, forget_snaps).await?;
prune::execute(be, cache, opts.prune_opts, config, forget_snaps).await?;
}
Ok(())

View File

@ -205,14 +205,14 @@ pub async fn execute() -> Result<()> {
Command::Cat(opts) => cat::execute(&dbe, opts).await?,
Command::Check(opts) => check::execute(&dbe, &cache, &be_hot, &be, opts).await?,
Command::Diff(opts) => diff::execute(&dbe, opts).await?,
Command::Forget(opts) => forget::execute(&dbe, opts, config).await?,
Command::Forget(opts) => forget::execute(&dbe, cache, opts, config).await?,
Command::Init(_) => {} // already handled above
Command::Key(opts) => key::execute(&dbe, key, opts).await?,
Command::List(opts) => list::execute(&dbe, opts).await?,
Command::Ls(opts) => ls::execute(&dbe, opts).await?,
Command::SelfUpdate(_) => {} // already handled above
Command::Snapshots(opts) => snapshots::execute(&dbe, opts).await?,
Command::Prune(opts) => prune::execute(&dbe, opts, config, vec![]).await?,
Command::Prune(opts) => prune::execute(&dbe, cache, opts, config, vec![]).await?,
Command::Restore(opts) => restore::execute(&dbe, opts).await?,
Command::Repoinfo(opts) => repoinfo::execute(&dbe, &be_hot, opts).await?,
Command::Tag(opts) => tag::execute(&dbe, opts).await?,

View File

@ -11,7 +11,7 @@ use futures::{future, TryStreamExt};
use vlog::*;
use super::{bytes, no_progress, progress_bytes, progress_counter, wait, warm_up, warm_up_command};
use crate::backend::{DecryptFullBackend, DecryptReadBackend, FileType};
use crate::backend::{Cache, DecryptFullBackend, DecryptReadBackend, FileType};
use crate::blob::{BlobType, BlobTypeMap, NodeType, Repacker, TreeStreamerOnce};
use crate::id::Id;
use crate::index::{IndexBackend, IndexCollector, IndexType, IndexedBackend, Indexer};
@ -44,6 +44,10 @@ pub(super) struct Opts {
#[clap(long, value_name = "DURATION", default_value = "0d")]
keep_pack: humantime::Duration,
/// only remove unneded pack file from local cache
#[clap(long)]
cache_only: bool,
/// simply copy blobs when repacking instead of decrypting; possibly compressing; encrypting
#[clap(long)]
fast_repack: bool,
@ -72,6 +76,7 @@ pub(super) struct Opts {
pub(super) async fn execute(
be: &(impl DecryptFullBackend + Unpin),
cache: Option<Cache>,
opts: Opts,
config: ConfigFile,
ignore_snaps: Vec<Id>,
@ -97,6 +102,21 @@ pub(super) async fn execute(
}
p.finish();
if let Some(cache) = &cache {
v1!("cleaning up packs from cache...");
cache
.remove_not_in_list(FileType::Pack, index_collector.tree_packs())
.await?;
}
match (cache.is_some(), opts.cache_only) {
(true, true) => return Ok(()),
(false, true) => {
ve1!("Warning: option --cache-only used without a cache.");
return Ok(());
}
_ => {}
}
let used_ids = {
let indexed_be = IndexBackend::new_from_index(&be.clone(), index_collector.into_index());
find_used_blobs(&indexed_be, ignore_snaps).await?

View File

@ -1,6 +1,7 @@
use std::num::NonZeroU32;
use super::{BlobType, IndexEntry, ReadIndex};
use crate::blob::BlobTypeMap;
use crate::id::Id;
use crate::repo::IndexPack;
@ -19,50 +20,69 @@ pub(crate) enum IndexType {
OnlyTrees,
}
enum SortedHashSetMap {
enum EntriesVariants {
None,
Set(Vec<Id>),
Map(Vec<SortedEntry>),
Ids(Vec<Id>),
FullEntries(Vec<SortedEntry>),
}
pub(crate) struct IndexCollector {
packs: Vec<Id>,
tree: Vec<SortedEntry>,
data: SortedHashSetMap,
total_tree_size: u64,
total_data_size: u64,
impl Default for EntriesVariants {
fn default() -> Self {
Self::None
}
}
#[derive(Default)]
pub(crate) struct TypeIndexCollector {
packs: Vec<(Id, u32)>,
entries: EntriesVariants,
total_size: u64,
}
#[derive(Default)]
pub(crate) struct IndexCollector(BlobTypeMap<TypeIndexCollector>);
pub(crate) struct TypeIndex {
packs: Vec<Id>,
entries: EntriesVariants,
total_size: u64,
}
pub struct Index(BlobTypeMap<TypeIndex>);
impl IndexCollector {
pub fn new(tpe: IndexType) -> Self {
let data = match tpe {
IndexType::OnlyTrees => SortedHashSetMap::None,
IndexType::FullTrees => SortedHashSetMap::Set(Vec::new()),
IndexType::Full => SortedHashSetMap::Map(Vec::new()),
let mut collector = Self::default();
collector.0[BlobType::Tree].entries = EntriesVariants::FullEntries(Vec::new());
collector.0[BlobType::Data].entries = match tpe {
IndexType::OnlyTrees => EntriesVariants::None,
IndexType::FullTrees => EntriesVariants::Ids(Vec::new()),
IndexType::Full => EntriesVariants::FullEntries(Vec::new()),
};
Self {
packs: Vec::new(),
tree: Vec::new(),
data,
total_tree_size: 0,
total_data_size: 0,
}
collector
}
pub fn into_index(mut self) -> Index {
self.tree.sort_unstable_by_key(|e| e.id);
match &mut self.data {
SortedHashSetMap::None => {}
SortedHashSetMap::Set(ids) => ids.sort_unstable(),
SortedHashSetMap::Map(data) => data.sort_unstable_by_key(|e| e.id),
};
Index {
packs: self.packs,
tree: self.tree,
data: self.data,
total_tree_size: self.total_tree_size,
total_data_size: self.total_data_size,
}
pub fn tree_packs(&self) -> &Vec<(Id, u32)> {
&self.0[BlobType::Tree].packs
}
// Turns Collector into an index by sorting the entries.
pub fn into_index(self) -> Index {
Index(self.0.map(|_, mut tc| {
match &mut tc.entries {
EntriesVariants::None => {}
EntriesVariants::Ids(ids) => ids.sort_unstable(),
EntriesVariants::FullEntries(entries) => entries.sort_unstable_by_key(|e| e.id),
};
let packs = tc.packs.into_iter().map(|(id, _)| id).collect();
TypeIndex {
packs,
entries: tc.entries,
total_size: tc.total_size,
}
}))
}
}
@ -72,21 +92,19 @@ impl Extend<IndexPack> for IndexCollector {
T: IntoIterator<Item = IndexPack>,
{
for p in iter {
let idx = self.packs.len();
self.packs.push(p.id);
let len = p.blobs.len();
let blob_type = p.blob_type();
let size = p.pack_size();
match blob_type {
BlobType::Tree => self.total_tree_size += p.pack_size() as u64,
BlobType::Data => self.total_data_size += p.pack_size() as u64,
}
let idx = self.0[blob_type].packs.len();
self.0[blob_type].packs.push((p.id, size));
match (p.blob_type(), &mut self.data) {
(BlobType::Tree, _) => self.tree.reserve(len),
(BlobType::Data, SortedHashSetMap::None) => {}
(BlobType::Data, SortedHashSetMap::Set(ids)) => ids.reserve(len),
(BlobType::Data, SortedHashSetMap::Map(data)) => data.reserve(len),
self.0[blob_type].total_size += size as u64;
match &mut self.0[blob_type].entries {
EntriesVariants::None => {}
EntriesVariants::Ids(ids) => ids.reserve(len),
EntriesVariants::FullEntries(entries) => entries.reserve(len),
};
for blob in &p.blobs {
@ -97,39 +115,31 @@ impl Extend<IndexPack> for IndexCollector {
length: blob.length,
uncompressed_length: blob.uncompressed_length,
};
match (blob.tpe, &mut self.data) {
(BlobType::Tree, _) => self.tree.push(be),
(BlobType::Data, SortedHashSetMap::None) => {}
(BlobType::Data, SortedHashSetMap::Set(ids)) => ids.push(blob.id),
(BlobType::Data, SortedHashSetMap::Map(data)) => data.push(be),
match &mut self.0[blob_type].entries {
EntriesVariants::None => {}
EntriesVariants::Ids(ids) => ids.push(blob.id),
EntriesVariants::FullEntries(entries) => entries.push(be),
};
}
}
}
}
pub struct Index {
packs: Vec<Id>,
tree: Vec<SortedEntry>,
data: SortedHashSetMap,
total_tree_size: u64,
total_data_size: u64,
}
impl ReadIndex for Index {
fn get_id(&self, tpe: &BlobType, id: &Id) -> Option<IndexEntry> {
let vec = match (tpe, &self.data) {
(BlobType::Tree, _) => &self.tree,
(BlobType::Data, SortedHashSetMap::Map(data)) => data,
(BlobType::Data, _) => {
fn get_id(&self, blob_type: &BlobType, id: &Id) -> Option<IndexEntry> {
let vec = match &self.0[*blob_type].entries {
EntriesVariants::FullEntries(entries) => entries,
_ => {
// get_id() only gives results if index contains full entries
return None;
}
};
vec.binary_search_by_key(id, |e| e.id).ok().map(|index| {
let be = &vec[index];
IndexEntry::new(
*tpe,
self.packs[be.pack_idx],
*blob_type,
self.0[*blob_type].packs[be.pack_idx],
be.offset,
be.length,
be.uncompressed_length,
@ -137,21 +147,192 @@ impl ReadIndex for Index {
})
}
fn total_size(&self, tpe: &BlobType) -> u64 {
match tpe {
BlobType::Tree => self.total_tree_size,
BlobType::Data => self.total_data_size,
}
fn total_size(&self, blob_type: &BlobType) -> u64 {
self.0[*blob_type].total_size
}
fn has(&self, tpe: &BlobType, id: &Id) -> bool {
match (tpe, &self.data) {
(BlobType::Tree, _) => self.tree.binary_search_by_key(id, |e| e.id).is_ok(),
(BlobType::Data, SortedHashSetMap::Map(data)) => {
data.binary_search_by_key(id, |e| e.id).is_ok()
fn has(&self, blob_type: &BlobType, id: &Id) -> bool {
match &self.0[*blob_type].entries {
EntriesVariants::FullEntries(entries) => {
entries.binary_search_by_key(id, |e| e.id).is_ok()
}
(BlobType::Data, SortedHashSetMap::Set(data)) => data.binary_search(id).is_ok(),
(BlobType::Data, SortedHashSetMap::None) => false,
EntriesVariants::Ids(ids) => ids.binary_search(id).is_ok(),
// has() only gives results if index contains full entries or ids
EntriesVariants::None => false,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::repo::IndexFile;
const JSON_INDEX: &str = r#"
{"packs":[{"id":"217f145b63fbc10267f5a686186689ea3389bed0d6a54b50ffc84d71f99eb7fa",
"blobs":[{"id":"a3e048f1073299310981d8f5447861df0eca26a706645b5e2fa355c31c2205ed",
"type":"data",
"offset":0,
"length":2869,
"uncompressed_length":9987},
{"id":"458c0b9b656a6593b7ba85ecdbfe85d6cb32af70c2e9c5fd1871cf3dccc39044",
"type":"data",
"offset":2869,
"length":2316,
"uncompressed_length":7370},
{"id":"fac5e908151e565267570108127b96e6bae22bcdda1d3d867f63ed1555fc8aef",
"type":"data",
"offset":5185,
"length":2095,
"uncompressed_length":6411}
]},
{"id":"3b25ec6d16401c31099c259311562160b1b5efbcf70bd69d0463104d3b8148fc",
"blobs":[{"id":"620b2cef43d4c7aab3d7c911a3c0e872d2e0e70f170201002b8af8fb98c59da5",
"type":"data",
"offset":6324,
"length":1413,
"uncompressed_length":3752},
{"id":"ee67585c7c53324e74537ab7aa44f889c0767c1b67e7e336fae6204aef2d4c73",
"type":"data",
"offset":7737,
"length":7686,
"uncompressed_length":29928},
{"id":"8aaa5f7f6c7b4a5ea5c70a744bf40002c54542e5a573c13d41ac9c8b17f426c1",
"type":"data",
"offset":15423,
"length":1419,
"uncompressed_length":3905},
{"id":"f2ca1bb6c7e907d06dafe4687e579fce76b37e4e93b7605022da52e6ccc26fd2",
"type":"data",
"offset":16842,
"length":46,
"uncompressed_length":5}
]},
{"id":"8431a27d38dd7d192dc37abd43a85d6dc4298de72fc8f583c5d7cdd09fa47274",
"blobs":[{"id":"3b25ec6d16401c31099c259311562160b1b5efbcf70bd69d0463104d3b8148fc",
"type":"tree",
"offset":0,
"length":794,
"uncompressed_length":3030},
{"id":"2ef8decbd2a17d9bfb1b35cfbdcd368175ea86d05dd93a4751fdacbe5213e611",
"type":"tree",
"offset":794,
"length":592,
"uncompressed_length":1912}
]}
]}"#;
fn index(it: IndexType) -> Index {
let index: IndexFile = serde_json::from_str(JSON_INDEX).unwrap();
let mut collector = IndexCollector::new(it);
collector.extend(index.packs);
collector.into_index()
}
fn parse(s: &str) -> Id {
Id::from_hex(s).unwrap()
}
#[test]
fn all_index_types() {
for it in [IndexType::OnlyTrees, IndexType::FullTrees, IndexType::Full] {
let index = index(it);
let id = parse("0000000000000000000000000000000000000000000000000000000000000000");
assert!(!index.has(&BlobType::Data, &id));
assert!(index.get_id(&BlobType::Data, &id).is_none());
assert!(!index.has(&BlobType::Tree, &id));
assert!(index.get_id(&BlobType::Tree, &id).is_none());
let id = parse("aac5e908151e5652b7570108127b96e6bae22bcdda1d3d867f63ed1555fc8aef");
assert!(!index.has(&BlobType::Data, &id,));
assert!(index.get_id(&BlobType::Data, &id).is_none());
assert!(!index.has(&BlobType::Tree, &id));
assert!(index.get_id(&BlobType::Tree, &id).is_none());
let id = parse("2ef8decbd2a17d9bfb1b35cfbdcd368175ea86d05dd93a4751fdacbe5213e611");
assert!(!index.has(&BlobType::Data, &id));
assert!(index.get_id(&BlobType::Data, &id).is_none());
assert!(index.has(&BlobType::Tree, &id));
assert_eq!(
index.get_id(&BlobType::Tree, &id),
Some(IndexEntry {
blob_type: BlobType::Tree,
pack: parse("8431a27d38dd7d192dc37abd43a85d6dc4298de72fc8f583c5d7cdd09fa47274"),
offset: 794,
length: 592,
uncompressed_length: Some(NonZeroU32::new(1912).unwrap()),
}),
);
}
}
#[test]
fn only_trees() {
let index = index(IndexType::OnlyTrees);
let id = parse("fac5e908151e565267570108127b96e6bae22bcdda1d3d867f63ed1555fc8aef");
assert!(!index.has(&BlobType::Data, &id));
assert!(index.get_id(&BlobType::Data, &id).is_none());
assert!(!index.has(&BlobType::Tree, &id));
assert!(index.get_id(&BlobType::Tree, &id).is_none());
let id = parse("620b2cef43d4c7aab3d7c911a3c0e872d2e0e70f170201002b8af8fb98c59da5");
assert!(!index.has(&BlobType::Data, &id));
assert!(index.get_id(&BlobType::Data, &id).is_none());
assert!(!index.has(&BlobType::Tree, &id));
assert!(index.get_id(&BlobType::Tree, &id).is_none());
}
#[test]
fn full_trees() {
let index = index(IndexType::FullTrees);
let id = parse("fac5e908151e565267570108127b96e6bae22bcdda1d3d867f63ed1555fc8aef");
assert!(index.has(&BlobType::Data, &id));
assert!(index.get_id(&BlobType::Data, &id).is_none());
assert!(!index.has(&BlobType::Tree, &id));
assert!(index.get_id(&BlobType::Tree, &id).is_none());
let id = parse("620b2cef43d4c7aab3d7c911a3c0e872d2e0e70f170201002b8af8fb98c59da5");
assert!(index.has(&BlobType::Data, &id));
assert!(index.get_id(&BlobType::Data, &id).is_none());
assert!(!index.has(&BlobType::Tree, &id));
assert!(index.get_id(&BlobType::Tree, &id).is_none());
}
#[test]
fn full() {
let index = index(IndexType::Full);
let id = parse("fac5e908151e565267570108127b96e6bae22bcdda1d3d867f63ed1555fc8aef");
assert!(index.has(&BlobType::Data, &id));
assert_eq!(
index.get_id(&BlobType::Data, &id),
Some(IndexEntry {
blob_type: BlobType::Data,
pack: parse("217f145b63fbc10267f5a686186689ea3389bed0d6a54b50ffc84d71f99eb7fa"),
offset: 5185,
length: 2095,
uncompressed_length: Some(NonZeroU32::new(6411).unwrap()),
}),
);
assert!(!index.has(&BlobType::Tree, &id));
assert!(index.get_id(&BlobType::Tree, &id).is_none());
let id = parse("620b2cef43d4c7aab3d7c911a3c0e872d2e0e70f170201002b8af8fb98c59da5");
assert!(index.has(&BlobType::Data, &id));
assert_eq!(
index.get_id(&BlobType::Data, &id),
Some(IndexEntry {
blob_type: BlobType::Data,
pack: parse("3b25ec6d16401c31099c259311562160b1b5efbcf70bd69d0463104d3b8148fc"),
offset: 6324,
length: 1413,
uncompressed_length: Some(NonZeroU32::new(3752).unwrap()),
}),
);
assert!(!index.has(&BlobType::Tree, &id));
assert!(index.get_id(&BlobType::Tree, &id).is_none());
}
}

View File

@ -22,7 +22,7 @@ mod indexer;
pub use binarysorted::*;
pub use indexer::*;
#[derive(Debug, Clone, Constructor, Getters)]
#[derive(Debug, Clone, PartialEq, Eq, Constructor, Getters)]
pub struct IndexEntry {
blob_type: BlobType,
pack: Id,