From e17cc2dbf41cc7861418bc9f0493e9fec90c3f67 Mon Sep 17 00:00:00 2001 From: Alexander Weiss Date: Fri, 22 Apr 2022 21:31:01 +0200 Subject: [PATCH] make IndexFile structs public --- src/blob/packer.rs | 14 +++---- src/commands/check.rs | 15 +++----- src/commands/list.rs | 6 +-- src/commands/prune.rs | 77 ++++++++++++++++++++------------------- src/commands/repoinfo.rs | 10 ++--- src/index/binarysorted.rs | 20 +++++----- src/index/indexer.rs | 12 +++--- src/repo/indexfile.rs | 42 +++++++-------------- 8 files changed, 89 insertions(+), 107 deletions(-) diff --git a/src/blob/packer.rs b/src/blob/packer.rs index 979cd0f..b9c096f 100644 --- a/src/blob/packer.rs +++ b/src/blob/packer.rs @@ -38,7 +38,7 @@ impl Packer { size: 0, count: 0, created: SystemTime::now(), - index: IndexPack::new(), + index: IndexPack::default(), indexer, hasher: Hasher::new(), }) @@ -108,11 +108,11 @@ impl Packer { // collect header entries let mut writer = Cursor::new(Vec::new()); - for blob in self.index.blobs() { + for blob in &self.index.blobs { PackHeaderEntry { - tpe: *blob.tpe(), - len: *blob.length(), - id: *blob.id(), + tpe: blob.tpe, + len: blob.length, + id: blob.id, } .write_to(&mut writer)?; } @@ -154,12 +154,12 @@ impl Packer { let file = std::mem::replace(&mut self.file, tempfile()?); self.be.write_file(FileType::Pack, &id, file).await?; - let index = std::mem::replace(&mut self.index, IndexPack::new()); + let index = std::mem::take(&mut self.index); self.indexer.borrow_mut().add(index).await?; Ok(()) } fn has(&self, id: &Id) -> bool { - self.index.blobs().iter().any(|b| b.id() == id) + self.index.blobs.iter().any(|b| &b.id == id) } } diff --git a/src/commands/check.rs b/src/commands/check.rs index 1f16a92..10d506d 100644 --- a/src/commands/check.rs +++ b/src/commands/check.rs @@ -41,24 +41,21 @@ async fn check_packs(be: &impl DecryptReadBackend) -> Result<()> { // TODO: only read index files once let mut stream = be.stream_all::(progress_counter()).await?; while let Some(index) = stream.next().await { - for p in index?.1.dissolve().1 { - packs.insert(*p.id(), p.pack_size()); + for p in index?.1.packs { + packs.insert(p.id, p.pack_size()); // check offsests in index let mut expected_offset: u32 = 0; - let (id, mut blobs) = p.dissolve(); + let mut blobs = p.blobs; blobs.sort_unstable(); for blob in blobs { - if blob.offset() != &expected_offset { + if blob.offset != expected_offset { eprintln!( "pack {}: blob {} offset in index: {}, expected: {}", - id, - blob.id(), - blob.offset(), - expected_offset + p.id, blob.id, blob.offset, expected_offset ); } - expected_offset += blob.length(); + expected_offset += blob.length; } } } diff --git a/src/commands/list.rs b/src/commands/list.rs index 22c9b57..d5dde65 100644 --- a/src/commands/list.rs +++ b/src/commands/list.rs @@ -19,9 +19,9 @@ pub(super) async fn execute(be: &impl DecryptReadBackend, opts: Opts) -> Result< "blobs" => { let mut stream = be.stream_all::(ProgressBar::hidden()).await?; while let Some(index) = stream.next().await { - for pack in index?.1.dissolve().1 { - for blob in pack.blobs() { - println!("{:?} {}", blob.tpe(), blob.id().to_hex()); + for pack in index?.1.packs { + for blob in pack.blobs { + println!("{:?} {}", blob.tpe, blob.id.to_hex()); } } } diff --git a/src/commands/prune.rs b/src/commands/prune.rs index 37a6d8c..6fbca05 100644 --- a/src/commands/prune.rs +++ b/src/commands/prune.rs @@ -1,6 +1,6 @@ use std::cell::RefCell; use std::cmp::Ordering; -use std::collections::{hash_map::Entry, HashMap, HashSet}; +use std::collections::{HashMap, HashSet}; use std::rc::Rc; use std::str::FromStr; @@ -57,15 +57,16 @@ pub(super) async fn execute(be: &(impl DecryptFullBackend + Unpin), opts: Opts) v1!("finding duplicate blobs..."); for pack in index_files .iter() - .flat_map(|(_, index)| index.packs()) - .unique_by(|p| p.id()) + .flat_map(|(_, index)| &index.packs) + .unique_by(|p| p.id) { - for blob in pack.blobs() { - let id = *blob.id(); - // note that duplicates are only counted up to 255. If there are more - // duplicates, the number is set to 255. This may imply that later on - // not the "best" pack is chosen to have that blob marked as used. - used_ids.entry(id).and_modify(|e| *e = e.saturating_add(1)); + for blob in &pack.blobs { + if let Some(count) = used_ids.get_mut(&blob.id) { + // note that duplicates are only counted up to 255. If there are more + // duplicates, the number is set to 255. This may imply that later on + // not the "best" pack is chosen to have that blob marked as used. + *count = count.saturating_add(1); + } } } @@ -79,7 +80,7 @@ pub(super) async fn execute(be: &(impl DecryptFullBackend + Unpin), opts: Opts) let mut pruner = Pruner::new(used_ids, existing_packs); pruner.check()?; - pruner.decide_packs(index_files.iter().flat_map(|(_, index)| index.packs()))?; + pruner.decide_packs(index_files.iter().flat_map(|(_, index)| &index.packs))?; pruner.decide_repack(&opts.max_repack, &opts.max_unused); pruner.filter_index_files(index_files); pruner.print_stats(); @@ -188,7 +189,7 @@ impl Pruner { // search used and unused blobs within packs for pack in pack_iter { - if !processed_packs.insert(pack.id()) { + if !processed_packs.insert(pack.id) { // ignore duplicate packs continue; } @@ -198,14 +199,14 @@ impl Pruner { // check if the pack has used blobs which are no duplicates let has_used = pack - .blobs() + .blobs .iter() - .any(|blob| self.used_ids.get(blob.id()) == Some(&1)); + .any(|blob| self.used_ids.get(&blob.id) == Some(&1)); - for blob in pack.blobs() { - match self.used_ids.entry(*blob.id()) { - Entry::Vacant(_) => pi.add_unused_blob(blob), - Entry::Occupied(mut count) => pi.add_blob(blob, has_used, count.get_mut()), + for blob in &pack.blobs { + match self.used_ids.get_mut(&blob.id) { + None => pi.add_unused_blob(blob), + Some(count) => pi.add_blob(blob, has_used, count), } } @@ -217,28 +218,28 @@ impl Pruner { if pi.used_blobs == 0 { // unused pack self.stats.packs.unused += 1; - self.packs_remove.insert(*pack.id()); + self.packs_remove.insert(pack.id); self.stats.blobs.remove += pi.unused_blobs as u64; self.stats.size.remove += pi.unused_size as u64; - self.existing_packs.remove(pack.id()); + self.existing_packs.remove(&pack.id); } else { - if self.existing_packs.remove(pack.id()).is_none() { - bail!("used pack {} does not exist!", pack.id()); + if self.existing_packs.remove(&pack.id).is_none() { + bail!("used pack {} does not exist!", pack.id); } if pi.unused_blobs == 0 { // used pack self.stats.packs.used += 1; self.stats.packs.keep += 1; - for blob in pack.blobs() { - self.used_ids.remove(blob.id()); + for blob in &pack.blobs { + self.used_ids.remove(&blob.id); } } else { // partly used pack => candidate for repacking self.stats.packs.partly_used += 1; self.repack_candidates - .push(RepackCandidate { id: *pack.id(), pi }) + .push(RepackCandidate { id: pack.id, pi }) } } } @@ -290,12 +291,12 @@ impl Pruner { // filter out only the index files which need processing self.index_files .extend(index_files.into_iter().filter(|(_, index)| { - let must_modify = index.packs().iter().any(|p| { + let must_modify = index.packs.iter().any(|p| { // index must be processed if this is a duplicate pack // or the packs needs to be removed or repacked. - !processed_packs.insert(*p.id()) - || self.packs_repack.contains(p.id()) - || self.packs_remove.contains(p.id()) + !processed_packs.insert(p.id) + || self.packs_repack.contains(&p.id) + || self.packs_remove.contains(&p.id) }); any_must_modify |= must_modify; @@ -405,26 +406,26 @@ impl Pruner { } for (index_id, index) in self.index_files { - for pack in index.dissolve().1 { - if !processed_packs.insert(*pack.id()) { + for pack in index.packs { + if !processed_packs.insert(pack.id) { // ignore duplicate packs continue; } - if self.packs_repack.contains(pack.id()) { + if self.packs_repack.contains(&pack.id) { // TODO: repack in parallel - for blob in pack.blobs() { - if self.used_ids.remove(blob.id()).is_none() { + for blob in pack.blobs { + if self.used_ids.remove(&blob.id).is_none() { // don't save duplicate blobs continue; } let data = be - .read_partial(FileType::Pack, pack.id(), *blob.offset(), *blob.length()) + .read_partial(FileType::Pack, &pack.id, blob.offset, blob.length) .await?; - packer.add_raw(&data, blob.id(), *blob.tpe()).await?; + packer.add_raw(&data, &blob.id, blob.tpe).await?; } - } else if !self.packs_remove.contains(pack.id()) { + } else if !self.packs_remove.contains(&pack.id) { // keep pack: add to new index indexer.borrow_mut().add(pack).await?; } @@ -501,13 +502,13 @@ impl Ord for PackInfo { impl PackInfo { fn add_unused_blob(&mut self, blob: &IndexBlob) { // used duplicate exists, mark as unused - self.unused_size += blob.length(); + self.unused_size += blob.length; self.unused_blobs += 1; } fn add_used_blob(&mut self, blob: &IndexBlob) { // used duplicate exists, mark as unused - self.used_size += blob.length(); + self.used_size += blob.length; self.used_blobs += 1; } diff --git a/src/commands/repoinfo.rs b/src/commands/repoinfo.rs index 7504200..e143121 100644 --- a/src/commands/repoinfo.rs +++ b/src/commands/repoinfo.rs @@ -47,16 +47,16 @@ pub(super) async fn execute(be: &impl DecryptReadBackend, _opts: Opts) -> Result let mut data_count = 0; let mut data_size = 0; while let Some(index) = stream.next().await { - for pack in index?.1.dissolve().1 { - for blob in pack.blobs() { - match blob.tpe() { + for pack in index?.1.packs { + for blob in pack.blobs { + match blob.tpe { BlobType::Tree => { tree_count += 1; - tree_size += *blob.length() as u64; + tree_size += blob.length as u64; } BlobType::Data => { data_count += 1; - data_size += *blob.length() as u64; + data_size += blob.length as u64; } } } diff --git a/src/index/binarysorted.rs b/src/index/binarysorted.rs index ea0a68a..4685ae2 100644 --- a/src/index/binarysorted.rs +++ b/src/index/binarysorted.rs @@ -41,11 +41,11 @@ where let mut data_id = Vec::new(); while let Some(index) = stream.next().await { - for i in index.dissolve().1 { + for p in index.packs { let idx = packs.len(); - packs.push(*i.id()); - let len = i.blobs().len(); - if i.blobs()[0].tpe() == &BlobType::Data { + packs.push(p.id); + let len = p.blobs.len(); + if p.blob_type() == BlobType::Data { if full_data { data.reserve(len); } else { @@ -55,14 +55,14 @@ where tree.reserve(len); } - for blob in i.blobs() { + for blob in p.blobs { let be = BinarySortedEntry { - id: *blob.id(), + id: blob.id, pack_idx: idx, - offset: *blob.offset(), - length: *blob.length(), + offset: blob.offset, + length: blob.length, }; - match blob.tpe() { + match blob.tpe { BlobType::Tree => { tree.push(be); } @@ -70,7 +70,7 @@ where if full_data { data.push(be); } else { - data_id.push(*blob.id()); + data_id.push(blob.id); } } } diff --git a/src/index/indexer.rs b/src/index/indexer.rs index 53a5096..c6d1790 100644 --- a/src/index/indexer.rs +++ b/src/index/indexer.rs @@ -26,7 +26,7 @@ impl Indexer { pub fn new(be: BE) -> Self { Self { be, - file: IndexFile::new(), + file: IndexFile::default(), count: 0, created: SystemTime::now(), indexed: Some(HashSet::new()), @@ -36,7 +36,7 @@ impl Indexer { pub fn new_unindexed(be: BE) -> Self { Self { be, - file: IndexFile::new(), + file: IndexFile::default(), count: 0, created: SystemTime::now(), indexed: None, @@ -44,7 +44,7 @@ impl Indexer { } pub fn reset(&mut self) { - self.file = IndexFile::new(); + self.file = IndexFile::default(); self.count = 0; self.created = SystemTime::now(); } @@ -61,11 +61,11 @@ impl Indexer { } pub async fn add(&mut self, pack: IndexPack) -> Result<()> { - self.count += pack.blobs().len(); + self.count += pack.blobs.len(); if let Some(indexed) = &mut self.indexed { - for blob in pack.blobs() { - indexed.insert(*blob.id()); + for blob in &pack.blobs { + indexed.insert(blob.id); } } diff --git a/src/repo/indexfile.rs b/src/repo/indexfile.rs index f4f912e..e6281be 100644 --- a/src/repo/indexfile.rs +++ b/src/repo/indexfile.rs @@ -1,19 +1,18 @@ use std::cmp::Ordering; -use derive_getters::{Dissolve, Getters}; use serde::{Deserialize, Serialize}; use crate::backend::{FileType, RepoFile}; use crate::blob::BlobType; use crate::id::Id; -#[derive(Debug, Default, Serialize, Deserialize, Getters, Dissolve)] +#[derive(Debug, Default, Serialize, Deserialize)] pub struct IndexFile { #[serde(skip_serializing_if = "Option::is_none")] - supersedes: Option>, - packs: Vec, + pub(crate) supersedes: Option>, + pub(crate) packs: Vec, #[serde(default, skip_serializing_if = "Vec::is_empty")] - packs_to_delete: Vec, + pub(crate) packs_to_delete: Vec, } impl RepoFile for IndexFile { @@ -21,14 +20,6 @@ impl RepoFile for IndexFile { } impl IndexFile { - pub fn new() -> Self { - Self { - supersedes: None, - packs: Vec::new(), - packs_to_delete: Vec::new(), - } - } - pub fn add(&mut self, p: IndexPack) { self.packs.push(p); } @@ -38,20 +29,13 @@ impl IndexFile { } } -#[derive(Debug, Serialize, Deserialize, Getters, Dissolve)] +#[derive(Default, Debug, Serialize, Deserialize)] pub struct IndexPack { - id: Id, - blobs: Vec, + pub(crate) id: Id, + pub(crate) blobs: Vec, } impl IndexPack { - pub fn new() -> Self { - Self { - id: Id::default(), - blobs: Vec::new(), - } - } - pub fn set_id(&mut self, id: Id) { self.id = id; } @@ -69,7 +53,7 @@ impl IndexPack { pub fn pack_size(&self) -> u32 { let mut size = 4 + 32; // 4 + crypto overhead for blob in &self.blobs { - size += blob.length() + 37 // 37 = length of blob description + size += blob.length + 37 // 37 = length of blob description } size } @@ -81,13 +65,13 @@ impl IndexPack { } } -#[derive(Debug, Clone, Serialize, Deserialize, Getters, Dissolve, Eq, PartialEq)] +#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)] pub struct IndexBlob { - id: Id, + pub(crate) id: Id, #[serde(rename = "type")] - tpe: BlobType, - offset: u32, - length: u32, + pub(crate) tpe: BlobType, + pub(crate) offset: u32, + pub(crate) length: u32, } impl PartialOrd for IndexBlob {