make IndexFile structs public

This commit is contained in:
Alexander Weiss 2022-04-22 21:31:01 +02:00
parent ac2862050d
commit e17cc2dbf4
8 changed files with 89 additions and 107 deletions

View File

@ -38,7 +38,7 @@ impl<BE: DecryptWriteBackend> Packer<BE> {
size: 0,
count: 0,
created: SystemTime::now(),
index: IndexPack::new(),
index: IndexPack::default(),
indexer,
hasher: Hasher::new(),
})
@ -108,11 +108,11 @@ impl<BE: DecryptWriteBackend> Packer<BE> {
// collect header entries
let mut writer = Cursor::new(Vec::new());
for blob in self.index.blobs() {
for blob in &self.index.blobs {
PackHeaderEntry {
tpe: *blob.tpe(),
len: *blob.length(),
id: *blob.id(),
tpe: blob.tpe,
len: blob.length,
id: blob.id,
}
.write_to(&mut writer)?;
}
@ -154,12 +154,12 @@ impl<BE: DecryptWriteBackend> Packer<BE> {
let file = std::mem::replace(&mut self.file, tempfile()?);
self.be.write_file(FileType::Pack, &id, file).await?;
let index = std::mem::replace(&mut self.index, IndexPack::new());
let index = std::mem::take(&mut self.index);
self.indexer.borrow_mut().add(index).await?;
Ok(())
}
fn has(&self, id: &Id) -> bool {
self.index.blobs().iter().any(|b| b.id() == id)
self.index.blobs.iter().any(|b| &b.id == id)
}
}

View File

@ -41,24 +41,21 @@ async fn check_packs(be: &impl DecryptReadBackend) -> Result<()> {
// TODO: only read index files once
let mut stream = be.stream_all::<IndexFile>(progress_counter()).await?;
while let Some(index) = stream.next().await {
for p in index?.1.dissolve().1 {
packs.insert(*p.id(), p.pack_size());
for p in index?.1.packs {
packs.insert(p.id, p.pack_size());
// check offsests in index
let mut expected_offset: u32 = 0;
let (id, mut blobs) = p.dissolve();
let mut blobs = p.blobs;
blobs.sort_unstable();
for blob in blobs {
if blob.offset() != &expected_offset {
if blob.offset != expected_offset {
eprintln!(
"pack {}: blob {} offset in index: {}, expected: {}",
id,
blob.id(),
blob.offset(),
expected_offset
p.id, blob.id, blob.offset, expected_offset
);
}
expected_offset += blob.length();
expected_offset += blob.length;
}
}
}

View File

@ -19,9 +19,9 @@ pub(super) async fn execute(be: &impl DecryptReadBackend, opts: Opts) -> Result<
"blobs" => {
let mut stream = be.stream_all::<IndexFile>(ProgressBar::hidden()).await?;
while let Some(index) = stream.next().await {
for pack in index?.1.dissolve().1 {
for blob in pack.blobs() {
println!("{:?} {}", blob.tpe(), blob.id().to_hex());
for pack in index?.1.packs {
for blob in pack.blobs {
println!("{:?} {}", blob.tpe, blob.id.to_hex());
}
}
}

View File

@ -1,6 +1,6 @@
use std::cell::RefCell;
use std::cmp::Ordering;
use std::collections::{hash_map::Entry, HashMap, HashSet};
use std::collections::{HashMap, HashSet};
use std::rc::Rc;
use std::str::FromStr;
@ -57,15 +57,16 @@ pub(super) async fn execute(be: &(impl DecryptFullBackend + Unpin), opts: Opts)
v1!("finding duplicate blobs...");
for pack in index_files
.iter()
.flat_map(|(_, index)| index.packs())
.unique_by(|p| p.id())
.flat_map(|(_, index)| &index.packs)
.unique_by(|p| p.id)
{
for blob in pack.blobs() {
let id = *blob.id();
// note that duplicates are only counted up to 255. If there are more
// duplicates, the number is set to 255. This may imply that later on
// not the "best" pack is chosen to have that blob marked as used.
used_ids.entry(id).and_modify(|e| *e = e.saturating_add(1));
for blob in &pack.blobs {
if let Some(count) = used_ids.get_mut(&blob.id) {
// note that duplicates are only counted up to 255. If there are more
// duplicates, the number is set to 255. This may imply that later on
// not the "best" pack is chosen to have that blob marked as used.
*count = count.saturating_add(1);
}
}
}
@ -79,7 +80,7 @@ pub(super) async fn execute(be: &(impl DecryptFullBackend + Unpin), opts: Opts)
let mut pruner = Pruner::new(used_ids, existing_packs);
pruner.check()?;
pruner.decide_packs(index_files.iter().flat_map(|(_, index)| index.packs()))?;
pruner.decide_packs(index_files.iter().flat_map(|(_, index)| &index.packs))?;
pruner.decide_repack(&opts.max_repack, &opts.max_unused);
pruner.filter_index_files(index_files);
pruner.print_stats();
@ -188,7 +189,7 @@ impl Pruner {
// search used and unused blobs within packs
for pack in pack_iter {
if !processed_packs.insert(pack.id()) {
if !processed_packs.insert(pack.id) {
// ignore duplicate packs
continue;
}
@ -198,14 +199,14 @@ impl Pruner {
// check if the pack has used blobs which are no duplicates
let has_used = pack
.blobs()
.blobs
.iter()
.any(|blob| self.used_ids.get(blob.id()) == Some(&1));
.any(|blob| self.used_ids.get(&blob.id) == Some(&1));
for blob in pack.blobs() {
match self.used_ids.entry(*blob.id()) {
Entry::Vacant(_) => pi.add_unused_blob(blob),
Entry::Occupied(mut count) => pi.add_blob(blob, has_used, count.get_mut()),
for blob in &pack.blobs {
match self.used_ids.get_mut(&blob.id) {
None => pi.add_unused_blob(blob),
Some(count) => pi.add_blob(blob, has_used, count),
}
}
@ -217,28 +218,28 @@ impl Pruner {
if pi.used_blobs == 0 {
// unused pack
self.stats.packs.unused += 1;
self.packs_remove.insert(*pack.id());
self.packs_remove.insert(pack.id);
self.stats.blobs.remove += pi.unused_blobs as u64;
self.stats.size.remove += pi.unused_size as u64;
self.existing_packs.remove(pack.id());
self.existing_packs.remove(&pack.id);
} else {
if self.existing_packs.remove(pack.id()).is_none() {
bail!("used pack {} does not exist!", pack.id());
if self.existing_packs.remove(&pack.id).is_none() {
bail!("used pack {} does not exist!", pack.id);
}
if pi.unused_blobs == 0 {
// used pack
self.stats.packs.used += 1;
self.stats.packs.keep += 1;
for blob in pack.blobs() {
self.used_ids.remove(blob.id());
for blob in &pack.blobs {
self.used_ids.remove(&blob.id);
}
} else {
// partly used pack => candidate for repacking
self.stats.packs.partly_used += 1;
self.repack_candidates
.push(RepackCandidate { id: *pack.id(), pi })
.push(RepackCandidate { id: pack.id, pi })
}
}
}
@ -290,12 +291,12 @@ impl Pruner {
// filter out only the index files which need processing
self.index_files
.extend(index_files.into_iter().filter(|(_, index)| {
let must_modify = index.packs().iter().any(|p| {
let must_modify = index.packs.iter().any(|p| {
// index must be processed if this is a duplicate pack
// or the packs needs to be removed or repacked.
!processed_packs.insert(*p.id())
|| self.packs_repack.contains(p.id())
|| self.packs_remove.contains(p.id())
!processed_packs.insert(p.id)
|| self.packs_repack.contains(&p.id)
|| self.packs_remove.contains(&p.id)
});
any_must_modify |= must_modify;
@ -405,26 +406,26 @@ impl Pruner {
}
for (index_id, index) in self.index_files {
for pack in index.dissolve().1 {
if !processed_packs.insert(*pack.id()) {
for pack in index.packs {
if !processed_packs.insert(pack.id) {
// ignore duplicate packs
continue;
}
if self.packs_repack.contains(pack.id()) {
if self.packs_repack.contains(&pack.id) {
// TODO: repack in parallel
for blob in pack.blobs() {
if self.used_ids.remove(blob.id()).is_none() {
for blob in pack.blobs {
if self.used_ids.remove(&blob.id).is_none() {
// don't save duplicate blobs
continue;
}
let data = be
.read_partial(FileType::Pack, pack.id(), *blob.offset(), *blob.length())
.read_partial(FileType::Pack, &pack.id, blob.offset, blob.length)
.await?;
packer.add_raw(&data, blob.id(), *blob.tpe()).await?;
packer.add_raw(&data, &blob.id, blob.tpe).await?;
}
} else if !self.packs_remove.contains(pack.id()) {
} else if !self.packs_remove.contains(&pack.id) {
// keep pack: add to new index
indexer.borrow_mut().add(pack).await?;
}
@ -501,13 +502,13 @@ impl Ord for PackInfo {
impl PackInfo {
fn add_unused_blob(&mut self, blob: &IndexBlob) {
// used duplicate exists, mark as unused
self.unused_size += blob.length();
self.unused_size += blob.length;
self.unused_blobs += 1;
}
fn add_used_blob(&mut self, blob: &IndexBlob) {
// used duplicate exists, mark as unused
self.used_size += blob.length();
self.used_size += blob.length;
self.used_blobs += 1;
}

View File

@ -47,16 +47,16 @@ pub(super) async fn execute(be: &impl DecryptReadBackend, _opts: Opts) -> Result
let mut data_count = 0;
let mut data_size = 0;
while let Some(index) = stream.next().await {
for pack in index?.1.dissolve().1 {
for blob in pack.blobs() {
match blob.tpe() {
for pack in index?.1.packs {
for blob in pack.blobs {
match blob.tpe {
BlobType::Tree => {
tree_count += 1;
tree_size += *blob.length() as u64;
tree_size += blob.length as u64;
}
BlobType::Data => {
data_count += 1;
data_size += *blob.length() as u64;
data_size += blob.length as u64;
}
}
}

View File

@ -41,11 +41,11 @@ where
let mut data_id = Vec::new();
while let Some(index) = stream.next().await {
for i in index.dissolve().1 {
for p in index.packs {
let idx = packs.len();
packs.push(*i.id());
let len = i.blobs().len();
if i.blobs()[0].tpe() == &BlobType::Data {
packs.push(p.id);
let len = p.blobs.len();
if p.blob_type() == BlobType::Data {
if full_data {
data.reserve(len);
} else {
@ -55,14 +55,14 @@ where
tree.reserve(len);
}
for blob in i.blobs() {
for blob in p.blobs {
let be = BinarySortedEntry {
id: *blob.id(),
id: blob.id,
pack_idx: idx,
offset: *blob.offset(),
length: *blob.length(),
offset: blob.offset,
length: blob.length,
};
match blob.tpe() {
match blob.tpe {
BlobType::Tree => {
tree.push(be);
}
@ -70,7 +70,7 @@ where
if full_data {
data.push(be);
} else {
data_id.push(*blob.id());
data_id.push(blob.id);
}
}
}

View File

@ -26,7 +26,7 @@ impl<BE: DecryptWriteBackend> Indexer<BE> {
pub fn new(be: BE) -> Self {
Self {
be,
file: IndexFile::new(),
file: IndexFile::default(),
count: 0,
created: SystemTime::now(),
indexed: Some(HashSet::new()),
@ -36,7 +36,7 @@ impl<BE: DecryptWriteBackend> Indexer<BE> {
pub fn new_unindexed(be: BE) -> Self {
Self {
be,
file: IndexFile::new(),
file: IndexFile::default(),
count: 0,
created: SystemTime::now(),
indexed: None,
@ -44,7 +44,7 @@ impl<BE: DecryptWriteBackend> Indexer<BE> {
}
pub fn reset(&mut self) {
self.file = IndexFile::new();
self.file = IndexFile::default();
self.count = 0;
self.created = SystemTime::now();
}
@ -61,11 +61,11 @@ impl<BE: DecryptWriteBackend> Indexer<BE> {
}
pub async fn add(&mut self, pack: IndexPack) -> Result<()> {
self.count += pack.blobs().len();
self.count += pack.blobs.len();
if let Some(indexed) = &mut self.indexed {
for blob in pack.blobs() {
indexed.insert(*blob.id());
for blob in &pack.blobs {
indexed.insert(blob.id);
}
}

View File

@ -1,19 +1,18 @@
use std::cmp::Ordering;
use derive_getters::{Dissolve, Getters};
use serde::{Deserialize, Serialize};
use crate::backend::{FileType, RepoFile};
use crate::blob::BlobType;
use crate::id::Id;
#[derive(Debug, Default, Serialize, Deserialize, Getters, Dissolve)]
#[derive(Debug, Default, Serialize, Deserialize)]
pub struct IndexFile {
#[serde(skip_serializing_if = "Option::is_none")]
supersedes: Option<Vec<Id>>,
packs: Vec<IndexPack>,
pub(crate) supersedes: Option<Vec<Id>>,
pub(crate) packs: Vec<IndexPack>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
packs_to_delete: Vec<IndexPack>,
pub(crate) packs_to_delete: Vec<IndexPack>,
}
impl RepoFile for IndexFile {
@ -21,14 +20,6 @@ impl RepoFile for IndexFile {
}
impl IndexFile {
pub fn new() -> Self {
Self {
supersedes: None,
packs: Vec::new(),
packs_to_delete: Vec::new(),
}
}
pub fn add(&mut self, p: IndexPack) {
self.packs.push(p);
}
@ -38,20 +29,13 @@ impl IndexFile {
}
}
#[derive(Debug, Serialize, Deserialize, Getters, Dissolve)]
#[derive(Default, Debug, Serialize, Deserialize)]
pub struct IndexPack {
id: Id,
blobs: Vec<IndexBlob>,
pub(crate) id: Id,
pub(crate) blobs: Vec<IndexBlob>,
}
impl IndexPack {
pub fn new() -> Self {
Self {
id: Id::default(),
blobs: Vec::new(),
}
}
pub fn set_id(&mut self, id: Id) {
self.id = id;
}
@ -69,7 +53,7 @@ impl IndexPack {
pub fn pack_size(&self) -> u32 {
let mut size = 4 + 32; // 4 + crypto overhead
for blob in &self.blobs {
size += blob.length() + 37 // 37 = length of blob description
size += blob.length + 37 // 37 = length of blob description
}
size
}
@ -81,13 +65,13 @@ impl IndexPack {
}
}
#[derive(Debug, Clone, Serialize, Deserialize, Getters, Dissolve, Eq, PartialEq)]
#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
pub struct IndexBlob {
id: Id,
pub(crate) id: Id,
#[serde(rename = "type")]
tpe: BlobType,
offset: u32,
length: u32,
pub(crate) tpe: BlobType,
pub(crate) offset: u32,
pub(crate) length: u32,
}
impl PartialOrd<IndexBlob> for IndexBlob {