mirror of
https://github.com/rustic-rs/rustic.git
synced 2025-10-26 11:18:51 +00:00
make IndexFile structs public
This commit is contained in:
parent
ac2862050d
commit
e17cc2dbf4
@ -38,7 +38,7 @@ impl<BE: DecryptWriteBackend> Packer<BE> {
|
||||
size: 0,
|
||||
count: 0,
|
||||
created: SystemTime::now(),
|
||||
index: IndexPack::new(),
|
||||
index: IndexPack::default(),
|
||||
indexer,
|
||||
hasher: Hasher::new(),
|
||||
})
|
||||
@ -108,11 +108,11 @@ impl<BE: DecryptWriteBackend> Packer<BE> {
|
||||
|
||||
// collect header entries
|
||||
let mut writer = Cursor::new(Vec::new());
|
||||
for blob in self.index.blobs() {
|
||||
for blob in &self.index.blobs {
|
||||
PackHeaderEntry {
|
||||
tpe: *blob.tpe(),
|
||||
len: *blob.length(),
|
||||
id: *blob.id(),
|
||||
tpe: blob.tpe,
|
||||
len: blob.length,
|
||||
id: blob.id,
|
||||
}
|
||||
.write_to(&mut writer)?;
|
||||
}
|
||||
@ -154,12 +154,12 @@ impl<BE: DecryptWriteBackend> Packer<BE> {
|
||||
let file = std::mem::replace(&mut self.file, tempfile()?);
|
||||
self.be.write_file(FileType::Pack, &id, file).await?;
|
||||
|
||||
let index = std::mem::replace(&mut self.index, IndexPack::new());
|
||||
let index = std::mem::take(&mut self.index);
|
||||
self.indexer.borrow_mut().add(index).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn has(&self, id: &Id) -> bool {
|
||||
self.index.blobs().iter().any(|b| b.id() == id)
|
||||
self.index.blobs.iter().any(|b| &b.id == id)
|
||||
}
|
||||
}
|
||||
|
||||
@ -41,24 +41,21 @@ async fn check_packs(be: &impl DecryptReadBackend) -> Result<()> {
|
||||
// TODO: only read index files once
|
||||
let mut stream = be.stream_all::<IndexFile>(progress_counter()).await?;
|
||||
while let Some(index) = stream.next().await {
|
||||
for p in index?.1.dissolve().1 {
|
||||
packs.insert(*p.id(), p.pack_size());
|
||||
for p in index?.1.packs {
|
||||
packs.insert(p.id, p.pack_size());
|
||||
|
||||
// check offsests in index
|
||||
let mut expected_offset: u32 = 0;
|
||||
let (id, mut blobs) = p.dissolve();
|
||||
let mut blobs = p.blobs;
|
||||
blobs.sort_unstable();
|
||||
for blob in blobs {
|
||||
if blob.offset() != &expected_offset {
|
||||
if blob.offset != expected_offset {
|
||||
eprintln!(
|
||||
"pack {}: blob {} offset in index: {}, expected: {}",
|
||||
id,
|
||||
blob.id(),
|
||||
blob.offset(),
|
||||
expected_offset
|
||||
p.id, blob.id, blob.offset, expected_offset
|
||||
);
|
||||
}
|
||||
expected_offset += blob.length();
|
||||
expected_offset += blob.length;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -19,9 +19,9 @@ pub(super) async fn execute(be: &impl DecryptReadBackend, opts: Opts) -> Result<
|
||||
"blobs" => {
|
||||
let mut stream = be.stream_all::<IndexFile>(ProgressBar::hidden()).await?;
|
||||
while let Some(index) = stream.next().await {
|
||||
for pack in index?.1.dissolve().1 {
|
||||
for blob in pack.blobs() {
|
||||
println!("{:?} {}", blob.tpe(), blob.id().to_hex());
|
||||
for pack in index?.1.packs {
|
||||
for blob in pack.blobs {
|
||||
println!("{:?} {}", blob.tpe, blob.id.to_hex());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
use std::cell::RefCell;
|
||||
use std::cmp::Ordering;
|
||||
use std::collections::{hash_map::Entry, HashMap, HashSet};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::rc::Rc;
|
||||
use std::str::FromStr;
|
||||
|
||||
@ -57,15 +57,16 @@ pub(super) async fn execute(be: &(impl DecryptFullBackend + Unpin), opts: Opts)
|
||||
v1!("finding duplicate blobs...");
|
||||
for pack in index_files
|
||||
.iter()
|
||||
.flat_map(|(_, index)| index.packs())
|
||||
.unique_by(|p| p.id())
|
||||
.flat_map(|(_, index)| &index.packs)
|
||||
.unique_by(|p| p.id)
|
||||
{
|
||||
for blob in pack.blobs() {
|
||||
let id = *blob.id();
|
||||
// note that duplicates are only counted up to 255. If there are more
|
||||
// duplicates, the number is set to 255. This may imply that later on
|
||||
// not the "best" pack is chosen to have that blob marked as used.
|
||||
used_ids.entry(id).and_modify(|e| *e = e.saturating_add(1));
|
||||
for blob in &pack.blobs {
|
||||
if let Some(count) = used_ids.get_mut(&blob.id) {
|
||||
// note that duplicates are only counted up to 255. If there are more
|
||||
// duplicates, the number is set to 255. This may imply that later on
|
||||
// not the "best" pack is chosen to have that blob marked as used.
|
||||
*count = count.saturating_add(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -79,7 +80,7 @@ pub(super) async fn execute(be: &(impl DecryptFullBackend + Unpin), opts: Opts)
|
||||
|
||||
let mut pruner = Pruner::new(used_ids, existing_packs);
|
||||
pruner.check()?;
|
||||
pruner.decide_packs(index_files.iter().flat_map(|(_, index)| index.packs()))?;
|
||||
pruner.decide_packs(index_files.iter().flat_map(|(_, index)| &index.packs))?;
|
||||
pruner.decide_repack(&opts.max_repack, &opts.max_unused);
|
||||
pruner.filter_index_files(index_files);
|
||||
pruner.print_stats();
|
||||
@ -188,7 +189,7 @@ impl Pruner {
|
||||
|
||||
// search used and unused blobs within packs
|
||||
for pack in pack_iter {
|
||||
if !processed_packs.insert(pack.id()) {
|
||||
if !processed_packs.insert(pack.id) {
|
||||
// ignore duplicate packs
|
||||
continue;
|
||||
}
|
||||
@ -198,14 +199,14 @@ impl Pruner {
|
||||
|
||||
// check if the pack has used blobs which are no duplicates
|
||||
let has_used = pack
|
||||
.blobs()
|
||||
.blobs
|
||||
.iter()
|
||||
.any(|blob| self.used_ids.get(blob.id()) == Some(&1));
|
||||
.any(|blob| self.used_ids.get(&blob.id) == Some(&1));
|
||||
|
||||
for blob in pack.blobs() {
|
||||
match self.used_ids.entry(*blob.id()) {
|
||||
Entry::Vacant(_) => pi.add_unused_blob(blob),
|
||||
Entry::Occupied(mut count) => pi.add_blob(blob, has_used, count.get_mut()),
|
||||
for blob in &pack.blobs {
|
||||
match self.used_ids.get_mut(&blob.id) {
|
||||
None => pi.add_unused_blob(blob),
|
||||
Some(count) => pi.add_blob(blob, has_used, count),
|
||||
}
|
||||
}
|
||||
|
||||
@ -217,28 +218,28 @@ impl Pruner {
|
||||
if pi.used_blobs == 0 {
|
||||
// unused pack
|
||||
self.stats.packs.unused += 1;
|
||||
self.packs_remove.insert(*pack.id());
|
||||
self.packs_remove.insert(pack.id);
|
||||
self.stats.blobs.remove += pi.unused_blobs as u64;
|
||||
self.stats.size.remove += pi.unused_size as u64;
|
||||
|
||||
self.existing_packs.remove(pack.id());
|
||||
self.existing_packs.remove(&pack.id);
|
||||
} else {
|
||||
if self.existing_packs.remove(pack.id()).is_none() {
|
||||
bail!("used pack {} does not exist!", pack.id());
|
||||
if self.existing_packs.remove(&pack.id).is_none() {
|
||||
bail!("used pack {} does not exist!", pack.id);
|
||||
}
|
||||
|
||||
if pi.unused_blobs == 0 {
|
||||
// used pack
|
||||
self.stats.packs.used += 1;
|
||||
self.stats.packs.keep += 1;
|
||||
for blob in pack.blobs() {
|
||||
self.used_ids.remove(blob.id());
|
||||
for blob in &pack.blobs {
|
||||
self.used_ids.remove(&blob.id);
|
||||
}
|
||||
} else {
|
||||
// partly used pack => candidate for repacking
|
||||
self.stats.packs.partly_used += 1;
|
||||
self.repack_candidates
|
||||
.push(RepackCandidate { id: *pack.id(), pi })
|
||||
.push(RepackCandidate { id: pack.id, pi })
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -290,12 +291,12 @@ impl Pruner {
|
||||
// filter out only the index files which need processing
|
||||
self.index_files
|
||||
.extend(index_files.into_iter().filter(|(_, index)| {
|
||||
let must_modify = index.packs().iter().any(|p| {
|
||||
let must_modify = index.packs.iter().any(|p| {
|
||||
// index must be processed if this is a duplicate pack
|
||||
// or the packs needs to be removed or repacked.
|
||||
!processed_packs.insert(*p.id())
|
||||
|| self.packs_repack.contains(p.id())
|
||||
|| self.packs_remove.contains(p.id())
|
||||
!processed_packs.insert(p.id)
|
||||
|| self.packs_repack.contains(&p.id)
|
||||
|| self.packs_remove.contains(&p.id)
|
||||
});
|
||||
any_must_modify |= must_modify;
|
||||
|
||||
@ -405,26 +406,26 @@ impl Pruner {
|
||||
}
|
||||
|
||||
for (index_id, index) in self.index_files {
|
||||
for pack in index.dissolve().1 {
|
||||
if !processed_packs.insert(*pack.id()) {
|
||||
for pack in index.packs {
|
||||
if !processed_packs.insert(pack.id) {
|
||||
// ignore duplicate packs
|
||||
continue;
|
||||
}
|
||||
|
||||
if self.packs_repack.contains(pack.id()) {
|
||||
if self.packs_repack.contains(&pack.id) {
|
||||
// TODO: repack in parallel
|
||||
for blob in pack.blobs() {
|
||||
if self.used_ids.remove(blob.id()).is_none() {
|
||||
for blob in pack.blobs {
|
||||
if self.used_ids.remove(&blob.id).is_none() {
|
||||
// don't save duplicate blobs
|
||||
continue;
|
||||
}
|
||||
|
||||
let data = be
|
||||
.read_partial(FileType::Pack, pack.id(), *blob.offset(), *blob.length())
|
||||
.read_partial(FileType::Pack, &pack.id, blob.offset, blob.length)
|
||||
.await?;
|
||||
packer.add_raw(&data, blob.id(), *blob.tpe()).await?;
|
||||
packer.add_raw(&data, &blob.id, blob.tpe).await?;
|
||||
}
|
||||
} else if !self.packs_remove.contains(pack.id()) {
|
||||
} else if !self.packs_remove.contains(&pack.id) {
|
||||
// keep pack: add to new index
|
||||
indexer.borrow_mut().add(pack).await?;
|
||||
}
|
||||
@ -501,13 +502,13 @@ impl Ord for PackInfo {
|
||||
impl PackInfo {
|
||||
fn add_unused_blob(&mut self, blob: &IndexBlob) {
|
||||
// used duplicate exists, mark as unused
|
||||
self.unused_size += blob.length();
|
||||
self.unused_size += blob.length;
|
||||
self.unused_blobs += 1;
|
||||
}
|
||||
|
||||
fn add_used_blob(&mut self, blob: &IndexBlob) {
|
||||
// used duplicate exists, mark as unused
|
||||
self.used_size += blob.length();
|
||||
self.used_size += blob.length;
|
||||
self.used_blobs += 1;
|
||||
}
|
||||
|
||||
|
||||
@ -47,16 +47,16 @@ pub(super) async fn execute(be: &impl DecryptReadBackend, _opts: Opts) -> Result
|
||||
let mut data_count = 0;
|
||||
let mut data_size = 0;
|
||||
while let Some(index) = stream.next().await {
|
||||
for pack in index?.1.dissolve().1 {
|
||||
for blob in pack.blobs() {
|
||||
match blob.tpe() {
|
||||
for pack in index?.1.packs {
|
||||
for blob in pack.blobs {
|
||||
match blob.tpe {
|
||||
BlobType::Tree => {
|
||||
tree_count += 1;
|
||||
tree_size += *blob.length() as u64;
|
||||
tree_size += blob.length as u64;
|
||||
}
|
||||
BlobType::Data => {
|
||||
data_count += 1;
|
||||
data_size += *blob.length() as u64;
|
||||
data_size += blob.length as u64;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -41,11 +41,11 @@ where
|
||||
let mut data_id = Vec::new();
|
||||
|
||||
while let Some(index) = stream.next().await {
|
||||
for i in index.dissolve().1 {
|
||||
for p in index.packs {
|
||||
let idx = packs.len();
|
||||
packs.push(*i.id());
|
||||
let len = i.blobs().len();
|
||||
if i.blobs()[0].tpe() == &BlobType::Data {
|
||||
packs.push(p.id);
|
||||
let len = p.blobs.len();
|
||||
if p.blob_type() == BlobType::Data {
|
||||
if full_data {
|
||||
data.reserve(len);
|
||||
} else {
|
||||
@ -55,14 +55,14 @@ where
|
||||
tree.reserve(len);
|
||||
}
|
||||
|
||||
for blob in i.blobs() {
|
||||
for blob in p.blobs {
|
||||
let be = BinarySortedEntry {
|
||||
id: *blob.id(),
|
||||
id: blob.id,
|
||||
pack_idx: idx,
|
||||
offset: *blob.offset(),
|
||||
length: *blob.length(),
|
||||
offset: blob.offset,
|
||||
length: blob.length,
|
||||
};
|
||||
match blob.tpe() {
|
||||
match blob.tpe {
|
||||
BlobType::Tree => {
|
||||
tree.push(be);
|
||||
}
|
||||
@ -70,7 +70,7 @@ where
|
||||
if full_data {
|
||||
data.push(be);
|
||||
} else {
|
||||
data_id.push(*blob.id());
|
||||
data_id.push(blob.id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -26,7 +26,7 @@ impl<BE: DecryptWriteBackend> Indexer<BE> {
|
||||
pub fn new(be: BE) -> Self {
|
||||
Self {
|
||||
be,
|
||||
file: IndexFile::new(),
|
||||
file: IndexFile::default(),
|
||||
count: 0,
|
||||
created: SystemTime::now(),
|
||||
indexed: Some(HashSet::new()),
|
||||
@ -36,7 +36,7 @@ impl<BE: DecryptWriteBackend> Indexer<BE> {
|
||||
pub fn new_unindexed(be: BE) -> Self {
|
||||
Self {
|
||||
be,
|
||||
file: IndexFile::new(),
|
||||
file: IndexFile::default(),
|
||||
count: 0,
|
||||
created: SystemTime::now(),
|
||||
indexed: None,
|
||||
@ -44,7 +44,7 @@ impl<BE: DecryptWriteBackend> Indexer<BE> {
|
||||
}
|
||||
|
||||
pub fn reset(&mut self) {
|
||||
self.file = IndexFile::new();
|
||||
self.file = IndexFile::default();
|
||||
self.count = 0;
|
||||
self.created = SystemTime::now();
|
||||
}
|
||||
@ -61,11 +61,11 @@ impl<BE: DecryptWriteBackend> Indexer<BE> {
|
||||
}
|
||||
|
||||
pub async fn add(&mut self, pack: IndexPack) -> Result<()> {
|
||||
self.count += pack.blobs().len();
|
||||
self.count += pack.blobs.len();
|
||||
|
||||
if let Some(indexed) = &mut self.indexed {
|
||||
for blob in pack.blobs() {
|
||||
indexed.insert(*blob.id());
|
||||
for blob in &pack.blobs {
|
||||
indexed.insert(blob.id);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -1,19 +1,18 @@
|
||||
use std::cmp::Ordering;
|
||||
|
||||
use derive_getters::{Dissolve, Getters};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::backend::{FileType, RepoFile};
|
||||
use crate::blob::BlobType;
|
||||
use crate::id::Id;
|
||||
|
||||
#[derive(Debug, Default, Serialize, Deserialize, Getters, Dissolve)]
|
||||
#[derive(Debug, Default, Serialize, Deserialize)]
|
||||
pub struct IndexFile {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
supersedes: Option<Vec<Id>>,
|
||||
packs: Vec<IndexPack>,
|
||||
pub(crate) supersedes: Option<Vec<Id>>,
|
||||
pub(crate) packs: Vec<IndexPack>,
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
packs_to_delete: Vec<IndexPack>,
|
||||
pub(crate) packs_to_delete: Vec<IndexPack>,
|
||||
}
|
||||
|
||||
impl RepoFile for IndexFile {
|
||||
@ -21,14 +20,6 @@ impl RepoFile for IndexFile {
|
||||
}
|
||||
|
||||
impl IndexFile {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
supersedes: None,
|
||||
packs: Vec::new(),
|
||||
packs_to_delete: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add(&mut self, p: IndexPack) {
|
||||
self.packs.push(p);
|
||||
}
|
||||
@ -38,20 +29,13 @@ impl IndexFile {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Getters, Dissolve)]
|
||||
#[derive(Default, Debug, Serialize, Deserialize)]
|
||||
pub struct IndexPack {
|
||||
id: Id,
|
||||
blobs: Vec<IndexBlob>,
|
||||
pub(crate) id: Id,
|
||||
pub(crate) blobs: Vec<IndexBlob>,
|
||||
}
|
||||
|
||||
impl IndexPack {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
id: Id::default(),
|
||||
blobs: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_id(&mut self, id: Id) {
|
||||
self.id = id;
|
||||
}
|
||||
@ -69,7 +53,7 @@ impl IndexPack {
|
||||
pub fn pack_size(&self) -> u32 {
|
||||
let mut size = 4 + 32; // 4 + crypto overhead
|
||||
for blob in &self.blobs {
|
||||
size += blob.length() + 37 // 37 = length of blob description
|
||||
size += blob.length + 37 // 37 = length of blob description
|
||||
}
|
||||
size
|
||||
}
|
||||
@ -81,13 +65,13 @@ impl IndexPack {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Getters, Dissolve, Eq, PartialEq)]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
|
||||
pub struct IndexBlob {
|
||||
id: Id,
|
||||
pub(crate) id: Id,
|
||||
#[serde(rename = "type")]
|
||||
tpe: BlobType,
|
||||
offset: u32,
|
||||
length: u32,
|
||||
pub(crate) tpe: BlobType,
|
||||
pub(crate) offset: u32,
|
||||
pub(crate) length: u32,
|
||||
}
|
||||
|
||||
impl PartialOrd<IndexBlob> for IndexBlob {
|
||||
|
||||
Loading…
Reference in New Issue
Block a user