simplify index

This commit is contained in:
Alexander Weiss 2022-08-13 13:10:07 +02:00
parent 53db8f81b8
commit fab6d8f377

View File

@ -1,6 +1,7 @@
use std::num::NonZeroU32;
use super::{BlobType, IndexEntry, ReadIndex};
use crate::blob::BlobTypeMap;
use crate::id::Id;
use crate::repo::IndexPack;
@ -19,50 +20,69 @@ pub(crate) enum IndexType {
OnlyTrees,
}
enum SortedHashSetMap {
enum EntriesVariants {
None,
Set(Vec<Id>),
Map(Vec<SortedEntry>),
Ids(Vec<Id>),
FullEntries(Vec<SortedEntry>),
}
pub(crate) struct IndexCollector {
packs: Vec<Id>,
tree: Vec<SortedEntry>,
data: SortedHashSetMap,
total_tree_size: u64,
total_data_size: u64,
impl Default for EntriesVariants {
fn default() -> Self {
Self::None
}
}
#[derive(Default)]
pub(crate) struct TypeIndexCollector {
packs: Vec<(Id, u32)>,
entries: EntriesVariants,
total_size: u64,
}
#[derive(Default)]
pub(crate) struct IndexCollector(BlobTypeMap<TypeIndexCollector>);
pub(crate) struct TypeIndex {
packs: Vec<Id>,
entries: EntriesVariants,
total_size: u64,
}
pub struct Index(BlobTypeMap<TypeIndex>);
impl IndexCollector {
pub fn new(tpe: IndexType) -> Self {
let data = match tpe {
IndexType::OnlyTrees => SortedHashSetMap::None,
IndexType::FullTrees => SortedHashSetMap::Set(Vec::new()),
IndexType::Full => SortedHashSetMap::Map(Vec::new()),
let mut collector = Self::default();
collector.0[BlobType::Tree].entries = EntriesVariants::FullEntries(Vec::new());
collector.0[BlobType::Data].entries = match tpe {
IndexType::OnlyTrees => EntriesVariants::None,
IndexType::FullTrees => EntriesVariants::Ids(Vec::new()),
IndexType::Full => EntriesVariants::FullEntries(Vec::new()),
};
Self {
packs: Vec::new(),
tree: Vec::new(),
data,
total_tree_size: 0,
total_data_size: 0,
}
collector
}
pub fn into_index(mut self) -> Index {
self.tree.sort_unstable_by_key(|e| e.id);
match &mut self.data {
SortedHashSetMap::None => {}
SortedHashSetMap::Set(ids) => ids.sort_unstable(),
SortedHashSetMap::Map(data) => data.sort_unstable_by_key(|e| e.id),
};
Index {
packs: self.packs,
tree: self.tree,
data: self.data,
total_tree_size: self.total_tree_size,
total_data_size: self.total_data_size,
}
pub fn tree_packs(&self) -> &Vec<(Id, u32)> {
&self.0[BlobType::Tree].packs
}
// Turns Collector into an index by sorting the entries.
pub fn into_index(self) -> Index {
Index(self.0.map(|_, mut tc| {
match &mut tc.entries {
EntriesVariants::None => {}
EntriesVariants::Ids(ids) => ids.sort_unstable(),
EntriesVariants::FullEntries(entries) => entries.sort_unstable_by_key(|e| e.id),
};
let packs = tc.packs.into_iter().map(|(id, _)| id).collect();
TypeIndex {
packs,
entries: tc.entries,
total_size: tc.total_size,
}
}))
}
}
@ -72,21 +92,19 @@ impl Extend<IndexPack> for IndexCollector {
T: IntoIterator<Item = IndexPack>,
{
for p in iter {
let idx = self.packs.len();
self.packs.push(p.id);
let len = p.blobs.len();
let blob_type = p.blob_type();
let size = p.pack_size();
match blob_type {
BlobType::Tree => self.total_tree_size += p.pack_size() as u64,
BlobType::Data => self.total_data_size += p.pack_size() as u64,
}
let idx = self.0[blob_type].packs.len();
self.0[blob_type].packs.push((p.id, size));
match (p.blob_type(), &mut self.data) {
(BlobType::Tree, _) => self.tree.reserve(len),
(BlobType::Data, SortedHashSetMap::None) => {}
(BlobType::Data, SortedHashSetMap::Set(ids)) => ids.reserve(len),
(BlobType::Data, SortedHashSetMap::Map(data)) => data.reserve(len),
self.0[blob_type].total_size += size as u64;
match &mut self.0[blob_type].entries {
EntriesVariants::None => {}
EntriesVariants::Ids(ids) => ids.reserve(len),
EntriesVariants::FullEntries(entries) => entries.reserve(len),
};
for blob in &p.blobs {
@ -97,39 +115,31 @@ impl Extend<IndexPack> for IndexCollector {
length: blob.length,
uncompressed_length: blob.uncompressed_length,
};
match (blob.tpe, &mut self.data) {
(BlobType::Tree, _) => self.tree.push(be),
(BlobType::Data, SortedHashSetMap::None) => {}
(BlobType::Data, SortedHashSetMap::Set(ids)) => ids.push(blob.id),
(BlobType::Data, SortedHashSetMap::Map(data)) => data.push(be),
match &mut self.0[blob_type].entries {
EntriesVariants::None => {}
EntriesVariants::Ids(ids) => ids.push(blob.id),
EntriesVariants::FullEntries(entries) => entries.push(be),
};
}
}
}
}
pub struct Index {
packs: Vec<Id>,
tree: Vec<SortedEntry>,
data: SortedHashSetMap,
total_tree_size: u64,
total_data_size: u64,
}
impl ReadIndex for Index {
fn get_id(&self, tpe: &BlobType, id: &Id) -> Option<IndexEntry> {
let vec = match (tpe, &self.data) {
(BlobType::Tree, _) => &self.tree,
(BlobType::Data, SortedHashSetMap::Map(data)) => data,
(BlobType::Data, _) => {
fn get_id(&self, blob_type: &BlobType, id: &Id) -> Option<IndexEntry> {
let vec = match &self.0[*blob_type].entries {
EntriesVariants::FullEntries(entries) => entries,
_ => {
// get_id() only gives results if index contains full entries
return None;
}
};
vec.binary_search_by_key(id, |e| e.id).ok().map(|index| {
let be = &vec[index];
IndexEntry::new(
*tpe,
self.packs[be.pack_idx],
*blob_type,
self.0[*blob_type].packs[be.pack_idx],
be.offset,
be.length,
be.uncompressed_length,
@ -137,21 +147,18 @@ impl ReadIndex for Index {
})
}
fn total_size(&self, tpe: &BlobType) -> u64 {
match tpe {
BlobType::Tree => self.total_tree_size,
BlobType::Data => self.total_data_size,
}
fn total_size(&self, blob_type: &BlobType) -> u64 {
self.0[*blob_type].total_size
}
fn has(&self, tpe: &BlobType, id: &Id) -> bool {
match (tpe, &self.data) {
(BlobType::Tree, _) => self.tree.binary_search_by_key(id, |e| e.id).is_ok(),
(BlobType::Data, SortedHashSetMap::Map(data)) => {
data.binary_search_by_key(id, |e| e.id).is_ok()
fn has(&self, blob_type: &BlobType, id: &Id) -> bool {
match &self.0[*blob_type].entries {
EntriesVariants::FullEntries(entries) => {
entries.binary_search_by_key(id, |e| e.id).is_ok()
}
(BlobType::Data, SortedHashSetMap::Set(data)) => data.binary_search(id).is_ok(),
(BlobType::Data, SortedHashSetMap::None) => false,
EntriesVariants::Ids(ids) => ids.binary_search(id).is_ok(),
// has() only gives results if index contains full entries or ids
EntriesVariants::None => false,
}
}
}