From fab6d8f377c354faa4e70a6cd2ba09c3bbbd2c3f Mon Sep 17 00:00:00 2001 From: Alexander Weiss Date: Sat, 13 Aug 2022 13:10:07 +0200 Subject: [PATCH] simplify index --- src/index/binarysorted.rs | 161 ++++++++++++++++++++------------------ 1 file changed, 84 insertions(+), 77 deletions(-) diff --git a/src/index/binarysorted.rs b/src/index/binarysorted.rs index 7528063..061b355 100644 --- a/src/index/binarysorted.rs +++ b/src/index/binarysorted.rs @@ -1,6 +1,7 @@ use std::num::NonZeroU32; use super::{BlobType, IndexEntry, ReadIndex}; +use crate::blob::BlobTypeMap; use crate::id::Id; use crate::repo::IndexPack; @@ -19,50 +20,69 @@ pub(crate) enum IndexType { OnlyTrees, } -enum SortedHashSetMap { +enum EntriesVariants { None, - Set(Vec), - Map(Vec), + Ids(Vec), + FullEntries(Vec), } -pub(crate) struct IndexCollector { - packs: Vec, - tree: Vec, - data: SortedHashSetMap, - total_tree_size: u64, - total_data_size: u64, +impl Default for EntriesVariants { + fn default() -> Self { + Self::None + } } +#[derive(Default)] +pub(crate) struct TypeIndexCollector { + packs: Vec<(Id, u32)>, + entries: EntriesVariants, + total_size: u64, +} + +#[derive(Default)] +pub(crate) struct IndexCollector(BlobTypeMap); + +pub(crate) struct TypeIndex { + packs: Vec, + entries: EntriesVariants, + total_size: u64, +} +pub struct Index(BlobTypeMap); + impl IndexCollector { pub fn new(tpe: IndexType) -> Self { - let data = match tpe { - IndexType::OnlyTrees => SortedHashSetMap::None, - IndexType::FullTrees => SortedHashSetMap::Set(Vec::new()), - IndexType::Full => SortedHashSetMap::Map(Vec::new()), + let mut collector = Self::default(); + + collector.0[BlobType::Tree].entries = EntriesVariants::FullEntries(Vec::new()); + collector.0[BlobType::Data].entries = match tpe { + IndexType::OnlyTrees => EntriesVariants::None, + IndexType::FullTrees => EntriesVariants::Ids(Vec::new()), + IndexType::Full => EntriesVariants::FullEntries(Vec::new()), }; - Self { - packs: Vec::new(), - tree: Vec::new(), - data, - total_tree_size: 0, - total_data_size: 0, - } + + collector } - pub fn into_index(mut self) -> Index { - self.tree.sort_unstable_by_key(|e| e.id); - match &mut self.data { - SortedHashSetMap::None => {} - SortedHashSetMap::Set(ids) => ids.sort_unstable(), - SortedHashSetMap::Map(data) => data.sort_unstable_by_key(|e| e.id), - }; - Index { - packs: self.packs, - tree: self.tree, - data: self.data, - total_tree_size: self.total_tree_size, - total_data_size: self.total_data_size, - } + pub fn tree_packs(&self) -> &Vec<(Id, u32)> { + &self.0[BlobType::Tree].packs + } + + // Turns Collector into an index by sorting the entries. + pub fn into_index(self) -> Index { + Index(self.0.map(|_, mut tc| { + match &mut tc.entries { + EntriesVariants::None => {} + EntriesVariants::Ids(ids) => ids.sort_unstable(), + EntriesVariants::FullEntries(entries) => entries.sort_unstable_by_key(|e| e.id), + }; + + let packs = tc.packs.into_iter().map(|(id, _)| id).collect(); + TypeIndex { + packs, + entries: tc.entries, + total_size: tc.total_size, + } + })) } } @@ -72,21 +92,19 @@ impl Extend for IndexCollector { T: IntoIterator, { for p in iter { - let idx = self.packs.len(); - self.packs.push(p.id); let len = p.blobs.len(); let blob_type = p.blob_type(); + let size = p.pack_size(); - match blob_type { - BlobType::Tree => self.total_tree_size += p.pack_size() as u64, - BlobType::Data => self.total_data_size += p.pack_size() as u64, - } + let idx = self.0[blob_type].packs.len(); + self.0[blob_type].packs.push((p.id, size)); - match (p.blob_type(), &mut self.data) { - (BlobType::Tree, _) => self.tree.reserve(len), - (BlobType::Data, SortedHashSetMap::None) => {} - (BlobType::Data, SortedHashSetMap::Set(ids)) => ids.reserve(len), - (BlobType::Data, SortedHashSetMap::Map(data)) => data.reserve(len), + self.0[blob_type].total_size += size as u64; + + match &mut self.0[blob_type].entries { + EntriesVariants::None => {} + EntriesVariants::Ids(ids) => ids.reserve(len), + EntriesVariants::FullEntries(entries) => entries.reserve(len), }; for blob in &p.blobs { @@ -97,39 +115,31 @@ impl Extend for IndexCollector { length: blob.length, uncompressed_length: blob.uncompressed_length, }; - match (blob.tpe, &mut self.data) { - (BlobType::Tree, _) => self.tree.push(be), - (BlobType::Data, SortedHashSetMap::None) => {} - (BlobType::Data, SortedHashSetMap::Set(ids)) => ids.push(blob.id), - (BlobType::Data, SortedHashSetMap::Map(data)) => data.push(be), + match &mut self.0[blob_type].entries { + EntriesVariants::None => {} + EntriesVariants::Ids(ids) => ids.push(blob.id), + EntriesVariants::FullEntries(entries) => entries.push(be), }; } } } } -pub struct Index { - packs: Vec, - tree: Vec, - data: SortedHashSetMap, - total_tree_size: u64, - total_data_size: u64, -} - impl ReadIndex for Index { - fn get_id(&self, tpe: &BlobType, id: &Id) -> Option { - let vec = match (tpe, &self.data) { - (BlobType::Tree, _) => &self.tree, - (BlobType::Data, SortedHashSetMap::Map(data)) => data, - (BlobType::Data, _) => { + fn get_id(&self, blob_type: &BlobType, id: &Id) -> Option { + let vec = match &self.0[*blob_type].entries { + EntriesVariants::FullEntries(entries) => entries, + _ => { + // get_id() only gives results if index contains full entries return None; } }; + vec.binary_search_by_key(id, |e| e.id).ok().map(|index| { let be = &vec[index]; IndexEntry::new( - *tpe, - self.packs[be.pack_idx], + *blob_type, + self.0[*blob_type].packs[be.pack_idx], be.offset, be.length, be.uncompressed_length, @@ -137,21 +147,18 @@ impl ReadIndex for Index { }) } - fn total_size(&self, tpe: &BlobType) -> u64 { - match tpe { - BlobType::Tree => self.total_tree_size, - BlobType::Data => self.total_data_size, - } + fn total_size(&self, blob_type: &BlobType) -> u64 { + self.0[*blob_type].total_size } - fn has(&self, tpe: &BlobType, id: &Id) -> bool { - match (tpe, &self.data) { - (BlobType::Tree, _) => self.tree.binary_search_by_key(id, |e| e.id).is_ok(), - (BlobType::Data, SortedHashSetMap::Map(data)) => { - data.binary_search_by_key(id, |e| e.id).is_ok() + fn has(&self, blob_type: &BlobType, id: &Id) -> bool { + match &self.0[*blob_type].entries { + EntriesVariants::FullEntries(entries) => { + entries.binary_search_by_key(id, |e| e.id).is_ok() } - (BlobType::Data, SortedHashSetMap::Set(data)) => data.binary_search(id).is_ok(), - (BlobType::Data, SortedHashSetMap::None) => false, + EntriesVariants::Ids(ids) => ids.binary_search(id).is_ok(), + // has() only gives results if index contains full entries or ids + EntriesVariants::None => false, } } }