|             Line data    Source code 
       1              : //! Simple hash table with chaining.
       2              : 
       3              : use std::hash::Hash;
       4              : use std::mem::MaybeUninit;
       5              : 
       6              : use crate::hash::entry::*;
       7              : 
       8              : /// Invalid position within the map (either within the dictionary or bucket array).
       9              : pub(crate) const INVALID_POS: u32 = u32::MAX;
      10              : 
      11              : /// Fundamental storage unit within the hash table. Either empty or contains a key-value pair.
      12              : /// Always part of a chain of some kind (either a freelist if empty or a hash chain if full).
      13              : pub(crate) struct Bucket<K, V> {
      14              :     /// Index of next bucket in the chain.
      15              :     pub(crate) next: u32,
      16              :     /// Key-value pair contained within bucket.
      17              :     pub(crate) inner: Option<(K, V)>,
      18              : }
      19              : 
      20              : /// Core hash table implementation.
      21              : pub(crate) struct CoreHashMap<'a, K, V> {
      22              :     /// Dictionary used to map hashes to bucket indices.
      23              :     pub(crate) dictionary: &'a mut [u32],
      24              :     /// Buckets containing key-value pairs.
      25              :     pub(crate) buckets: &'a mut [Bucket<K, V>],
      26              :     /// Head of the freelist.
      27              :     pub(crate) free_head: u32,
      28              :     /// Maximum index of a bucket allowed to be allocated. [`INVALID_POS`] if no limit.
      29              :     pub(crate) alloc_limit: u32,
      30              :     /// The number of currently occupied buckets.
      31              :     pub(crate) buckets_in_use: u32,
      32              : }
      33              : 
      34              : /// Error for when there are no empty buckets left but one is needed.
      35              : #[derive(Debug, PartialEq)]
      36              : pub struct FullError;
      37              : 
      38              : impl<'a, K: Clone + Hash + Eq, V> CoreHashMap<'a, K, V> {
      39              :     const FILL_FACTOR: f32 = 0.60;
      40              : 
      41              :     /// Estimate the size of data contained within the the hash map.
      42           60 :     pub fn estimate_size(num_buckets: u32) -> usize {
      43           60 :         let mut size = 0;
      44              : 
      45              :         // buckets
      46           60 :         size += size_of::<Bucket<K, V>>() * num_buckets as usize;
      47              : 
      48              :         // dictionary
      49           60 :         size += (f32::ceil((size_of::<u32>() * num_buckets as usize) as f32 / Self::FILL_FACTOR))
      50           60 :             as usize;
      51              : 
      52           60 :         size
      53           60 :     }
      54              : 
      55           26 :     pub fn new(
      56           26 :         buckets: &'a mut [MaybeUninit<Bucket<K, V>>],
      57           26 :         dictionary: &'a mut [MaybeUninit<u32>],
      58           26 :     ) -> Self {
      59              :         // Initialize the buckets
      60      1316003 :         for i in 0..buckets.len() {
      61      1316003 :             buckets[i].write(Bucket {
      62      1316003 :                 next: if i < buckets.len() - 1 {
      63      1315977 :                     i as u32 + 1
      64              :                 } else {
      65           26 :                     INVALID_POS
      66              :                 },
      67      1316003 :                 inner: None,
      68              :             });
      69              :         }
      70              : 
      71              :         // Initialize the dictionary
      72      2201664 :         for e in dictionary.iter_mut() {
      73      2201664 :             e.write(INVALID_POS);
      74      2201664 :         }
      75              : 
      76              :         // TODO: use std::slice::assume_init_mut() once it stabilizes
      77           26 :         let buckets =
      78           26 :             unsafe { std::slice::from_raw_parts_mut(buckets.as_mut_ptr().cast(), buckets.len()) };
      79           26 :         let dictionary = unsafe {
      80           26 :             std::slice::from_raw_parts_mut(dictionary.as_mut_ptr().cast(), dictionary.len())
      81              :         };
      82              : 
      83           26 :         Self {
      84           26 :             dictionary,
      85           26 :             buckets,
      86           26 :             free_head: 0,
      87           26 :             buckets_in_use: 0,
      88           26 :             alloc_limit: INVALID_POS,
      89           26 :         }
      90           26 :     }
      91              : 
      92              :     /// Get the value associated with a key (if it exists) given its hash.
      93       111131 :     pub fn get_with_hash(&self, key: &K, hash: u64) -> Option<&V> {
      94       111131 :         let mut next = self.dictionary[hash as usize % self.dictionary.len()];
      95              :         loop {
      96       114808 :             if next == INVALID_POS {
      97          842 :                 return None;
      98       113966 :             }
      99              : 
     100       113966 :             let bucket = &self.buckets[next as usize];
     101       113966 :             let (bucket_key, bucket_value) = bucket.inner.as_ref().expect("entry is in use");
     102       113966 :             if bucket_key == key {
     103       110289 :                 return Some(bucket_value);
     104         3677 :             }
     105         3677 :             next = bucket.next;
     106              :         }
     107       111131 :     }
     108              : 
     109              :     /// Get number of buckets in map.
     110           15 :     pub fn get_num_buckets(&self) -> usize {
     111           15 :         self.buckets.len()
     112           15 :     }
     113              : 
     114              :     /// Clears all entries from the hashmap.
     115              :     ///
     116              :     /// Does not reset any allocation limits, but does clear any entries beyond them.
     117            2 :     pub fn clear(&mut self) {
     118         3000 :         for i in 0..self.buckets.len() {
     119         3000 :             self.buckets[i] = Bucket {
     120         3000 :                 next: if i < self.buckets.len() - 1 {
     121         2998 :                     i as u32 + 1
     122              :                 } else {
     123            2 :                     INVALID_POS
     124              :                 },
     125         3000 :                 inner: None,
     126              :             }
     127              :         }
     128         5472 :         for i in 0..self.dictionary.len() {
     129         5472 :             self.dictionary[i] = INVALID_POS;
     130         5472 :         }
     131              : 
     132            2 :         self.free_head = 0;
     133            2 :         self.buckets_in_use = 0;
     134            2 :     }
     135              : 
     136              :     /// Find the position of an unused bucket via the freelist and initialize it.
     137       172806 :     pub(crate) fn alloc_bucket(&mut self, key: K, value: V) -> Result<u32, FullError> {
     138       172806 :         let mut pos = self.free_head;
     139              : 
     140              :         // Find the first bucket we're *allowed* to use.
     141       172806 :         let mut prev = PrevPos::First(self.free_head);
     142       172806 :         while pos != INVALID_POS && pos >= self.alloc_limit {
     143            0 :             let bucket = &mut self.buckets[pos as usize];
     144            0 :             prev = PrevPos::Chained(pos);
     145            0 :             pos = bucket.next;
     146            0 :         }
     147       172806 :         if pos == INVALID_POS {
     148            2 :             return Err(FullError);
     149       172804 :         }
     150              : 
     151              :         // Repair the freelist.
     152       172804 :         match prev {
     153       172804 :             PrevPos::First(_) => {
     154       172804 :                 let next_pos = self.buckets[pos as usize].next;
     155       172804 :                 self.free_head = next_pos;
     156       172804 :             }
     157            0 :             PrevPos::Chained(p) => {
     158            0 :                 if p != INVALID_POS {
     159            0 :                     let next_pos = self.buckets[pos as usize].next;
     160            0 :                     self.buckets[p as usize].next = next_pos;
     161            0 :                 }
     162              :             }
     163            0 :             _ => unreachable!(),
     164              :         }
     165              : 
     166              :         // Initialize the bucket.
     167       172804 :         let bucket = &mut self.buckets[pos as usize];
     168       172804 :         self.buckets_in_use += 1;
     169       172804 :         bucket.next = INVALID_POS;
     170       172804 :         bucket.inner = Some((key, value));
     171              : 
     172       172804 :         Ok(pos)
     173       172806 :     }
     174              : }
         |