Line data Source code
1 : //! Simple hash table with chaining.
2 :
3 : use std::hash::Hash;
4 : use std::mem::MaybeUninit;
5 :
6 : use crate::hash::entry::*;
7 :
8 : /// Invalid position within the map (either within the dictionary or bucket array).
9 : pub(crate) const INVALID_POS: u32 = u32::MAX;
10 :
11 : /// Fundamental storage unit within the hash table. Either empty or contains a key-value pair.
12 : /// Always part of a chain of some kind (either a freelist if empty or a hash chain if full).
13 : pub(crate) struct Bucket<K, V> {
14 : /// Index of next bucket in the chain.
15 : pub(crate) next: u32,
16 : /// Key-value pair contained within bucket.
17 : pub(crate) inner: Option<(K, V)>,
18 : }
19 :
20 : /// Core hash table implementation.
21 : pub(crate) struct CoreHashMap<'a, K, V> {
22 : /// Dictionary used to map hashes to bucket indices.
23 : pub(crate) dictionary: &'a mut [u32],
24 : /// Buckets containing key-value pairs.
25 : pub(crate) buckets: &'a mut [Bucket<K, V>],
26 : /// Head of the freelist.
27 : pub(crate) free_head: u32,
28 : /// Maximum index of a bucket allowed to be allocated. [`INVALID_POS`] if no limit.
29 : pub(crate) alloc_limit: u32,
30 : /// The number of currently occupied buckets.
31 : pub(crate) buckets_in_use: u32,
32 : }
33 :
34 : /// Error for when there are no empty buckets left but one is needed.
35 : #[derive(Debug, PartialEq)]
36 : pub struct FullError;
37 :
38 : impl<'a, K: Clone + Hash + Eq, V> CoreHashMap<'a, K, V> {
39 : const FILL_FACTOR: f32 = 0.60;
40 :
41 : /// Estimate the size of data contained within the the hash map.
42 60 : pub fn estimate_size(num_buckets: u32) -> usize {
43 60 : let mut size = 0;
44 :
45 : // buckets
46 60 : size += size_of::<Bucket<K, V>>() * num_buckets as usize;
47 :
48 : // dictionary
49 60 : size += (f32::ceil((size_of::<u32>() * num_buckets as usize) as f32 / Self::FILL_FACTOR))
50 60 : as usize;
51 :
52 60 : size
53 60 : }
54 :
55 26 : pub fn new(
56 26 : buckets: &'a mut [MaybeUninit<Bucket<K, V>>],
57 26 : dictionary: &'a mut [MaybeUninit<u32>],
58 26 : ) -> Self {
59 : // Initialize the buckets
60 1316003 : for i in 0..buckets.len() {
61 1316003 : buckets[i].write(Bucket {
62 1316003 : next: if i < buckets.len() - 1 {
63 1315977 : i as u32 + 1
64 : } else {
65 26 : INVALID_POS
66 : },
67 1316003 : inner: None,
68 : });
69 : }
70 :
71 : // Initialize the dictionary
72 2201664 : for e in dictionary.iter_mut() {
73 2201664 : e.write(INVALID_POS);
74 2201664 : }
75 :
76 : // TODO: use std::slice::assume_init_mut() once it stabilizes
77 26 : let buckets =
78 26 : unsafe { std::slice::from_raw_parts_mut(buckets.as_mut_ptr().cast(), buckets.len()) };
79 26 : let dictionary = unsafe {
80 26 : std::slice::from_raw_parts_mut(dictionary.as_mut_ptr().cast(), dictionary.len())
81 : };
82 :
83 26 : Self {
84 26 : dictionary,
85 26 : buckets,
86 26 : free_head: 0,
87 26 : buckets_in_use: 0,
88 26 : alloc_limit: INVALID_POS,
89 26 : }
90 26 : }
91 :
92 : /// Get the value associated with a key (if it exists) given its hash.
93 111088 : pub fn get_with_hash(&self, key: &K, hash: u64) -> Option<&V> {
94 111088 : let mut next = self.dictionary[hash as usize % self.dictionary.len()];
95 : loop {
96 114713 : if next == INVALID_POS {
97 809 : return None;
98 113904 : }
99 :
100 113904 : let bucket = &self.buckets[next as usize];
101 113904 : let (bucket_key, bucket_value) = bucket.inner.as_ref().expect("entry is in use");
102 113904 : if bucket_key == key {
103 110279 : return Some(bucket_value);
104 3625 : }
105 3625 : next = bucket.next;
106 : }
107 111088 : }
108 :
109 : /// Get number of buckets in map.
110 15 : pub fn get_num_buckets(&self) -> usize {
111 15 : self.buckets.len()
112 15 : }
113 :
114 : /// Clears all entries from the hashmap.
115 : ///
116 : /// Does not reset any allocation limits, but does clear any entries beyond them.
117 2 : pub fn clear(&mut self) {
118 3000 : for i in 0..self.buckets.len() {
119 3000 : self.buckets[i] = Bucket {
120 3000 : next: if i < self.buckets.len() - 1 {
121 2998 : i as u32 + 1
122 : } else {
123 2 : INVALID_POS
124 : },
125 3000 : inner: None,
126 : }
127 : }
128 5472 : for i in 0..self.dictionary.len() {
129 5472 : self.dictionary[i] = INVALID_POS;
130 5472 : }
131 :
132 2 : self.free_head = 0;
133 2 : self.buckets_in_use = 0;
134 2 : }
135 :
136 : /// Find the position of an unused bucket via the freelist and initialize it.
137 172328 : pub(crate) fn alloc_bucket(&mut self, key: K, value: V) -> Result<u32, FullError> {
138 172328 : let mut pos = self.free_head;
139 :
140 : // Find the first bucket we're *allowed* to use.
141 172328 : let mut prev = PrevPos::First(self.free_head);
142 172328 : while pos != INVALID_POS && pos >= self.alloc_limit {
143 0 : let bucket = &mut self.buckets[pos as usize];
144 0 : prev = PrevPos::Chained(pos);
145 0 : pos = bucket.next;
146 0 : }
147 172328 : if pos == INVALID_POS {
148 2 : return Err(FullError);
149 172326 : }
150 :
151 : // Repair the freelist.
152 172326 : match prev {
153 172326 : PrevPos::First(_) => {
154 172326 : let next_pos = self.buckets[pos as usize].next;
155 172326 : self.free_head = next_pos;
156 172326 : }
157 0 : PrevPos::Chained(p) => {
158 0 : if p != INVALID_POS {
159 0 : let next_pos = self.buckets[pos as usize].next;
160 0 : self.buckets[p as usize].next = next_pos;
161 0 : }
162 : }
163 0 : _ => unreachable!(),
164 : }
165 :
166 : // Initialize the bucket.
167 172326 : let bucket = &mut self.buckets[pos as usize];
168 172326 : self.buckets_in_use += 1;
169 172326 : bucket.next = INVALID_POS;
170 172326 : bucket.inner = Some((key, value));
171 :
172 172326 : Ok(pos)
173 172328 : }
174 : }
|