1 /// Copyright: Copyright (c) 2017-2019 Andrey Penechko.
2 /// License: $(WEB boost.org/LICENSE_1_0.txt, Boost License 1.0).
3 /// Authors: Andrey Penechko.
4 module vox.utils.hash;
5 
6 import vox.utils : write, writef, writeln, isPowerOfTwo, format, swap, formattedWrite, enforce;
7 
8 enum StoreValues : bool { no = false, yes = true }
9 
10 struct HashMap(Key, Value, Key emptyKey) {
11 	mixin HashTablePart!(KeyBucket!(Key, emptyKey), StoreValues.yes);
12 }
13 
14 /// Uses special key value to mark empty buckets
15 struct KeyBucket(Key, Key emptyKey)
16 {
17 	static assert(isPowerOfTwo(Key.sizeof), format("%s == %s NPOT", Key.stringof, Key.sizeof));
18 	Key key = emptyKey;
19 
20 	bool empty() const { return key == emptyKey; }
21 	bool used() const { return key != emptyKey; }
22 	bool canInsert(Key _key) const { return key == emptyKey || key == _key; }
23 	void assignKey(Key _key) { key = _key; }
24 	void clear() { key = emptyKey; }
25 	static bool isValidKey(Key _key) { return _key != emptyKey; }
26 	static void clearBuckets(typeof(this)[] keyBuckets) { keyBuckets[] = typeof(this).init; }
27 }
28 
29 /// If store_values = no then table works like hashset
30 // Compiler doesn't optimize `index % _capacity` into `index & (_capacity - 1)`
31 mixin template HashTablePart(KeyBucketT, StoreValues store_values)
32 {
33 	import vox.utils : isPowerOfTwo, max, writefln, ArrayArena;
34 
35 	private uint _length; // num of used buckets
36 	private uint _capacity; // capacity. Always power of two
37 	private enum MIN_CAPACITY = 4;
38 	private enum bool SINGLE_ALLOC = Key.sizeof == Value.sizeof;
39 
40 	bool empty() const { return _length == 0; }
41 	uint length() const { return _length; }
42 	uint capacity() const { return _capacity; }
43 	private uint maxLength() const { return (_capacity / 4) * 3; } // Should be optimized into shr + lea
44 
45 	static if (store_values) {
46 		enum Bucket_size = KeyBucketT.sizeof + Value.sizeof;
47 		mixin HashMapImpl;
48 	} else {
49 		enum Bucket_size = KeyBucketT.sizeof;
50 		mixin HashSetImpl;
51 	}
52 
53 	/// Returns false if no value was deleted, true otherwise
54 	bool remove(ref ArrayArena arena, Key key) {
55 		if (_length == 0) return false;
56 		size_t index = getHash(key) & (_capacity - 1); // % capacity
57 		size_t searched_dib = 0;
58 		while (true) { // Find entry to delete
59 			if (keyBuckets[index].empty) return false;
60 			if (keyBuckets[index].key == key) break; // found the item
61 			size_t current_initial_bucket = getHash(keyBuckets[index].key) & (_capacity - 1); // % capacity
62 			ptrdiff_t current_dib = index - current_initial_bucket;
63 			if (current_dib < 0) current_dib += _capacity;
64 			if (searched_dib > current_dib) return false; // item must have been inserted here
65 			++searched_dib;
66 			index = (index + 1) & (_capacity - 1); // % capacity
67 		}
68 		while (true) { // Backward shift
69 			size_t nextIndex = (index + 1) & (_capacity - 1); // % capacity
70 			if (keyBuckets[nextIndex].empty) break; // Found stop bucket (empty)
71 			size_t current_initial_bucket = getHash(keyBuckets[nextIndex].key) & (_capacity - 1); // % capacity
72 			if (current_initial_bucket == nextIndex) break; // Found stop bucket (0 DIB)
73 			keyBuckets[index].key = keyBuckets[nextIndex].key; // shift item left
74 			values[index] = values[nextIndex]; // shift item left
75 			index = nextIndex;
76 		}
77 		keyBuckets[index].clear; // Mark empty
78 		--_length;
79 		return true;
80 	}
81 
82 	pragma(inline, true)
83 	private static size_t getHash(Key key) {
84 		import std.traits : isIntegral;
85 		static if (isIntegral!Key) return cast(size_t)key;
86 		else return hashOf(key);
87 	}
88 
89 	private size_t findIndex(Key key) inout
90 	{
91 		if (_length == 0) return size_t.max;
92 		auto index = getHash(key) & (_capacity - 1); // % capacity
93 		size_t searched_dib = 0;
94 		while (true) {
95 			if (keyBuckets[index].empty) return size_t.max;
96 			if (keyBuckets[index].key == key) return index;
97 			size_t current_initial_bucket = getHash(keyBuckets[index].key) & (_capacity - 1); // % capacity
98 			ptrdiff_t current_dib = index - current_initial_bucket;
99 			if (current_dib < 0) current_dib += _capacity;
100 			if (searched_dib > current_dib) return size_t.max; // item must have been inserted here
101 			++searched_dib;
102 			index = (index + 1) & (_capacity - 1); // % capacity
103 		}
104 	}
105 
106 	void free(ref ArrayArena arena) {
107 		static if (SINGLE_ALLOC) {
108 			size_t size = max(ArrayArena.MIN_BLOCK_BYTES, Bucket_size * _capacity);
109 			arena.freeBlock((cast(ubyte*)keyBuckets)[0..size]);
110 			//values = null; // not needed, because values ptr is derived from keys
111 		} else {
112 			size_t keySize = max(ArrayArena.MIN_BLOCK_BYTES, KeyBucketT.sizeof * _capacity);
113 			arena.freeBlock((cast(ubyte*)keyBuckets)[0..keySize]);
114 			static if (store_values) {
115 				size_t valSize = max(ArrayArena.MIN_BLOCK_BYTES, Value.sizeof * _capacity);
116 				arena.freeBlock((cast(ubyte*)values)[0..valSize]);
117 			}
118 			values = null;
119 		}
120 		keyBuckets = null;
121 		_capacity = 0;
122 		_length = 0;
123 	}
124 
125 	void clear() {
126 		KeyBucketT.clearBuckets(keyBuckets[0.._capacity]);
127 		_length = 0;
128 	}
129 
130 	private void extend(ref ArrayArena arena)
131 	{
132 		uint newCapacity = _capacity ? _capacity * 2 : MIN_CAPACITY;
133 		auto oldKeyBuckets = keyBuckets;
134 
135 		static if (store_values) {
136 			auto oldValues = values[0.._capacity];
137 		}
138 
139 		auto oldCapacity = _capacity;
140 
141 		static if (SINGLE_ALLOC) {
142 			size_t newSize = max(ArrayArena.MIN_BLOCK_BYTES, Bucket_size * newCapacity);
143 			ubyte[] newBlock = arena.allocBlock(newSize);
144 			keyBuckets = cast(KeyBucketT*)(newBlock.ptr);
145 			// values is based on keyBuckets ptr
146 		} else {
147 			size_t newKeySize = max(ArrayArena.MIN_BLOCK_BYTES, KeyBucketT.sizeof * newCapacity);
148 			ubyte[] newKeysBlock = arena.allocBlock(newKeySize);
149 			keyBuckets = cast(KeyBucketT*)(newKeysBlock.ptr);
150 			static if (store_values) {
151 				size_t newValSize = max(ArrayArena.MIN_BLOCK_BYTES, Value.sizeof * newCapacity);
152 				ubyte[] newValuesBlock = arena.allocBlock(newValSize);
153 				values = cast(Value*)newValuesBlock.ptr;
154 			}
155 		}
156 
157 		KeyBucketT.clearBuckets(keyBuckets[0..newCapacity]);
158 		_capacity = newCapacity; // put() below uses new capacity
159 
160 		if (oldKeyBuckets) {
161 			_length = 0; // needed because `put` will increment per item
162 			foreach (i, ref bucket; oldKeyBuckets[0..oldCapacity]) {
163 				if (bucket.used) {
164 					static if (store_values) put(arena, bucket.key, oldValues[i]);
165 					else put(arena, bucket.key);
166 				}
167 			}
168 
169 			static if (SINGLE_ALLOC) {
170 				size_t oldSize = max(ArrayArena.MIN_BLOCK_BYTES, Bucket_size * oldCapacity);
171 				arena.freeBlock((cast(ubyte*)oldKeyBuckets)[0..oldSize]);
172 			} else {
173 				size_t oldKeySize = max(ArrayArena.MIN_BLOCK_BYTES, KeyBucketT.sizeof * oldCapacity);
174 				arena.freeBlock((cast(ubyte*)oldKeyBuckets)[0..oldKeySize]);
175 				static if (store_values) {
176 					size_t oldValSize = max(ArrayArena.MIN_BLOCK_BYTES, Value.sizeof * oldCapacity);
177 					arena.freeBlock((cast(ubyte*)oldValues)[0..oldValSize]);
178 				}
179 			}
180 		}
181 	}
182 }
183 
184 // http://codecapsule.com/2013/11/11/robin-hood-hashing
185 // http://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion/
186 // DIB - distance to initial bucket
187 mixin template HashMapImpl()
188 {
189 	private enum bool SINGLE_ALLOC = Key.sizeof == Value.sizeof;
190 
191 	static assert(isPowerOfTwo(Value.sizeof));
192 	KeyBucketT* keyBuckets;
193 
194 	static if(SINGLE_ALLOC) {
195 		// Save space in the hashmap
196 		inout(Value)* values() pure inout {
197 			return cast(Value*)(cast(void*)keyBuckets + _capacity*KeyBucketT.sizeof);
198 		}
199 	} else {
200 		Value* values;
201 	}
202 
203 	alias KeyT = Key;
204 	alias ValueT = Value;
205 
206 	Value* put(ref ArrayArena arena, Key key, Value value)
207 	{
208 		assert(KeyBucketT.isValidKey(key), "Invalid key");
209 		if (_length == maxLength) extend(arena);
210 		size_t index = getHash(key) & (_capacity - 1); // % capacity
211 		size_t inserted_dib = 0;
212 		while (true) {
213 			if (keyBuckets[index].key == key) { // same key
214 				values[index] = value;
215 				return &values[index];
216 			}
217 			if (keyBuckets[index].empty) { // bucket is empty
218 				++_length;
219 				keyBuckets[index].assignKey(key);
220 				values[index] = value;
221 				return &values[index];
222 			}
223 			size_t current_initial_bucket = getHash(keyBuckets[index].key) & (_capacity - 1); // % capacity
224 			ptrdiff_t current_dib = index - current_initial_bucket;
225 			if (current_dib < 0) current_dib += _capacity;
226 			// swap inserted item with current only if DIB(current) < DIB(inserted item)
227 			if (inserted_dib > current_dib) {
228 				swap(key, keyBuckets[index].key);
229 				swap(value, values[index]);
230 				inserted_dib = current_dib;
231 			}
232 			++inserted_dib;
233 			index = (index + 1) & (_capacity - 1); // % capacity
234 		}
235 	}
236 
237 	Value* getOrCreate(ref ArrayArena arena, Key key, out bool wasCreated, Value default_value = Value.init)
238 	{
239 		if (_length == 0) {
240 			wasCreated = true;
241 			return put(arena, key, default_value);
242 		}
243 
244 		if (_length == maxLength) extend(arena);
245 		auto index = getHash(key) & (_capacity - 1); // % capacity
246 		size_t inserted_dib = 0;
247 		Value value;
248 		while (true) {
249 			// no item was found, insert default and return
250 			if (keyBuckets[index].empty) { // bucket is empty
251 				++_length;
252 				keyBuckets[index].assignKey(key);
253 				values[index] = default_value;
254 				wasCreated = true;
255 				return &values[index];
256 			}
257 
258 			// found existing item
259 			if (keyBuckets[index].key == key) return &values[index];
260 
261 			size_t current_initial_bucket = getHash(keyBuckets[index].key) & (_capacity - 1); // % capacity
262 			ptrdiff_t current_dib = index - current_initial_bucket;
263 			if (current_dib < 0) current_dib += _capacity;
264 
265 			// no item was found, cell is occupied, insert default and shift other items
266 			if (inserted_dib > current_dib) {
267 				value = default_value;
268 				wasCreated = true;
269 				swap(key, keyBuckets[index].key);
270 				swap(value, values[index]);
271 				inserted_dib = current_dib;
272 				++inserted_dib;
273 				index = (index + 1) & (_capacity - 1); // % capacity
274 				break; // item must have been inserted here
275 			}
276 			++inserted_dib;
277 			index = (index + 1) & (_capacity - 1); // % capacity
278 		}
279 
280 		size_t numIters = 0;
281 		// fixup invariant after insertion
282 		while (true) {
283 			if (keyBuckets[index].empty) { // bucket is empty
284 				++_length;
285 				keyBuckets[index].assignKey(key);
286 				values[index] = value;
287 				return &values[index];
288 			}
289 			size_t current_initial_bucket = getHash(keyBuckets[index].key) & (_capacity - 1); // % capacity
290 			ptrdiff_t current_dib = index - current_initial_bucket;
291 			if (current_dib < 0) current_dib += _capacity;
292 			// swap inserted item with current only if DIB(current) < DIB(inserted item)
293 			if (inserted_dib > current_dib) {
294 				swap(key, keyBuckets[index].key);
295 				swap(value, values[index]);
296 				inserted_dib = current_dib;
297 			}
298 			++inserted_dib;
299 			assert(numIters < _capacity, format("bug %s %s %s", _capacity, numIters, _length));
300 			++numIters;
301 			index = (index + 1) & (_capacity - 1); // % capacity
302 		}
303 	}
304 
305 	inout(Value)* opBinaryRight(string op)(Key key) inout if (op == "in") {
306 		size_t index = findIndex(key);
307 		if (index == size_t.max) return null;
308 		return &values[index];
309 	}
310 
311 	ref inout(Value) opIndex(Key key) inout {
312 		size_t index = findIndex(key);
313 		enforce(index != size_t.max, "Non-existing key access");
314 		return values[index];
315 	}
316 
317 	Value get(Key key, Value default_value = Value.init) {
318 		size_t index = findIndex(key);
319 		if (index == size_t.max) return default_value;
320 		return values[index];
321 	}
322 
323 	int opApply(scope int delegate(ref Value) del) {
324 		foreach (i, ref bucket; keyBuckets[0.._capacity])
325 			if (bucket.used)
326 				if (int ret = del(values[i]))
327 					return ret;
328 		return 0;
329 	}
330 
331 	int opApply(scope int delegate(in Key, ref Value) del) {
332 		foreach (i, ref bucket; keyBuckets[0.._capacity])
333 			if (bucket.used)
334 				if (int ret = del(bucket.key, values[i]))
335 					return ret;
336 		return 0;
337 	}
338 
339 	void toString()(scope void delegate(const(char)[]) sink) {
340 		sink.formattedWrite("[",);
341 		size_t index;
342 		foreach(key, value; this) {
343 			if (index > 0) sink(", ");
344 			sink.formattedWrite("%s:%s", key, value);
345 			++index;
346 		}
347 		sink.formattedWrite("]");
348 	}
349 
350 	void dump() {
351 		//writef("{%08X}[", cast(size_t)keyBuckets & 0xFFFFFFFF);
352 		foreach (i, ref bucket; keyBuckets[0.._capacity]) {
353 			if (i > 0) write(", ");
354 			if (bucket.empty)
355 				writef("%s:<E>", i);
356 			else
357 				writef("%s:%s:%s", i, bucket.key, values[i]);
358 		}
359 		writeln("]");
360 	}
361 }