1 /// Copyright: Copyright (c) 2017-2019 Andrey Penechko. 2 /// License: $(WEB boost.org/LICENSE_1_0.txt, Boost License 1.0). 3 /// Authors: Andrey Penechko. 4 module vox.utils.hash; 5 6 import vox.utils : write, writef, writeln, isPowerOfTwo, format, swap, formattedWrite, enforce; 7 8 enum StoreValues : bool { no = false, yes = true } 9 10 struct HashMap(Key, Value, Key emptyKey) { 11 mixin HashTablePart!(KeyBucket!(Key, emptyKey), StoreValues.yes); 12 } 13 14 /// Uses special key value to mark empty buckets 15 struct KeyBucket(Key, Key emptyKey) 16 { 17 static assert(isPowerOfTwo(Key.sizeof), format("%s == %s NPOT", Key.stringof, Key.sizeof)); 18 Key key = emptyKey; 19 20 bool empty() const { return key == emptyKey; } 21 bool used() const { return key != emptyKey; } 22 bool canInsert(Key _key) const { return key == emptyKey || key == _key; } 23 void assignKey(Key _key) { key = _key; } 24 void clear() { key = emptyKey; } 25 static bool isValidKey(Key _key) { return _key != emptyKey; } 26 static void clearBuckets(typeof(this)[] keyBuckets) { keyBuckets[] = typeof(this).init; } 27 } 28 29 /// If store_values = no then table works like hashset 30 // Compiler doesn't optimize `index % _capacity` into `index & (_capacity - 1)` 31 mixin template HashTablePart(KeyBucketT, StoreValues store_values) 32 { 33 import vox.utils : isPowerOfTwo, max, writefln, ArrayArena; 34 35 private uint _length; // num of used buckets 36 private uint _capacity; // capacity. Always power of two 37 private enum MIN_CAPACITY = 4; 38 private enum bool SINGLE_ALLOC = Key.sizeof == Value.sizeof; 39 40 bool empty() const { return _length == 0; } 41 uint length() const { return _length; } 42 uint capacity() const { return _capacity; } 43 private uint maxLength() const { return (_capacity / 4) * 3; } // Should be optimized into shr + lea 44 45 static if (store_values) { 46 enum Bucket_size = KeyBucketT.sizeof + Value.sizeof; 47 mixin HashMapImpl; 48 } else { 49 enum Bucket_size = KeyBucketT.sizeof; 50 mixin HashSetImpl; 51 } 52 53 /// Returns false if no value was deleted, true otherwise 54 bool remove(ref ArrayArena arena, Key key) { 55 if (_length == 0) return false; 56 size_t index = getHash(key) & (_capacity - 1); // % capacity 57 size_t searched_dib = 0; 58 while (true) { // Find entry to delete 59 if (keyBuckets[index].empty) return false; 60 if (keyBuckets[index].key == key) break; // found the item 61 size_t current_initial_bucket = getHash(keyBuckets[index].key) & (_capacity - 1); // % capacity 62 ptrdiff_t current_dib = index - current_initial_bucket; 63 if (current_dib < 0) current_dib += _capacity; 64 if (searched_dib > current_dib) return false; // item must have been inserted here 65 ++searched_dib; 66 index = (index + 1) & (_capacity - 1); // % capacity 67 } 68 while (true) { // Backward shift 69 size_t nextIndex = (index + 1) & (_capacity - 1); // % capacity 70 if (keyBuckets[nextIndex].empty) break; // Found stop bucket (empty) 71 size_t current_initial_bucket = getHash(keyBuckets[nextIndex].key) & (_capacity - 1); // % capacity 72 if (current_initial_bucket == nextIndex) break; // Found stop bucket (0 DIB) 73 keyBuckets[index].key = keyBuckets[nextIndex].key; // shift item left 74 values[index] = values[nextIndex]; // shift item left 75 index = nextIndex; 76 } 77 keyBuckets[index].clear; // Mark empty 78 --_length; 79 return true; 80 } 81 82 pragma(inline, true) 83 private static size_t getHash(Key key) { 84 import std.traits : isIntegral; 85 static if (isIntegral!Key) return cast(size_t)key; 86 else return hashOf(key); 87 } 88 89 private size_t findIndex(Key key) inout 90 { 91 if (_length == 0) return size_t.max; 92 auto index = getHash(key) & (_capacity - 1); // % capacity 93 size_t searched_dib = 0; 94 while (true) { 95 if (keyBuckets[index].empty) return size_t.max; 96 if (keyBuckets[index].key == key) return index; 97 size_t current_initial_bucket = getHash(keyBuckets[index].key) & (_capacity - 1); // % capacity 98 ptrdiff_t current_dib = index - current_initial_bucket; 99 if (current_dib < 0) current_dib += _capacity; 100 if (searched_dib > current_dib) return size_t.max; // item must have been inserted here 101 ++searched_dib; 102 index = (index + 1) & (_capacity - 1); // % capacity 103 } 104 } 105 106 void free(ref ArrayArena arena) { 107 static if (SINGLE_ALLOC) { 108 size_t size = max(ArrayArena.MIN_BLOCK_BYTES, Bucket_size * _capacity); 109 arena.freeBlock((cast(ubyte*)keyBuckets)[0..size]); 110 //values = null; // not needed, because values ptr is derived from keys 111 } else { 112 size_t keySize = max(ArrayArena.MIN_BLOCK_BYTES, KeyBucketT.sizeof * _capacity); 113 arena.freeBlock((cast(ubyte*)keyBuckets)[0..keySize]); 114 static if (store_values) { 115 size_t valSize = max(ArrayArena.MIN_BLOCK_BYTES, Value.sizeof * _capacity); 116 arena.freeBlock((cast(ubyte*)values)[0..valSize]); 117 } 118 values = null; 119 } 120 keyBuckets = null; 121 _capacity = 0; 122 _length = 0; 123 } 124 125 void clear() { 126 KeyBucketT.clearBuckets(keyBuckets[0.._capacity]); 127 _length = 0; 128 } 129 130 private void extend(ref ArrayArena arena) 131 { 132 uint newCapacity = _capacity ? _capacity * 2 : MIN_CAPACITY; 133 auto oldKeyBuckets = keyBuckets; 134 135 static if (store_values) { 136 auto oldValues = values[0.._capacity]; 137 } 138 139 auto oldCapacity = _capacity; 140 141 static if (SINGLE_ALLOC) { 142 size_t newSize = max(ArrayArena.MIN_BLOCK_BYTES, Bucket_size * newCapacity); 143 ubyte[] newBlock = arena.allocBlock(newSize); 144 keyBuckets = cast(KeyBucketT*)(newBlock.ptr); 145 // values is based on keyBuckets ptr 146 } else { 147 size_t newKeySize = max(ArrayArena.MIN_BLOCK_BYTES, KeyBucketT.sizeof * newCapacity); 148 ubyte[] newKeysBlock = arena.allocBlock(newKeySize); 149 keyBuckets = cast(KeyBucketT*)(newKeysBlock.ptr); 150 static if (store_values) { 151 size_t newValSize = max(ArrayArena.MIN_BLOCK_BYTES, Value.sizeof * newCapacity); 152 ubyte[] newValuesBlock = arena.allocBlock(newValSize); 153 values = cast(Value*)newValuesBlock.ptr; 154 } 155 } 156 157 KeyBucketT.clearBuckets(keyBuckets[0..newCapacity]); 158 _capacity = newCapacity; // put() below uses new capacity 159 160 if (oldKeyBuckets) { 161 _length = 0; // needed because `put` will increment per item 162 foreach (i, ref bucket; oldKeyBuckets[0..oldCapacity]) { 163 if (bucket.used) { 164 static if (store_values) put(arena, bucket.key, oldValues[i]); 165 else put(arena, bucket.key); 166 } 167 } 168 169 static if (SINGLE_ALLOC) { 170 size_t oldSize = max(ArrayArena.MIN_BLOCK_BYTES, Bucket_size * oldCapacity); 171 arena.freeBlock((cast(ubyte*)oldKeyBuckets)[0..oldSize]); 172 } else { 173 size_t oldKeySize = max(ArrayArena.MIN_BLOCK_BYTES, KeyBucketT.sizeof * oldCapacity); 174 arena.freeBlock((cast(ubyte*)oldKeyBuckets)[0..oldKeySize]); 175 static if (store_values) { 176 size_t oldValSize = max(ArrayArena.MIN_BLOCK_BYTES, Value.sizeof * oldCapacity); 177 arena.freeBlock((cast(ubyte*)oldValues)[0..oldValSize]); 178 } 179 } 180 } 181 } 182 } 183 184 // http://codecapsule.com/2013/11/11/robin-hood-hashing 185 // http://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion/ 186 // DIB - distance to initial bucket 187 mixin template HashMapImpl() 188 { 189 private enum bool SINGLE_ALLOC = Key.sizeof == Value.sizeof; 190 191 static assert(isPowerOfTwo(Value.sizeof)); 192 KeyBucketT* keyBuckets; 193 194 static if(SINGLE_ALLOC) { 195 // Save space in the hashmap 196 inout(Value)* values() pure inout { 197 return cast(Value*)(cast(void*)keyBuckets + _capacity*KeyBucketT.sizeof); 198 } 199 } else { 200 Value* values; 201 } 202 203 alias KeyT = Key; 204 alias ValueT = Value; 205 206 Value* put(ref ArrayArena arena, Key key, Value value) 207 { 208 assert(KeyBucketT.isValidKey(key), "Invalid key"); 209 if (_length == maxLength) extend(arena); 210 size_t index = getHash(key) & (_capacity - 1); // % capacity 211 size_t inserted_dib = 0; 212 while (true) { 213 if (keyBuckets[index].key == key) { // same key 214 values[index] = value; 215 return &values[index]; 216 } 217 if (keyBuckets[index].empty) { // bucket is empty 218 ++_length; 219 keyBuckets[index].assignKey(key); 220 values[index] = value; 221 return &values[index]; 222 } 223 size_t current_initial_bucket = getHash(keyBuckets[index].key) & (_capacity - 1); // % capacity 224 ptrdiff_t current_dib = index - current_initial_bucket; 225 if (current_dib < 0) current_dib += _capacity; 226 // swap inserted item with current only if DIB(current) < DIB(inserted item) 227 if (inserted_dib > current_dib) { 228 swap(key, keyBuckets[index].key); 229 swap(value, values[index]); 230 inserted_dib = current_dib; 231 } 232 ++inserted_dib; 233 index = (index + 1) & (_capacity - 1); // % capacity 234 } 235 } 236 237 Value* getOrCreate(ref ArrayArena arena, Key key, out bool wasCreated, Value default_value = Value.init) 238 { 239 if (_length == 0) { 240 wasCreated = true; 241 return put(arena, key, default_value); 242 } 243 244 if (_length == maxLength) extend(arena); 245 auto index = getHash(key) & (_capacity - 1); // % capacity 246 size_t inserted_dib = 0; 247 Value value; 248 while (true) { 249 // no item was found, insert default and return 250 if (keyBuckets[index].empty) { // bucket is empty 251 ++_length; 252 keyBuckets[index].assignKey(key); 253 values[index] = default_value; 254 wasCreated = true; 255 return &values[index]; 256 } 257 258 // found existing item 259 if (keyBuckets[index].key == key) return &values[index]; 260 261 size_t current_initial_bucket = getHash(keyBuckets[index].key) & (_capacity - 1); // % capacity 262 ptrdiff_t current_dib = index - current_initial_bucket; 263 if (current_dib < 0) current_dib += _capacity; 264 265 // no item was found, cell is occupied, insert default and shift other items 266 if (inserted_dib > current_dib) { 267 value = default_value; 268 wasCreated = true; 269 swap(key, keyBuckets[index].key); 270 swap(value, values[index]); 271 inserted_dib = current_dib; 272 ++inserted_dib; 273 index = (index + 1) & (_capacity - 1); // % capacity 274 break; // item must have been inserted here 275 } 276 ++inserted_dib; 277 index = (index + 1) & (_capacity - 1); // % capacity 278 } 279 280 size_t numIters = 0; 281 // fixup invariant after insertion 282 while (true) { 283 if (keyBuckets[index].empty) { // bucket is empty 284 ++_length; 285 keyBuckets[index].assignKey(key); 286 values[index] = value; 287 return &values[index]; 288 } 289 size_t current_initial_bucket = getHash(keyBuckets[index].key) & (_capacity - 1); // % capacity 290 ptrdiff_t current_dib = index - current_initial_bucket; 291 if (current_dib < 0) current_dib += _capacity; 292 // swap inserted item with current only if DIB(current) < DIB(inserted item) 293 if (inserted_dib > current_dib) { 294 swap(key, keyBuckets[index].key); 295 swap(value, values[index]); 296 inserted_dib = current_dib; 297 } 298 ++inserted_dib; 299 assert(numIters < _capacity, format("bug %s %s %s", _capacity, numIters, _length)); 300 ++numIters; 301 index = (index + 1) & (_capacity - 1); // % capacity 302 } 303 } 304 305 inout(Value)* opBinaryRight(string op)(Key key) inout if (op == "in") { 306 size_t index = findIndex(key); 307 if (index == size_t.max) return null; 308 return &values[index]; 309 } 310 311 ref inout(Value) opIndex(Key key) inout { 312 size_t index = findIndex(key); 313 enforce(index != size_t.max, "Non-existing key access"); 314 return values[index]; 315 } 316 317 Value get(Key key, Value default_value = Value.init) { 318 size_t index = findIndex(key); 319 if (index == size_t.max) return default_value; 320 return values[index]; 321 } 322 323 int opApply(scope int delegate(ref Value) del) { 324 foreach (i, ref bucket; keyBuckets[0.._capacity]) 325 if (bucket.used) 326 if (int ret = del(values[i])) 327 return ret; 328 return 0; 329 } 330 331 int opApply(scope int delegate(in Key, ref Value) del) { 332 foreach (i, ref bucket; keyBuckets[0.._capacity]) 333 if (bucket.used) 334 if (int ret = del(bucket.key, values[i])) 335 return ret; 336 return 0; 337 } 338 339 void toString()(scope void delegate(const(char)[]) sink) { 340 sink.formattedWrite("[",); 341 size_t index; 342 foreach(key, value; this) { 343 if (index > 0) sink(", "); 344 sink.formattedWrite("%s:%s", key, value); 345 ++index; 346 } 347 sink.formattedWrite("]"); 348 } 349 350 void dump() { 351 //writef("{%08X}[", cast(size_t)keyBuckets & 0xFFFFFFFF); 352 foreach (i, ref bucket; keyBuckets[0.._capacity]) { 353 if (i > 0) write(", "); 354 if (bucket.empty) 355 writef("%s:<E>", i); 356 else 357 writef("%s:%s:%s", i, bucket.key, values[i]); 358 } 359 writeln("]"); 360 } 361 }