1 /// Copyright: Copyright (c) 2017-2020 Andrey Penechko. 2 /// License: $(WEB boost.org/LICENSE_1_0.txt, Boost License 1.0). 3 /// Authors: Andrey Penechko. 4 5 module vox.ir.ir_small_set; 6 7 import std.bitmanip : bitfields; 8 import std.format : formattedWrite; 9 import vox.all; 10 11 /// Stores 0-4 items inline 12 /// Allocates memory from IrFuncStorage.arrayBuffer for sets with > 4 slots 13 /// IrIndex.init is used as an empty key, it is illegal key 14 struct IrSmallSet 15 { 16 union { 17 // this is not deduplicated, but equal items are sequential 18 IrIndex[4] items; // items[0].kind != IrValueKind.array 19 struct { 20 // .kind == IrValueKind.array 21 // offset into IrFuncStorage.arrayBuffer 22 IrIndex arrayIndex; 23 mixin(bitfields!( 24 // Number of buckets with items in multihashset 25 uint, "numUniqueKeys", 23, 26 // capacity is bigCapacity 27 uint, "capacityPower", 5, 28 // 4 bits need to be clear, so IrIndex.kind == IrvalueKind.none 29 // important for automatic index fixing after moving 30 // when switching from built-in items to external array 31 // item[1] must cleared 32 uint, "", 4 33 )); 34 mixin(bitfields!( 35 // Total number of items inside multihashset (sum of values of all unique keys) 36 uint, "numItems", 28, 37 // 4 bits need to be clear, so IrIndex.kind == IrvalueKind.none 38 // important for automatic index fixing after moving 39 // when switching from built-in items to external array 40 // item[2] must cleared 41 uint, "", 4 42 )); 43 } 44 } 45 46 struct Bucket 47 { 48 IrIndex key; 49 // stores number of items with this key 50 // always >= 1 for used items 51 uint value; 52 53 bool used() const { return key.isDefined; } 54 bool empty() const { return key.isUndefined; } 55 } 56 57 bool isBig() const { 58 return items[0].kind == IrValueKind.array; 59 } 60 61 private uint bigCapacity() const { return 1 << capacityPower; } 62 63 uint capacity() const { 64 if (isBig) return bigCapacity; 65 else return 3; 66 } 67 68 uint length() const { 69 if (isBig) return numItems; 70 foreach_reverse(i, item; items) { 71 if (item.isDefined) return cast(uint)(i+1); 72 } 73 return 0; 74 } 75 76 bool empty() const { return length == 0; } 77 78 void free(IrFunction* ir) { 79 if (isBig) { 80 ir.freeIrArray(arrayIndex, bigCapacity); 81 } 82 items[] = IrIndex.init; 83 } 84 85 IrSmallSetIterator range(IrFunction* ir) const { 86 return IrSmallSetIterator(this, ir); 87 } 88 89 void put(IrBuilder* builder, IrIndex key) { 90 assert(key.isDefined, "Invalid key"); 91 if (isBig) { 92 Bucket* buckets = cast(Bucket*)builder.ir.getArray(arrayIndex); 93 uint _capacity = bigCapacity; 94 uint maxLength = (_capacity / 4) * 3; // Should be optimized into shr + lea 95 96 if (numUniqueKeys == maxLength) { 97 // prevent unnecessary extend 98 auto index = findIndex(buckets, key); 99 if (index != size_t.max) return; 100 101 uint newCapacity = _capacity * 2; 102 IrIndex newArrayIndex = builder.allocateIrArray(newCapacity * 2); // Buckets are twice bigger than IrIndex 103 Bucket* newBuckets = cast(Bucket*)builder.ir.getArray(newArrayIndex); 104 newBuckets[0..newCapacity] = Bucket.init; 105 106 // copy buckets 107 numUniqueKeys = 0; // needed because `putKey` will increment per item 108 numItems = 0; 109 capacityPower = capacityPower + 1; // used by putKey 110 foreach (Bucket oldBucket; buckets[0.._capacity]) { 111 if (oldBucket.used) { 112 putKey(newBuckets, oldBucket); 113 } 114 } 115 116 // free old array 117 builder.ir.freeIrArray(arrayIndex, _capacity * 2); // Buckets are twice bigger than IrIndex 118 119 // update instance 120 arrayIndex = newArrayIndex; // old index used for freeIrArray 121 buckets = newBuckets; 122 } 123 124 putKey(buckets, Bucket(key, 1)); 125 } 126 else 127 { 128 foreach (ref item; items) { 129 if (item.isUndefined) { 130 item = key; 131 return; 132 } 133 // equal keys will be sequential in the array 134 if (key.asUint < item.asUint) { 135 swap(key, item); 136 } 137 } 138 139 // no free slots, switch to multihashset 140 IrIndex newArrayIndex = builder.allocateIrArray(16); // 8 Buckets are twice bigger than IrIndex 141 Bucket* newBuckets = cast(Bucket*)builder.ir.getArray(newArrayIndex); 142 auto itemsCopy = items; 143 144 // update instance 145 // clear bitfields and all lengths 146 items[] = IrIndex.init; 147 arrayIndex = newArrayIndex; 148 capacityPower = 3; // 1 << 3 == 8 149 150 newBuckets[0..8] = Bucket.init; 151 //writefln("newBuckets %s %s %s", newArrayIndex, newBuckets, newBuckets[0..8]); 152 foreach(item; itemsCopy) 153 putKey(newBuckets, Bucket(item, 1)); 154 putKey(newBuckets, Bucket(key, 1)); 155 } 156 } 157 158 private void putKey(Bucket* buckets, Bucket bucket) { 159 uint _capacity = bigCapacity; 160 size_t index = getHash(bucket.key) & (_capacity - 1); // % capacity 161 size_t inserted_dib = 0; 162 while (true) { 163 if (buckets[index].key == bucket.key) { 164 //writefln("add %s %s %s %s, bigCap %s same", bucket.key, bucket.value, numItems, numUniqueKeys, _capacity); 165 ++buckets[index].value; // same key, bump the value 166 numItems = numItems + 1; 167 return; 168 } 169 if (buckets[index].empty) { // bucket is empty, we add new key 170 //writefln("add %s %s %s %s, bigCap %s new", bucket.key, bucket.value, numItems, numUniqueKeys, _capacity); 171 numUniqueKeys = numUniqueKeys + 1; 172 numItems = numItems + bucket.value; 173 buckets[index] = bucket; 174 return; 175 } 176 size_t current_initial_bucket = getHash(buckets[index].key) & (_capacity - 1); // % capacity 177 ptrdiff_t current_dib = index - current_initial_bucket; 178 if (current_dib < 0) current_dib += _capacity; 179 // swap inserted item with current only if DIB(current) < DIB(inserted item) 180 if (inserted_dib > current_dib) { 181 swap(bucket, buckets[index]); 182 inserted_dib = current_dib; 183 } 184 ++inserted_dib; 185 index = (index + 1) & (_capacity - 1); // % capacity 186 } 187 } 188 189 // Returns number of keys 190 uint contains(IrFunction* ir, IrIndex key) { 191 assert(key.isDefined, "Invalid key"); 192 if (isBig) { 193 Bucket* buckets = cast(Bucket*)ir.getArray(arrayIndex); 194 auto index = findIndex(buckets, key); 195 if (index == size_t.max) return 0; 196 return buckets[index].value; 197 } else { 198 uint count = 0; 199 foreach (item; items) 200 if (item == key) ++count; 201 return count; 202 } 203 } 204 205 /// Returns number of replaced keys 206 uint replace(IrFunction* ir, IrIndex what, IrIndex byWhat) { 207 assert(what.isDefined, "Invalid what"); 208 assert(byWhat.isDefined, "Invalid byWhat"); 209 if (isBig) { 210 Bucket* buckets = cast(Bucket*)ir.getArray(arrayIndex); 211 if (auto numRemoved = removeBig(buckets, what)) { 212 putKey(buckets, Bucket(byWhat, numRemoved)); 213 return true; 214 } 215 } else { 216 uint numReplaced = 0; 217 foreach (i, ref item; items) { 218 if (item == what) { 219 ++numReplaced; 220 item = byWhat; 221 } 222 } 223 return numReplaced; 224 } 225 return 0; 226 } 227 228 /// Returns number of deleted keys 229 uint remove(IrFunction* ir, IrIndex what) 230 { 231 if (isBig) { // external array 232 Bucket* buckets = cast(Bucket*)ir.getArray(arrayIndex); 233 return removeBig(buckets, what); 234 } 235 else 236 return removeSmall(what); 237 } 238 239 private uint removeSmall(IrIndex key) { 240 // no harm if key or items[i] is null here 241 uint numRemoved = 0; 242 foreach (i, ref item; items) { 243 if (item == key) { 244 ++numRemoved; 245 item = IrIndex.init; 246 } else { 247 swap(items[i - numRemoved], item); 248 } 249 } 250 return numRemoved; 251 } 252 253 private uint removeBig(Bucket* buckets, IrIndex key) { 254 if (numUniqueKeys == 0) return 0; 255 uint _capacity = bigCapacity; 256 size_t index = getHash(key) & (_capacity - 1); // % capacity 257 size_t searched_dib = 0; 258 while (true) { // Find entry to delete 259 if (buckets[index].empty) return 0; // empty bucket 260 if (buckets[index].key == key) { 261 //writefln("remove %s %s %s %s bigCap %s", buckets[index].key, buckets[index].value, numItems, numUniqueKeys, _capacity); 262 numUniqueKeys = numUniqueKeys - 1; 263 numItems = numItems - buckets[index].value; 264 break; // found the item 265 } 266 size_t current_initial_bucket = getHash(buckets[index].key) & (_capacity - 1); // % capacity 267 ptrdiff_t current_dib = index - current_initial_bucket; 268 if (current_dib < 0) current_dib += _capacity; 269 if (searched_dib > current_dib) return 0; // item must have been inserted here 270 ++searched_dib; 271 index = (index + 1) & (_capacity - 1); // % capacity 272 } 273 uint value = buckets[index].value; 274 while (true) { // Backward shift 275 size_t nextIndex = (index + 1) & (_capacity - 1); // % capacity 276 if (buckets[nextIndex].empty) break; // Found stop bucket (empty) 277 size_t current_initial_bucket = getHash(buckets[nextIndex].key) & (_capacity - 1); // % capacity 278 if (current_initial_bucket == nextIndex) break; // Found stop bucket (0 DIB) 279 buckets[index] = buckets[nextIndex]; // shift item left 280 index = nextIndex; 281 } 282 buckets[index] = Bucket.init; // Mark empty 283 return value; 284 } 285 286 pragma(inline, true) 287 private static uint getHash(IrIndex key) { 288 return key.asUint;// * 0xdeece66d + 0xb; 289 } 290 291 private size_t findIndex(Bucket* buckets, IrIndex key) const 292 { 293 if (numUniqueKeys == 0) return size_t.max; 294 uint _capacity = bigCapacity; 295 auto index = getHash(key) & (_capacity - 1); // % capacity 296 size_t searched_dib = 0; 297 while (true) { 298 if (buckets[index].empty) return size_t.max; // empty key 299 if (buckets[index].key == key) return index; 300 size_t current_initial_bucket = getHash(buckets[index].key) & (_capacity - 1); // % capacity 301 ptrdiff_t current_dib = index - current_initial_bucket; 302 if (current_dib < 0) current_dib += _capacity; 303 if (searched_dib > current_dib) return size_t.max; // item must have been inserted here 304 ++searched_dib; 305 index = (index + 1) & (_capacity - 1); // % capacity 306 } 307 } 308 } 309 310 struct IrSmallSetIterator 311 { 312 IrSmallSet array; 313 IrFunction* ir; 314 315 int opApply(scope int delegate(IrIndex, uint) dg) 316 { 317 if (array.isBig) { // external hashset 318 IrSmallSet.Bucket* buckets = cast(IrSmallSet.Bucket*)ir.getArray(array.arrayIndex); 319 uint _capacity = array.bigCapacity; 320 foreach(IrSmallSet.Bucket bucket; buckets[0.._capacity]) { 321 if (bucket.used) 322 if (int res = dg(bucket.key, bucket.value)) 323 return res; 324 } 325 } else { // small array 326 uint index = 0; 327 while(index < array.items.length) { 328 IrIndex item = array.items[index]; 329 if (item.isUndefined) return 0; // No more items 330 331 // count equal keys 332 uint startIndex = index; 333 ++index; 334 335 while(index < array.items.length) { 336 IrIndex item2 = array.items[index]; // will read extra item in the `copy` if we have max items 337 if (item == item2) ++index; // will be false when we reach undefined item at the end 338 else break; 339 } 340 uint numItems = index - startIndex; 341 342 if (int res = dg(item, numItems)) return res; 343 } 344 } 345 return 0; 346 } 347 }