1 /// Copyright: Copyright (c) 2017-2020 Andrey Penechko.
2 /// License: $(WEB boost.org/LICENSE_1_0.txt, Boost License 1.0).
3 /// Authors: Andrey Penechko.
4 
5 module vox.ir.ir_small_set;
6 
7 import std.bitmanip : bitfields;
8 import std.format : formattedWrite;
9 import vox.all;
10 
11 /// Stores 0-4 items inline
12 /// Allocates memory from IrFuncStorage.arrayBuffer for sets with > 4 slots
13 /// IrIndex.init is used as an empty key, it is illegal key
14 struct IrSmallSet
15 {
16 	union {
17 		// this is not deduplicated, but equal items are sequential
18 		IrIndex[4] items; // items[0].kind != IrValueKind.array
19 		struct {
20 			// .kind == IrValueKind.array
21 			// offset into IrFuncStorage.arrayBuffer
22 			IrIndex arrayIndex;
23 			mixin(bitfields!(
24 				// Number of buckets with items in multihashset
25 				uint, "numUniqueKeys",   23,
26 				// capacity is bigCapacity
27 				uint, "capacityPower",  5,
28 				// 4 bits need to be clear, so IrIndex.kind == IrvalueKind.none
29 				// important for automatic index fixing after moving
30 				// when switching from built-in items to external array
31 				// item[1] must cleared
32 				uint, "", 4
33 			));
34 			mixin(bitfields!(
35 				// Total number of items inside multihashset (sum of values of all unique keys)
36 				uint, "numItems",   28,
37 				// 4 bits need to be clear, so IrIndex.kind == IrvalueKind.none
38 				// important for automatic index fixing after moving
39 				// when switching from built-in items to external array
40 				// item[2] must cleared
41 				uint, "", 4
42 			));
43 		}
44 	}
45 
46 	struct Bucket
47 	{
48 		IrIndex key;
49 		// stores number of items with this key
50 		// always >= 1 for used items
51 		uint value;
52 
53 		bool used() const { return key.isDefined; }
54 		bool empty() const { return key.isUndefined; }
55 	}
56 
57 	bool isBig() const {
58 		return items[0].kind == IrValueKind.array;
59 	}
60 
61 	private uint bigCapacity() const { return 1 << capacityPower; }
62 
63 	uint capacity() const {
64 		if (isBig) return bigCapacity;
65 		else return 3;
66 	}
67 
68 	uint length() const {
69 		if (isBig) return numItems;
70 		foreach_reverse(i, item; items) {
71 			if (item.isDefined) return cast(uint)(i+1);
72 		}
73 		return 0;
74 	}
75 
76 	bool empty() const { return length == 0; }
77 
78 	void free(IrFunction* ir) {
79 		if (isBig) {
80 			ir.freeIrArray(arrayIndex, bigCapacity);
81 		}
82 		items[] = IrIndex.init;
83 	}
84 
85 	IrSmallSetIterator range(IrFunction* ir) const {
86 		return IrSmallSetIterator(this, ir);
87 	}
88 
89 	void put(IrBuilder* builder, IrIndex key) {
90 		assert(key.isDefined, "Invalid key");
91 		if (isBig) {
92 			Bucket* buckets = cast(Bucket*)builder.ir.getArray(arrayIndex);
93 			uint _capacity = bigCapacity;
94 			uint maxLength = (_capacity / 4) * 3; // Should be optimized into shr + lea
95 
96 			if (numUniqueKeys == maxLength) {
97 				// prevent unnecessary extend
98 				auto index = findIndex(buckets, key);
99 				if (index != size_t.max) return;
100 
101 				uint newCapacity = _capacity * 2;
102 				IrIndex newArrayIndex = builder.allocateIrArray(newCapacity * 2); // Buckets are twice bigger than IrIndex
103 				Bucket* newBuckets = cast(Bucket*)builder.ir.getArray(newArrayIndex);
104 				newBuckets[0..newCapacity] = Bucket.init;
105 
106 				// copy buckets
107 				numUniqueKeys = 0; // needed because `putKey` will increment per item
108 				numItems = 0;
109 				capacityPower = capacityPower + 1; // used by putKey
110 				foreach (Bucket oldBucket; buckets[0.._capacity]) {
111 					if (oldBucket.used) {
112 						putKey(newBuckets, oldBucket);
113 					}
114 				}
115 
116 				// free old array
117 				builder.ir.freeIrArray(arrayIndex, _capacity * 2); // Buckets are twice bigger than IrIndex
118 
119 				// update instance
120 				arrayIndex = newArrayIndex; // old index used for freeIrArray
121 				buckets = newBuckets;
122 			}
123 
124 			putKey(buckets, Bucket(key, 1));
125 		}
126 		else
127 		{
128 			foreach (ref item; items) {
129 				if (item.isUndefined) {
130 					item = key;
131 					return;
132 				}
133 				// equal keys will be sequential in the array
134 				if (key.asUint < item.asUint) {
135 					swap(key, item);
136 				}
137 			}
138 
139 			// no free slots, switch to multihashset
140 			IrIndex newArrayIndex = builder.allocateIrArray(16); // 8 Buckets are twice bigger than IrIndex
141 			Bucket* newBuckets = cast(Bucket*)builder.ir.getArray(newArrayIndex);
142 			auto itemsCopy = items;
143 
144 			// update instance
145 			// clear bitfields and all lengths
146 			items[] = IrIndex.init;
147 			arrayIndex = newArrayIndex;
148 			capacityPower = 3; // 1 << 3 == 8
149 
150 			newBuckets[0..8] = Bucket.init;
151 			//writefln("newBuckets %s %s %s", newArrayIndex, newBuckets, newBuckets[0..8]);
152 			foreach(item; itemsCopy)
153 				putKey(newBuckets, Bucket(item, 1));
154 			putKey(newBuckets, Bucket(key, 1));
155 		}
156 	}
157 
158 	private void putKey(Bucket* buckets, Bucket bucket) {
159 		uint _capacity = bigCapacity;
160 		size_t index = getHash(bucket.key) & (_capacity - 1); // % capacity
161 		size_t inserted_dib = 0;
162 		while (true) {
163 			if (buckets[index].key == bucket.key) {
164 				//writefln("add %s %s %s %s, bigCap %s same", bucket.key, bucket.value, numItems, numUniqueKeys, _capacity);
165 				++buckets[index].value; // same key, bump the value
166 				numItems = numItems + 1;
167 				return;
168 			}
169 			if (buckets[index].empty) { // bucket is empty, we add new key
170 				//writefln("add %s %s %s %s, bigCap %s new", bucket.key, bucket.value, numItems, numUniqueKeys, _capacity);
171 				numUniqueKeys = numUniqueKeys + 1;
172 				numItems = numItems + bucket.value;
173 				buckets[index] = bucket;
174 				return;
175 			}
176 			size_t current_initial_bucket = getHash(buckets[index].key) & (_capacity - 1); // % capacity
177 			ptrdiff_t current_dib = index - current_initial_bucket;
178 			if (current_dib < 0) current_dib += _capacity;
179 			// swap inserted item with current only if DIB(current) < DIB(inserted item)
180 			if (inserted_dib > current_dib) {
181 				swap(bucket, buckets[index]);
182 				inserted_dib = current_dib;
183 			}
184 			++inserted_dib;
185 			index = (index + 1) & (_capacity - 1); // % capacity
186 		}
187 	}
188 
189 	// Returns number of keys
190 	uint contains(IrFunction* ir, IrIndex key) {
191 		assert(key.isDefined, "Invalid key");
192 		if (isBig) {
193 			Bucket* buckets = cast(Bucket*)ir.getArray(arrayIndex);
194 			auto index = findIndex(buckets, key);
195 			if (index == size_t.max) return 0;
196 			return buckets[index].value;
197 		} else {
198 			uint count = 0;
199 			foreach (item; items)
200 				if (item == key) ++count;
201 			return count;
202 		}
203 	}
204 
205 	/// Returns number of replaced keys
206 	uint replace(IrFunction* ir, IrIndex what, IrIndex byWhat) {
207 		assert(what.isDefined, "Invalid what");
208 		assert(byWhat.isDefined, "Invalid byWhat");
209 		if (isBig) {
210 			Bucket* buckets = cast(Bucket*)ir.getArray(arrayIndex);
211 			if (auto numRemoved = removeBig(buckets, what)) {
212 				putKey(buckets, Bucket(byWhat, numRemoved));
213 				return true;
214 			}
215 		} else {
216 			uint numReplaced = 0;
217 			foreach (i, ref item; items) {
218 				if (item == what) {
219 					++numReplaced;
220 					item = byWhat;
221 				}
222 			}
223 			return numReplaced;
224 		}
225 		return 0;
226 	}
227 
228 	/// Returns number of deleted keys
229 	uint remove(IrFunction* ir, IrIndex what)
230 	{
231 		if (isBig) { // external array
232 			Bucket* buckets = cast(Bucket*)ir.getArray(arrayIndex);
233 			return removeBig(buckets, what);
234 		}
235 		else
236 			return removeSmall(what);
237 	}
238 
239 	private uint removeSmall(IrIndex key) {
240 		// no harm if key or items[i] is null here
241 		uint numRemoved = 0;
242 		foreach (i, ref item; items) {
243 			if (item == key) {
244 				++numRemoved;
245 				item = IrIndex.init;
246 			} else {
247 				swap(items[i - numRemoved], item);
248 			}
249 		}
250 		return numRemoved;
251 	}
252 
253 	private uint removeBig(Bucket* buckets, IrIndex key) {
254 		if (numUniqueKeys == 0) return 0;
255 		uint _capacity = bigCapacity;
256 		size_t index = getHash(key) & (_capacity - 1); // % capacity
257 		size_t searched_dib = 0;
258 		while (true) { // Find entry to delete
259 			if (buckets[index].empty) return 0; // empty bucket
260 			if (buckets[index].key == key) {
261 				//writefln("remove %s %s %s %s bigCap %s", buckets[index].key, buckets[index].value, numItems, numUniqueKeys, _capacity);
262 				numUniqueKeys = numUniqueKeys - 1;
263 				numItems = numItems - buckets[index].value;
264 				break; // found the item
265 			}
266 			size_t current_initial_bucket = getHash(buckets[index].key) & (_capacity - 1); // % capacity
267 			ptrdiff_t current_dib = index - current_initial_bucket;
268 			if (current_dib < 0) current_dib += _capacity;
269 			if (searched_dib > current_dib) return 0; // item must have been inserted here
270 			++searched_dib;
271 			index = (index + 1) & (_capacity - 1); // % capacity
272 		}
273 		uint value = buckets[index].value;
274 		while (true) { // Backward shift
275 			size_t nextIndex = (index + 1) & (_capacity - 1); // % capacity
276 			if (buckets[nextIndex].empty) break; // Found stop bucket (empty)
277 			size_t current_initial_bucket = getHash(buckets[nextIndex].key) & (_capacity - 1); // % capacity
278 			if (current_initial_bucket == nextIndex) break; // Found stop bucket (0 DIB)
279 			buckets[index] = buckets[nextIndex]; // shift item left
280 			index = nextIndex;
281 		}
282 		buckets[index] = Bucket.init; // Mark empty
283 		return value;
284 	}
285 
286 	pragma(inline, true)
287 	private static uint getHash(IrIndex key) {
288 		return key.asUint;// * 0xdeece66d + 0xb;
289 	}
290 
291 	private size_t findIndex(Bucket* buckets, IrIndex key) const
292 	{
293 		if (numUniqueKeys == 0) return size_t.max;
294 		uint _capacity = bigCapacity;
295 		auto index = getHash(key) & (_capacity - 1); // % capacity
296 		size_t searched_dib = 0;
297 		while (true) {
298 			if (buckets[index].empty) return size_t.max; // empty key
299 			if (buckets[index].key == key) return index;
300 			size_t current_initial_bucket = getHash(buckets[index].key) & (_capacity - 1); // % capacity
301 			ptrdiff_t current_dib = index - current_initial_bucket;
302 			if (current_dib < 0) current_dib += _capacity;
303 			if (searched_dib > current_dib) return size_t.max; // item must have been inserted here
304 			++searched_dib;
305 			index = (index + 1) & (_capacity - 1); // % capacity
306 		}
307 	}
308 }
309 
310 struct IrSmallSetIterator
311 {
312 	IrSmallSet array;
313 	IrFunction* ir;
314 
315 	int opApply(scope int delegate(IrIndex, uint) dg)
316 	{
317 		if (array.isBig) { // external hashset
318 			IrSmallSet.Bucket* buckets = cast(IrSmallSet.Bucket*)ir.getArray(array.arrayIndex);
319 			uint _capacity = array.bigCapacity;
320 			foreach(IrSmallSet.Bucket bucket; buckets[0.._capacity]) {
321 				if (bucket.used)
322 					if (int res = dg(bucket.key, bucket.value))
323 						return res;
324 			}
325 		} else { // small array
326 			uint index = 0;
327 			while(index < array.items.length) {
328 				IrIndex item = array.items[index];
329 				if (item.isUndefined) return 0; // No more items
330 
331 				// count equal keys
332 				uint startIndex = index;
333 				++index;
334 
335 				while(index < array.items.length) {
336 					IrIndex item2 = array.items[index]; // will read extra item in the `copy` if we have max items
337 					if (item == item2) ++index; // will be false when we reach undefined item at the end
338 					else break;
339 				}
340 				uint numItems = index - startIndex;
341 
342 				if (int res = dg(item, numItems)) return res;
343 			}
344 		}
345 		return 0;
346 	}
347 }