1 /// Copyright: Copyright (c) 2017-2020 Andrey Penechko.
2 /// License: $(WEB boost.org/LICENSE_1_0.txt, Boost License 1.0).
3 /// Authors: Andrey Penechko.
4 
5 // Missing case of System V ABI implementation:
6 // - Aggregates with misaligned members
7 // - Aggregates with alignment > 16 bytes
8 // - xmm/ymm/zmm register passing
9 // - x87
10 module vox.be.ir_lower;
11 
12 import std.bitmanip : bitfields;
13 import std.stdio;
14 import vox.all;
15 
16 
17 void pass_ir_lower(CompilationContext* c, ModuleDeclNode* mod, FunctionDeclNode* func)
18 {
19 	IrFunction* optimizedIrData = c.getAst!IrFunction(func.backendData.optimizedIrData);
20 	func.backendData.loweredIrData = c.appendAst!IrFunction;
21 	IrFunction* loweredIrData = c.getAst!IrFunction(func.backendData.loweredIrData);
22 	*loweredIrData = *optimizedIrData; // copy
23 
24 	IrBuilder builder;
25 	builder.beginDup(loweredIrData, c);
26 	IrIndex funcIndex = func.getIrIndex(c);
27 
28 	void doPass(FuncPassIr pass, string passName) {
29 		pass(c, loweredIrData, funcIndex, builder);
30 		if (c.validateIr)
31 			validateIrFunction(c, loweredIrData, passName);
32 		if (c.printIrLowerEach && c.printDumpOf(func)) dumpFunction(c, loweredIrData, passName);
33 	}
34 
35 	doPass(&func_pass_lower_abi, "Lower ABI");
36 	doPass(&func_pass_lower_aggregates, "Lower aggregates");
37 	doPass(&func_pass_lower_gep, "Lower GEP");
38 
39 	if (!c.printIrLowerEach && c.printIrLower && c.printDumpOf(func)) dumpFunction(c, loweredIrData, "IR lowering all");
40 	builder.finalizeIr;
41 }
42 
43 bool fitsIntoRegister(IrIndex type, CompilationContext* c) {
44 	if (type.isTypeAggregate) {
45 		switch(c.types.typeSize(type)) {
46 			case 1: return true;
47 			case 2: return true;
48 			case 4: return true;
49 			case 8: return true;
50 			default: return false;
51 		}
52 	}
53 	return true;
54 }
55 
56 /// Converts complex constants fitting in a single register into an integer constant
57 IrIndex simplifyConstant(IrIndex index, CompilationContext* c)
58 {
59 	union U {
60 		ulong bufferValue;
61 		ubyte[8] buffer;
62 	}
63 	U data;
64 	uint typeSize;
65 	if (index.isConstantZero)
66 	{
67 		typeSize = c.types.typeSize(index.typeOfConstantZero);
68 	}
69 	else if (index.isConstantAggregate)
70 	{
71 		IrAggregateConstant* con = &c.constants.getAggregate(index);
72 		typeSize = c.types.typeSize(con.type);
73 	}
74 	else
75 	{
76 		return index;
77 	}
78 
79 	constantToMem(data.buffer[0..typeSize], index, c);
80 	return c.constants.add(sizeToIntType(typeSize, c), data.bufferValue);
81 }
82 
83 IrIndex genAddressOffset(IrIndex ptr, uint offset, IrIndex ptrType, IrIndex beforeInstr, ref IrBuilder builder) {
84 	if (offset == 0) {
85 		ExtraInstrArgs extra = { type : ptrType };
86 		InstrWithResult movInstr = builder.emitInstrBefore!(IrOpcode.move)(beforeInstr, extra, ptr);
87 		return movInstr.result;
88 	} else {
89 		IrIndex offsetIndex = builder.context.constants.add(makeIrType(IrBasicType.i64), offset);
90 		ExtraInstrArgs extra = { type : ptrType };
91 		InstrWithResult addressInstr = builder.emitInstrBefore!(IrOpcode.add)(beforeInstr, extra, ptr, offsetIndex);
92 		return addressInstr.result;
93 	}
94 }
95 
96 IrIndex genCopy(IrIndex dst, IrIndex src, IrIndex beforeInstr, ref IrBuilder builder) {
97 	if (src.isSomeConstant)
98 		return builder.emitInstrBefore!(IrOpcode.store)(beforeInstr, ExtraInstrArgs(), dst, src);
99 	else
100 		return builder.emitInstrBefore!(IrOpcode.copy)(beforeInstr, ExtraInstrArgs(), dst, src);
101 }
102 
103 IrIndex genLoad(IrIndex ptr, uint offset, IrIndex ptrType, IrIndex beforeInstr, ref IrBuilder builder) {
104 	ptr = genAddressOffset(ptr, offset, ptrType, beforeInstr, builder);
105 	IrIndex valType = builder.context.types.getPointerBaseType(ptrType);
106 	IrArgSize argSize = typeToIrArgSize(valType, builder.context);
107 	ExtraInstrArgs extra = { type : valType, argSize : argSize };
108 	auto instr = builder.emitInstrBefore!(IrOpcode.load)(beforeInstr, extra, ptr);
109 	return instr.result;
110 }
111 
112 struct LowerVreg
113 {
114 	IrIndex redirectTo;
115 	LowerAggregateAs status;
116 }
117 
118 enum LowerAggregateAs : ubyte
119 {
120 	// no action needed
121 	none,
122 	// redirect all users to the value of `redirectTo` (possibly recursively, if target is also redirectToPointer)
123 	redirectToPointer,
124 	// Will remain in a register. When aggregate is <= 8 bytes
125 	value,
126 	// No suitable memory found for the whole value graph
127 	newSlot,
128 	// same as newSlot, but no redirect needed
129 	newPhiSlot,
130 	// value graph starts from a unique load
131 	//uniqueLoadPtr,
132 	// value graph ends in a unique store
133 	//uniqueStorePtr,
134 }
135 
136 void func_pass_lower_aggregates(CompilationContext* c, IrFunction* ir, IrIndex funcIndex, ref IrBuilder builder)
137 {
138 	//writefln("lower_aggregates %s", c.idString(ir.name));
139 
140 	// buffer for call/instruction arguments
141 	//enum MAX_ARGS = 255;
142 	//IrIndex[MAX_ARGS] argBuffer = void;
143 
144 	LowerVreg[] vregInfos = makeParallelArray!LowerVreg(c, ir.numVirtualRegisters);
145 
146 	/*foreach (IrIndex vregIndex, ref IrVirtualRegister vreg; ir.virtualRegisters)
147 	{
148 		if (!vreg.type.isTypeAggregate) continue;
149 
150 		//writefln("- vreg %s", vregIndex);
151 
152 		//if (!vreg.definition.isInstruction) continue;
153 
154 		IrInstrHeader* definition = ir.getInstr(vreg.definition);
155 		if (definition.op != IrOpcode.load_aggregate) continue;
156 
157 		// we can omit stack allocation and reuse source memory
158 		if (!definition.isUniqueLoad) continue;
159 
160 		vregInfos[vregIndex.storageUintIndex].redirectTo = definition.arg(ir, 0);
161 		removeInstruction(ir, vreg.definition);
162 	}*/
163 
164 	// transforms instructions
165 	// gathers all registers to be promoted to pointer
166 	foreach (IrIndex blockIndex, ref IrBasicBlock block; ir.blocks)
167 	{
168 		foreach(IrIndex phiIndex, ref IrPhi phi; block.phis(ir))
169 		{
170 			IrIndex type = ir.getVirtReg(phi.result).type;
171 			if (!type.fitsIntoRegister(c)) {
172 				vregInfos[phi.result.storageUintIndex] = LowerVreg(phiIndex, LowerAggregateAs.newPhiSlot);
173 			}
174 		}
175 
176 		foreach(IrIndex instrIndex, ref IrInstrHeader instrHeader; block.instructions(ir))
177 		{
178 			switch(instrHeader.op)
179 			{
180 				case IrOpcode.store:
181 					IrIndex ptr = instrHeader.arg(ir, 0);
182 					IrIndex val = instrHeader.arg(ir, 1);
183 					if (ptr.isPhysReg || val.isPhysReg) break;
184 
185 					//writefln("- store %s %s %s", instrIndex, ptr, val);
186 					IrIndex ptrType = ir.getValueType(c, ptr);
187 					IrIndex valType = ir.getValueType(c, val);
188 
189 					// value will be replaced with pointer, replace store with copy
190 					if (!valType.fitsIntoRegister(c) && !val.isSomeConstant)
191 					{
192 						instrHeader.op = IrOpcode.copy;
193 					}
194 					break;
195 
196 				case IrOpcode.load_aggregate:
197 					//writefln("- load_aggregate %s", instrIndex);
198 					IrIndex ptr = instrHeader.arg(ir, 0);
199 					IrIndex ptrType = ir.getValueType(c, ptr);
200 					IrIndex base = c.types.getPointerBaseType(ptrType);
201 
202 					if (base.fitsIntoRegister(c))
203 					{
204 						IrArgSize argSize = typeToIrArgSize(base, c);
205 						ExtraInstrArgs extra = { result : instrHeader.result(ir), argSize : argSize };
206 						builder.emitInstrBefore!(IrOpcode.load)(instrIndex, extra, ptr);
207 					}
208 					else
209 					{
210 						// we can omit stack allocation and reuse source memory
211 						if (instrHeader.isUniqueLoad)
212 						{
213 							vregInfos[instrHeader.result(ir).storageUintIndex] = LowerVreg(instrHeader.arg(ir, 0), LowerAggregateAs.redirectToPointer);
214 						}
215 						else
216 						{
217 							genCopy(instrHeader.result(ir), instrHeader.arg(ir, 0), instrIndex, builder);
218 							vregInfos[instrHeader.result(ir).storageUintIndex] = LowerVreg(base, LowerAggregateAs.newSlot);
219 						}
220 					}
221 					removeInstruction(ir, instrIndex);
222 					break;
223 
224 				case IrOpcode.create_aggregate:
225 					//writefln("- create_aggregate %s", instrIndex);
226 					IrIndex type = ir.getVirtReg(instrHeader.result(ir)).type;
227 
228 					if (!type.fitsIntoRegister(c)) {
229 						IrTypeStruct* structType = &c.types.get!IrTypeStruct(type);
230 						IrIndex slot = builder.appendStackSlot(type, c.types.typeSizeAndAlignment(type), StackSlotKind.local);
231 						vregInfos[instrHeader.result(ir).storageUintIndex] = LowerVreg(slot, LowerAggregateAs.redirectToPointer);
232 
233 						IrIndex[] members = instrHeader.args(ir);
234 						c.assertf(members.length == structType.numMembers, "%s != %s", members.length, structType.numMembers);
235 
236 						foreach (i, IrTypeStructMember member; structType.members)
237 						{
238 							IrIndex ptrType = c.types.appendPtr(member.type);
239 							IrIndex ptr = genAddressOffset(slot, member.offset, ptrType, instrIndex, builder);
240 							if (member.type.fitsIntoRegister(c))
241 							{
242 								IrArgSize argSize = getTypeArgSize(member.type, c);
243 								ExtraInstrArgs extra = { argSize : argSize };
244 								builder.emitInstrBefore!(IrOpcode.store)(instrIndex, extra, ptr, members[i]);
245 							}
246 							else
247 							{
248 								if (members[i].isSomeConstant)
249 									builder.emitInstrBefore!(IrOpcode.store)(instrIndex, ExtraInstrArgs(), ptr, members[i]);
250 								else
251 									builder.emitInstrBefore!(IrOpcode.copy)(instrIndex, ExtraInstrArgs(), ptr, members[i]);
252 							}
253 						}
254 						//convertAggregateVregToPointer(instrHeader.result(ir), ir, builder);
255 						removeInstruction(ir, instrIndex);
256 					}
257 					else
258 						createSmallAggregate(instrIndex, type, instrHeader, ir, builder);
259 					break;
260 
261 				case IrOpcode.insert_element:
262 					//writefln("- insert_element %s", instrIndex);
263 					IrIndex[] args = instrHeader.args(ir);
264 					IrIndex aggrType = getValueType(args[0], ir, c);
265 					uint targetTypeSize = c.types.typeSize(aggrType);
266 					IrTypeStructMember member = c.types.getAggregateMember(aggrType, c, args[2..$]);
267 					IrIndex memberType = member.type;
268 					uint memberSize = c.types.typeSize(memberType);
269 					//writefln("insert %s into %s at %s", memberSize, targetTypeSize, member.offset);
270 
271 					if (!aggrType.fitsIntoRegister(c)) {
272 						IrIndex ptrType = c.types.appendPtr(memberType);
273 
274 						IrIndex ptr = args[0];
275 
276 						if (ptr.isSomeConstant) {
277 							// TODO: delay slot alloc
278 							IrIndex slot = builder.appendStackSlot(aggrType, c.types.typeSizeAndAlignment(aggrType), StackSlotKind.local);
279 							builder.emitInstrBefore!(IrOpcode.store)(instrIndex, ExtraInstrArgs(), instrHeader.result(ir), ptr);
280 							ptr = slot;
281 							vregInfos[instrHeader.result(ir).storageUintIndex] = LowerVreg(slot, LowerAggregateAs.redirectToPointer);
282 						} else {
283 							vregInfos[instrHeader.result(ir).storageUintIndex] = LowerVreg(ptr, LowerAggregateAs.redirectToPointer);
284 						}
285 
286 						IrIndex memberPtr = genAddressOffset(ptr, member.offset, ptrType, instrIndex, builder);
287 						if (memberType.fitsIntoRegister(c))
288 						{
289 							builder.emitInstrBefore!(IrOpcode.store)(instrIndex, ExtraInstrArgs(), memberPtr, args[1]);
290 						}
291 						else
292 						{
293 							builder.emitInstrBefore!(IrOpcode.copy)(instrIndex, ExtraInstrArgs(), memberPtr, args[1]);
294 						}
295 					}
296 					else
297 					{
298 						IrArgSize argSize = sizeToIrArgSize(targetTypeSize, c);
299 						IrIndex intType = sizeToIntType(targetTypeSize, c);
300 
301 						ulong aggregateMask = bitmask(targetTypeSize * 8);
302 						ulong headerMask = bitmask((memberSize + member.offset) * 8);
303 						ulong rightMask = bitmask(member.offset * 8);
304 						ulong holeMask = aggregateMask ^ headerMask | rightMask;
305 						//writefln("target %064b", aggregateMask);
306 						//writefln("header %064b", headerMask);
307 						//writefln("member %064b", rightMask);
308 						//writefln("hole   %064b", holeMask);
309 
310 						if (holeMask == 0) {
311 							// we will replace the whole aggregate
312 							// rewrite instruction into move
313 
314 							instrHeader.op = IrOpcode.move;
315 							args[0] = args[1];
316 							instrHeader.numArgs = 1;
317 							break;
318 						}
319 
320 						bool isBigConstant = argSizeIntUnsigned(holeMask) == IrArgSize.size64;
321 						IrIndex constIndex = c.constants.add(intType, holeMask);
322 
323 						if (isBigConstant)
324 						{
325 							// copy to temp register
326 							ExtraInstrArgs extra = { argSize : argSize, type : aggrType };
327 							constIndex = builder.emitInstrBefore!(IrOpcode.move)(instrIndex, extra, constIndex).result;
328 						}
329 
330 						ExtraInstrArgs extra3 = { argSize : argSize, type : aggrType };
331 						IrIndex maskedAggregate = builder.emitInstrBefore!(IrOpcode.and)(instrIndex, extra3, args[0], constIndex).result;
332 
333 						IrIndex memberValue = args[1];
334 						uint bit_offset = member.offset * 8;
335 
336 						if (memberSize < targetTypeSize) {
337 							ExtraInstrArgs extra = { argSize : argSize, type : memberType };
338 							switch(memberSize) { // zero extend 8 and 16 bit args to 32bit
339 								case 1: memberValue = builder.emitInstrBefore!(IrOpcode.zext)(instrIndex, extra, memberValue).result; break;
340 								case 2: memberValue = builder.emitInstrBefore!(IrOpcode.zext)(instrIndex, extra, memberValue).result; break;
341 								default: break;
342 							}
343 						}
344 
345 						// shift
346 						if (bit_offset != 0)
347 						{
348 							IrIndex rightArg = c.constants.add(intType, bit_offset);
349 							ExtraInstrArgs extra1 = { argSize : argSize, type : memberType };
350 							memberValue = builder.emitInstrBefore!(IrOpcode.shl)(instrIndex, extra1, memberValue, rightArg).result;
351 						}
352 
353 						// or
354 						ExtraInstrArgs extra2 = { argSize : argSize, type : aggrType, result : instrHeader.result(ir) };
355 						IrIndex result = builder.emitInstrBefore!(IrOpcode.or)(instrIndex, extra2, maskedAggregate, memberValue).result;
356 					}
357 					removeInstruction(ir, instrIndex);
358 					break;
359 
360 				case IrOpcode.get_element:
361 					// if source is stored inside register - extract with bit manipulation, otherwise lower to GEP
362 
363 					//writefln("- get_element %s", instrIndex);
364 					// instruction is reused
365 
366 					IrIndex[] args = instrHeader.args(ir);
367 					IrIndex sourceType = getValueType(args[0], ir, c);
368 					uint sourceSize = c.types.typeSize(sourceType);
369 					IrTypeStructMember member = c.types.getAggregateMember(sourceType, c, args[1..$]);
370 					IrIndex resultType = member.type;
371 					uint resultSize = c.types.typeSize(resultType);
372 
373 					if (args[0].isSomeConstant) {
374 						IrIndex value = args[0];
375 						foreach (IrIndex memberIndex; args[1..$]) {
376 							value = c.constants.getAggregateMember(value, memberIndex, c);
377 						}
378 						vregInfos[instrHeader.result(ir).storageUintIndex] = LowerVreg(value, LowerAggregateAs.value);
379 						removeInstruction(ir, instrIndex);
380 						break;
381 					}
382 
383 					if (sourceType.fitsIntoRegister(c))
384 					{
385 						IrIndex intType = sizeToIntType(sourceSize, c);
386 						// do simple variant where all indices are constant
387 						IrIndex value = args[0];
388 						if (member.offset > 0)
389 						{
390 							// shift right
391 							IrIndex rightArg = c.constants.add(intType, member.offset * 8);
392 							ExtraInstrArgs extra = { argSize : getTypeArgSize(sourceType, c), type : sourceType };
393 							value = builder.emitInstrBefore!(IrOpcode.lshr)(instrIndex, extra, value, rightArg).result;
394 						}
395 
396 						// mask if not 1, 2, 4 or 8 bytes in size
397 						if (!resultType.fitsIntoRegister(c))
398 						{
399 							// and
400 							IrIndex mask = c.constants.add(intType, (1 << (resultSize * 8)) - 1);
401 							ExtraInstrArgs extra = { type : member.type };
402 							value = builder.emitInstrBefore!(IrOpcode.and)(instrIndex, extra, value, mask).result;
403 						}
404 						else
405 						{
406 							if (resultSize == sourceSize) {
407 								ExtraInstrArgs extra = { type : member.type };
408 								value = builder.emitInstrBefore!(IrOpcode.move)(instrIndex, extra, value).result;
409 							} else {
410 								ExtraInstrArgs extra = { argSize : sizeToIrArgSize(resultSize, c), type : member.type };
411 								value = builder.emitInstrBefore!(IrOpcode.trunc)(instrIndex, extra, value).result;
412 							}
413 						}
414 
415 						vregInfos[instrHeader.result(ir).storageUintIndex] = LowerVreg(value, LowerAggregateAs.value);
416 						removeInstruction(ir, instrIndex);
417 						break;
418 					}
419 
420 					// reuse the same indices from get_element and perform GEP on them, then do load
421 					instrHeader.op = IrOpcode.get_element_ptr_0;
422 
423 					IrIndex ptrType = c.types.appendPtr(resultType);
424 					if (resultType.fitsIntoRegister(c))
425 					{
426 						IrIndex loadResult = instrHeader.result(ir);
427 						IrIndex gepResult = builder.addVirtualRegister(instrIndex, ptrType);
428 						instrHeader.result(ir) = gepResult;
429 
430 						ExtraInstrArgs extra2 = { argSize : getTypeArgSize(resultType, c), result : loadResult };
431 						IrIndex loadInstr = builder.emitInstr!(IrOpcode.load)(extra2, gepResult).instruction;
432 						builder.insertAfterInstr(instrIndex, loadInstr);
433 					}
434 
435 					ir.getVirtReg(instrHeader.result(ir)).type = ptrType;
436 					break;
437 
438 				case IrOpcode.get_aggregate_slice:
439 					//writefln("- get_aggregate_slice %s", instrIndex);
440 					IrIndex[] args = instrHeader.args(ir);
441 
442 					long indexVal = c.constants.get(args[1]).i64;
443 					IrIndex addr;
444 					if (indexVal == 0) {
445 						ExtraInstrArgs extra1 = { type : c.i64PtrType };
446 						InstrWithResult movInstr = builder.emitInstrBefore!(IrOpcode.move)(instrIndex, extra1, args[0]);
447 						addr = movInstr.result;
448 					} else {
449 						ExtraInstrArgs extra1 = { type : c.i64PtrType };
450 						InstrWithResult addressInstr = builder.emitInstrBefore!(IrOpcode.add)(instrIndex, extra1, args[0], args[1]);
451 						addr = addressInstr.result;
452 					}
453 
454 					ExtraInstrArgs extra2 = { argSize : IrArgSize.size64, type : makeIrType(IrBasicType.i64) };
455 					InstrWithResult loadInstr = builder.emitInstrBefore!(IrOpcode.load)(instrIndex, extra2, addr);
456 
457 					instrHeader.numArgs = 1;
458 					instrHeader.op = IrOpcode.move;
459 					removeUser(c, ir, instrIndex, args[0]);
460 					args[0] = loadInstr.result;
461 					builder.addUser(instrIndex, args[0]);
462 					break;
463 
464 				case IrOpcode.branch_switch:
465 					// unroll switch into a chain of compare branches
466 					IrIndex[] args = instrHeader.args(ir);
467 					IrSmallArray successors = block.successors;
468 					block.successors = IrSmallArray.init;
469 					IrIndex[] succ = successors.data(ir);
470 
471 					assert(args.length == succ.length);
472 					assert(args.length > 0);
473 					IrIndex value = args[0];
474 					IrIndex valueType = ir.getValueType(c, value);
475 					IrArgSize argSize = typeToIrArgSize(valueType, c);
476 					IrIndex defaultBlock = succ[0];
477 
478 					// replace switch with branch to first case block
479 					ExtraInstrArgs extra = { cond : IrBinaryCondition.eq, argSize : argSize };
480 					IrIndex firstInstr = builder.emitInstr!(IrOpcode.branch_binary)(extra, value, args[1]);
481 					replaceInstruction(ir, instrIndex, firstInstr);
482 					block.successors.append(&builder, succ[1]);
483 					// predecessor is already correct for this block
484 
485 					// build a chain
486 					IrIndex lastBlock = blockIndex;
487 					foreach(i; 2..args.length)
488 					{
489 						IrIndex branchBlockIndex = builder.addBasicBlock;
490 						IrBasicBlock* branchBlock = ir.getBlock(branchBlockIndex);
491 
492 						builder.addBlockTarget(lastBlock, branchBlockIndex);
493 						ir.getBlock(succ[i]).predecessors[0, ir] = branchBlockIndex;
494 						branchBlock.successors.append(&builder, succ[i]);
495 
496 						branchBlock.isSealed = true;
497 						branchBlock.isFinished = true;
498 
499 						builder.emitInstr!(IrOpcode.branch_binary)(branchBlockIndex, extra, value, args[i]);
500 						moveBlockAfter(ir, branchBlockIndex, lastBlock);
501 						lastBlock = branchBlockIndex;
502 					}
503 
504 					successors.free(ir);
505 					block.successors.append(&builder, succ[1]);
506 
507 					ir.getBlock(lastBlock).successors.append(&builder, defaultBlock);
508 					ir.getBlock(defaultBlock).predecessors[0, ir] = lastBlock;
509 					break;
510 
511 				default:
512 					//c.internal_error("IR lower unimplemented IR instr %s", cast(IrOpcode)instrHeader.op);
513 					break;
514 			}
515 		}
516 	}
517 
518 	foreach(i, ref info; vregInfos)
519 	{
520 		IrIndex vregIndex = IrIndex(cast(uint)i, IrValueKind.virtualRegister);
521 		final switch (info.status) with(LowerAggregateAs) {
522 			case none: break;
523 			case value:
524 				//writefln("Redirect value %s -> %s", IrIndex(cast(uint)i, IrValueKind.virtualRegister), info.redirectTo);
525 				builder.redirectVregUsersTo(vregIndex, info.redirectTo);
526 				break;
527 			case redirectToPointer:
528 				IrIndex redirectTo = info.redirectTo;
529 				//writef("Redirect ptr %s -> %s", vregIndex, info.redirectTo);
530 				while (redirectTo.isVirtReg)
531 				{
532 					if (vregInfos[redirectTo.storageUintIndex].redirectTo.isDefined) {
533 						redirectTo = vregInfos[redirectTo.storageUintIndex].redirectTo;
534 						//writef(" -> %s", redirectTo);
535 					}
536 					else break;
537 				}
538 				//writeln;
539 				builder.redirectVregUsersTo(vregIndex, redirectTo);
540 				break;
541 			case newSlot:
542 				IrIndex type = info.redirectTo;
543 				IrIndex slot = builder.appendStackSlot(type, c.types.typeSizeAndAlignment(type), StackSlotKind.local);
544 				builder.redirectVregUsersTo(vregIndex, slot);
545 				break;
546 			case newPhiSlot:
547 				IrVirtualRegister* vreg = ir.getVirtReg(vregIndex);
548 				IrPhi* phi = ir.getPhi(vreg.definition);
549 				IrIndex type = vreg.type;
550 				IrIndex slot = builder.appendStackSlot(type, c.types.typeSizeAndAlignment(type), StackSlotKind.local);
551 				IrIndex[] predecessors = ir.getBlock(phi.blockIndex).predecessors.data(ir);
552 				foreach(size_t arg_i, ref IrIndex phiArg; phi.args(ir)) {
553 					if (phiArg.isSomeConstant) {
554 						builder.emitInstrBefore!(IrOpcode.store)(ir.getBlock(predecessors[arg_i]).lastInstr, ExtraInstrArgs(), slot, phiArg);
555 						phiArg = slot;
556 					}
557 				}
558 				vreg.type = c.types.appendPtr(type);
559 				break;
560 		}
561 	}
562 }
563 
564 // pack values and constants into a register via `shift` and `binary or` instructions
565 void createSmallAggregate(IrIndex instrIndex, IrIndex type, ref IrInstrHeader instrHeader, IrFunction* ir, ref IrBuilder builder)
566 {
567 	CompilationContext* c = builder.context;
568 
569 	uint targetTypeSize = c.types.typeSize(type);
570 	IrArgSize argSize = sizeToIrArgSize(targetTypeSize, c);
571 	c.assertf(targetTypeSize <= 8, "aggregate is too big (%s) expected <= 8 bytes", targetTypeSize);
572 	c.assertf(instrHeader.numArgs <= 8, "too much args %s", instrHeader.numArgs);
573 	ulong constant = 0;
574 	// how many non-constants are prepared in argBuffer
575 	// after each insert has value of 0 or 1
576 	uint numBufferedValues = 0;
577 
578 	IrIndex[2] argBuffer;
579 
580 	void insertNonConstant(IrIndex value, uint bit_offset, uint size)
581 	{
582 		if (size < targetTypeSize) {
583 			ExtraInstrArgs extra = { argSize : argSize, type : type };
584 			switch(size) { // zero extend 8 and 16 bit args to 32bit
585 				case 1: value = builder.emitInstrBefore!(IrOpcode.zext)(instrIndex, extra, value).result; break;
586 				case 2: value = builder.emitInstrBefore!(IrOpcode.zext)(instrIndex, extra, value).result; break;
587 				default: break;
588 			}
589 		}
590 
591 		// shift
592 		if (bit_offset == 0)
593 			argBuffer[numBufferedValues] = value;
594 		else
595 		{
596 			IrIndex rightArg = c.constants.add(makeIrType(IrBasicType.i8), bit_offset);
597 			ExtraInstrArgs extra1 = { argSize : argSize, type : type };
598 			IrIndex shiftRes = builder.emitInstrBefore!(IrOpcode.shl)(instrIndex, extra1, value, rightArg).result;
599 			argBuffer[numBufferedValues] = shiftRes;
600 		}
601 		++numBufferedValues;
602 
603 		if (numBufferedValues == 2)
604 		{
605 			// or
606 			ExtraInstrArgs extra2 = { argSize : argSize, type : type };
607 			argBuffer[0] = builder.emitInstrBefore!(IrOpcode.or)(instrIndex, extra2, argBuffer[0], argBuffer[1]).result;
608 			numBufferedValues = 1;
609 		}
610 	}
611 
612 	void insertAt(IrIndex value, uint offset, uint size)
613 	{
614 		if (value.isSimpleConstant) {
615 			constant |= c.constants.get(value).i64 << (offset * 8);
616 		} else {
617 			insertNonConstant(value, offset * 8, size);
618 		}
619 	}
620 
621 	switch(type.typeKind) with(IrTypeKind) {
622 		case struct_t:
623 			IrTypeStruct* structType = &c.types.get!IrTypeStruct(type);
624 			IrIndex[] args = instrHeader.args(ir);
625 			foreach_reverse (i, IrTypeStructMember member; structType.members)
626 			{
627 				uint memberSize = c.types.typeSize(member.type);
628 				insertAt(args[i], member.offset, memberSize);
629 			}
630 			break;
631 		case array:
632 			IrTypeArray* arrayType = &c.types.get!IrTypeArray(type);
633 			uint elemSize = c.types.typeSize(arrayType.elemType);
634 			IrIndex[] args = instrHeader.args(ir);
635 			foreach_reverse (i; 0..arrayType.numElements)
636 			{
637 				insertAt(args[i], i * elemSize, elemSize);
638 			}
639 			break;
640 		default: assert(false);
641 	}
642 
643 	IrIndex targetIntType = sizeToIntType(targetTypeSize, c);
644 	IrIndex constIndex = c.constants.add(targetIntType, constant);
645 	IrIndex result;
646 	if (numBufferedValues == 1)
647 	{
648 		if (constant == 0)
649 		{
650 			result = argBuffer[0]; // only non-constant data
651 			if (!c.types.isSameType(getValueType(result, ir, c), type)) {
652 				ExtraInstrArgs extra3 = { type : type };
653 				result = builder.emitInstrBefore!(IrOpcode.move)(instrIndex, extra3, result).result; // bitcast
654 			}
655 		}
656 		else
657 		{
658 			ExtraInstrArgs extra3 = { argSize : argSize, type : type };
659 			result = builder.emitInstrBefore!(IrOpcode.or)(instrIndex, extra3, argBuffer[0], constIndex).result; // both
660 		}
661 	}
662 	else
663 	{
664 		assert(numBufferedValues == 0);
665 		result = constIndex;
666 	}
667 	builder.redirectVregUsersTo(instrHeader.result(ir), result);
668 	removeInstruction(ir, instrIndex);
669 }
670 
671 void func_pass_lower_gep(CompilationContext* context, IrFunction* ir, IrIndex funcIndex, ref IrBuilder builder)
672 {
673 	foreach (IrIndex blockIndex, ref IrBasicBlock block; ir.blocks)
674 	{
675 		foreach(IrIndex instrIndex, ref IrInstrHeader instrHeader; block.instructions(ir))
676 		{
677 			switch(cast(IrOpcode)instrHeader.op) with(IrOpcode)
678 			{
679 				case get_element_ptr, get_element_ptr_0:
680 					lowerGEP(context, builder, instrIndex, instrHeader);
681 					break;
682 				default: break;
683 			}
684 		}
685 	}
686 }
687 
688 // TODO some typecasts are needed for correct typing
689 void lowerGEP(CompilationContext* context, ref IrBuilder builder, IrIndex instrIndex, ref IrInstrHeader instrHeader)
690 {
691 	IrIndex buildOffset(IrIndex basePtr, long offsetVal, IrIndex resultType) {
692 		if (offsetVal == 0) {
693 			// Shortcut for 0-th index
694 			IrIndex basePtrType = getValueType(basePtr, builder.ir, context);
695 			// TODO: prefer proper typing for now, until IR lowering is implemented
696 			if (basePtrType == resultType) return basePtr;
697 
698 			ExtraInstrArgs extra = { type : resultType };
699 			InstrWithResult instr = builder.emitInstr!(IrOpcode.conv)(extra, basePtr);
700 			builder.insertBeforeInstr(instrIndex, instr.instruction);
701 			return instr.result;
702 		} else {
703 			IrIndex offset = context.constants.add(makeIrType(IrBasicType.i64), offsetVal);
704 
705 			ExtraInstrArgs extra = { type : resultType };
706 			InstrWithResult addressInstr = builder.emitInstr!(IrOpcode.add)(extra, basePtr, offset);
707 			builder.insertBeforeInstr(instrIndex, addressInstr.instruction);
708 
709 			return addressInstr.result;
710 		}
711 	}
712 
713 	IrIndex buildIndex(IrIndex basePtr, IrIndex index, uint elemSize, IrIndex resultType)
714 	{
715 		IrIndex scale = context.constants.add(makeIrType(IrBasicType.i64), elemSize);
716 		IrIndex indexVal = index;
717 
718 		if (elemSize > 1) {
719 			ExtraInstrArgs extra1 = { type : makeIrType(IrBasicType.i64) };
720 			InstrWithResult offsetInstr = builder.emitInstr!(IrOpcode.umul)(extra1, index, scale);
721 			builder.insertBeforeInstr(instrIndex, offsetInstr.instruction);
722 			indexVal = offsetInstr.result;
723 		}
724 
725 		ExtraInstrArgs extra2 = { type : resultType };
726 		InstrWithResult addressInstr = builder.emitInstr!(IrOpcode.add)(extra2, basePtr, indexVal);
727 		builder.insertBeforeInstr(instrIndex, addressInstr.instruction);
728 
729 		return addressInstr.result;
730 	}
731 
732 	IrIndex aggrPtr = instrHeader.arg(builder.ir, 0); // aggregate ptr
733 	IrIndex aggrPtrType = getValueType(aggrPtr, builder.ir, context);
734 
735 	context.assertf(aggrPtrType.isTypePointer,
736 		"First argument to GEP instruction must be pointer, not %s %s", IrIndexDump(aggrPtrType, &builder), IrIndexDump(aggrPtr, &builder));
737 
738 	IrIndex aggrType = context.types.getPointerBaseType(aggrPtrType);
739 	uint aggrSize = context.types.typeSize(aggrType);
740 
741 	IrIndex[] args;
742 
743 	// get_element_ptr_0 first index is zero, hence no op
744 	if (cast(IrOpcode)instrHeader.op == IrOpcode.get_element_ptr)
745 	{
746 		IrIndex firstIndex = instrHeader.arg(builder.ir, 1);
747 
748 		if (firstIndex.isSimpleConstant) {
749 			long indexVal = context.constants.get(firstIndex).i64;
750 			long offset = indexVal * aggrSize;
751 			aggrPtr = buildOffset(aggrPtr, offset, aggrPtrType);
752 		} else {
753 			aggrPtr = buildIndex(aggrPtr, firstIndex, aggrSize, aggrPtrType);
754 		}
755 
756 		args = instrHeader.args(builder.ir)[2..$]; // 0 is ptr, 1 is first index
757 	}
758 	else
759 	{
760 		args = instrHeader.args(builder.ir)[1..$]; // 0 is ptr
761 	}
762 
763 	foreach(IrIndex memberIndex; args)
764 	{
765 		final switch(aggrType.typeKind)
766 		{
767 			case IrTypeKind.basic:
768 				context.internal_error("Cannot index basic type %s", aggrType.typeKind);
769 
770 			case IrTypeKind.pointer:
771 				context.internal_error("Cannot index pointer with GEP instruction, use load first");
772 
773 			case IrTypeKind.array:
774 				IrIndex elemType = context.types.getArrayElementType(aggrType);
775 				IrIndex elemPtrType = context.types.appendPtr(elemType);
776 				uint elemSize = context.types.typeSize(elemType);
777 
778 				if (memberIndex.isSimpleConstant) {
779 					long indexVal = context.constants.get(memberIndex).i64;
780 					long offset = indexVal * elemSize;
781 					aggrPtr = buildOffset(aggrPtr, offset, elemPtrType);
782 				} else {
783 					aggrPtr = buildIndex(aggrPtr, memberIndex, elemSize, elemPtrType);
784 				}
785 
786 				aggrType = elemType;
787 				break;
788 
789 			case IrTypeKind.struct_t:
790 				context.assertf(memberIndex.isSimpleConstant, "Structs can only be indexed with constants, not with %s", memberIndex);
791 
792 				long memberIndexVal = context.constants.get(memberIndex).i64;
793 				IrTypeStructMember[] members = context.types.get!IrTypeStruct(aggrType).members;
794 
795 				context.assertf(memberIndexVal < members.length,
796 					"Indexing member %s of %s-member struct",
797 					memberIndexVal, members.length);
798 
799 				IrTypeStructMember member = members[memberIndexVal];
800 				IrIndex memberPtrType = context.types.appendPtr(member.type);
801 
802 				aggrPtr = buildOffset(aggrPtr, member.offset, memberPtrType);
803 				aggrType = member.type;
804 				break;
805 
806 			case IrTypeKind.func_t:
807 				context.internal_error("Cannot index function type");
808 		}
809 	}
810 
811 	builder.redirectVregUsersTo(instrHeader.result(builder.ir), aggrPtr);
812 	removeInstruction(builder.ir, instrIndex);
813 }