1 /// Copyright: Copyright (c) 2017-2020 Andrey Penechko.
2 /// License: $(WEB boost.org/LICENSE_1_0.txt, Boost License 1.0).
3 /// Authors: Andrey Penechko.
4 
5 // Missing case of System V ABI implementation:
6 // - Aggregates with alignment > 16 bytes
7 // - xmm/ymm/zmm register passing
8 // - x87
9 module vox.be.abi;
10 
11 import std.bitmanip : bitfields;
12 import std.stdio;
13 import vox.all;
14 
15 /// 2 AbiClass slots is enough to classify structs containing m128/m256/m512 types.
16 /// They can be the only value in the struct.
17 /// The only rules needed to find types that are passed in registers, are:
18 /// - m128/m256/m512 must have offset of 0
19 /// - basic types must have offset < 16
20 
21 enum AbiClass : ubyte {
22 	/// This class is used as initializer in the algorithms. It will be used for padding and empty structures and unions.
23 	no_class,
24 	/// This class consists of integral types that fit into one of the general purpose registers.
25 	integer,
26 	/// The class consists of types that fit into a vector register.
27 	sse,
28 	/// The class consists of types that fit into a vector register and can be passed and returned in the upper bytes of it.
29 	sse_up,
30 	/// These classes consists of types that will be returned via the x87 FPU.
31 	x87,
32 	/// ditto
33 	x87_up,
34 	/// This class consists of types that will be returned via the x87 FPU.
35 	complex_x87,
36 	/// This class consists of types that will be passed and returned in memory via the stack.
37 	memory,
38 }
39 
40 // Plaform-independent classification of function parameter/result
41 enum PassClass : ubyte {
42 	// Register stores value
43 	// sysv64 & win64 ABI: parameters, return value
44 	byValueReg,
45 	// 2 registers, possibly of different classes store value
46 	// sysv64: parameters, return value
47 	byValueRegMulti,
48 	// Pointer to stack allocated value is passed via GPR
49 	// win64 ABI: parameters, return value
50 	// sysv64 ABI: return value
51 	byPtrReg,
52 	// Value is pushed on the stack
53 	// sysv64 & win64 ABI: parameters
54 	byValueMemory,
55 	// Pointer to stack allocated value is passed via stack
56 	// win64 ABI: parameters
57 	byPtrMemory,
58 	// Value is ignored. Used for empty structs
59 	// sysv64 ABI: parameters, return value
60 	ignore
61 }
62 
63 bool isMemory(PassClass passClass) { return passClass == PassClass.byValueMemory || passClass == PassClass.byPtrMemory; }
64 
65 struct Sysv_AbiParamClass {
66 	mixin(bitfields!(
67 		AbiClass,      "low", 4,
68 		AbiClass,     "high", 4,
69 	));
70 	PassClass passClass;
71 	ubyte len() { return high == AbiClass.no_class ? 1 : 2; }
72 
73 	this(PassClass passClass, AbiClass a) {
74 		this.passClass = passClass;
75 		low = a;
76 	}
77 	this(PassClass passClass, AbiClass a, AbiClass b) {
78 		this.passClass = passClass;
79 		low = a;
80 		high = b;
81 	}
82 
83 	void toString(scope void delegate(const(char)[]) sink) {
84 		if (high == AbiClass.no_class)
85 			sink.formattedWrite("(%s)", low);
86 		else
87 			sink.formattedWrite("(%s,%s)", low, high);
88 	}
89 }
90 
91 enum MAX_ARG_REGS = 2;
92 
93 struct AbiState {
94 	FunctionAbi abi;
95 
96 	PhysReg[][2] abiRegs;
97 	PhysReg[] retRegs;
98 	IrTypeFunction* type;
99 
100 	uint[] paramClassBuf;
101 
102 	void run(CompilationContext* c, IrTypeFunction* type) {
103 		this.type = type;
104 		auto cc = callConventions[type.callConv];
105 		abi.stackSizealign.alignmentPower = cc.minStackAlignmentPower;
106 		paramClassBuf = c.allocateTempArray!uint(alignValue(type.numParameters, 4) / 4);
107 		PassClass[] paramClasses = (cast(PassClass[])paramClassBuf)[0..type.numParameters];
108 		abi.paramClasses = paramClasses[0..type.numParameters];
109 		abi.paramData = c.allocateTempArray!ParamLocation(type.numParameters);
110 
111 		abiRegs[AMD64_REG_CLASS.GPR] = cc.gprParamRegs;
112 		abiRegs[AMD64_REG_CLASS.XMM] = cc.sseParamRegs;
113 		ArgClassifier classify = classify_abis[type.callConv];
114 		classify(c, this);
115 
116 		calc_stack(c);
117 	}
118 
119 	void free(CompilationContext* c) {
120 		c.freeTempArray(abi.paramData);
121 		c.freeTempArray(paramClassBuf);
122 	}
123 
124 	void calc_stack(CompilationContext* c) {
125 		// choose stack ordering
126 		int start = 0;
127 		int end = cast(int)abi.paramClasses.length;
128 		int inc = 1;
129 
130 		if (abi.reverseStackOrder) {
131 			swap(start, end);
132 			--start;
133 			--end;
134 			inc = -1;
135 		}
136 
137 		enum MIN_STACK_SLOT_SIZE = 8;
138 
139 		// returns assigned register to the pool
140 		// item added will have increasing stack offset
141 		void assignToMem(ref ParamLocation loc, SizeAndAlignment item) {
142 			abi.stackSizealign.alignmentPower = max(abi.stackSizealign.alignmentPower, item.alignmentPower);
143 			// each item takes a whole number of stack slots (8 byte slots on 64 bit arch)
144 			uint itemAlignment = max(MIN_STACK_SLOT_SIZE, item.alignment);
145 			abi.stackSizealign.size = alignValue!uint(abi.stackSizealign.size, itemAlignment);
146 			loc.stackOffset = abi.stackSizealign.size;
147 			loc.stackSizealign = SizeAndAlignment(max(MIN_STACK_SLOT_SIZE, item.size), item.alignmentPower);
148 
149 			abi.stackSizealign.size += loc.stackSizealign.size;
150 		}
151 
152 		// actually assign memory offsets
153 		for (int i = start; i != end; i += inc) {
154 			PassClass paramClass = abi.paramClasses[i];
155 			if (paramClass == PassClass.byValueMemory) {
156 				SizeAndAlignment sizeAlign = c.types.typeSizeAndAlignment(type.parameterTypes[i]);
157 				assignToMem(abi.paramData[i], sizeAlign);
158 				//writefln("offset %s %s %s", i, abi.paramData[i].stackOffset, sizeAlign.size);
159 			} else if (paramClass == PassClass.byPtrMemory) {
160 				assignToMem(abi.paramData[i], SizeAndAlignment(8, 3));
161 			}
162 		}
163 		abi.stackSizealign.size = alignValue!uint(abi.stackSizealign.size, MIN_STACK_SLOT_SIZE);
164 	}
165 }
166 
167 union ParamLocation {
168 	PhysReg[MAX_ARG_REGS] regs;
169 	struct {
170 		// offset from the first byte of first parameter on the stack
171 		// first parameter will have offset of 0
172 		uint stackOffset;
173 		SizeAndAlignment stackSizealign;
174 	}
175 }
176 
177 struct FunctionAbi
178 {
179 	// must be byValueReg when function has no result
180 	PassClass returnClass;
181 	// if is pass by ptr, then first slot is hidden parameter and second is return register
182 	ParamLocation returnLoc;
183 	// length is number of function parameters
184 	// hidden parameter is not included here, instead it is handled immediately
185 	PassClass[] paramClasses;
186 	// when same index is memory, this is int offset
187 	// when same index is register, this is register
188 	ParamLocation[] paramData;
189 	SizeAndAlignment stackSizealign;
190 	bool reverseStackOrder;
191 	ubyte numRegistersUsed;
192 	// if defined, syscall instruction is used instead of call
193 	PhysReg syscallRegister;
194 	bool useSyscall;
195 }
196 
197 // classification callback will classify the argument and immediately try to assign register if available
198 // If there is no free registers left, it will reclassify the argument to be passed via memory.
199 void win64_classify(CompilationContext* c, ref AbiState state)
200 {
201 	// used for gpr and sse regs
202 	ubyte regsRemaining = cast(ubyte)state.abiRegs[AMD64_REG_CLASS.GPR].length;
203 	scope(exit) {
204 		state.abi.numRegistersUsed = cast(ubyte)(state.abiRegs[AMD64_REG_CLASS.GPR].length - regsRemaining);
205 	}
206 
207 	state.abi.returnClass = PassClass.ignore;
208 	if (state.type.numResults == 1) {
209 		IrIndex resType = state.type.resultTypes[0];
210 		if (resType.isTypeFloat) {
211 			state.abi.returnClass = PassClass.byValueReg;
212 			state.abi.returnLoc.regs[0] = amd64_reg.xmm0;
213 		} else if (resType.fitsIntoRegister(c)) {
214 			state.abi.returnClass = PassClass.byValueReg;
215 			state.abi.returnLoc.regs[0] = amd64_reg.ax;
216 		} else {
217 			// hidden pointer is passed as first parameter
218 			state.abi.returnClass = PassClass.byPtrReg;
219 			PhysReg reg = state.abiRegs[AMD64_REG_CLASS.GPR][0];
220 			state.abi.returnLoc.regs[0] = reg;
221 			state.abi.returnLoc.regs[1] = amd64_reg.ax; // we store return register in second slot
222 			--regsRemaining;
223 		}
224 	}
225 	//writefln("result %s %s", state.abi.returnClass, state.abi.returnLoc.regs);
226 
227 	foreach(uint i; 0..cast(uint)state.type.numParameters) {
228 		IrIndex paramType = state.type.parameterTypes[i];
229 		if (paramType.isTypeFloat) {
230 			if (regsRemaining) {
231 				state.abi.paramClasses[i] = PassClass.byValueReg;
232 				PhysReg reg = state.abiRegs[AMD64_REG_CLASS.XMM][$-regsRemaining];
233 				state.abi.paramData[i].regs[0] = reg;
234 				--regsRemaining;
235 			} else {
236 				state.abi.paramClasses[i] = PassClass.byValueMemory;
237 			}
238 		} else if (fitsIntoRegister(paramType, c)) {
239 			if (regsRemaining) {
240 				state.abi.paramClasses[i] = PassClass.byValueReg;
241 				PhysReg reg = state.abiRegs[AMD64_REG_CLASS.GPR][$-regsRemaining];
242 				state.abi.paramData[i].regs[0] = reg;
243 				--regsRemaining;
244 			} else {
245 				state.abi.paramClasses[i] = PassClass.byValueMemory;
246 			}
247 		} else {
248 			if (regsRemaining) {
249 				state.abi.paramClasses[i] = PassClass.byPtrReg;
250 				PhysReg reg = state.abiRegs[AMD64_REG_CLASS.GPR][$-regsRemaining];
251 				state.abi.paramData[i].regs[0] = reg;
252 				--regsRemaining;
253 			} else {
254 				state.abi.paramClasses[i] = PassClass.byPtrMemory;
255 			}
256 		}
257 
258 		//writefln("param %s %s %s", i, state.abi.paramClasses[i], state.abi.returnLoc.regs);
259 	}
260 }
261 
262 enum InMemory : bool {
263 	no = false,
264 	yes = true,
265 }
266 
267 Sysv_AbiParamClass classify_value(CompilationContext* c, IrIndex paramType) {
268 	AbiClass[2] resultClasses = [AbiClass.no_class, AbiClass.no_class];
269 	InMemory in_mem = classify_value_impl(c, paramType, resultClasses, 0);
270 	if (in_mem) return Sysv_AbiParamClass(PassClass.byValueMemory, AbiClass.memory);
271 
272 	// parse classes to PassClass
273 	if (resultClasses[1] == AbiClass.no_class) {
274 		if (resultClasses[0] == AbiClass.no_class)
275 			return Sysv_AbiParamClass(PassClass.ignore, AbiClass.no_class);
276 		else
277 			return Sysv_AbiParamClass(PassClass.byValueReg, resultClasses[0]);
278 	}
279 	assert(resultClasses[0] != AbiClass.no_class);
280 	return Sysv_AbiParamClass(PassClass.byValueRegMulti, resultClasses[0], resultClasses[1]);
281 }
282 
283 InMemory classify_value_impl(CompilationContext* c, IrIndex paramType, ref AbiClass[2] resultClasses, uint offset) {
284 	assert(paramType.isType);
285 	// if anything starts after 16 bytes, the whole thing is passed through memory
286 	if (offset >= 16) return InMemory.yes;
287 
288 	SizeAndAlignment sizealign = c.types.typeSizeAndAlignment(paramType);
289 	if (sizealign.size == 0) return InMemory.no;
290 	// until we have support for m256 and m512, 16 bytes is max size for register
291 	if (sizealign.size > 16) return InMemory.yes;
292 	// if it is unaligned, the whole argument is passed in memory
293 	if (paddingSize(offset, sizealign.alignment) > 0) return InMemory.yes;
294 
295 	AbiClass basic_class;
296 	final switch (paramType.typeKind) with(IrTypeKind) {
297 		case basic:
298 			final switch(paramType.basicType(c)) with(IrBasicType) {
299 				case noreturn_t: basic_class = AbiClass.no_class; break;
300 				case void_t: basic_class = AbiClass.no_class; break;
301 				case i8:     basic_class = AbiClass.integer; break;
302 				case i16:    basic_class = AbiClass.integer; break;
303 				case i32:    basic_class = AbiClass.integer; break;
304 				case i64:    basic_class = AbiClass.integer; break;
305 				case f32:    basic_class = AbiClass.sse; break;
306 				case f64:    basic_class = AbiClass.sse; break;
307 			}
308 			break;
309 		case pointer:
310 		case func_t:
311 			basic_class = AbiClass.integer;
312 			break;
313 		case array:
314 			return classify_array(c, paramType, resultClasses, offset);
315 		case struct_t:
316 			return classify_struct(c, paramType, resultClasses, offset);
317 	}
318 
319 	uint slotIndex = offset / 8; // index of 8-byte
320 	c.assertf(slotIndex < 2, "Incorrect 8-byte index: %s", slotIndex); // must be 0 or 1
321 	resultClasses[slotIndex] = merge_classes(resultClasses[slotIndex], basic_class);
322 	return InMemory.no;
323 }
324 
325 AbiClass merge_classes(AbiClass a, AbiClass b) {
326 	if (a == b) return a;
327 	if (a > b) swap(a, b);
328 	switch(a) with(AbiClass) {
329 		case no_class: return b;
330 		case memory: return memory;
331 		case integer: return integer;
332 		case sse: return sse;
333 		default: assert(false);
334 	}
335 }
336 
337 InMemory classify_array(CompilationContext* c, IrIndex paramType, ref AbiClass[2] resultClasses, uint offset) {
338 	IrTypeArray type = c.types.get!IrTypeArray(paramType);
339 	SizeAndAlignment element_sizealign = c.types.typeSizeAndAlignment(type.elemType);
340 	foreach(i; 0..type.numElements) {
341 		auto memberOffset = offset + element_sizealign.size * i;
342 		InMemory in_mem = classify_value_impl(c, type.elemType, resultClasses, memberOffset);
343 		// if any member ends up in memory, the whole thing is in memory
344 		if (in_mem) return InMemory.yes;
345 	}
346 	return InMemory.no;
347 }
348 
349 InMemory classify_struct(CompilationContext* c, IrIndex paramType, ref AbiClass[2] resultClasses, uint offset) {
350 	IrTypeStructMember[] members = c.types.get!IrTypeStruct(paramType).members;
351 	foreach(m; members) {
352 		auto memberOffset = offset + m.offset;
353 		InMemory in_mem = classify_value_impl(c, m.type, resultClasses, memberOffset);
354 		// if any member ends up in memory, the whole thing is in memory
355 		if (in_mem) return InMemory.yes;
356 	}
357 	return InMemory.no;
358 }
359 
360 void sysv64_classify(CompilationContext* c, ref AbiState state)
361 {
362 	ubyte[2] regsRemaining;
363 
364 	regsRemaining[AMD64_REG_CLASS.GPR] = cast(ubyte)state.abiRegs[AMD64_REG_CLASS.GPR].length;
365 	regsRemaining[AMD64_REG_CLASS.XMM] = cast(ubyte)state.abiRegs[AMD64_REG_CLASS.XMM].length;
366 	scope(exit) {
367 		size_t gpr = state.abiRegs[AMD64_REG_CLASS.GPR].length - regsRemaining[AMD64_REG_CLASS.GPR];
368 		size_t xmm = state.abiRegs[AMD64_REG_CLASS.XMM].length - regsRemaining[AMD64_REG_CLASS.XMM];
369 		state.abi.numRegistersUsed = cast(ubyte)(gpr + xmm);
370 	}
371 
372 	state.abi.returnClass = PassClass.ignore;
373 	if (state.type.numResults == 1) {
374 		IrIndex resType = state.type.resultTypes[0];
375 		Sysv_AbiParamClass resClass = classify_value(c, resType);
376 		state.abi.returnClass = resClass.passClass;
377 		if (resClass.low == AbiClass.sse) {
378 			state.abi.returnLoc.regs[0] = amd64_reg.xmm0;
379 			if (resClass.high == AbiClass.sse)
380 				state.abi.returnLoc.regs[1] = amd64_reg.xmm1;
381 			else if (resClass.high == AbiClass.integer)
382 				state.abi.returnLoc.regs[1] = amd64_reg.ax;
383 		} else if (resClass.low == AbiClass.integer) {
384 			state.abi.returnLoc.regs[0] = amd64_reg.ax;
385 			if (resClass.high == AbiClass.sse)
386 				state.abi.returnLoc.regs[1] = amd64_reg.xmm0;
387 			else if (resClass.high == AbiClass.integer)
388 				state.abi.returnLoc.regs[1] = amd64_reg.dx;
389 		} else if (resClass.passClass == PassClass.byValueMemory) {
390 			// hidden pointer is passed as first parameter
391 			state.abi.returnClass = PassClass.byPtrReg;
392 			PhysReg reg = state.abiRegs[AMD64_REG_CLASS.GPR][0];
393 			state.abi.returnLoc.regs[0] = reg;
394 			state.abi.returnLoc.regs[1] = amd64_reg.ax; // we store return register in second slot
395 			--regsRemaining[AMD64_REG_CLASS.GPR];
396 		}
397 		//writefln("res %s %s", state.abi.returnClass, state.abi.returnLoc.regs);
398 	}
399 
400 	// assign register or fallback to memory class
401 	foreach(uint i; 0..cast(uint)state.type.numParameters) {
402 		IrIndex paramType = state.type.parameterTypes[i];
403 		Sysv_AbiParamClass paramClass_sysv = classify_value(c, paramType);
404 		state.abi.paramClasses[i] = paramClass_sysv.passClass;
405 		switch(paramClass_sysv.low)
406 		{
407 			case AbiClass.integer:
408 			case AbiClass.sse:
409 				// 1 or 2 registers of same or different class
410 				ubyte getRegClass(AbiClass abiClass) {
411 					switch(abiClass) {
412 						case AbiClass.integer: return AMD64_REG_CLASS.GPR;
413 						case AbiClass.sse: return AMD64_REG_CLASS.XMM;
414 						default: assert(false);
415 					}
416 				}
417 
418 				AbiClass[2] classes = [paramClass_sysv.low, paramClass_sysv.high];
419 				ubyte[2] regsNeeded;
420 
421 				foreach(uint j; 0..paramClass_sysv.len) {
422 					ubyte regClass = getRegClass(classes[j]);
423 					++regsNeeded[regClass];
424 				}
425 
426 				foreach(ubyte regClass; 0..2) {
427 					if (regsRemaining[regClass] < regsNeeded[regClass]) {
428 						state.abi.paramClasses[i] = PassClass.byValueMemory;
429 						goto case AbiClass.memory;
430 					}
431 				}
432 
433 				// assign regs
434 				foreach(uint j; 0..paramClass_sysv.len) {
435 					ubyte regClass = getRegClass(classes[j]);
436 					assert(regsRemaining[regClass]);
437 					PhysReg reg = state.abiRegs[regClass][$-regsRemaining[regClass]];
438 					state.abi.paramData[i].regs[j] = reg;
439 					--regsRemaining[regClass];
440 				}
441 				break;
442 
443 			case AbiClass.memory: break;
444 			case AbiClass.no_class: break; // for empty structs
445 			default:
446 				c.internal_error("%s not implemented", paramClass_sysv.low);
447 		}
448 	}
449 }
450 
451 void sysv64_syscall_classify(CompilationContext* c, ref AbiState state)
452 {
453 	state.abi.syscallRegister = amd64_reg.ax;
454 	state.abi.useSyscall = true;
455 	if (c.targetOs != TargetOs.linux)
456 		c.error("Cannot use System V syscall calling convention on %s", c.targetOs);
457 
458 	sysv64_classify(c, state);
459 
460 	if (state.type.numParameters > 6) {
461 		c.error("Cannot have more than 6 parameters in System V syscall calling convention");
462 	}
463 	if (state.abi.returnClass != PassClass.byValueReg &&
464 		state.abi.returnClass != PassClass.byPtrReg &&
465 		state.abi.returnClass != PassClass.ignore)
466 		c.error("Cannot have return of class %s in System V syscall calling convention", state.abi.returnClass);
467 	foreach(PassClass paramClass; state.abi.paramClasses) {
468 		if (paramClass == PassClass.byValueReg || paramClass == PassClass.byPtrReg || paramClass == PassClass.ignore) continue;
469 		c.error("Cannot have parameters of class %s in System V syscall calling convention", paramClass);
470 	}
471 }
472 
473 alias ArgClassifier = void function(CompilationContext* c, ref AbiState state);
474 __gshared ArgClassifier[] classify_abis = [
475 	&win64_classify,
476 	&sysv64_classify,
477 	&sysv64_syscall_classify
478 ];
479 
480 // Handle ABI
481 void func_pass_lower_abi(CompilationContext* c, IrFunction* ir, IrIndex funcIndex, ref IrBuilder builder)
482 {
483 	//writefln("lower_abi %s %s", builder.context.idString(ir.name), ir.getCallConvEnum(c));
484 	IrTypeFunction* irFuncType = &c.types.get!IrTypeFunction(ir.type);
485 	c.assertf(irFuncType.numResults <= 1, "%s results is not implemented", irFuncType.numResults);
486 
487 	AbiState state;
488 	state.run(c, irFuncType);
489 	scope(exit) state.free(c);
490 
491 	IrIndex hiddenParameter;
492 	if (state.abi.returnClass == PassClass.byPtrReg) {
493 		// pointer to return value is passed via hidden parameter, read it into virt reg
494 		IrIndex retType = c.types.appendPtr(state.type.resultTypes[0]);
495 		IrIndex paramReg = IrIndex(state.abi.returnLoc.regs[0], typeToRegSize(retType, c));
496 		ExtraInstrArgs extra = { type : retType };
497 		auto moveInstr = builder.emitInstr!(IrOpcode.move)(extra, paramReg);
498 		builder.prependBlockInstr(ir.entryBasicBlock, moveInstr.instruction);
499 		hiddenParameter = moveInstr.result;
500 	}
501 
502 	void convParam(IrIndex instrIndex, ref IrInstrHeader instrHeader)
503 	{
504 		IrInstr_parameter* param = ir.get!IrInstr_parameter(instrIndex);
505 		uint paramIndex = param.index(ir);
506 
507 		IrIndex type = ir.getVirtReg(instrHeader.result(ir)).type;
508 
509 		PassClass paramClass = state.abi.paramClasses[paramIndex];
510 		final switch(paramClass) {
511 			case PassClass.byValueReg:
512 				IrIndex paramReg = IrIndex(state.abi.paramData[paramIndex].regs[0], typeToRegSize(type, c));
513 				ExtraInstrArgs extra = { result : instrHeader.result(ir) };
514 				auto moveInstr = builder.emitInstr!(IrOpcode.move)(extra, paramReg).instruction;
515 				replaceInstruction(ir, instrIndex, moveInstr);
516 				break;
517 
518 			case PassClass.byValueRegMulti:
519 				PhysReg[2] paramRegs = state.abi.paramData[paramIndex].regs;
520 				IrIndex instr = receiveMultiValue(ir.nextInstr(instrIndex), paramRegs, instrHeader.result(ir), builder);
521 				replaceInstruction(ir, instrIndex, instr);
522 				break;
523 
524 			case PassClass.byPtrReg:
525 				type = c.types.appendPtr(type);
526 				IrIndex paramReg = IrIndex(state.abi.paramData[paramIndex].regs[0], typeToRegSize(type, c));
527 				ExtraInstrArgs extra1 = { type : type };
528 				auto moveInstr = builder.emitInstr!(IrOpcode.move)(extra1, paramReg);
529 				replaceInstruction(ir, instrIndex, moveInstr.instruction);
530 
531 				ExtraInstrArgs extra2 = { result : instrHeader.result(ir) };
532 				IrIndex loadInstr = builder.emitInstr!(IrOpcode.load_aggregate)(extra2, moveInstr.result).instruction;
533 				ir.getInstr(loadInstr).isUniqueLoad = true;
534 				builder.insertAfterInstr(moveInstr.instruction, loadInstr);
535 				break;
536 
537 			case PassClass.byValueMemory:
538 				IrIndex slot = builder.appendStackSlot(type, c.types.typeSizeAndAlignment(type), StackSlotKind.parameter);
539 				ir.getStackSlot(slot).displacement = state.abi.paramData[paramIndex].stackOffset;
540 				ir.getStackSlot(slot).sizealign = state.abi.paramData[paramIndex].stackSizealign;
541 
542 				// is directly in stack
543 				ExtraInstrArgs extra = { result : instrHeader.result(ir) };
544 				IrIndex loadInstr;
545 				if (type.isTypeAggregate) {
546 					// happens on sysv64
547 					loadInstr = builder.emitInstr!(IrOpcode.load_aggregate)(extra, slot).instruction;
548 				} else {
549 					extra.argSize = getTypeArgSize(type, c);
550 					loadInstr = builder.emitInstr!(IrOpcode.load)(extra, slot).instruction;
551 				}
552 				replaceInstruction(ir, instrIndex, loadInstr);
553 				break;
554 
555 			case PassClass.byPtrMemory:
556 				// stack contains pointer to data
557 				type = c.types.appendPtr(type);
558 				IrIndex slot = builder.appendStackSlot(type, c.types.typeSizeAndAlignment(type), StackSlotKind.parameter);
559 				ir.getStackSlot(slot).displacement = state.abi.paramData[paramIndex].stackOffset;
560 				ir.getStackSlot(slot).sizealign = state.abi.paramData[paramIndex].stackSizealign;
561 
562 				IrArgSize argSize = getTypeArgSize(type, c);
563 				ExtraInstrArgs extra = { argSize : argSize, type : type };
564 				InstrWithResult loadInstr = builder.emitInstr!(IrOpcode.load)(extra, slot);
565 				// remove parameter instruction
566 				replaceInstruction(ir, instrIndex, loadInstr.instruction);
567 
568 				// load aggregate
569 				ExtraInstrArgs extra2 = { result : instrHeader.result(ir) };
570 				InstrWithResult loadInstr2 = builder.emitInstr!(IrOpcode.load_aggregate)(extra2, loadInstr.result);
571 				ir.getInstr(loadInstr2.instruction).isUniqueLoad = true;
572 				builder.insertAfterInstr(loadInstr.instruction, loadInstr2.instruction);
573 				break;
574 
575 			case PassClass.ignore:
576 				// use zeroinited struct
577 				builder.redirectVregUsersTo(instrHeader.result(ir), c.constants.addZeroConstant(type));
578 				removeInstruction(ir, instrIndex);
579 				break;
580 		}
581 	}
582 
583 	void convCall(IrIndex instrIndex, ref IrInstrHeader instrHeader)
584 	{
585 		ir.numCalls += 1;
586 
587 		IrIndex callee = instrHeader.arg(ir, 0);
588 		IrIndex calleeTypeIndex = ir.getValueType(c, callee);
589 		if (calleeTypeIndex.isTypePointer)
590 			calleeTypeIndex = c.types.getPointerBaseType(calleeTypeIndex);
591 		IrTypeFunction* calleeType = &c.types.get!IrTypeFunction(calleeTypeIndex);
592 
593 		AbiState callee_state;
594 		callee_state.run(c, calleeType);
595 		scope(exit) callee_state.free(c);
596 
597 		CallConv* callConv = c.types.getCalleeCallConv(callee, ir, c);
598 		IrIndex[] args = instrHeader.args(ir)[1..$]; // exclude callee
599 		IrIndex originalResult;
600 		IrIndex hiddenPtr;
601 		bool hasHiddenPtr = false;
602 
603 		// allocate stack slot for big return value
604 		if (callee_state.abi.returnClass == PassClass.byPtrReg)
605 		{
606 			IrIndex resType = callee_state.type.resultTypes[0];
607 			originalResult = instrHeader.result(ir); // we reuse result slot
608 
609 			// reuse result slot of instruction as first argument
610 			instrHeader._payloadOffset -= 1;
611 			instrHeader.hasResult = false;
612 			instrHeader.numArgs += 1;
613 
614 			args = instrHeader.args(ir)[1..$];
615 			// move callee in first arg
616 			instrHeader.arg(ir, 0) = callee;
617 			// place return arg slot in second arg
618 			hiddenPtr = builder.appendStackSlot(resType, c.types.typeSizeAndAlignment(resType), StackSlotKind.argument);
619 			args[0] = hiddenPtr;
620 			hasHiddenPtr = true;
621 		}
622 
623 		enum STACK_ITEM_SIZE = 8;
624 		size_t numArgs = args.length;
625 		// how many bytes are allocated on the stack before func call
626 		size_t stackReserve;
627 		if (callConv.hasShadowSpace)
628 		{
629 			stackReserve = 4 * STACK_ITEM_SIZE;
630 		}
631 
632 		// Copy args to stack if necessary (big structs or run out of regs)
633 		foreach (size_t i; cast(size_t)hasHiddenPtr..args.length)
634 		{
635 			IrIndex arg = args[i];
636 			removeUser(c, ir, instrIndex, arg);
637 
638 			PassClass paramClass = callee_state.abi.paramClasses[i-cast(size_t)hasHiddenPtr];
639 			IrIndex type = callee_state.type.parameterTypes[i-cast(size_t)hasHiddenPtr];
640 			final switch(paramClass) {
641 				case PassClass.byValueReg:
642 					args[i] = simplifyConstant(arg, c);
643 					break;
644 				case PassClass.byValueRegMulti: break;
645 				case PassClass.byPtrReg, PassClass.byPtrMemory:
646 					//allocate stack slot, store value there and use slot pointer as argument
647 					args[i] = builder.appendStackSlot(type, c.types.typeSizeAndAlignment(type), StackSlotKind.argument);
648 					IrIndex instr = builder.emitInstr!(IrOpcode.store)(ExtraInstrArgs(), args[i], arg);
649 					builder.insertBeforeInstr(instrIndex, instr);
650 					break;
651 				case PassClass.byValueMemory:
652 					if (type.fitsIntoRegister(c)) args[i] = simplifyConstant(arg, c);
653 					break; // handled later
654 				case PassClass.ignore: break;
655 			}
656 		}
657 
658 		// Stack layouting code makes sure that local data has 16 byte alignment if we have calls in IR.
659 		// align stack and push args that didn't fit into registers (register size args)
660 		if (callee_state.abi.stackSizealign.size > 0)
661 		{
662 			if (callee_state.abi.stackSizealign.alignment > 16) {
663 				c.unrecoverable_error(TokenIndex(), "Stack alignment of %s > 16 is not implemented", callee_state.abi.stackSizealign.alignment);
664 			}
665 
666 			if (callee_state.abi.stackSizealign.size % callee_state.abi.stackSizealign.alignment != 0)
667 			{
668 				uint padding = paddingSize(callee_state.abi.stackSizealign.size, callee_state.abi.stackSizealign.alignment);
669 				// align stack to 16 bytes
670 				// TODO: SysV ABI needs 32byte alignment if __m256 is passed
671 				stackReserve += padding;
672 				IrIndex paddingSize = c.constants.add(makeIrType(IrBasicType.i32), padding);
673 				builder.emitInstrBefore!(IrOpcode.grow_stack)(instrIndex, ExtraInstrArgs(), paddingSize);
674 			}
675 
676 			// choose stack ordering
677 			int start = cast(int)callee_state.abi.paramClasses.length-1;
678 			int end = 0;
679 			int inc = -1;
680 
681 			if (callee_state.abi.reverseStackOrder) {
682 				swap(start, end);
683 				++end;
684 				inc = 1;
685 			}
686 			uint stackOffset = 0;
687 			// push args to stack
688 			for (int i = start; i != end; i += inc) {
689 				PassClass paramClass = callee_state.abi.paramClasses[i];
690 				IrIndex arg = args[i + cast(int)hasHiddenPtr];
691 				if (paramClass == PassClass.byValueMemory || paramClass == PassClass.byPtrMemory) {
692 					ParamLocation paramData = callee_state.abi.paramData[i];
693 
694 					IrIndex type = ir.getValueType(c, arg);
695 					uint size = c.types.typeSize(type);
696 					//writefln("param %s %s %s %s", i, stackOffset, paramData.stackOffset, size);
697 
698 					// push cannot be used with xmm registers. Convert those to grow_stack + store
699 					if (size <= 8 && !type.isTypeFloat) {
700 						auto pushInstr = builder.emitInstr!(IrOpcode.push)(ExtraInstrArgs(), arg);
701 						builder.insertBeforeInstr(instrIndex, pushInstr);
702 						stackOffset += 8;
703 					} else {
704 						// this must be multiple of 8
705 						uint allocSize = paramData.stackSizealign.size;
706 						if (allocSize > 0) {
707 							IrIndex paddingSize = c.constants.add(makeIrType(IrBasicType.i32), allocSize);
708 							builder.emitInstrBefore!(IrOpcode.grow_stack)(instrIndex, ExtraInstrArgs(), paddingSize);
709 						}
710 
711 						IrIndex ptrType = c.types.appendPtr(type);
712 						ExtraInstrArgs extra = { type : ptrType };
713 						IrIndex ptr = builder.emitInstrBefore!(IrOpcode.move)(instrIndex, extra, IrIndex(amd64_reg.sp, ArgType.QWORD)).result;
714 						builder.emitInstrBefore!(IrOpcode.store)(instrIndex, ExtraInstrArgs(), ptr, arg);
715 
716 						stackOffset += allocSize;
717 					}
718 				}
719 			}
720 			assert(stackOffset == callee_state.abi.stackSizealign.size);
721 			stackReserve += callee_state.abi.stackSizealign.size;
722 		}
723 
724 		if (callee_state.abi.returnClass == PassClass.byPtrReg) {
725 			IrIndex type = ir.getValueType(c, args[0]);
726 			IrIndex argRegister = IrIndex(callee_state.abi.returnLoc.regs[0], typeToRegSize(type, c));
727 			ExtraInstrArgs extra = { result : argRegister };
728 			builder.emitInstrBefore!(IrOpcode.move)(instrIndex, extra, args[0]);
729 		}
730 
731 		// move args to registers
732 		foreach(i, paramClass; callee_state.abi.paramClasses) {
733 			IrIndex arg = args[i + cast(int)hasHiddenPtr];
734 			ParamLocation paramData = callee_state.abi.paramData[i];
735 
736 			IrIndex type = ir.getValueType(c, arg);
737 			final switch(paramClass) {
738 				case PassClass.byValueReg, PassClass.byPtrReg:
739 					IrIndex argRegister = IrIndex(paramData.regs[0], typeToRegSize(type, c));
740 					ExtraInstrArgs extra = { result : argRegister };
741 					builder.emitInstrBefore!(IrOpcode.move)(instrIndex, extra, arg);
742 					break;
743 				case PassClass.byValueRegMulti:
744 					IrIndex[2] vals = simplifyConstant128(instrIndex, arg, builder, c);
745 
746 					IrIndex reg1 = IrIndex(paramData.regs[0], ArgType.QWORD);
747 					ExtraInstrArgs extra3 = { result : reg1 };
748 					builder.emitInstrBefore!(IrOpcode.move)(instrIndex, extra3, vals[0]);
749 
750 					IrIndex reg2 = IrIndex(paramData.regs[1], ArgType.QWORD);
751 					ExtraInstrArgs extra4 = { result : reg2 };
752 					builder.emitInstrBefore!(IrOpcode.move)(instrIndex, extra4, vals[1]);
753 					break;
754 				case PassClass.byValueMemory: break; // handled below
755 				case PassClass.byPtrMemory: break; // handled below
756 				case PassClass.ignore:
757 					break;
758 			}
759 		}
760 
761 		if (callConv.hasShadowSpace)
762 		{	// Allocate shadow space for 4 physical registers
763 			IrIndex const_32 = c.constants.add(makeIrType(IrBasicType.i32), 32);
764 			auto growStackInstr = builder.emitInstr!(IrOpcode.grow_stack)(ExtraInstrArgs(), const_32);
765 			builder.insertBeforeInstr(instrIndex, growStackInstr);
766 			ir.getInstr(instrIndex).extendFixedArgRange = true;
767 		}
768 
769 		// fix arguments
770 		scope(exit) {
771 			ubyte regsUsed = callee_state.abi.numRegistersUsed;
772 
773 			void fillRegs(IrIndex[] instrArgs) {
774 				assert(instrArgs.length == regsUsed);
775 				uint nextIndex = 0;
776 				// handle return by ptr reg
777 				if (callee_state.abi.returnClass == PassClass.byPtrReg) {
778 					// size is irrelevant here because register is only mentioned here to aid register allocation
779 					instrArgs[nextIndex] = IrIndex(callee_state.abi.returnLoc.regs[0], ArgType.QWORD);
780 					++nextIndex;
781 				}
782 				// order must be preserved, because liveness analisys relies on that
783 				foreach(i, PassClass paramClass; callee_state.abi.paramClasses) {
784 					final switch(paramClass) with(PassClass) {
785 						case byValueReg, byPtrReg:
786 							ParamLocation loc = callee_state.abi.paramData[i];
787 							// size is irrelevant here because register is only mentioned here to aid register allocation
788 							instrArgs[nextIndex] = IrIndex(loc.regs[0], ArgType.QWORD);
789 							++nextIndex;
790 							break;
791 						case byValueRegMulti:
792 							ParamLocation loc = callee_state.abi.paramData[i];
793 							// size is irrelevant here because register is only mentioned here to aid register allocation
794 							instrArgs[nextIndex] = IrIndex(loc.regs[0], ArgType.QWORD);
795 							instrArgs[nextIndex+1] = IrIndex(loc.regs[1], ArgType.QWORD);
796 							nextIndex += 2;
797 							break;
798 						case byValueMemory, byPtrMemory, ignore: break; // skip, non register param
799 					}
800 				}
801 				assert(nextIndex == regsUsed);
802 			}
803 
804 			if (regsUsed + 1 <= instrHeader.numArgs) { // +1 includes callee
805 				if (callee_state.abi.useSyscall) {
806 					// Put syscall number into the correct register
807 					IrIndex syscallRegister = IrIndex(callee_state.abi.syscallRegister, ArgType.DWORD);
808 					ExtraInstrArgs extra = { result : syscallRegister };
809 					builder.emitInstrBefore!(IrOpcode.move)(instrIndex, extra, c.constants.add(makeIrType(IrBasicType.i32), callee_state.type.syscallNumber));
810 
811 					// We need bigger instruction for syscall because of extra syscallRegister at the end
812 					ExtraInstrArgs callExtra = { hasResult : instrHeader.hasResult, result : instrHeader.tryGetResult(ir), extraArgSlots : cast(ubyte)(regsUsed + 1) };
813 
814 					// We leave callee argument in here, so that liveness analysis can get calling convention info
815 					IrIndex newCallInstr = builder.emitInstr!(IrOpcode.syscall)(callExtra, instrHeader.arg(ir, 0)).instruction;
816 					IrInstrHeader* callHeader = ir.getInstr(newCallInstr);
817 					fillRegs(callHeader.args(ir)[1..$-1]); // fill with regs
818 
819 					// Since we did the mov to the syscallRegister after all other movs, syscallRegister needs to be passed at the end of the list
820 					// to preserve the order invariant for the liveness analysis
821 					callHeader.args(ir)[$-1] = syscallRegister;
822 
823 					replaceInstruction(ir, instrIndex, newCallInstr);
824 					instrIndex = newCallInstr;
825 				} else {
826 					// Reuse instruction
827 					instrHeader.numArgs = cast(ubyte)(regsUsed + 1); // include callee
828 
829 					fillRegs(instrHeader.args(ir)[1..$]); // fill with regs
830 				}
831 			} else {
832 				assert(!callee_state.abi.useSyscall); // Syscalls are handled in the other case
833 
834 				// Make bigger instruction
835 				ExtraInstrArgs callExtra = { hasResult : instrHeader.hasResult, result : instrHeader.tryGetResult(ir), extraArgSlots : regsUsed };
836 				IrIndex newCallInstr = builder.emitInstr!(IrOpcode.call)(callExtra, instrHeader.arg(ir, 0)).instruction;
837 				IrInstrHeader* callHeader = ir.getInstr(newCallInstr);
838 				fillRegs(callHeader.args(ir)[1..$]); // fill with regs
839 
840 				replaceInstruction(ir, instrIndex, newCallInstr);
841 				instrIndex = newCallInstr;
842 			}
843 		}
844 
845 		{
846 			// If function is noreturn we don't need to insert cleanup code
847 			if (calleeType.numResults == 1)
848 			{
849 				IrIndex resType = callee_state.type.resultTypes[0];
850 				if (resType.isTypeNoreturn) return;
851 			}
852 
853 			// Instructions will be added after this one
854 			IrIndex lastInstr = instrIndex;
855 
856 			// Deallocate stack after call
857 			if (stackReserve > 0)
858 			{
859 				IrIndex conReservedBytes = c.constants.add(makeIrType(IrBasicType.i32), stackReserve);
860 				auto shrinkStackInstr = builder.emitInstr!(IrOpcode.shrink_stack)(ExtraInstrArgs(), conReservedBytes);
861 				builder.insertAfterInstr(lastInstr, shrinkStackInstr);
862 				lastInstr = shrinkStackInstr; // insert next instr after this one
863 				ir.getInstr(instrIndex).extendFixedResultRange = true;
864 			}
865 
866 			// for calls that return in register
867 			final switch (callee_state.abi.returnClass) {
868 				case PassClass.byValueReg:
869 					// mov result to virt reg
870 					IrIndex returnReg = IrIndex(callee_state.abi.returnLoc.regs[0], typeToIrArgSize(ir.getVirtReg(instrHeader.result(ir)).type, c));
871 					ExtraInstrArgs extra = { result : instrHeader.result(ir) };
872 					auto moveInstr = builder.emitInstr!(IrOpcode.move)(extra, returnReg).instruction;
873 					builder.insertAfterInstr(lastInstr, moveInstr);
874 					instrHeader.result(ir) = returnReg;
875 					break;
876 				case PassClass.byValueRegMulti:
877 					PhysReg[2] retRegs = callee_state.abi.returnLoc.regs;
878 					IrIndex instr = receiveMultiValue(ir.nextInstr(instrIndex), retRegs, instrHeader.result(ir), builder);
879 					builder.insertAfterInstr(lastInstr, instr);
880 					instrHeader.result(ir) = IrIndex(retRegs[0], ArgType.QWORD); // TODO: need to put both registers as result
881 					break;
882 				case PassClass.byPtrReg:
883 					ExtraInstrArgs extra = { result : originalResult };
884 					IrIndex loadInstr = builder.emitInstr!(IrOpcode.load_aggregate)(extra, hiddenPtr).instruction;
885 					ir.getInstr(loadInstr).isUniqueLoad = true;
886 					builder.insertAfterInstr(lastInstr, loadInstr);
887 					break;
888 				case PassClass.byPtrMemory:
889 				case PassClass.byValueMemory:
890 					c.internal_error("invalid return class", callee_state.abi.returnClass);
891 				case PassClass.ignore: break; // no result, or empty struct
892 			}
893 		}
894 	}
895 
896 	void convReturn(IrIndex instrIndex, ref IrInstrHeader instrHeader)
897 	{
898 		// rewrite ret_val as ret in-place
899 		instrHeader.op = IrOpcode.ret;
900 
901 		removeUser(c, ir, instrIndex, instrHeader.arg(ir, 0));
902 		PhysReg[2] resRegs = state.abi.returnLoc.regs;
903 
904 		final switch (state.abi.returnClass)
905 		{
906 			case PassClass.byPtrReg:
907 				// store struct into pointer, then return pointer
908 				IrIndex value = instrHeader.arg(ir, 0);
909 				IrIndex instr = builder.emitInstr!(IrOpcode.store)(ExtraInstrArgs(), hiddenParameter, value);
910 				builder.insertBeforeInstr(instrIndex, instr);
911 				IrIndex result = IrIndex(resRegs[1], ArgType.QWORD); // we store return register in second slot
912 				ExtraInstrArgs extra = { result : result };
913 				IrIndex copyInstr = builder.emitInstr!(IrOpcode.move)(extra, hiddenParameter).instruction;
914 				builder.insertBeforeInstr(instrIndex, copyInstr);
915 				break;
916 			case PassClass.byValueReg:
917 				IrIndex value = simplifyConstant(instrHeader.arg(ir, 0), c);
918 				IrIndex type = irFuncType.resultTypes[0];
919 				IrIndex result = IrIndex(resRegs[0], typeToRegSize(type, c));
920 				ExtraInstrArgs extra = { result : result };
921 				IrIndex copyInstr = builder.emitInstr!(IrOpcode.move)(extra, value).instruction;
922 				builder.insertBeforeInstr(instrIndex, copyInstr);
923 				break;
924 			case PassClass.byValueRegMulti:
925 				IrIndex[2] vals = simplifyConstant128(instrIndex, instrHeader.arg(ir, 0), builder, c);
926 
927 				IrIndex result1 = IrIndex(resRegs[0], ArgType.QWORD);
928 				ExtraInstrArgs extra3 = { result : result1 };
929 				builder.emitInstrBefore!(IrOpcode.move)(instrIndex, extra3, vals[0]);
930 
931 				IrIndex result2 = IrIndex(resRegs[1], ArgType.QWORD);
932 				ExtraInstrArgs extra4 = { result : result2 };
933 				builder.emitInstrBefore!(IrOpcode.move)(instrIndex, extra4, vals[1]);
934 				break;
935 			case PassClass.byValueMemory, PassClass.byPtrMemory:
936 				c.internal_error("Invalid return class %s", state.abi.returnClass);
937 			case PassClass.ignore:
938 				break;
939 		}
940 		// rewrite ret_val as ret in-place
941 		instrHeader.op = IrOpcode.ret;
942 		instrHeader.numArgs = 0;
943 	}
944 
945 	foreach (IrIndex blockIndex, ref IrBasicBlock block; ir.blocks)
946 	{
947 		foreach(IrIndex instrIndex, ref IrInstrHeader instrHeader; block.instructions(ir))
948 		{
949 			switch(instrHeader.op)
950 			{
951 				case IrOpcode.parameter: convParam(instrIndex, instrHeader); break;
952 				case IrOpcode.call: convCall(instrIndex, instrHeader); break;
953 				case IrOpcode.ret_val: convReturn(instrIndex, instrHeader); break;
954 				default: break;
955 			}
956 		}
957 	}
958 }
959 
960 // glue 2 registers into aggregate
961 IrIndex receiveMultiValue(IrIndex beforeInstr, PhysReg[2] regs, IrIndex result, ref IrBuilder builder) {
962 	IrIndex type = builder.ir.getVirtReg(result).type;
963 	IrIndex reg1 = IrIndex(regs[0], ArgType.QWORD);
964 	auto sizealign = builder.context.types.typeSizeAndAlignment(type);
965 	IrIndex reg2 = IrIndex(regs[1], ArgType.QWORD);
966 
967 	ExtraInstrArgs extra1 = { type : makeIrType(IrBasicType.i64) };
968 	auto move1 = builder.emitInstr!(IrOpcode.move)(extra1, reg1);
969 	builder.insertBeforeInstr(beforeInstr, move1.instruction);
970 
971 	ExtraInstrArgs extra2 = { type : makeIrType(IrBasicType.i64) };
972 	auto move2 = builder.emitInstr!(IrOpcode.move)(extra2, reg2);
973 	builder.insertBeforeInstr(beforeInstr, move2.instruction);
974 
975 	// store both regs into stack slot, then load aggregate
976 	IrIndex slot = builder.appendStackSlot(type, SizeAndAlignment(16, sizealign.alignmentPower), StackSlotKind.local);
977 
978 	IrIndex addr1 = genAddressOffset(slot, 0, builder.context.i64PtrType, beforeInstr, builder);
979 	IrIndex store1 = builder.emitInstr!(IrOpcode.store)(ExtraInstrArgs(), addr1, move1.result);
980 	builder.insertBeforeInstr(beforeInstr, store1);
981 
982 	IrIndex addr2 = genAddressOffset(slot, 8, builder.context.i64PtrType, beforeInstr, builder);
983 	IrIndex store2 = builder.emitInstr!(IrOpcode.store)(ExtraInstrArgs(), addr2, move2.result);
984 	builder.insertBeforeInstr(beforeInstr, store2);
985 
986 	ExtraInstrArgs extra3 = { result : result };
987 	IrIndex loadInstr = builder.emitInstr!(IrOpcode.load_aggregate)(extra3, slot).instruction;
988 	builder.ir.getInstr(loadInstr).isUniqueLoad = true;
989 	return loadInstr;
990 }
991 
992 // For sysv64 ABI
993 // Given aggregate constant of (size > 8 && size <= 16) with all members aligned, produces 2 64bit values
994 IrIndex[2] simplifyConstant128(IrIndex insertBefore, IrIndex value, ref IrBuilder builder, CompilationContext* c)
995 {
996 	IrIndex[2] vals;
997 	if (value.isConstantZero) {
998 		vals[] = c.constants.addZeroConstant(makeIrType(IrBasicType.i64));
999 	} else if (value.isConstantAggregate) {
1000 		IrAggregateConstant* con = &c.constants.getAggregate(value);
1001 		union Repr {
1002 			ubyte[16] buf;
1003 			ulong[2] items;
1004 		}
1005 		Repr repr;
1006 		void onGlobal(ubyte[] subbuffer, IrIndex index, CompilationContext* c) {
1007 			if (subbuffer.ptr == repr.buf.ptr) {
1008 				vals[0] = index;
1009 			} else {
1010 				assert(subbuffer.ptr == repr.buf.ptr + 8);
1011 				vals[1] = index;
1012 			}
1013 		}
1014 		constantToMem(repr.buf[], value, c, &onGlobal);
1015 		if (vals[0].isUndefined) vals[0] = c.constants.add(makeIrType(IrBasicType.i64), repr.items[0]);
1016 		if (vals[1].isUndefined) vals[1] = c.constants.add(makeIrType(IrBasicType.i64), repr.items[1]);
1017 	} else {
1018 		ExtraInstrArgs extra1 = { type : makeIrType(IrBasicType.i64) };
1019 		vals[0] = builder.emitInstrBefore!(IrOpcode.get_aggregate_slice)(insertBefore, extra1, value, c.constants.addZeroConstant(makeIrType(IrBasicType.i32))).result;
1020 
1021 		ExtraInstrArgs extra2 = { type : makeIrType(IrBasicType.i64) };
1022 		vals[1] = builder.emitInstrBefore!(IrOpcode.get_aggregate_slice)(insertBefore, extra2, value, c.constants.add(makeIrType(IrBasicType.i32), 8)).result;
1023 	}
1024 	return vals;
1025 }