1 /// Copyright: Copyright (c) 2017-2020 Andrey Penechko. 2 /// License: $(WEB boost.org/LICENSE_1_0.txt, Boost License 1.0). 3 /// Authors: Andrey Penechko. 4 5 // Missing case of System V ABI implementation: 6 // - Aggregates with alignment > 16 bytes 7 // - xmm/ymm/zmm register passing 8 // - x87 9 module vox.be.abi; 10 11 import std.bitmanip : bitfields; 12 import std.stdio; 13 import vox.all; 14 15 /// 2 AbiClass slots is enough to classify structs containing m128/m256/m512 types. 16 /// They can be the only value in the struct. 17 /// The only rules needed to find types that are passed in registers, are: 18 /// - m128/m256/m512 must have offset of 0 19 /// - basic types must have offset < 16 20 21 enum AbiClass : ubyte { 22 /// This class is used as initializer in the algorithms. It will be used for padding and empty structures and unions. 23 no_class, 24 /// This class consists of integral types that fit into one of the general purpose registers. 25 integer, 26 /// The class consists of types that fit into a vector register. 27 sse, 28 /// The class consists of types that fit into a vector register and can be passed and returned in the upper bytes of it. 29 sse_up, 30 /// These classes consists of types that will be returned via the x87 FPU. 31 x87, 32 /// ditto 33 x87_up, 34 /// This class consists of types that will be returned via the x87 FPU. 35 complex_x87, 36 /// This class consists of types that will be passed and returned in memory via the stack. 37 memory, 38 } 39 40 // Plaform-independent classification of function parameter/result 41 enum PassClass : ubyte { 42 // Register stores value 43 // sysv64 & win64 ABI: parameters, return value 44 byValueReg, 45 // 2 registers, possibly of different classes store value 46 // sysv64: parameters, return value 47 byValueRegMulti, 48 // Pointer to stack allocated value is passed via GPR 49 // win64 ABI: parameters, return value 50 // sysv64 ABI: return value 51 byPtrReg, 52 // Value is pushed on the stack 53 // sysv64 & win64 ABI: parameters 54 byValueMemory, 55 // Pointer to stack allocated value is passed via stack 56 // win64 ABI: parameters 57 byPtrMemory, 58 // Value is ignored. Used for empty structs 59 // sysv64 ABI: parameters, return value 60 ignore 61 } 62 63 bool isMemory(PassClass passClass) { return passClass == PassClass.byValueMemory || passClass == PassClass.byPtrMemory; } 64 65 struct Sysv_AbiParamClass { 66 mixin(bitfields!( 67 AbiClass, "low", 4, 68 AbiClass, "high", 4, 69 )); 70 PassClass passClass; 71 ubyte len() { return high == AbiClass.no_class ? 1 : 2; } 72 73 this(PassClass passClass, AbiClass a) { 74 this.passClass = passClass; 75 low = a; 76 } 77 this(PassClass passClass, AbiClass a, AbiClass b) { 78 this.passClass = passClass; 79 low = a; 80 high = b; 81 } 82 83 void toString(scope void delegate(const(char)[]) sink) { 84 if (high == AbiClass.no_class) 85 sink.formattedWrite("(%s)", low); 86 else 87 sink.formattedWrite("(%s,%s)", low, high); 88 } 89 } 90 91 enum MAX_ARG_REGS = 2; 92 93 struct AbiState { 94 FunctionAbi abi; 95 96 PhysReg[][2] abiRegs; 97 PhysReg[] retRegs; 98 IrTypeFunction* type; 99 100 uint[] paramClassBuf; 101 102 void run(CompilationContext* c, IrTypeFunction* type) { 103 this.type = type; 104 auto cc = callConventions[type.callConv]; 105 abi.stackSizealign.alignmentPower = cc.minStackAlignmentPower; 106 paramClassBuf = c.allocateTempArray!uint(alignValue(type.numParameters, 4) / 4); 107 PassClass[] paramClasses = (cast(PassClass[])paramClassBuf)[0..type.numParameters]; 108 abi.paramClasses = paramClasses[0..type.numParameters]; 109 abi.paramData = c.allocateTempArray!ParamLocation(type.numParameters); 110 111 abiRegs[AMD64_REG_CLASS.GPR] = cc.gprParamRegs; 112 abiRegs[AMD64_REG_CLASS.XMM] = cc.sseParamRegs; 113 ArgClassifier classify = classify_abis[type.callConv]; 114 classify(c, this); 115 116 calc_stack(c); 117 } 118 119 void free(CompilationContext* c) { 120 c.freeTempArray(abi.paramData); 121 c.freeTempArray(paramClassBuf); 122 } 123 124 void calc_stack(CompilationContext* c) { 125 // choose stack ordering 126 int start = 0; 127 int end = cast(int)abi.paramClasses.length; 128 int inc = 1; 129 130 if (abi.reverseStackOrder) { 131 swap(start, end); 132 --start; 133 --end; 134 inc = -1; 135 } 136 137 enum MIN_STACK_SLOT_SIZE = 8; 138 139 // returns assigned register to the pool 140 // item added will have increasing stack offset 141 void assignToMem(ref ParamLocation loc, SizeAndAlignment item) { 142 abi.stackSizealign.alignmentPower = max(abi.stackSizealign.alignmentPower, item.alignmentPower); 143 // each item takes a whole number of stack slots (8 byte slots on 64 bit arch) 144 uint itemAlignment = max(MIN_STACK_SLOT_SIZE, item.alignment); 145 abi.stackSizealign.size = alignValue!uint(abi.stackSizealign.size, itemAlignment); 146 loc.stackOffset = abi.stackSizealign.size; 147 loc.stackSizealign = SizeAndAlignment(max(MIN_STACK_SLOT_SIZE, item.size), item.alignmentPower); 148 149 abi.stackSizealign.size += loc.stackSizealign.size; 150 } 151 152 // actually assign memory offsets 153 for (int i = start; i != end; i += inc) { 154 PassClass paramClass = abi.paramClasses[i]; 155 if (paramClass == PassClass.byValueMemory) { 156 SizeAndAlignment sizeAlign = c.types.typeSizeAndAlignment(type.parameterTypes[i]); 157 assignToMem(abi.paramData[i], sizeAlign); 158 //writefln("offset %s %s %s", i, abi.paramData[i].stackOffset, sizeAlign.size); 159 } else if (paramClass == PassClass.byPtrMemory) { 160 assignToMem(abi.paramData[i], SizeAndAlignment(8, 3)); 161 } 162 } 163 abi.stackSizealign.size = alignValue!uint(abi.stackSizealign.size, MIN_STACK_SLOT_SIZE); 164 } 165 } 166 167 union ParamLocation { 168 PhysReg[MAX_ARG_REGS] regs; 169 struct { 170 // offset from the first byte of first parameter on the stack 171 // first parameter will have offset of 0 172 uint stackOffset; 173 SizeAndAlignment stackSizealign; 174 } 175 } 176 177 struct FunctionAbi 178 { 179 // must be byValueReg when function has no result 180 PassClass returnClass; 181 // if is pass by ptr, then first slot is hidden parameter and second is return register 182 ParamLocation returnLoc; 183 // length is number of function parameters 184 // hidden parameter is not included here, instead it is handled immediately 185 PassClass[] paramClasses; 186 // when same index is memory, this is int offset 187 // when same index is register, this is register 188 ParamLocation[] paramData; 189 SizeAndAlignment stackSizealign; 190 bool reverseStackOrder; 191 ubyte numRegistersUsed; 192 // if defined, syscall instruction is used instead of call 193 PhysReg syscallRegister; 194 bool useSyscall; 195 } 196 197 // classification callback will classify the argument and immediately try to assign register if available 198 // If there is no free registers left, it will reclassify the argument to be passed via memory. 199 void win64_classify(CompilationContext* c, ref AbiState state) 200 { 201 // used for gpr and sse regs 202 ubyte regsRemaining = cast(ubyte)state.abiRegs[AMD64_REG_CLASS.GPR].length; 203 scope(exit) { 204 state.abi.numRegistersUsed = cast(ubyte)(state.abiRegs[AMD64_REG_CLASS.GPR].length - regsRemaining); 205 } 206 207 state.abi.returnClass = PassClass.ignore; 208 if (state.type.numResults == 1) { 209 IrIndex resType = state.type.resultTypes[0]; 210 if (resType.isTypeFloat) { 211 state.abi.returnClass = PassClass.byValueReg; 212 state.abi.returnLoc.regs[0] = amd64_reg.xmm0; 213 } else if (resType.fitsIntoRegister(c)) { 214 state.abi.returnClass = PassClass.byValueReg; 215 state.abi.returnLoc.regs[0] = amd64_reg.ax; 216 } else { 217 // hidden pointer is passed as first parameter 218 state.abi.returnClass = PassClass.byPtrReg; 219 PhysReg reg = state.abiRegs[AMD64_REG_CLASS.GPR][0]; 220 state.abi.returnLoc.regs[0] = reg; 221 state.abi.returnLoc.regs[1] = amd64_reg.ax; // we store return register in second slot 222 --regsRemaining; 223 } 224 } 225 //writefln("result %s %s", state.abi.returnClass, state.abi.returnLoc.regs); 226 227 foreach(uint i; 0..cast(uint)state.type.numParameters) { 228 IrIndex paramType = state.type.parameterTypes[i]; 229 if (paramType.isTypeFloat) { 230 if (regsRemaining) { 231 state.abi.paramClasses[i] = PassClass.byValueReg; 232 PhysReg reg = state.abiRegs[AMD64_REG_CLASS.XMM][$-regsRemaining]; 233 state.abi.paramData[i].regs[0] = reg; 234 --regsRemaining; 235 } else { 236 state.abi.paramClasses[i] = PassClass.byValueMemory; 237 } 238 } else if (fitsIntoRegister(paramType, c)) { 239 if (regsRemaining) { 240 state.abi.paramClasses[i] = PassClass.byValueReg; 241 PhysReg reg = state.abiRegs[AMD64_REG_CLASS.GPR][$-regsRemaining]; 242 state.abi.paramData[i].regs[0] = reg; 243 --regsRemaining; 244 } else { 245 state.abi.paramClasses[i] = PassClass.byValueMemory; 246 } 247 } else { 248 if (regsRemaining) { 249 state.abi.paramClasses[i] = PassClass.byPtrReg; 250 PhysReg reg = state.abiRegs[AMD64_REG_CLASS.GPR][$-regsRemaining]; 251 state.abi.paramData[i].regs[0] = reg; 252 --regsRemaining; 253 } else { 254 state.abi.paramClasses[i] = PassClass.byPtrMemory; 255 } 256 } 257 258 //writefln("param %s %s %s", i, state.abi.paramClasses[i], state.abi.returnLoc.regs); 259 } 260 } 261 262 enum InMemory : bool { 263 no = false, 264 yes = true, 265 } 266 267 Sysv_AbiParamClass classify_value(CompilationContext* c, IrIndex paramType) { 268 AbiClass[2] resultClasses = [AbiClass.no_class, AbiClass.no_class]; 269 InMemory in_mem = classify_value_impl(c, paramType, resultClasses, 0); 270 if (in_mem) return Sysv_AbiParamClass(PassClass.byValueMemory, AbiClass.memory); 271 272 // parse classes to PassClass 273 if (resultClasses[1] == AbiClass.no_class) { 274 if (resultClasses[0] == AbiClass.no_class) 275 return Sysv_AbiParamClass(PassClass.ignore, AbiClass.no_class); 276 else 277 return Sysv_AbiParamClass(PassClass.byValueReg, resultClasses[0]); 278 } 279 assert(resultClasses[0] != AbiClass.no_class); 280 return Sysv_AbiParamClass(PassClass.byValueRegMulti, resultClasses[0], resultClasses[1]); 281 } 282 283 InMemory classify_value_impl(CompilationContext* c, IrIndex paramType, ref AbiClass[2] resultClasses, uint offset) { 284 assert(paramType.isType); 285 // if anything starts after 16 bytes, the whole thing is passed through memory 286 if (offset >= 16) return InMemory.yes; 287 288 SizeAndAlignment sizealign = c.types.typeSizeAndAlignment(paramType); 289 if (sizealign.size == 0) return InMemory.no; 290 // until we have support for m256 and m512, 16 bytes is max size for register 291 if (sizealign.size > 16) return InMemory.yes; 292 // if it is unaligned, the whole argument is passed in memory 293 if (paddingSize(offset, sizealign.alignment) > 0) return InMemory.yes; 294 295 AbiClass basic_class; 296 final switch (paramType.typeKind) with(IrTypeKind) { 297 case basic: 298 final switch(paramType.basicType(c)) with(IrBasicType) { 299 case noreturn_t: basic_class = AbiClass.no_class; break; 300 case void_t: basic_class = AbiClass.no_class; break; 301 case i8: basic_class = AbiClass.integer; break; 302 case i16: basic_class = AbiClass.integer; break; 303 case i32: basic_class = AbiClass.integer; break; 304 case i64: basic_class = AbiClass.integer; break; 305 case f32: basic_class = AbiClass.sse; break; 306 case f64: basic_class = AbiClass.sse; break; 307 } 308 break; 309 case pointer: 310 case func_t: 311 basic_class = AbiClass.integer; 312 break; 313 case array: 314 return classify_array(c, paramType, resultClasses, offset); 315 case struct_t: 316 return classify_struct(c, paramType, resultClasses, offset); 317 } 318 319 uint slotIndex = offset / 8; // index of 8-byte 320 c.assertf(slotIndex < 2, "Incorrect 8-byte index: %s", slotIndex); // must be 0 or 1 321 resultClasses[slotIndex] = merge_classes(resultClasses[slotIndex], basic_class); 322 return InMemory.no; 323 } 324 325 AbiClass merge_classes(AbiClass a, AbiClass b) { 326 if (a == b) return a; 327 if (a > b) swap(a, b); 328 switch(a) with(AbiClass) { 329 case no_class: return b; 330 case memory: return memory; 331 case integer: return integer; 332 case sse: return sse; 333 default: assert(false); 334 } 335 } 336 337 InMemory classify_array(CompilationContext* c, IrIndex paramType, ref AbiClass[2] resultClasses, uint offset) { 338 IrTypeArray type = c.types.get!IrTypeArray(paramType); 339 SizeAndAlignment element_sizealign = c.types.typeSizeAndAlignment(type.elemType); 340 foreach(i; 0..type.numElements) { 341 auto memberOffset = offset + element_sizealign.size * i; 342 InMemory in_mem = classify_value_impl(c, type.elemType, resultClasses, memberOffset); 343 // if any member ends up in memory, the whole thing is in memory 344 if (in_mem) return InMemory.yes; 345 } 346 return InMemory.no; 347 } 348 349 InMemory classify_struct(CompilationContext* c, IrIndex paramType, ref AbiClass[2] resultClasses, uint offset) { 350 IrTypeStructMember[] members = c.types.get!IrTypeStruct(paramType).members; 351 foreach(m; members) { 352 auto memberOffset = offset + m.offset; 353 InMemory in_mem = classify_value_impl(c, m.type, resultClasses, memberOffset); 354 // if any member ends up in memory, the whole thing is in memory 355 if (in_mem) return InMemory.yes; 356 } 357 return InMemory.no; 358 } 359 360 void sysv64_classify(CompilationContext* c, ref AbiState state) 361 { 362 ubyte[2] regsRemaining; 363 364 regsRemaining[AMD64_REG_CLASS.GPR] = cast(ubyte)state.abiRegs[AMD64_REG_CLASS.GPR].length; 365 regsRemaining[AMD64_REG_CLASS.XMM] = cast(ubyte)state.abiRegs[AMD64_REG_CLASS.XMM].length; 366 scope(exit) { 367 size_t gpr = state.abiRegs[AMD64_REG_CLASS.GPR].length - regsRemaining[AMD64_REG_CLASS.GPR]; 368 size_t xmm = state.abiRegs[AMD64_REG_CLASS.XMM].length - regsRemaining[AMD64_REG_CLASS.XMM]; 369 state.abi.numRegistersUsed = cast(ubyte)(gpr + xmm); 370 } 371 372 state.abi.returnClass = PassClass.ignore; 373 if (state.type.numResults == 1) { 374 IrIndex resType = state.type.resultTypes[0]; 375 Sysv_AbiParamClass resClass = classify_value(c, resType); 376 state.abi.returnClass = resClass.passClass; 377 if (resClass.low == AbiClass.sse) { 378 state.abi.returnLoc.regs[0] = amd64_reg.xmm0; 379 if (resClass.high == AbiClass.sse) 380 state.abi.returnLoc.regs[1] = amd64_reg.xmm1; 381 else if (resClass.high == AbiClass.integer) 382 state.abi.returnLoc.regs[1] = amd64_reg.ax; 383 } else if (resClass.low == AbiClass.integer) { 384 state.abi.returnLoc.regs[0] = amd64_reg.ax; 385 if (resClass.high == AbiClass.sse) 386 state.abi.returnLoc.regs[1] = amd64_reg.xmm0; 387 else if (resClass.high == AbiClass.integer) 388 state.abi.returnLoc.regs[1] = amd64_reg.dx; 389 } else if (resClass.passClass == PassClass.byValueMemory) { 390 // hidden pointer is passed as first parameter 391 state.abi.returnClass = PassClass.byPtrReg; 392 PhysReg reg = state.abiRegs[AMD64_REG_CLASS.GPR][0]; 393 state.abi.returnLoc.regs[0] = reg; 394 state.abi.returnLoc.regs[1] = amd64_reg.ax; // we store return register in second slot 395 --regsRemaining[AMD64_REG_CLASS.GPR]; 396 } 397 //writefln("res %s %s", state.abi.returnClass, state.abi.returnLoc.regs); 398 } 399 400 // assign register or fallback to memory class 401 foreach(uint i; 0..cast(uint)state.type.numParameters) { 402 IrIndex paramType = state.type.parameterTypes[i]; 403 Sysv_AbiParamClass paramClass_sysv = classify_value(c, paramType); 404 state.abi.paramClasses[i] = paramClass_sysv.passClass; 405 switch(paramClass_sysv.low) 406 { 407 case AbiClass.integer: 408 case AbiClass.sse: 409 // 1 or 2 registers of same or different class 410 ubyte getRegClass(AbiClass abiClass) { 411 switch(abiClass) { 412 case AbiClass.integer: return AMD64_REG_CLASS.GPR; 413 case AbiClass.sse: return AMD64_REG_CLASS.XMM; 414 default: assert(false); 415 } 416 } 417 418 AbiClass[2] classes = [paramClass_sysv.low, paramClass_sysv.high]; 419 ubyte[2] regsNeeded; 420 421 foreach(uint j; 0..paramClass_sysv.len) { 422 ubyte regClass = getRegClass(classes[j]); 423 ++regsNeeded[regClass]; 424 } 425 426 foreach(ubyte regClass; 0..2) { 427 if (regsRemaining[regClass] < regsNeeded[regClass]) { 428 state.abi.paramClasses[i] = PassClass.byValueMemory; 429 goto case AbiClass.memory; 430 } 431 } 432 433 // assign regs 434 foreach(uint j; 0..paramClass_sysv.len) { 435 ubyte regClass = getRegClass(classes[j]); 436 assert(regsRemaining[regClass]); 437 PhysReg reg = state.abiRegs[regClass][$-regsRemaining[regClass]]; 438 state.abi.paramData[i].regs[j] = reg; 439 --regsRemaining[regClass]; 440 } 441 break; 442 443 case AbiClass.memory: break; 444 case AbiClass.no_class: break; // for empty structs 445 default: 446 c.internal_error("%s not implemented", paramClass_sysv.low); 447 } 448 } 449 } 450 451 void sysv64_syscall_classify(CompilationContext* c, ref AbiState state) 452 { 453 state.abi.syscallRegister = amd64_reg.ax; 454 state.abi.useSyscall = true; 455 if (c.targetOs != TargetOs.linux) 456 c.error("Cannot use System V syscall calling convention on %s", c.targetOs); 457 458 sysv64_classify(c, state); 459 460 if (state.type.numParameters > 6) { 461 c.error("Cannot have more than 6 parameters in System V syscall calling convention"); 462 } 463 if (state.abi.returnClass != PassClass.byValueReg && 464 state.abi.returnClass != PassClass.byPtrReg && 465 state.abi.returnClass != PassClass.ignore) 466 c.error("Cannot have return of class %s in System V syscall calling convention", state.abi.returnClass); 467 foreach(PassClass paramClass; state.abi.paramClasses) { 468 if (paramClass == PassClass.byValueReg || paramClass == PassClass.byPtrReg || paramClass == PassClass.ignore) continue; 469 c.error("Cannot have parameters of class %s in System V syscall calling convention", paramClass); 470 } 471 } 472 473 alias ArgClassifier = void function(CompilationContext* c, ref AbiState state); 474 __gshared ArgClassifier[] classify_abis = [ 475 &win64_classify, 476 &sysv64_classify, 477 &sysv64_syscall_classify 478 ]; 479 480 // Handle ABI 481 void func_pass_lower_abi(CompilationContext* c, IrFunction* ir, IrIndex funcIndex, ref IrBuilder builder) 482 { 483 //writefln("lower_abi %s %s", builder.context.idString(ir.name), ir.getCallConvEnum(c)); 484 IrTypeFunction* irFuncType = &c.types.get!IrTypeFunction(ir.type); 485 c.assertf(irFuncType.numResults <= 1, "%s results is not implemented", irFuncType.numResults); 486 487 AbiState state; 488 state.run(c, irFuncType); 489 scope(exit) state.free(c); 490 491 IrIndex hiddenParameter; 492 if (state.abi.returnClass == PassClass.byPtrReg) { 493 // pointer to return value is passed via hidden parameter, read it into virt reg 494 IrIndex retType = c.types.appendPtr(state.type.resultTypes[0]); 495 IrIndex paramReg = IrIndex(state.abi.returnLoc.regs[0], typeToRegSize(retType, c)); 496 ExtraInstrArgs extra = { type : retType }; 497 auto moveInstr = builder.emitInstr!(IrOpcode.move)(extra, paramReg); 498 builder.prependBlockInstr(ir.entryBasicBlock, moveInstr.instruction); 499 hiddenParameter = moveInstr.result; 500 } 501 502 void convParam(IrIndex instrIndex, ref IrInstrHeader instrHeader) 503 { 504 IrInstr_parameter* param = ir.get!IrInstr_parameter(instrIndex); 505 uint paramIndex = param.index(ir); 506 507 IrIndex type = ir.getVirtReg(instrHeader.result(ir)).type; 508 509 PassClass paramClass = state.abi.paramClasses[paramIndex]; 510 final switch(paramClass) { 511 case PassClass.byValueReg: 512 IrIndex paramReg = IrIndex(state.abi.paramData[paramIndex].regs[0], typeToRegSize(type, c)); 513 ExtraInstrArgs extra = { result : instrHeader.result(ir) }; 514 auto moveInstr = builder.emitInstr!(IrOpcode.move)(extra, paramReg).instruction; 515 replaceInstruction(ir, instrIndex, moveInstr); 516 break; 517 518 case PassClass.byValueRegMulti: 519 PhysReg[2] paramRegs = state.abi.paramData[paramIndex].regs; 520 IrIndex instr = receiveMultiValue(ir.nextInstr(instrIndex), paramRegs, instrHeader.result(ir), builder); 521 replaceInstruction(ir, instrIndex, instr); 522 break; 523 524 case PassClass.byPtrReg: 525 type = c.types.appendPtr(type); 526 IrIndex paramReg = IrIndex(state.abi.paramData[paramIndex].regs[0], typeToRegSize(type, c)); 527 ExtraInstrArgs extra1 = { type : type }; 528 auto moveInstr = builder.emitInstr!(IrOpcode.move)(extra1, paramReg); 529 replaceInstruction(ir, instrIndex, moveInstr.instruction); 530 531 ExtraInstrArgs extra2 = { result : instrHeader.result(ir) }; 532 IrIndex loadInstr = builder.emitInstr!(IrOpcode.load_aggregate)(extra2, moveInstr.result).instruction; 533 ir.getInstr(loadInstr).isUniqueLoad = true; 534 builder.insertAfterInstr(moveInstr.instruction, loadInstr); 535 break; 536 537 case PassClass.byValueMemory: 538 IrIndex slot = builder.appendStackSlot(type, c.types.typeSizeAndAlignment(type), StackSlotKind.parameter); 539 ir.getStackSlot(slot).displacement = state.abi.paramData[paramIndex].stackOffset; 540 ir.getStackSlot(slot).sizealign = state.abi.paramData[paramIndex].stackSizealign; 541 542 // is directly in stack 543 ExtraInstrArgs extra = { result : instrHeader.result(ir) }; 544 IrIndex loadInstr; 545 if (type.isTypeAggregate) { 546 // happens on sysv64 547 loadInstr = builder.emitInstr!(IrOpcode.load_aggregate)(extra, slot).instruction; 548 } else { 549 extra.argSize = getTypeArgSize(type, c); 550 loadInstr = builder.emitInstr!(IrOpcode.load)(extra, slot).instruction; 551 } 552 replaceInstruction(ir, instrIndex, loadInstr); 553 break; 554 555 case PassClass.byPtrMemory: 556 // stack contains pointer to data 557 type = c.types.appendPtr(type); 558 IrIndex slot = builder.appendStackSlot(type, c.types.typeSizeAndAlignment(type), StackSlotKind.parameter); 559 ir.getStackSlot(slot).displacement = state.abi.paramData[paramIndex].stackOffset; 560 ir.getStackSlot(slot).sizealign = state.abi.paramData[paramIndex].stackSizealign; 561 562 IrArgSize argSize = getTypeArgSize(type, c); 563 ExtraInstrArgs extra = { argSize : argSize, type : type }; 564 InstrWithResult loadInstr = builder.emitInstr!(IrOpcode.load)(extra, slot); 565 // remove parameter instruction 566 replaceInstruction(ir, instrIndex, loadInstr.instruction); 567 568 // load aggregate 569 ExtraInstrArgs extra2 = { result : instrHeader.result(ir) }; 570 InstrWithResult loadInstr2 = builder.emitInstr!(IrOpcode.load_aggregate)(extra2, loadInstr.result); 571 ir.getInstr(loadInstr2.instruction).isUniqueLoad = true; 572 builder.insertAfterInstr(loadInstr.instruction, loadInstr2.instruction); 573 break; 574 575 case PassClass.ignore: 576 // use zeroinited struct 577 builder.redirectVregUsersTo(instrHeader.result(ir), c.constants.addZeroConstant(type)); 578 removeInstruction(ir, instrIndex); 579 break; 580 } 581 } 582 583 void convCall(IrIndex instrIndex, ref IrInstrHeader instrHeader) 584 { 585 ir.numCalls += 1; 586 587 IrIndex callee = instrHeader.arg(ir, 0); 588 IrIndex calleeTypeIndex = ir.getValueType(c, callee); 589 if (calleeTypeIndex.isTypePointer) 590 calleeTypeIndex = c.types.getPointerBaseType(calleeTypeIndex); 591 IrTypeFunction* calleeType = &c.types.get!IrTypeFunction(calleeTypeIndex); 592 593 AbiState callee_state; 594 callee_state.run(c, calleeType); 595 scope(exit) callee_state.free(c); 596 597 CallConv* callConv = c.types.getCalleeCallConv(callee, ir, c); 598 IrIndex[] args = instrHeader.args(ir)[1..$]; // exclude callee 599 IrIndex originalResult; 600 IrIndex hiddenPtr; 601 bool hasHiddenPtr = false; 602 603 // allocate stack slot for big return value 604 if (callee_state.abi.returnClass == PassClass.byPtrReg) 605 { 606 IrIndex resType = callee_state.type.resultTypes[0]; 607 originalResult = instrHeader.result(ir); // we reuse result slot 608 609 // reuse result slot of instruction as first argument 610 instrHeader._payloadOffset -= 1; 611 instrHeader.hasResult = false; 612 instrHeader.numArgs += 1; 613 614 args = instrHeader.args(ir)[1..$]; 615 // move callee in first arg 616 instrHeader.arg(ir, 0) = callee; 617 // place return arg slot in second arg 618 hiddenPtr = builder.appendStackSlot(resType, c.types.typeSizeAndAlignment(resType), StackSlotKind.argument); 619 args[0] = hiddenPtr; 620 hasHiddenPtr = true; 621 } 622 623 enum STACK_ITEM_SIZE = 8; 624 size_t numArgs = args.length; 625 // how many bytes are allocated on the stack before func call 626 size_t stackReserve; 627 if (callConv.hasShadowSpace) 628 { 629 stackReserve = 4 * STACK_ITEM_SIZE; 630 } 631 632 // Copy args to stack if necessary (big structs or run out of regs) 633 foreach (size_t i; cast(size_t)hasHiddenPtr..args.length) 634 { 635 IrIndex arg = args[i]; 636 removeUser(c, ir, instrIndex, arg); 637 638 PassClass paramClass = callee_state.abi.paramClasses[i-cast(size_t)hasHiddenPtr]; 639 IrIndex type = callee_state.type.parameterTypes[i-cast(size_t)hasHiddenPtr]; 640 final switch(paramClass) { 641 case PassClass.byValueReg: 642 args[i] = simplifyConstant(arg, c); 643 break; 644 case PassClass.byValueRegMulti: break; 645 case PassClass.byPtrReg, PassClass.byPtrMemory: 646 //allocate stack slot, store value there and use slot pointer as argument 647 args[i] = builder.appendStackSlot(type, c.types.typeSizeAndAlignment(type), StackSlotKind.argument); 648 IrIndex instr = builder.emitInstr!(IrOpcode.store)(ExtraInstrArgs(), args[i], arg); 649 builder.insertBeforeInstr(instrIndex, instr); 650 break; 651 case PassClass.byValueMemory: 652 if (type.fitsIntoRegister(c)) args[i] = simplifyConstant(arg, c); 653 break; // handled later 654 case PassClass.ignore: break; 655 } 656 } 657 658 // Stack layouting code makes sure that local data has 16 byte alignment if we have calls in IR. 659 // align stack and push args that didn't fit into registers (register size args) 660 if (callee_state.abi.stackSizealign.size > 0) 661 { 662 if (callee_state.abi.stackSizealign.alignment > 16) { 663 c.unrecoverable_error(TokenIndex(), "Stack alignment of %s > 16 is not implemented", callee_state.abi.stackSizealign.alignment); 664 } 665 666 if (callee_state.abi.stackSizealign.size % callee_state.abi.stackSizealign.alignment != 0) 667 { 668 uint padding = paddingSize(callee_state.abi.stackSizealign.size, callee_state.abi.stackSizealign.alignment); 669 // align stack to 16 bytes 670 // TODO: SysV ABI needs 32byte alignment if __m256 is passed 671 stackReserve += padding; 672 IrIndex paddingSize = c.constants.add(makeIrType(IrBasicType.i32), padding); 673 builder.emitInstrBefore!(IrOpcode.grow_stack)(instrIndex, ExtraInstrArgs(), paddingSize); 674 } 675 676 // choose stack ordering 677 int start = cast(int)callee_state.abi.paramClasses.length-1; 678 int end = 0; 679 int inc = -1; 680 681 if (callee_state.abi.reverseStackOrder) { 682 swap(start, end); 683 ++end; 684 inc = 1; 685 } 686 uint stackOffset = 0; 687 // push args to stack 688 for (int i = start; i != end; i += inc) { 689 PassClass paramClass = callee_state.abi.paramClasses[i]; 690 IrIndex arg = args[i + cast(int)hasHiddenPtr]; 691 if (paramClass == PassClass.byValueMemory || paramClass == PassClass.byPtrMemory) { 692 ParamLocation paramData = callee_state.abi.paramData[i]; 693 694 IrIndex type = ir.getValueType(c, arg); 695 uint size = c.types.typeSize(type); 696 //writefln("param %s %s %s %s", i, stackOffset, paramData.stackOffset, size); 697 698 // push cannot be used with xmm registers. Convert those to grow_stack + store 699 if (size <= 8 && !type.isTypeFloat) { 700 auto pushInstr = builder.emitInstr!(IrOpcode.push)(ExtraInstrArgs(), arg); 701 builder.insertBeforeInstr(instrIndex, pushInstr); 702 stackOffset += 8; 703 } else { 704 // this must be multiple of 8 705 uint allocSize = paramData.stackSizealign.size; 706 if (allocSize > 0) { 707 IrIndex paddingSize = c.constants.add(makeIrType(IrBasicType.i32), allocSize); 708 builder.emitInstrBefore!(IrOpcode.grow_stack)(instrIndex, ExtraInstrArgs(), paddingSize); 709 } 710 711 IrIndex ptrType = c.types.appendPtr(type); 712 ExtraInstrArgs extra = { type : ptrType }; 713 IrIndex ptr = builder.emitInstrBefore!(IrOpcode.move)(instrIndex, extra, IrIndex(amd64_reg.sp, ArgType.QWORD)).result; 714 builder.emitInstrBefore!(IrOpcode.store)(instrIndex, ExtraInstrArgs(), ptr, arg); 715 716 stackOffset += allocSize; 717 } 718 } 719 } 720 assert(stackOffset == callee_state.abi.stackSizealign.size); 721 stackReserve += callee_state.abi.stackSizealign.size; 722 } 723 724 if (callee_state.abi.returnClass == PassClass.byPtrReg) { 725 IrIndex type = ir.getValueType(c, args[0]); 726 IrIndex argRegister = IrIndex(callee_state.abi.returnLoc.regs[0], typeToRegSize(type, c)); 727 ExtraInstrArgs extra = { result : argRegister }; 728 builder.emitInstrBefore!(IrOpcode.move)(instrIndex, extra, args[0]); 729 } 730 731 // move args to registers 732 foreach(i, paramClass; callee_state.abi.paramClasses) { 733 IrIndex arg = args[i + cast(int)hasHiddenPtr]; 734 ParamLocation paramData = callee_state.abi.paramData[i]; 735 736 IrIndex type = ir.getValueType(c, arg); 737 final switch(paramClass) { 738 case PassClass.byValueReg, PassClass.byPtrReg: 739 IrIndex argRegister = IrIndex(paramData.regs[0], typeToRegSize(type, c)); 740 ExtraInstrArgs extra = { result : argRegister }; 741 builder.emitInstrBefore!(IrOpcode.move)(instrIndex, extra, arg); 742 break; 743 case PassClass.byValueRegMulti: 744 IrIndex[2] vals = simplifyConstant128(instrIndex, arg, builder, c); 745 746 IrIndex reg1 = IrIndex(paramData.regs[0], ArgType.QWORD); 747 ExtraInstrArgs extra3 = { result : reg1 }; 748 builder.emitInstrBefore!(IrOpcode.move)(instrIndex, extra3, vals[0]); 749 750 IrIndex reg2 = IrIndex(paramData.regs[1], ArgType.QWORD); 751 ExtraInstrArgs extra4 = { result : reg2 }; 752 builder.emitInstrBefore!(IrOpcode.move)(instrIndex, extra4, vals[1]); 753 break; 754 case PassClass.byValueMemory: break; // handled below 755 case PassClass.byPtrMemory: break; // handled below 756 case PassClass.ignore: 757 break; 758 } 759 } 760 761 if (callConv.hasShadowSpace) 762 { // Allocate shadow space for 4 physical registers 763 IrIndex const_32 = c.constants.add(makeIrType(IrBasicType.i32), 32); 764 auto growStackInstr = builder.emitInstr!(IrOpcode.grow_stack)(ExtraInstrArgs(), const_32); 765 builder.insertBeforeInstr(instrIndex, growStackInstr); 766 ir.getInstr(instrIndex).extendFixedArgRange = true; 767 } 768 769 // fix arguments 770 scope(exit) { 771 ubyte regsUsed = callee_state.abi.numRegistersUsed; 772 773 void fillRegs(IrIndex[] instrArgs) { 774 assert(instrArgs.length == regsUsed); 775 uint nextIndex = 0; 776 // handle return by ptr reg 777 if (callee_state.abi.returnClass == PassClass.byPtrReg) { 778 // size is irrelevant here because register is only mentioned here to aid register allocation 779 instrArgs[nextIndex] = IrIndex(callee_state.abi.returnLoc.regs[0], ArgType.QWORD); 780 ++nextIndex; 781 } 782 // order must be preserved, because liveness analisys relies on that 783 foreach(i, PassClass paramClass; callee_state.abi.paramClasses) { 784 final switch(paramClass) with(PassClass) { 785 case byValueReg, byPtrReg: 786 ParamLocation loc = callee_state.abi.paramData[i]; 787 // size is irrelevant here because register is only mentioned here to aid register allocation 788 instrArgs[nextIndex] = IrIndex(loc.regs[0], ArgType.QWORD); 789 ++nextIndex; 790 break; 791 case byValueRegMulti: 792 ParamLocation loc = callee_state.abi.paramData[i]; 793 // size is irrelevant here because register is only mentioned here to aid register allocation 794 instrArgs[nextIndex] = IrIndex(loc.regs[0], ArgType.QWORD); 795 instrArgs[nextIndex+1] = IrIndex(loc.regs[1], ArgType.QWORD); 796 nextIndex += 2; 797 break; 798 case byValueMemory, byPtrMemory, ignore: break; // skip, non register param 799 } 800 } 801 assert(nextIndex == regsUsed); 802 } 803 804 if (regsUsed + 1 <= instrHeader.numArgs) { // +1 includes callee 805 if (callee_state.abi.useSyscall) { 806 // Put syscall number into the correct register 807 IrIndex syscallRegister = IrIndex(callee_state.abi.syscallRegister, ArgType.DWORD); 808 ExtraInstrArgs extra = { result : syscallRegister }; 809 builder.emitInstrBefore!(IrOpcode.move)(instrIndex, extra, c.constants.add(makeIrType(IrBasicType.i32), callee_state.type.syscallNumber)); 810 811 // We need bigger instruction for syscall because of extra syscallRegister at the end 812 ExtraInstrArgs callExtra = { hasResult : instrHeader.hasResult, result : instrHeader.tryGetResult(ir), extraArgSlots : cast(ubyte)(regsUsed + 1) }; 813 814 // We leave callee argument in here, so that liveness analysis can get calling convention info 815 IrIndex newCallInstr = builder.emitInstr!(IrOpcode.syscall)(callExtra, instrHeader.arg(ir, 0)).instruction; 816 IrInstrHeader* callHeader = ir.getInstr(newCallInstr); 817 fillRegs(callHeader.args(ir)[1..$-1]); // fill with regs 818 819 // Since we did the mov to the syscallRegister after all other movs, syscallRegister needs to be passed at the end of the list 820 // to preserve the order invariant for the liveness analysis 821 callHeader.args(ir)[$-1] = syscallRegister; 822 823 replaceInstruction(ir, instrIndex, newCallInstr); 824 instrIndex = newCallInstr; 825 } else { 826 // Reuse instruction 827 instrHeader.numArgs = cast(ubyte)(regsUsed + 1); // include callee 828 829 fillRegs(instrHeader.args(ir)[1..$]); // fill with regs 830 } 831 } else { 832 assert(!callee_state.abi.useSyscall); // Syscalls are handled in the other case 833 834 // Make bigger instruction 835 ExtraInstrArgs callExtra = { hasResult : instrHeader.hasResult, result : instrHeader.tryGetResult(ir), extraArgSlots : regsUsed }; 836 IrIndex newCallInstr = builder.emitInstr!(IrOpcode.call)(callExtra, instrHeader.arg(ir, 0)).instruction; 837 IrInstrHeader* callHeader = ir.getInstr(newCallInstr); 838 fillRegs(callHeader.args(ir)[1..$]); // fill with regs 839 840 replaceInstruction(ir, instrIndex, newCallInstr); 841 instrIndex = newCallInstr; 842 } 843 } 844 845 { 846 // If function is noreturn we don't need to insert cleanup code 847 if (calleeType.numResults == 1) 848 { 849 IrIndex resType = callee_state.type.resultTypes[0]; 850 if (resType.isTypeNoreturn) return; 851 } 852 853 // Instructions will be added after this one 854 IrIndex lastInstr = instrIndex; 855 856 // Deallocate stack after call 857 if (stackReserve > 0) 858 { 859 IrIndex conReservedBytes = c.constants.add(makeIrType(IrBasicType.i32), stackReserve); 860 auto shrinkStackInstr = builder.emitInstr!(IrOpcode.shrink_stack)(ExtraInstrArgs(), conReservedBytes); 861 builder.insertAfterInstr(lastInstr, shrinkStackInstr); 862 lastInstr = shrinkStackInstr; // insert next instr after this one 863 ir.getInstr(instrIndex).extendFixedResultRange = true; 864 } 865 866 // for calls that return in register 867 final switch (callee_state.abi.returnClass) { 868 case PassClass.byValueReg: 869 // mov result to virt reg 870 IrIndex returnReg = IrIndex(callee_state.abi.returnLoc.regs[0], typeToIrArgSize(ir.getVirtReg(instrHeader.result(ir)).type, c)); 871 ExtraInstrArgs extra = { result : instrHeader.result(ir) }; 872 auto moveInstr = builder.emitInstr!(IrOpcode.move)(extra, returnReg).instruction; 873 builder.insertAfterInstr(lastInstr, moveInstr); 874 instrHeader.result(ir) = returnReg; 875 break; 876 case PassClass.byValueRegMulti: 877 PhysReg[2] retRegs = callee_state.abi.returnLoc.regs; 878 IrIndex instr = receiveMultiValue(ir.nextInstr(instrIndex), retRegs, instrHeader.result(ir), builder); 879 builder.insertAfterInstr(lastInstr, instr); 880 instrHeader.result(ir) = IrIndex(retRegs[0], ArgType.QWORD); // TODO: need to put both registers as result 881 break; 882 case PassClass.byPtrReg: 883 ExtraInstrArgs extra = { result : originalResult }; 884 IrIndex loadInstr = builder.emitInstr!(IrOpcode.load_aggregate)(extra, hiddenPtr).instruction; 885 ir.getInstr(loadInstr).isUniqueLoad = true; 886 builder.insertAfterInstr(lastInstr, loadInstr); 887 break; 888 case PassClass.byPtrMemory: 889 case PassClass.byValueMemory: 890 c.internal_error("invalid return class", callee_state.abi.returnClass); 891 case PassClass.ignore: break; // no result, or empty struct 892 } 893 } 894 } 895 896 void convReturn(IrIndex instrIndex, ref IrInstrHeader instrHeader) 897 { 898 // rewrite ret_val as ret in-place 899 instrHeader.op = IrOpcode.ret; 900 901 removeUser(c, ir, instrIndex, instrHeader.arg(ir, 0)); 902 PhysReg[2] resRegs = state.abi.returnLoc.regs; 903 904 final switch (state.abi.returnClass) 905 { 906 case PassClass.byPtrReg: 907 // store struct into pointer, then return pointer 908 IrIndex value = instrHeader.arg(ir, 0); 909 IrIndex instr = builder.emitInstr!(IrOpcode.store)(ExtraInstrArgs(), hiddenParameter, value); 910 builder.insertBeforeInstr(instrIndex, instr); 911 IrIndex result = IrIndex(resRegs[1], ArgType.QWORD); // we store return register in second slot 912 ExtraInstrArgs extra = { result : result }; 913 IrIndex copyInstr = builder.emitInstr!(IrOpcode.move)(extra, hiddenParameter).instruction; 914 builder.insertBeforeInstr(instrIndex, copyInstr); 915 break; 916 case PassClass.byValueReg: 917 IrIndex value = simplifyConstant(instrHeader.arg(ir, 0), c); 918 IrIndex type = irFuncType.resultTypes[0]; 919 IrIndex result = IrIndex(resRegs[0], typeToRegSize(type, c)); 920 ExtraInstrArgs extra = { result : result }; 921 IrIndex copyInstr = builder.emitInstr!(IrOpcode.move)(extra, value).instruction; 922 builder.insertBeforeInstr(instrIndex, copyInstr); 923 break; 924 case PassClass.byValueRegMulti: 925 IrIndex[2] vals = simplifyConstant128(instrIndex, instrHeader.arg(ir, 0), builder, c); 926 927 IrIndex result1 = IrIndex(resRegs[0], ArgType.QWORD); 928 ExtraInstrArgs extra3 = { result : result1 }; 929 builder.emitInstrBefore!(IrOpcode.move)(instrIndex, extra3, vals[0]); 930 931 IrIndex result2 = IrIndex(resRegs[1], ArgType.QWORD); 932 ExtraInstrArgs extra4 = { result : result2 }; 933 builder.emitInstrBefore!(IrOpcode.move)(instrIndex, extra4, vals[1]); 934 break; 935 case PassClass.byValueMemory, PassClass.byPtrMemory: 936 c.internal_error("Invalid return class %s", state.abi.returnClass); 937 case PassClass.ignore: 938 break; 939 } 940 // rewrite ret_val as ret in-place 941 instrHeader.op = IrOpcode.ret; 942 instrHeader.numArgs = 0; 943 } 944 945 foreach (IrIndex blockIndex, ref IrBasicBlock block; ir.blocks) 946 { 947 foreach(IrIndex instrIndex, ref IrInstrHeader instrHeader; block.instructions(ir)) 948 { 949 switch(instrHeader.op) 950 { 951 case IrOpcode.parameter: convParam(instrIndex, instrHeader); break; 952 case IrOpcode.call: convCall(instrIndex, instrHeader); break; 953 case IrOpcode.ret_val: convReturn(instrIndex, instrHeader); break; 954 default: break; 955 } 956 } 957 } 958 } 959 960 // glue 2 registers into aggregate 961 IrIndex receiveMultiValue(IrIndex beforeInstr, PhysReg[2] regs, IrIndex result, ref IrBuilder builder) { 962 IrIndex type = builder.ir.getVirtReg(result).type; 963 IrIndex reg1 = IrIndex(regs[0], ArgType.QWORD); 964 auto sizealign = builder.context.types.typeSizeAndAlignment(type); 965 IrIndex reg2 = IrIndex(regs[1], ArgType.QWORD); 966 967 ExtraInstrArgs extra1 = { type : makeIrType(IrBasicType.i64) }; 968 auto move1 = builder.emitInstr!(IrOpcode.move)(extra1, reg1); 969 builder.insertBeforeInstr(beforeInstr, move1.instruction); 970 971 ExtraInstrArgs extra2 = { type : makeIrType(IrBasicType.i64) }; 972 auto move2 = builder.emitInstr!(IrOpcode.move)(extra2, reg2); 973 builder.insertBeforeInstr(beforeInstr, move2.instruction); 974 975 // store both regs into stack slot, then load aggregate 976 IrIndex slot = builder.appendStackSlot(type, SizeAndAlignment(16, sizealign.alignmentPower), StackSlotKind.local); 977 978 IrIndex addr1 = genAddressOffset(slot, 0, builder.context.i64PtrType, beforeInstr, builder); 979 IrIndex store1 = builder.emitInstr!(IrOpcode.store)(ExtraInstrArgs(), addr1, move1.result); 980 builder.insertBeforeInstr(beforeInstr, store1); 981 982 IrIndex addr2 = genAddressOffset(slot, 8, builder.context.i64PtrType, beforeInstr, builder); 983 IrIndex store2 = builder.emitInstr!(IrOpcode.store)(ExtraInstrArgs(), addr2, move2.result); 984 builder.insertBeforeInstr(beforeInstr, store2); 985 986 ExtraInstrArgs extra3 = { result : result }; 987 IrIndex loadInstr = builder.emitInstr!(IrOpcode.load_aggregate)(extra3, slot).instruction; 988 builder.ir.getInstr(loadInstr).isUniqueLoad = true; 989 return loadInstr; 990 } 991 992 // For sysv64 ABI 993 // Given aggregate constant of (size > 8 && size <= 16) with all members aligned, produces 2 64bit values 994 IrIndex[2] simplifyConstant128(IrIndex insertBefore, IrIndex value, ref IrBuilder builder, CompilationContext* c) 995 { 996 IrIndex[2] vals; 997 if (value.isConstantZero) { 998 vals[] = c.constants.addZeroConstant(makeIrType(IrBasicType.i64)); 999 } else if (value.isConstantAggregate) { 1000 IrAggregateConstant* con = &c.constants.getAggregate(value); 1001 union Repr { 1002 ubyte[16] buf; 1003 ulong[2] items; 1004 } 1005 Repr repr; 1006 void onGlobal(ubyte[] subbuffer, IrIndex index, CompilationContext* c) { 1007 if (subbuffer.ptr == repr.buf.ptr) { 1008 vals[0] = index; 1009 } else { 1010 assert(subbuffer.ptr == repr.buf.ptr + 8); 1011 vals[1] = index; 1012 } 1013 } 1014 constantToMem(repr.buf[], value, c, &onGlobal); 1015 if (vals[0].isUndefined) vals[0] = c.constants.add(makeIrType(IrBasicType.i64), repr.items[0]); 1016 if (vals[1].isUndefined) vals[1] = c.constants.add(makeIrType(IrBasicType.i64), repr.items[1]); 1017 } else { 1018 ExtraInstrArgs extra1 = { type : makeIrType(IrBasicType.i64) }; 1019 vals[0] = builder.emitInstrBefore!(IrOpcode.get_aggregate_slice)(insertBefore, extra1, value, c.constants.addZeroConstant(makeIrType(IrBasicType.i32))).result; 1020 1021 ExtraInstrArgs extra2 = { type : makeIrType(IrBasicType.i64) }; 1022 vals[1] = builder.emitInstrBefore!(IrOpcode.get_aggregate_slice)(insertBefore, extra2, value, c.constants.add(makeIrType(IrBasicType.i32), 8)).result; 1023 } 1024 return vals; 1025 }