1 /** 2 Copyright: Copyright (c) 2018-2019 Andrey Penechko. 3 License: $(WEB boost.org/LICENSE_1_0.txt, Boost License 1.0). 4 Authors: Andrey Penechko. 5 */ 6 7 module vox.be.emit_mc_amd64; 8 9 import std.stdio; 10 11 import vox.all; 12 import vox.be.amd64asm; 13 14 /// Emits machine code for amd64 architecture 15 void pass_emit_mc_amd64(ref CompilationContext context, CompilePassPerModule[] subPasses) 16 { 17 auto emitter = CodeEmitter(&context); 18 19 // emit code 20 foreach (ref SourceFileInfo file; context.files.data) { 21 emitter.compileModule(file.mod); 22 } 23 24 fillStaticDataSections(&context); 25 26 if (context.printStaticData) { 27 writefln("// RW data: addr 0x%X, %s bytes", 28 context.staticDataBuffer.bufPtr, 29 context.staticDataBuffer.length); 30 printHex(context.staticDataBuffer.data, 16); 31 writefln("// RO data: addr 0x%X, %s bytes", 32 context.roStaticDataBuffer.bufPtr, 33 context.roStaticDataBuffer.length); 34 printHex(context.roStaticDataBuffer.data, 16); 35 } 36 } 37 38 // Arranges static data inside static data sections 39 void fillStaticDataSections(CompilationContext* c) 40 { 41 // copy initialized static data into buffer and set offsets 42 foreach(size_t i, ref IrGlobal global; c.globals.buffer.data) 43 { 44 ObjectSymbol* globalSym = c.objSymTab.getSymbol(global.objectSymIndex); 45 if (globalSym.isAllZero) continue; 46 47 ObjectSection* symSection = c.objSymTab.getSection(globalSym.sectionIndex); 48 49 if (symSection.buffer.contains(globalSym.initializer.ptr)) { 50 // If data is in we assume that zero termination was handled as needed 51 globalSym.sectionOffset = cast(uint)(globalSym.initializer.ptr - symSection.buffer.bufPtr); 52 continue; 53 } 54 55 // alignment 56 uint padding = paddingSize!uint(cast(uint)symSection.buffer.length, globalSym.alignment); 57 symSection.buffer.pad(padding); 58 59 // offset 60 globalSym.sectionOffset = cast(uint)symSection.buffer.length; 61 62 // copy data 63 c.assertf(globalSym.dataPtr !is null, "null initializer"); 64 symSection.buffer.put(globalSym.initializer); 65 66 // zero termination 67 if (globalSym.needsZeroTermination) symSection.buffer.put(0); 68 //writefln("Global %s, size %s, zero %s, offset %s, buf size %s", 69 // globalSym.initializer, globalSym.length, globalSym.needsZeroTermination, globalSym.sectionOffset, symSection.buffer.length); 70 } 71 72 uint zeroDataOffset = cast(uint)c.staticDataBuffer.length; 73 LinkIndex rwSectionIndex = c.builtinSections[ObjectSectionType.rw_data]; 74 75 // second pass for zero initialized data 76 foreach(size_t i, ref IrGlobal global; c.globals.buffer.data) 77 { 78 ObjectSymbol* globalSym = c.objSymTab.getSymbol(global.objectSymIndex); 79 if (!globalSym.isAllZero) continue; 80 81 c.assertf(globalSym.sectionIndex == rwSectionIndex, "Cannot have zero-initialized data in sections other than RW"); 82 83 // alignment 84 uint padding = paddingSize!uint(zeroDataOffset, globalSym.alignment); 85 zeroDataOffset += padding; 86 87 // offset 88 globalSym.sectionOffset = zeroDataOffset; 89 90 // copy data 91 zeroDataOffset += globalSym.length; 92 93 // zero termination 94 if (globalSym.needsZeroTermination) ++zeroDataOffset; 95 } 96 97 ObjectSection* rwSection = c.objSymTab.getSection(c.builtinSections[ObjectSectionType.rw_data]); 98 c.zeroDataLength = zeroDataOffset - cast(uint)c.staticDataBuffer.length; 99 rwSection.zeroDataLength = c.zeroDataLength; 100 } 101 102 //version = emit_mc_print; 103 104 struct CodeEmitter 105 { 106 CompilationContext* context; 107 108 FunctionDeclNode* fun; 109 IrFunction* lir; 110 CodeGen_x86_64 gen; 111 PC[] blockStarts; 112 PC[2][] jumpFixups; 113 int stackPointerExtraOffset; 114 IrIndex stackPointer; 115 116 void compileModule(ModuleDeclNode* mod) 117 { 118 ubyte* codeStart = context.codeBuffer.nextPtr; 119 gen.encoder.setBuffer(&context.codeBuffer); 120 121 foreach(funcIndex; mod.functions) { 122 FunctionDeclNode* f = context.getAst!FunctionDeclNode(funcIndex); 123 124 if (f.isExternal) continue; 125 compileFunction(f); 126 } 127 128 ubyte[] code = codeStart[0..context.codeBuffer.nextPtr-codeStart]; 129 130 if (context.printCodeHex && context.printDumpOfAll) { 131 writefln("// Amd64 code: addr 0x%X, %s bytes", code.ptr, code.length); 132 printHex(code, 16); 133 writeln; 134 } 135 } 136 137 void compileFunction(FunctionDeclNode* f) 138 { 139 context.currentFunction = f; 140 scope(exit) context.currentFunction = null; 141 142 fun = f; 143 lir = context.getAst!IrFunction(fun.backendData.lirData); 144 145 ObjectSymbol* funcSym = context.objSymTab.getSymbol(fun.backendData.objectSymIndex); 146 funcSym.dataPtr = gen.pc; 147 funcSym.sectionOffset = cast(ulong)(gen.pc - context.codeBuffer.bufPtr); 148 149 if (context.buildType == BuildType.exe && fun.id == CommonIds.id_main) 150 { 151 if (context.entryPoint !is null) 152 { 153 context.unrecoverable_error(fun.loc, "Multiple entry points: %s, %s", fun.loc, context.entryPoint.loc); 154 } 155 156 context.entryPoint = fun; 157 158 if (context.targetOs == TargetOs.linux) { 159 // On Linux entry point is aligned to 16 bytes, but we assume 16 byte alignment + 8 bytes from call istruction 160 // section 3.4.1 of AMD64 ABI 1.0 says: 161 // `rsp`: The stack pointer holds the address of the byte with lowest address which is part of 162 // the stack. It is guaranteed to be 16-byte aligned at process entry 163 // https://stackoverflow.com/questions/26866723/main-and-stack-alignment 164 gen.subq(Register.SP, Imm8(8)); 165 } 166 } 167 168 stackPointer = IrIndex(lir.getCallConv(context).stackPointer, ArgType.QWORD); 169 170 blockStarts = cast(PC[])context.tempBuffer.voidPut(lir.numBasicBlocks * (PC.sizeof / uint.sizeof)); 171 172 uint[] buf = context.tempBuffer.voidPut(lir.numBasicBlocks * 2 * (PC.sizeof / uint.sizeof)); // TODO: free mem 173 // buf[] = 0; //zeroing is not needed, because both slots are correctly filled by jump instruction emitters 174 jumpFixups = cast(PC[2][])buf; 175 176 compileFuncProlog(); 177 compileBody(); 178 fixJumps(); 179 180 funcSym.length = cast(uint)(gen.pc - funcSym.dataPtr); 181 182 if (context.printCodeHex && context.printDumpOnlyOf(f)) { 183 writefln("// Amd64 code: %s addr 0x%X, %s bytes", context.idString(f.id), funcSym.dataPtr, funcSym.length); 184 printHex(funcSym.dataPtr[0..funcSym.length], 16); 185 } 186 } 187 188 void compileFuncProlog() 189 { 190 uint reservedBytes = lir.stackFrameSize; 191 192 // frame pointer is stored with a push, so don't allocate space for it 193 if (context.useFramePointer) { 194 context.assertf(reservedBytes >= STACK_ITEM_SIZE, "bug"); 195 reservedBytes -= STACK_ITEM_SIZE; 196 } 197 198 // Establish frame pointer 199 if (context.useFramePointer) 200 { 201 gen.pushq(Register.BP); 202 gen.movq(Register.BP, Register.SP); 203 } 204 205 if (reservedBytes) // Reserve space for locals 206 { 207 if (reservedBytes > byte.max) gen.subq(Register.SP, Imm32(reservedBytes)); 208 else gen.subq(Register.SP, Imm8(cast(byte)reservedBytes)); 209 } 210 } 211 212 void compileFuncEpilog() 213 { 214 uint reservedBytes = lir.stackFrameSize; 215 216 // frame pointer is stored with a push, so don't allocate space for it 217 if (context.useFramePointer) { 218 context.assertf(reservedBytes >= STACK_ITEM_SIZE, "bug"); 219 reservedBytes -= STACK_ITEM_SIZE; 220 } 221 222 if (reservedBytes) 223 { 224 if (reservedBytes > byte.max) gen.addq(Register.SP, Imm32(reservedBytes)); 225 else gen.addq(Register.SP, Imm8(cast(byte)reservedBytes)); 226 } 227 228 if (context.useFramePointer) 229 { 230 // Restore frame pointer 231 gen.popq(Register.BP); 232 } 233 234 gen.ret(); 235 } 236 237 uint referenceOffset() 238 { 239 ObjectSymbol* funcSym = context.objSymTab.getSymbol(fun.backendData.objectSymIndex); 240 ptrdiff_t diff = gen.pc - funcSym.dataPtr; 241 context.assertf(diff >= 0, "Negative buffer position"); 242 context.assertf(diff <= uint.max, "Function is bigger than uint.max"); 243 return cast(uint)diff; 244 } 245 246 // successorBIndex is 0 or 1 247 void genJumpToSuccessors(ref IrBasicBlock fromBlock, ubyte successorBIndex, PC successorA = null) 248 { 249 if (fromBlock.seqIndex + 1 != lir.getBlock(fromBlock.successors[successorBIndex, lir]).seqIndex) { 250 gen.jmp(Imm32(0)); 251 jumpFixups[fromBlock.seqIndex][successorBIndex] = gen.pc; 252 } else { 253 // zero out the successor fixup 254 jumpFixups[fromBlock.seqIndex][successorBIndex] = null; 255 } 256 // zero out the other fixup 257 jumpFixups[fromBlock.seqIndex][1 - successorBIndex] = successorA; 258 } 259 260 void compileBody() 261 { 262 lir.assignSequentialBlockIndices(); 263 264 foreach (IrIndex lirBlockIndex, ref IrBasicBlock lirBlock; lir.blocks) 265 { 266 blockStarts[lirBlock.seqIndex] = gen.pc; 267 stackPointerExtraOffset = 0; 268 foreach(IrIndex instrIndex, ref IrInstrHeader instrHeader; lirBlock.instructions(lir)) 269 { 270 switch(cast(Amd64Opcode)instrHeader.op) 271 { 272 case Amd64Opcode.mov: 273 genMove(instrHeader.result(lir), instrHeader.arg(lir, 0)); 274 break; 275 case Amd64Opcode.xchg: 276 IrIndex arg0 = instrHeader.arg(lir, 0); 277 IrIndex arg1 = instrHeader.arg(lir, 1); 278 context.assertf(arg1.isPhysReg, "%s is not phys reg", arg1); 279 context.assertf(arg0.isPhysReg, "%s is not phys reg", arg0); 280 context.assertf(arg0.physRegSize == arg1.physRegSize, 281 "%s:%s reg size mismatch %s != %s", lirBlockIndex, instrIndex, arg0.physRegSize, arg1.physRegSize); 282 context.assertf(arg0.physRegClass == arg1.physRegClass && arg0.physRegClass == AMD64_REG_CLASS.GPR, "Only GPR xchg is implemented"); 283 Register dst = indexToRegister(arg0); 284 Register src = indexToRegister(arg1); 285 gen.xchg(dst, src, cast(ArgType)arg0.physRegSize); 286 break; 287 case Amd64Opcode.load: 288 genLoad(instrHeader.result(lir), instrHeader.arg(lir, 0)); 289 break; 290 case Amd64Opcode.store: 291 genStore(instrHeader.arg(lir, 0), instrHeader.arg(lir, 1), instrHeader.argSize); 292 break; 293 case Amd64Opcode.add: 294 IrIndex arg0 = instrHeader.arg(lir, 0); 295 IrIndex arg1 = instrHeader.arg(lir, 1); 296 if (arg1.isStackSlot) 297 { 298 // this was generated from GEP 299 // reg += rsp + disp8/32 300 // convert it into 301 // lea reg, rsp + reg + disp8/32 302 Register dst = indexToRegister(arg0); 303 MemAddress addr = localVarMemAddress(arg1); 304 switch(addr.type) { 305 case MemAddrType.baseDisp8: 306 MemAddress newAddr = memAddrBaseIndexDisp8(addr.baseReg, dst, SibScale(0), addr.disp8.value); 307 gen.lea(dst, newAddr, ArgType.QWORD); 308 break; 309 case MemAddrType.baseDisp32: 310 MemAddress newAddr = memAddrBaseIndexDisp32(addr.baseReg, dst, SibScale(0), addr.disp32.value); 311 gen.lea(dst, newAddr, ArgType.QWORD); 312 break; 313 default: 314 context.internal_error("Invalid memory operand %s", addr); 315 } 316 } 317 else 318 { 319 genRegular(arg0, arg1, AMD64OpRegular.add, cast(IrArgSize)arg0.physRegSize, instrIndex); 320 } 321 322 if (arg0 == stackPointer) { 323 if (arg1.isSimpleConstant) { 324 stackPointerExtraOffset -= context.constants.get(arg1).i64; 325 } else { 326 dumpFunction(context, lir, "Code gen"); 327 context.internal_error("Cannot decrement stack pointer by non-constant in %s", instrIndex); 328 } 329 } 330 break; 331 case Amd64Opcode.sub: 332 IrIndex arg0 = instrHeader.arg(lir, 0); 333 IrIndex arg1 = instrHeader.arg(lir, 1); 334 genRegular(arg0, arg1, AMD64OpRegular.sub, cast(IrArgSize)arg0.physRegSize, instrIndex); 335 if (arg0 == stackPointer) { 336 if (arg1.isSimpleConstant) { 337 stackPointerExtraOffset += context.constants.get(arg1).i64; 338 } else { 339 dumpFunction(context, lir, "Code gen"); 340 context.internal_error("Cannot increment stack pointer by non-constant in %s", instrIndex); 341 } 342 } 343 break; 344 case Amd64Opcode.xor: 345 genRegular(instrHeader.arg(lir, 0), instrHeader.arg(lir, 1), AMD64OpRegular.xor, cast(IrArgSize)instrHeader.arg(lir, 0).physRegSize, instrIndex); 346 break; 347 case Amd64Opcode.or: 348 genRegular(instrHeader.arg(lir, 0), instrHeader.arg(lir, 1), AMD64OpRegular.or, cast(IrArgSize)instrHeader.arg(lir, 0).physRegSize, instrIndex); 349 break; 350 case Amd64Opcode.and: 351 genRegular(instrHeader.arg(lir, 0), instrHeader.arg(lir, 1), AMD64OpRegular.and, cast(IrArgSize)instrHeader.arg(lir, 0).physRegSize, instrIndex); 352 break; 353 case Amd64Opcode.imul: 354 context.assertf(instrHeader.arg(lir, 0).isPhysReg, "%s is not phys reg", instrHeader.arg(lir, 0)); 355 Register dst = indexToRegister(instrHeader.arg(lir, 0)); 356 switch(instrHeader.arg(lir, 1).kind) with(IrValueKind) { 357 case constant: 358 IrConstant con = context.constants.get(instrHeader.arg(lir, 1)); 359 gen.imulq(dst, dst, Imm32(con.i32)); 360 break; 361 case physicalRegister: 362 Register src = indexToRegister(instrHeader.arg(lir, 1)); 363 gen.imul(dst, src, cast(ArgType)instrHeader.arg(lir, 0).physRegSize); 364 break; 365 default: 366 context.internal_error("imul %s not implemented", instrHeader.args(lir)); 367 } 368 break; 369 case Amd64Opcode.div: 370 Register divisor = indexToRegister(instrHeader.arg(lir, 2)); 371 gen.div(divisor, cast(ArgType)instrHeader.arg(lir, 2).physRegSize); 372 break; 373 case Amd64Opcode.idiv: 374 Register divisor = indexToRegister(instrHeader.arg(lir, 2)); 375 gen.idiv(divisor, cast(ArgType)instrHeader.arg(lir, 2).physRegSize); 376 break; 377 378 case Amd64Opcode.fadd: 379 IrIndex arg0 = instrHeader.arg(lir, 0); 380 IrIndex arg1 = instrHeader.arg(lir, 1); 381 context.assertf(arg0.physRegClass == AMD64_REG_CLASS.XMM, "fadd reg class %s != XMM", arg0.physRegClass); 382 context.assertf(arg1.physRegClass == AMD64_REG_CLASS.XMM, "fadd reg class %s != XMM", arg1.physRegClass); 383 final switch(instrHeader.argSize) with(IrArgSize) { 384 case size32: gen.addss(indexToRegister(arg0), indexToRegister(arg1)); break; 385 case size64: gen.addsd(indexToRegister(arg0), indexToRegister(arg1)); break; 386 case size8, size16, size128, size256, size512: context.internal_error("fadd %s", instrHeader.argSize); 387 } 388 break; 389 case Amd64Opcode.fsub: 390 IrIndex arg0 = instrHeader.arg(lir, 0); 391 IrIndex arg1 = instrHeader.arg(lir, 1); 392 context.assertf(arg0.physRegClass == AMD64_REG_CLASS.XMM, "fsub reg class %s != XMM", arg0.physRegClass); 393 context.assertf(arg1.physRegClass == AMD64_REG_CLASS.XMM, "fsub reg class %s != XMM", arg1.physRegClass); 394 final switch(instrHeader.argSize) with(IrArgSize) { 395 case size32: gen.subss(indexToRegister(arg0), indexToRegister(arg1)); break; 396 case size64: gen.subsd(indexToRegister(arg0), indexToRegister(arg1)); break; 397 case size8, size16, size128, size256, size512: context.internal_error("fsub %s", instrHeader.argSize); 398 } 399 break; 400 case Amd64Opcode.fdiv: 401 IrIndex arg0 = instrHeader.arg(lir, 0); 402 IrIndex arg1 = instrHeader.arg(lir, 1); 403 context.assertf(arg0.physRegClass == AMD64_REG_CLASS.XMM, "fdiv reg class %s != XMM", arg0.physRegClass); 404 context.assertf(arg1.physRegClass == AMD64_REG_CLASS.XMM, "fdiv reg class %s != XMM", arg1.physRegClass); 405 final switch(instrHeader.argSize) with(IrArgSize) { 406 case size32: gen.divss(indexToRegister(arg0), indexToRegister(arg1)); break; 407 case size64: gen.divsd(indexToRegister(arg0), indexToRegister(arg1)); break; 408 case size8, size16, size128, size256, size512: context.internal_error("fdiv %s", instrHeader.argSize); 409 } 410 break; 411 case Amd64Opcode.fmul: 412 IrIndex arg0 = instrHeader.arg(lir, 0); 413 IrIndex arg1 = instrHeader.arg(lir, 1); 414 context.assertf(arg0.physRegClass == AMD64_REG_CLASS.XMM, "fmul reg class %s != XMM", arg0.physRegClass); 415 context.assertf(arg1.physRegClass == AMD64_REG_CLASS.XMM, "fmul reg class %s != XMM", arg1.physRegClass); 416 final switch(instrHeader.argSize) with(IrArgSize) { 417 case size32: gen.mulss(indexToRegister(arg0), indexToRegister(arg1)); break; 418 case size64: gen.mulsd(indexToRegister(arg0), indexToRegister(arg1)); break; 419 case size8, size16, size128, size256, size512: context.internal_error("fmul %s", instrHeader.argSize); 420 } 421 break; 422 423 case Amd64Opcode.movzx_btow: gen.movzx_btow(indexToRegister(instrHeader.result(lir)), indexToRegister(instrHeader.arg(lir, 0))); break; 424 case Amd64Opcode.movzx_btod: gen.movzx_btod(indexToRegister(instrHeader.result(lir)), indexToRegister(instrHeader.arg(lir, 0))); break; 425 case Amd64Opcode.movzx_btoq: gen.movzx_btoq(indexToRegister(instrHeader.result(lir)), indexToRegister(instrHeader.arg(lir, 0))); break; 426 case Amd64Opcode.movzx_wtod: gen.movzx_wtod(indexToRegister(instrHeader.result(lir)), indexToRegister(instrHeader.arg(lir, 0))); break; 427 case Amd64Opcode.movzx_wtoq: gen.movzx_wtoq(indexToRegister(instrHeader.result(lir)), indexToRegister(instrHeader.arg(lir, 0))); break; 428 case Amd64Opcode.movsx_btow: gen.movsx_btow(indexToRegister(instrHeader.result(lir)), indexToRegister(instrHeader.arg(lir, 0))); break; 429 case Amd64Opcode.movsx_btod: gen.movsx_btod(indexToRegister(instrHeader.result(lir)), indexToRegister(instrHeader.arg(lir, 0))); break; 430 case Amd64Opcode.movsx_btoq: gen.movsx_btoq(indexToRegister(instrHeader.result(lir)), indexToRegister(instrHeader.arg(lir, 0))); break; 431 case Amd64Opcode.movsx_wtod: gen.movsx_wtod(indexToRegister(instrHeader.result(lir)), indexToRegister(instrHeader.arg(lir, 0))); break; 432 case Amd64Opcode.movsx_wtoq: gen.movsx_wtoq(indexToRegister(instrHeader.result(lir)), indexToRegister(instrHeader.arg(lir, 0))); break; 433 case Amd64Opcode.movsx_dtoq: gen.movsx_dtoq(indexToRegister(instrHeader.result(lir)), indexToRegister(instrHeader.arg(lir, 0))); break; 434 case Amd64Opcode.f32_to_f64: gen.cvtss2sd(indexToRegister(instrHeader.result(lir)), indexToRegister(instrHeader.arg(lir, 0))); break; 435 case Amd64Opcode.f64_to_f32: gen.cvtsd2ss(indexToRegister(instrHeader.result(lir)), indexToRegister(instrHeader.arg(lir, 0))); break; 436 437 case Amd64Opcode.i32_to_f32: gen.cvtsid2ss(indexToRegister(instrHeader.result(lir)), indexToRegister(instrHeader.arg(lir, 0))); break; 438 case Amd64Opcode.i64_to_f32: gen.cvtsiq2ss(indexToRegister(instrHeader.result(lir)), indexToRegister(instrHeader.arg(lir, 0))); break; 439 case Amd64Opcode.i32_to_f64: gen.cvtsid2sd(indexToRegister(instrHeader.result(lir)), indexToRegister(instrHeader.arg(lir, 0))); break; 440 case Amd64Opcode.i64_to_f64: gen.cvtsiq2sd(indexToRegister(instrHeader.result(lir)), indexToRegister(instrHeader.arg(lir, 0))); break; 441 442 case Amd64Opcode.f32_to_i32_trunc: gen.cvttss2sid(indexToRegister(instrHeader.result(lir)), indexToRegister(instrHeader.arg(lir, 0))); break; 443 case Amd64Opcode.f32_to_i64_trunc: gen.cvttss2siq(indexToRegister(instrHeader.result(lir)), indexToRegister(instrHeader.arg(lir, 0))); break; 444 case Amd64Opcode.f64_to_i32_trunc: gen.cvttsd2sid(indexToRegister(instrHeader.result(lir)), indexToRegister(instrHeader.arg(lir, 0))); break; 445 case Amd64Opcode.f64_to_i64_trunc: gen.cvttsd2siq(indexToRegister(instrHeader.result(lir)), indexToRegister(instrHeader.arg(lir, 0))); break; 446 447 case Amd64Opcode.rep_stos: gen.rep_prefix; gen.stos; break; 448 case Amd64Opcode.divsx: 449 final switch(instrHeader.argSize) { 450 case IrArgSize.size8: gen.movsx_btow(Register.AX, Register.AX); break; 451 case IrArgSize.size16: gen.cwd; break; 452 case IrArgSize.size32: gen.cdq; break; 453 case IrArgSize.size64: gen.cqo; break; 454 case IrArgSize.size128, IrArgSize.size256, IrArgSize.size512: context.internal_error("divsx %s", instrHeader.argSize); 455 } 456 break; 457 case Amd64Opcode.shl: 458 Register dst = indexToRegister(instrHeader.arg(lir, 0)); 459 IrIndex src = instrHeader.arg(lir, 1); 460 if (src.isSimpleConstant) { 461 IrConstant con = context.constants.get(instrHeader.arg(lir, 1)); 462 if (con.i8 == 1) 463 gen.shl1(dst, cast(ArgType)instrHeader.argSize); 464 else 465 gen.shli(dst, Imm8(con.i8), cast(ArgType)instrHeader.argSize); 466 } 467 else 468 gen.shl(dst, cast(ArgType)instrHeader.argSize); 469 break; 470 case Amd64Opcode.shr: 471 Register dst = indexToRegister(instrHeader.arg(lir, 0)); 472 IrIndex src = instrHeader.arg(lir, 1); 473 if (src.isSimpleConstant) { 474 IrConstant con = context.constants.get(instrHeader.arg(lir, 1)); 475 if (con.i8 == 1) 476 gen.shr1(dst, cast(ArgType)instrHeader.argSize); 477 else 478 gen.shri(dst, Imm8(con.i8), cast(ArgType)instrHeader.argSize); 479 } 480 else 481 gen.shr(dst, cast(ArgType)instrHeader.argSize); 482 break; 483 case Amd64Opcode.sar: 484 Register dst = indexToRegister(instrHeader.arg(lir, 0)); 485 IrIndex src = instrHeader.arg(lir, 1); 486 if (src.isSimpleConstant) { 487 IrConstant con = context.constants.get(instrHeader.arg(lir, 1)); 488 if (con.i8 == 1) 489 gen.sar1(dst, cast(ArgType)instrHeader.argSize); 490 else 491 gen.sari(dst, Imm8(con.i8), cast(ArgType)instrHeader.argSize); 492 } 493 else 494 gen.sar(dst, cast(ArgType)instrHeader.argSize); 495 break; 496 case Amd64Opcode.not: 497 Register dst = indexToRegister(instrHeader.arg(lir, 0)); 498 gen.not(dst, cast(ArgType)instrHeader.arg(lir, 0).physRegSize); 499 break; 500 case Amd64Opcode.neg: 501 Register dst = indexToRegister(instrHeader.arg(lir, 0)); 502 gen.neg(dst, cast(ArgType)instrHeader.arg(lir, 0).physRegSize); 503 break; 504 case Amd64Opcode.fneg: 505 IrIndex arg0 = instrHeader.arg(lir, 0); 506 context.assertf(arg0.physRegClass == AMD64_REG_CLASS.XMM, "incorrect class %s, xmm expected", arg0.physRegClass); 507 switch(arg0.physRegSize) { 508 case IrArgSize.size32: 509 MemAddress addr = memAddrRipDisp32(0); 510 gen.xorps(indexToRegister(arg0), addr); 511 IrIndex sign_bit_global = context.globals.get_or_add_f32_sign_bit_constant(context); 512 addRefTo(sign_bit_global); 513 break; 514 case IrArgSize.size64: 515 MemAddress addr = memAddrRipDisp32(0); 516 gen.xorpd(indexToRegister(arg0), addr); 517 IrIndex sign_bit_global = context.globals.get_or_add_f64_sign_bit_constant(context); 518 addRefTo(sign_bit_global); 519 break; 520 default: context.internal_error("fneg %s", arg0.physRegSize); 521 } 522 break; 523 case Amd64Opcode.call: 524 IrIndex calleeIndex = instrHeader.arg(lir, 0); 525 526 if (calleeIndex.isFunction) 527 { 528 // direct call by name 529 FunctionDeclNode* callee = context.getFunction(calleeIndex); 530 ObjectSymbol* sym = context.objSymTab.getSymbol(callee.backendData.objectSymIndex); 531 532 if (sym.isIndirect) 533 gen.call(memAddrRipDisp32(0)); // read address from import section 534 else 535 gen.call(Imm32(0)); // call relative to next instruction 536 537 addRefTo(calleeIndex); 538 } 539 else 540 { 541 // call by ptr 542 if (calleeIndex.isStackSlot) 543 { 544 MemAddress addr = localVarMemAddress(calleeIndex); 545 gen.call(addr); 546 } 547 else 548 { 549 Register calleePtr = indexToRegister(calleeIndex); 550 gen.call(calleePtr); 551 } 552 } 553 break; 554 case Amd64Opcode.syscall: 555 gen.syscall(); 556 break; 557 case Amd64Opcode.jmp: 558 genJumpToSuccessors(lirBlock, 0); 559 break; 560 case Amd64Opcode.bin_branch: 561 IrIndex arg0 = instrHeader.arg(lir, 0); 562 IrIndex arg1 = instrHeader.arg(lir, 1); 563 auto cond = cast(IrBinaryCondition)instrHeader.cond; 564 565 if (arg0.isSimpleConstant) 566 { 567 if (arg1.isSimpleConstant) 568 { 569 if (evalBinCondition(*context, cond, arg0, arg1)) 570 genJumpToSuccessors(lirBlock, 0); 571 else 572 genJumpToSuccessors(lirBlock, 1); 573 break; 574 } 575 576 // move const to the right 577 // TODO: perform canonicalization in middle-end 578 swap(arg0, arg1); 579 cond = invertBinaryCond(cond); 580 } 581 582 if (arg0.physRegClass == AMD64_REG_CLASS.XMM) { 583 assert(arg1.physRegClass == AMD64_REG_CLASS.XMM); 584 final switch(instrHeader.argSize) with(IrArgSize) { 585 case size32: gen.ucomiss(indexToRegister(arg0), indexToRegister(arg1)); break; 586 case size64: gen.ucomisd(indexToRegister(arg0), indexToRegister(arg1)); break; 587 case size8, size16, size128, size256, size512: context.internal_error("bin_branch %s", instrHeader.argSize); 588 } 589 } else { 590 genRegular(arg0, arg1, AMD64OpRegular.cmp, cast(IrArgSize)instrHeader.argSize, instrIndex); 591 } 592 593 Condition mach_cond = IrBinCondToAmd64Condition[cond]; 594 gen.jcc(mach_cond, Imm32(0)); 595 genJumpToSuccessors(lirBlock, 1, gen.pc); 596 break; 597 case Amd64Opcode.un_branch: 598 if (instrHeader.arg(lir, 0).isSimpleConstant) 599 { 600 IrConstant con = context.constants.get(instrHeader.arg(lir, 0)); 601 if (con.i64 && instrHeader.cond == IrUnaryCondition.not_zero || 602 (!con.i64) && instrHeader.cond == IrUnaryCondition.zero) 603 genJumpToSuccessors(lirBlock, 0); 604 else 605 genJumpToSuccessors(lirBlock, 1); 606 break; 607 } 608 Register reg = indexToRegister(instrHeader.arg(lir, 0)); 609 gen.test(reg, reg, cast(ArgType)instrHeader.arg(lir, 0).physRegSize); 610 Condition cond = IrUnCondToAmd64Condition[instrHeader.cond]; 611 gen.jcc(cond, Imm32(0)); 612 genJumpToSuccessors(lirBlock, 1, gen.pc); 613 break; 614 case Amd64Opcode.set_unary_cond: 615 Register reg = indexToRegister(instrHeader.arg(lir, 0)); 616 gen.test(reg, reg, cast(ArgType)instrHeader.arg(lir, 0).physRegSize); 617 Condition cond = IrUnCondToAmd64Condition[instrHeader.cond]; 618 Register dst = indexToRegister(instrHeader.result(lir)); 619 gen.setcc(cond, dst); 620 break; 621 case Amd64Opcode.set_binary_cond: 622 IrIndex arg0 = instrHeader.arg(lir, 0); 623 IrIndex arg1 = instrHeader.arg(lir, 1); 624 Condition cond = IrBinCondToAmd64Condition[instrHeader.cond]; 625 if (arg0.physRegClass == AMD64_REG_CLASS.XMM) { 626 assert(arg1.physRegClass == AMD64_REG_CLASS.XMM); 627 switch(arg0.physRegSize) { 628 case IrArgSize.size32: gen.ucomiss(indexToRegister(arg0), indexToRegister(arg1)); break; 629 case IrArgSize.size64: gen.ucomisd(indexToRegister(arg0), indexToRegister(arg1)); break; 630 default: context.internal_error("set_binary_cond %s", arg0.physRegSize); 631 } 632 } else { 633 genRegular(arg0, arg1, AMD64OpRegular.cmp, cast(IrArgSize)arg0.physRegSize, instrIndex); 634 } 635 Register dst = indexToRegister(instrHeader.result(lir)); 636 gen.setcc(cond, dst); 637 break; 638 case Amd64Opcode.ret: 639 jumpFixups[lirBlock.seqIndex][0] = null; 640 jumpFixups[lirBlock.seqIndex][1] = null; 641 compileFuncEpilog(); 642 break; 643 case Amd64Opcode.ud2: 644 jumpFixups[lirBlock.seqIndex][0] = null; 645 jumpFixups[lirBlock.seqIndex][1] = null; 646 gen.ud2; 647 break; 648 case Amd64Opcode.push: 649 IrIndex src = instrHeader.arg(lir, 0); 650 switch (src.kind) with(IrValueKind) 651 { 652 case constant, constantZero: 653 IrConstant con = context.constants.get(src); 654 gen.pushd(Imm32(con.i32)); 655 break; 656 657 case physicalRegister: 658 Register reg = indexToRegister(src); 659 gen.pushq(reg); 660 break; 661 662 // those wont push the address itself, but memory contents 663 /*case stackSlot: 664 MemAddress addr = localVarMemAddress(src); 665 gen.pushq(addr); 666 break; 667 668 case global, func: 669 MemAddress addr = memAddrRipDisp32(0); 670 gen.pushq(addr); 671 addRefTo(src); 672 break;*/ 673 674 default: 675 context.internal_error("Cannot encode %s %s in %s %s", 676 cast(Amd64Opcode)instrHeader.op, src, context.idString(lir.name), instrIndex); 677 } 678 stackPointerExtraOffset += STACK_ITEM_SIZE; 679 break; 680 default: 681 context.internal_error("Unimplemented instruction `%s`", cast(Amd64Opcode)instrHeader.op); 682 } 683 } 684 685 if (stackPointerExtraOffset != 0) { 686 // When we call noreturn function stack cleanup is omitted 687 // After such calls we expect unreachable 688 if (lir.getInstr(lirBlock.lastInstr).op != Amd64Opcode.ud2) 689 context.internal_error("Unmatched stack size modification"); 690 } 691 } 692 } 693 694 void addRefTo(IrIndex entity, short offset = 4) 695 { 696 LinkIndex entityIndex; 697 switch (entity.kind) with(IrValueKind) 698 { 699 case global: 700 IrGlobal* global = context.globals.get(entity); 701 entityIndex = global.objectSymIndex; 702 break; 703 704 case func: 705 FunctionDeclNode* func = context.getFunction(entity); 706 entityIndex = func.backendData.objectSymIndex; 707 break; 708 709 default: 710 context.internal_error("addRefTo %s %s", entity, offset); 711 } 712 713 addRefTo(entityIndex, offset); 714 } 715 716 void addRefTo(LinkIndex entityIndex, short offset = 4) 717 { 718 ObjectSymbolReference r = { 719 fromSymbol : fun.backendData.objectSymIndex, 720 referencedSymbol : entityIndex, 721 refOffset : referenceOffset() - offset, 722 offset, 723 ObjectSymbolRefKind.relative32, 724 }; 725 context.objSymTab.addReference(r); 726 } 727 728 void fixJump(PC fixup, lazy IrIndex targetBlock) 729 { 730 PC succPC = blockStarts[lir.getBlock(targetBlock).seqIndex]; 731 fix_PC_REL_32(fixup, succPC); 732 } 733 734 void fixJumps() 735 { 736 foreach (IrIndex lirBlockIndex, ref IrBasicBlock lirBlock; lir.blocks) 737 { 738 PC[2] fixups = jumpFixups[lirBlock.seqIndex]; 739 if (fixups[0] !is null) fixJump(fixups[0], lirBlock.successors[0, lir]); 740 if (fixups[1] !is null) fixJump(fixups[1], lirBlock.successors[1, lir]); 741 } 742 } 743 744 MemAddress localVarMemAddress(IrIndex stackSlotIndex) { 745 context.assertf(stackSlotIndex.isStackSlot, "Index is not stack slot, but %s", stackSlotIndex.kind); 746 auto stackSlot = lir.getStackSlot(stackSlotIndex); 747 Register baseReg = indexToRegister(stackSlot.baseReg); 748 return minMemAddrBaseDisp(baseReg, stackSlot.displacement + stackPointerExtraOffset); 749 } 750 751 Register indexToRegister(IrIndex regIndex) { 752 context.assertf(regIndex.isPhysReg, "Index is not register, but %s %s", regIndex.kind, regIndex); 753 return cast(Register)regIndex.physRegIndex; 754 } 755 756 void genRegular(IrIndex dst, IrIndex src, AMD64OpRegular op, IrArgSize argSize, IrIndex instrIndex) 757 { 758 AsmArg argDst; 759 AsmArg argSrc; 760 AsmOpParam param; 761 param.op = op; 762 param.argType = cast(ArgType)argSize; 763 764 argDst.reg = indexToRegister(dst); 765 766 // HACK, TODO: ESP is generated instead of RSP. Need to store types in instructions / more instruction types 767 if (argDst.reg == Register.SP) param.argType = ArgType.QWORD; 768 769 param.dstKind = AsmArgKind.REG; 770 771 //writefln("%s.%s %s %s", op, param.argType, dst.kind, src.kind); 772 773 final switch (src.kind) with(IrValueKind) 774 { 775 case none, array, instruction, basicBlock, phi, type, virtualRegister, variable, func, constantAggregate: context.internal_error("divsx src %s", src.kind); 776 case constantZero: 777 case constant: 778 IrConstant con = context.constants.get(src); 779 if (con.i64.argSizeIntSigned == IrArgSize.size8) { 780 param.immType = ArgType.BYTE; 781 argSrc.imm8 = Imm8(con.i8); 782 } 783 else if (argSize == IrArgSize.size16) { 784 param.immType = ArgType.WORD; 785 argSrc.imm16 = Imm16(con.i16); 786 } 787 else { 788 param.immType = ArgType.DWORD; 789 argSrc.imm32 = Imm32(con.i32); 790 } 791 param.srcKind = AsmArgKind.IMM; 792 break; 793 794 case physicalRegister: 795 argSrc.reg = indexToRegister(src); 796 param.srcKind = AsmArgKind.REG; 797 break; 798 799 case global, stackSlot: 800 // This should not happen. Stack slot or global must go through mov or load instruction. 801 context.internal_error("Cannot encode %s %s %s in %s %s", op, dst, src, context.idString(lir.name), instrIndex); 802 } 803 gen.encodeRegular(argDst, argSrc, param); 804 } 805 806 /// Generate move from src operand to dst operand. Size of destination is used 807 void genMove(IrIndex dst, IrIndex src) 808 { 809 // i64 <- i32 must be 32bit move if both sides are registers. 810 IrArgSize argSize; 811 if (src.isPhysReg) 812 argSize = cast(IrArgSize)min(dst.physRegSize, src.physRegSize); 813 else 814 argSize = cast(IrArgSize)dst.physRegSize; 815 816 version(emit_mc_print) writefln("genMove %s %s %s", dst, src, argSize); 817 MoveType moveType = calcMoveType(dst.kind, src.kind); 818 819 if (moveType != MoveType.invalid && dst == src) return; 820 821 Register srcReg = cast(Register)src.physRegIndex; 822 Register dstReg = cast(Register)dst.physRegIndex; 823 824 switch(moveType) 825 { 826 default: 827 context.internal_error("Invalid move from %s to %s", IrIndexDump(dst, context, lir), IrIndexDump(src, context, lir)); 828 829 case MoveType.const_to_reg: 830 IrConstant con = context.constants.get(src); 831 version(emit_mc_print) writefln(" move.%s reg:%s, con:%s", argSize, dstReg, con.i64); 832 if (con.i64 == 0) // xor 833 { 834 if (dst.physRegClass == AMD64_REG_CLASS.GPR) { 835 AsmArg argDst = {reg : dstReg}; 836 AsmArg argSrc = {reg : dstReg}; 837 AsmOpParam param = AsmOpParam(AsmArgKind.REG, AsmArgKind.REG, AMD64OpRegular.xor, cast(ArgType)IrArgSize.size32); 838 gen.encodeRegular(argDst, argSrc, param); 839 } else if (dst.physRegClass == AMD64_REG_CLASS.XMM) { 840 // TODO: replace with pxor 841 // See: https://stackoverflow.com/questions/33666617/what-is-the-best-way-to-set-a-register-to-zero-in-x86-assembly-xor-mov-or-and 842 gen.xorps(dstReg, dstReg); 843 } 844 } 845 else 846 { 847 if (dst.physRegClass == AMD64_REG_CLASS.GPR) { 848 final switch(argSize) with(IrArgSize) { 849 case size8: gen.movb(dstReg, Imm8(con.i8)); break; 850 case size16: gen.movw(dstReg, Imm16(con.i16)); break; 851 case size32: gen.movd(dstReg, Imm32(con.i32)); break; 852 case size64: 853 if (!con.intFitsIn32Bits) 854 gen.movq(dstReg, Imm64(con.i64)); 855 else { 856 if (con.u32_top == uint.max) { 857 gen.movq(dstReg, Imm32(con.i32)); // sign-extend 32bit constant to 64bit register 858 } else { 859 gen.movd(dstReg, Imm32(con.i32)); // zero-extend 32bit constant to 64bit register 860 } 861 } 862 break; 863 case size128, size256, size512: 864 context.internal_error("Not implemented: const_to_reg %s %s", dst, src); 865 } 866 } else if (dst.physRegClass == AMD64_REG_CLASS.XMM) { 867 LinkIndex roSectionIndex = context.builtinSections[ObjectSectionType.ro_data]; 868 ObjectSymbol* funcSym = context.objSymTab.getSymbol(fun.backendData.objectSymIndex); 869 ObjectSymbol sym = { 870 kind : ObjectSymbolKind.isLocal, 871 sectionIndex : roSectionIndex, 872 moduleIndex : funcSym.moduleIndex, 873 id : context.idMap.getOrReg(context, ":float"), 874 }; 875 LinkIndex symIndex = context.objSymTab.addSymbol(sym); 876 ObjectSymbol* globalSym = context.objSymTab.getSymbol(symIndex); 877 ObjectSection* roSection = context.objSymTab.getSection(roSectionIndex); 878 globalSym.sectionOffset = cast(uint)roSection.buffer.length; 879 880 final switch(argSize) with(IrArgSize) { 881 case size32: 882 globalSym.setInitializer(context.roStaticDataBuffer.nextPtr[0..4]); 883 context.roStaticDataBuffer.put(con.i32); 884 gen.movd_xr(dstReg, memAddrRipDisp32(0)); 885 break; 886 case size64: 887 globalSym.setInitializer(context.roStaticDataBuffer.nextPtr[0..8]); 888 context.roStaticDataBuffer.put(con.i64); 889 gen.movq_xr(dstReg, memAddrRipDisp32(0)); 890 break; 891 case size8, size16, size128, size256, size512: context.internal_error("genMove XMM <- const %s", argSize); 892 } 893 addRefTo(symIndex); 894 } 895 } 896 break; 897 898 // copy address of global into register 899 case MoveType.global_to_reg: 900 context.assertf(dst.physRegClass == AMD64_REG_CLASS.GPR, "global_to_reg %s", dst); 901 // HACK, TODO: 32bit version of reg is incoming, while for ptr 64bits are needed 902 MemAddress addr = memAddrRipDisp32(0); 903 gen.lea(dstReg, addr, cast(ArgType)IrArgSize.size64); 904 addRefTo(src); 905 break; 906 907 // copy address of function into register 908 case MoveType.func_to_reg: 909 context.assertf(dst.physRegClass == AMD64_REG_CLASS.GPR, "func_to_reg %s", dst); 910 FunctionDeclNode* func = context.getFunction(src); 911 LinkIndex entityIndex = func.backendData.objectSymIndex; 912 ObjectSymbol* sym = context.objSymTab.getSymbol(entityIndex); 913 if (sym.isIndirect) { 914 // read address from the import section 915 MemAddress addr = memAddrRipDisp32(0); 916 gen.mov(dstReg, addr, cast(ArgType)IrArgSize.size64); 917 } else { 918 // take address of the symbol 919 MemAddress addr = memAddrRipDisp32(0); 920 gen.lea(dstReg, addr, cast(ArgType)IrArgSize.size64); 921 } 922 addRefTo(entityIndex); 923 break; 924 925 case MoveType.reg_to_reg: 926 version(emit_mc_print) writefln(" move.%s reg:%s, reg:%s", argSize, dstReg, srcReg); 927 if (src.physRegClass == AMD64_REG_CLASS.XMM && dst.physRegClass == AMD64_REG_CLASS.XMM) { 928 final switch(argSize) with(IrArgSize) { 929 case size8, size16: 930 context.internal_error("Not implemented: reg_to_reg %s %s", dst, src); 931 case size32: gen.movss(dstReg, srcReg); break; 932 case size64: gen.movsd(dstReg, srcReg); break; 933 case size128: gen.movups(dstReg, srcReg); break; 934 case size256, size512: 935 context.internal_error("Not implemented: reg_to_reg %s %s", dst, src); 936 } 937 } else if (src.physRegClass == AMD64_REG_CLASS.XMM) { 938 final switch(argSize) with(IrArgSize) { 939 case size32: gen.movd_rx(dstReg, srcReg); break; 940 case size64: gen.movq_rx(dstReg, srcReg); break; 941 case size8, size16, size128, size256, size512: 942 context.internal_error("Not implemented: reg_to_reg %s %s", dst, src); 943 } 944 } else if (dst.physRegClass == AMD64_REG_CLASS.XMM) { 945 final switch(argSize) with(IrArgSize) { 946 case size32: gen.movd_xr(dstReg, srcReg); break; 947 case size64: gen.movq_xr(dstReg, srcReg); break; 948 case size8, size16, size128, size256, size512: 949 context.internal_error("Not implemented: reg_to_reg %s %s", dst, src); 950 } 951 } else { 952 if (dstReg != srcReg) { 953 gen.mov(dstReg, srcReg, cast(ArgType)argSize); 954 } 955 } 956 break; 957 958 // copy address of stack slot into register 959 case MoveType.stack_to_reg: 960 context.assertf(dst.physRegClass == AMD64_REG_CLASS.GPR, "stack_to_reg %s", dst); 961 gen.lea(dstReg, localVarMemAddress(src), cast(ArgType)IrArgSize.size64); 962 break; 963 } 964 } 965 966 void fix_PC_REL_32(PC fixup, PC target) 967 { 968 *cast(Imm32*)(fixup-4) = jumpOffset(fixup, target); 969 } 970 971 // nextInstr is address 972 void fix_PC_REL_CUSTOM(Imm32* offset, PC nextInstr, PC target) 973 { 974 *offset = jumpOffset(nextInstr, target); 975 } 976 977 void doMemToReg(IrIndex dst, MemAddress srcMem, IrArgSize argSize) { 978 Register dstReg = indexToRegister(dst); 979 if (dst.physRegClass == AMD64_REG_CLASS.XMM) { 980 final switch(argSize) with(IrArgSize) { 981 case size32: gen.movd_xr(dstReg, srcMem); break; 982 case size64: gen.movq_xr(dstReg, srcMem); break; 983 case size128: gen.movups(dstReg, srcMem); break; 984 case size8, size16, size256, size512: context.internal_error("doMemToReg %s", argSize); 985 } 986 } else { 987 gen.mov(dstReg, srcMem, cast(ArgType)argSize); 988 } 989 } 990 991 /// Generate move from src operand to dst operand. argType describes the size of operands. 992 // If src is phys reg then it is used as address base. 993 // dst must be phys reg 994 void genLoad(IrIndex dst, IrIndex src) 995 { 996 IrArgSize argSize = cast(IrArgSize)dst.physRegSize; 997 bool valid = dst.isPhysReg && (src.isPhysReg || src.isStackSlot || src.isGlobal); 998 context.assertf(valid, "Invalid load %s -> %s", src.kind, dst.kind); 999 1000 switch(src.kind) with(IrValueKind) { 1001 case physicalRegister: doMemToReg(dst, memAddrBase(indexToRegister(src)), argSize); break; 1002 case stackSlot: doMemToReg(dst, localVarMemAddress(src), argSize); break; 1003 case global: 1004 doMemToReg(dst, memAddrRipDisp32(0), argSize); 1005 addRefTo(src); 1006 break; 1007 1008 default: 1009 context.internal_error("invalid source of load %s", src.kind); 1010 } 1011 } 1012 1013 // dst must be of pointer type 1014 // dst is pointer of unknown type (that's why we need explicit argType) 1015 void genStore(IrIndex dst, IrIndex src, IrArgSize argSize) 1016 { 1017 context.assertf(!src.isGlobal, 1018 "store %s <- %s, must go through intermediate register", 1019 dst.kind, src.kind); 1020 1021 void doRegToMem(MemAddress dstMem) { 1022 if (src.physRegClass == AMD64_REG_CLASS.XMM) { 1023 Register srcReg = indexToRegister(src); 1024 final switch(argSize) with(IrArgSize) { 1025 case size32: gen.movd_rx(dstMem, srcReg); break; 1026 case size64: gen.movq_rx(dstMem, srcReg); break; 1027 case size128: gen.movups(dstMem, srcReg); break; 1028 case size8, size16, size256, size512: context.internal_error("doRegToMem %s", argSize); 1029 } 1030 } else { 1031 Register srcReg = indexToRegister(src); 1032 gen.mov(dstMem, srcReg, cast(ArgType)argSize); 1033 } 1034 } 1035 void doConToMem(MemAddress dstMem, IrConstant con) { 1036 final switch(argSize) with(IrArgSize) { 1037 case size8: gen.movb(dstMem, Imm8(con.i8)); break; 1038 case size16: gen.movw(dstMem, Imm16(con.i16)); break; 1039 case size32: gen.movd(dstMem, Imm32(con.i32)); break; 1040 case size64: 1041 context.assertf(con.intFitsIn32Bits, "Constant 0x%X is too big", con.i64); 1042 gen.movq(dstMem, Imm32(con.i32)); 1043 break; 1044 case size128, size256, size512: context.internal_error("doConToMem %s", argSize); 1045 } 1046 } 1047 1048 MoveType moveType = calcMoveType(dst.kind, src.kind); 1049 switch (moveType) with(MoveType) 1050 { 1051 case const_to_stack: 1052 IrConstant con = context.constants.get(src); 1053 MemAddress dstMem = localVarMemAddress(dst); 1054 doConToMem(dstMem, con); 1055 break; 1056 case const_to_reg: 1057 IrConstant con = context.constants.get(src); 1058 Register dstReg = indexToRegister(dst); 1059 MemAddress dstMem = memAddrBase(dstReg); 1060 doConToMem(dstMem, con); 1061 break; 1062 case reg_to_stack: 1063 MemAddress dstMem = localVarMemAddress(dst); 1064 doRegToMem(dstMem); 1065 break; 1066 case reg_to_reg: 1067 Register dstReg = indexToRegister(dst); 1068 MemAddress dstMem = memAddrBase(dstReg); 1069 doRegToMem(dstMem); 1070 break; 1071 case const_to_global: 1072 IrConstant con = context.constants.get(src); 1073 MemAddress dstMem = memAddrRipDisp32(0); 1074 doConToMem(dstMem, con); 1075 addRefTo(dst, 8); 1076 break; 1077 case reg_to_global: 1078 MemAddress dstMem = memAddrRipDisp32(0); 1079 doRegToMem(dstMem); 1080 addRefTo(dst); 1081 break; 1082 default: 1083 context.internal_error("store %s <- %s is not implemented", dst.kind, src.kind); 1084 } 1085 } 1086 } 1087 1088 MoveType calcMoveType(IrValueKind dst, IrValueKind src) 1089 { 1090 switch(dst) with(IrValueKind) { 1091 case none, array, constant: return MoveType.invalid; 1092 case virtualRegister: return MoveType.invalid; 1093 case physicalRegister: 1094 switch(src) with(IrValueKind) { 1095 case constant, constantZero: return MoveType.const_to_reg; 1096 case global: return MoveType.global_to_reg; 1097 case physicalRegister: return MoveType.reg_to_reg; 1098 case stackSlot: return MoveType.stack_to_reg; 1099 case func: return MoveType.func_to_reg; 1100 default: return MoveType.invalid; 1101 } 1102 case stackSlot: 1103 switch(src) with(IrValueKind) { 1104 case constant: return MoveType.const_to_stack; 1105 case physicalRegister: return MoveType.reg_to_stack; 1106 default: return MoveType.invalid; 1107 } 1108 case global: 1109 switch(src) with(IrValueKind) { 1110 case constant: return MoveType.const_to_global; 1111 case physicalRegister: return MoveType.reg_to_global; 1112 default: return MoveType.invalid; 1113 } 1114 default: return MoveType.invalid; 1115 } 1116 } 1117 1118 enum MoveType 1119 { 1120 invalid, 1121 const_to_reg, 1122 const_to_global, 1123 global_to_reg, 1124 const_to_stack, 1125 reg_to_reg, 1126 reg_to_stack, 1127 reg_to_global, 1128 stack_to_reg, 1129 const_to_mem, 1130 reg_to_mem, 1131 mem_to_reg, 1132 func_to_reg, 1133 }