1 /**
2 Copyright: Copyright (c) 2017-2019 Andrey Penechko.
3 License: $(WEB boost.org/LICENSE_1_0.txt, Boost License 1.0).
4 Authors: Andrey Penechko.
5 */
6 module vox.be.amd64asm;
7 
8 import vox.utils : Arena;
9 
10 enum Register : ubyte {AX, CX, DX, BX, SP, BP, SI, DI, R8, R9, R10, R11, R12, R13, R14, R15}
11 enum RegisterMax = cast(Register)(Register.max+1);
12 
13 bool is_SP_or_R12(Register reg) { return (reg & 0b111) == 0b100; }
14 bool is_BP_or_R13(Register reg) { return (reg & 0b111) == 0b101; }
15 
16 enum ArgType : ubyte { BYTE, WORD, DWORD, QWORD }
17 
18 import std.string : format;
19 struct Imm8  { ubyte  value; enum argT = ArgType.BYTE;  string toString(){ return format("0X%02X", value); } }
20 struct Imm16 { ushort value; enum argT = ArgType.WORD;  string toString(){ return format("0X%02X", value); } }
21 struct Imm32 { uint   value; enum argT = ArgType.DWORD; string toString(){ return format("0X%02X", value); } }
22 struct Imm64 { ulong  value; enum argT = ArgType.QWORD; string toString(){ return format("0X%02X", value); } }
23 enum bool isAnyImm(I) = is(I == Imm64) || is(I == Imm32) || is(I == Imm16) || is(I == Imm8);
24 
25 
26 enum ubyte REX_PREFIX = 0b0100_0000;
27 enum ubyte REX_W      = 0b0000_1000;
28 enum ubyte REX_R      = 0b0000_0100;
29 enum ubyte REX_X      = 0b0000_0010;
30 enum ubyte REX_B      = 0b0000_0001;
31 
32 enum LegacyPrefix : ubyte {
33 	// Prefix group 1
34 	LOCK = 0xF0, // LOCK prefix
35 	REPN = 0xF2, // REPNE/REPNZ prefix
36 	REP  = 0xF3, // REP or REPE/REPZ prefix
37 	// Prefix group 2
38 	CS = 0x2E, // CS segment override
39 	SS = 0x36, // SS segment override
40 	DS = 0x3E, // DS segment override
41 	ES = 0x26, // ES segment override
42 	FS = 0x64, // FS segment override
43 	GS = 0x65, // GS segment override
44 	BNT = 0x2E, // Branch not taken
45 	BT = 0x3E, // Branch taken
46 	// Prefix group 3
47 	OPERAND_SIZE = 0x66, // Operand-size override prefix
48 	// Prefix group 4
49 	ADDRESS_SIZE = 0x67, // Address-size override prefix
50 }
51 
52 /// The terms "less" and "greater" are used for comparisons of signed integers.
53 /// The terms "above" and "below" are used for unsigned integers.
54 enum Condition : ubyte {
55 	O   = 0x0, /// overflow (OF=1).
56 	NO  = 0x1, /// not overflow (OF=0).
57 	B   = 0x2, /// below (CF=1).
58 	C   = 0x2, /// carry (CF=1).
59 	NAE = 0x2, /// not above or equal (CF=1).
60 	AE  = 0x3, /// above or equal (CF=0).
61 	NB  = 0x3, /// not below (CF=0).
62 	NC  = 0x3, /// not carry (CF=0).
63 	E   = 0x4, /// equal (ZF=1).
64 	Z   = 0x4, /// zero (ZF = 1).
65 	NE  = 0x5, /// not equal (ZF=0).
66 	NZ  = 0x5, /// not zero (ZF=0).
67 	BE  = 0x6, /// below or equal (CF=1 or ZF=1).
68 	NA  = 0x6, /// not above (CF=1 or ZF=1).
69 	A   = 0x7, /// above (CF=0 and ZF=0).
70 	NBE = 0x7, /// not below or equal (CF=0 andZF=0).
71 	S   = 0x8, /// sign (SF=1).
72 	NS  = 0x9, /// not sign (SF=0).
73 	P   = 0xA, /// parity (PF=1).
74 	PE  = 0xA, /// parity even (PF=1).
75 	NP  = 0xB, /// not parity (PF=0).
76 	PO  = 0xB, /// parity odd (PF=0).
77 	L   = 0xC, /// less (SF≠ OF).
78 	NGE = 0xC, /// not greater or equal (SF≠ OF).
79 	GE  = 0xD, /// greater or equal (SF=OF).
80 	NL  = 0xD, /// not less (SF=OF).
81 	LE  = 0xE, /// less or equal (ZF=1 or SF≠ OF).
82 	NG  = 0xE, /// not greater (ZF=1 or SF≠ OF).
83 	G   = 0xF, /// greater (ZF=0 and SF=OF).
84 	NLE = 0xF, /// not less or equal (ZF=0 andSF=OF).
85 }
86 
87 // place 1 MSB of register into appropriate bit field of REX prefix
88 ubyte regTo_Rex_W(Register reg) pure nothrow @nogc { return (reg & 0b1000) >> 0; } // 1000 WRXB
89 ubyte regTo_Rex_R(Register reg) pure nothrow @nogc { return (reg & 0b1000) >> 1; } // 0100 WRXB
90 ubyte regTo_Rex_X(Register reg) pure nothrow @nogc { return (reg & 0b1000) >> 2; } // 0010 WRXB
91 ubyte regTo_Rex_B(Register reg) pure nothrow @nogc { return (reg & 0b1000) >> 3; } // 0001 WRXB
92 
93 // place 3 LSB of register into appropriate bit field of ModR/M byte
94 ubyte regTo_ModRm_Reg(Register reg) pure nothrow @nogc { return (reg & 0b0111) << 3; }
95 ubyte regTo_ModRm_Rm(Register reg) pure nothrow @nogc { return (reg & 0b0111) << 0; }
96 
97 struct SibScale { ubyte bits; ubyte value() { return cast(ubyte)(1 << bits); } }
98 struct ModRmMod { ubyte bits; }
99 
100 ubyte encodeSibByte(SibScale ss, Register index, Register base) pure nothrow @nogc {
101 	return cast(ubyte)(ss.bits << 6) | (index & 0b0111) << 3 | (base & 0b0111);
102 }
103 
104 ubyte encodeModRegRmByte(ModRmMod mod, Register reg, Register rm) pure nothrow @nogc {
105 	return cast(ubyte)(mod.bits << 6) | (reg & 0b0111) << 3 | (rm & 0b0111);
106 }
107 
108 enum MemAddrType : ubyte {
109 	disp32,           // [                     disp32]
110 	indexDisp32,      // [       (index * s) + disp32]
111 	base,             // [base                       ]
112 	baseDisp32,       // [base +             + disp32]
113 	baseIndex,        // [base + (index * s)         ]
114 	baseIndexDisp32,  // [base + (index * s) + disp32]
115 	baseDisp8,        // [base +             + disp8 ]
116 	baseIndexDisp8,   // [base + (index * s) + disp8 ]
117 	ripDisp32         // [RIP  +             + disp32]
118 }
119 ubyte sibAddrType(MemAddrType type) { return 0b1_0000 | type; }
120 
121 ubyte[9] memAddrType_to_mod = [0,0,0,2,0,2,1,1,0];
122 ubyte[9] memAddrType_to_dispType = [1,1,0,1,0,1,2,2,1]; // 0 - none, 1 - disp32, 2 - disp8
123 
124 // memory location that can be passed to assembly instructions
125 struct MemAddress {
126 	ubyte typeStorage; // MemAddrType | 0b1_0000;
127 	Register indexReg = Register.SP;
128 	Register baseReg  = Register.BP;
129 	SibScale scale;
130 	uint disp; // disp8 is stored here too
131 
132 	MemAddrType type() { return cast(MemAddrType)(typeStorage & 0b1111); }
133 	Imm32 disp32() @property { return Imm32(disp); }
134 	Imm8 disp8() @property { return Imm8(cast(ubyte)(disp & 0xFF)); }
135 
136 	ubyte rexBits() { return regTo_Rex_X(indexReg) | regTo_Rex_B(baseReg); }
137 	ubyte modRmByte(ubyte reg = 0) {
138 		return encodeModRegRmByte(ModRmMod(memAddrType_to_mod[type]), cast(Register)reg, hasSibByte ? Register.SP : baseReg);
139 	}
140 	ModRmMod mod() { return ModRmMod(memAddrType_to_mod[type]); }
141 	ubyte sibByte() { return encodeSibByte(scale, indexReg, baseReg); }
142 	bool hasDisp32() { return memAddrType_to_dispType[type] == 1; }
143 	bool hasDisp8 () { return memAddrType_to_dispType[type] == 2; }
144 	bool hasSibByte() { return cast(bool)(typeStorage & 0b1_0000); }
145 
146 	string toString() {
147 		final switch(type) {
148 			case MemAddrType.disp32: return format("[0x%x]", disp32.value);
149 			case MemAddrType.indexDisp32: return format("[(%s*%s) + 0x%x]", indexReg, scale.value, disp32.value);
150 			case MemAddrType.base: return format("[%s]", baseReg);
151 			case MemAddrType.baseDisp32: return format("[%s + 0x%x]", baseReg, disp32.value);
152 			case MemAddrType.baseIndex: return format("[%s + (%s*%s)]", baseReg, indexReg, scale.value);
153 			case MemAddrType.baseIndexDisp32: return format("[%s + (%s*%s) + 0x%x]", baseReg, indexReg, scale.value, disp32.value);
154 			case MemAddrType.baseDisp8: return format("[%s + 0x%x]", baseReg, disp8.value);
155 			case MemAddrType.baseIndexDisp8: return format("[%s + (%s*%s) + 0x%x]", baseReg, indexReg, scale.value, disp8.value);
156 			case MemAddrType.ripDisp32: return format("[RIP + 0x%x]", disp32.value);
157 		}
158 	}
159 }
160 
161 // variant 1  [disp32]
162 MemAddress memAddrDisp32(int disp32) {
163 	return MemAddress(sibAddrType(MemAddrType.disp32), Register.SP, Register.BP, SibScale(), disp32); // with SIB
164 }
165 // variant 2  [(index * s) + disp32]
166 MemAddress memAddrIndexDisp32(Register indexReg, SibScale scale, int disp32) {
167 	assert(indexReg != Register.SP, "Cannot encode [RSP * scale + disp32]");
168 	return MemAddress(sibAddrType(MemAddrType.indexDisp32), indexReg, Register.BP, scale, disp32); // with SIB
169 }
170 // variant 3  [base]
171 MemAddress memAddrBase(Register baseReg) {
172 	if (is_BP_or_R13(baseReg)) // fallback to variant 7 [base + 0x0]
173 		return memAddrBaseDisp8(baseReg, 0); // with or without SIB
174 	else if (is_SP_or_R12(baseReg)) // cannot encode SP,R12 without SIB
175 		return MemAddress(sibAddrType(MemAddrType.base), Register.SP, baseReg); // with SIB
176 	else
177 		return MemAddress(MemAddrType.base, Register.SP, baseReg); // no SIB
178 }
179 // variant 4  [base + disp32]
180 MemAddress memAddrBaseDisp32(Register baseReg, int disp32) {
181 	if (is_SP_or_R12(baseReg))
182 		return MemAddress(sibAddrType(MemAddrType.baseDisp32), Register.SP, baseReg, SibScale(), disp32); // with SIB
183 	else
184 		return MemAddress(MemAddrType.baseDisp32, Register.SP, baseReg, SibScale(), disp32); // no SIB
185 }
186 // variant 5  [base + index * s]
187 MemAddress memAddrBaseIndex(Register baseReg, Register indexReg, SibScale scale) {
188 	assert(indexReg != Register.SP, "Cannot encode [base + RSP * scale]");
189 	if (is_BP_or_R13(baseReg)) // fallback to variant 8 [base + (index * s) + disp8]
190 		return memAddrBaseIndexDisp8(baseReg, indexReg, scale, 0); // with SIB
191 	else
192 		return MemAddress(sibAddrType(MemAddrType.baseIndex), indexReg, baseReg, scale); // with SIB
193 }
194 // variant 6  [base + index * s + disp32]
195 MemAddress memAddrBaseIndexDisp32(Register baseReg, Register indexReg, SibScale scale, int disp32) {
196 	assert(indexReg != Register.SP, "Cannot encode [base + RSP * scale + disp32]");
197 	return MemAddress(sibAddrType(MemAddrType.baseIndexDisp32), indexReg, baseReg, scale, disp32); // with SIB
198 }
199 // variant 7  [base + disp8]
200 MemAddress memAddrBaseDisp8(Register baseReg, byte disp8) {
201 	if (is_SP_or_R12(baseReg)) // cannot encode SP,R12 without SIB
202 		return MemAddress(sibAddrType(MemAddrType.baseDisp8), Register.SP, baseReg, SibScale(), disp8); // with SIB
203 	else
204 		return MemAddress(MemAddrType.baseDisp8, Register.SP, baseReg, SibScale(), disp8); // no SIB
205 }
206 // variant 8  [base + (index * s) + disp8]
207 MemAddress memAddrBaseIndexDisp8(Register baseReg, Register indexReg, SibScale scale, byte disp8) {
208 	assert(indexReg != Register.SP, "Cannot encode [base + RSP * scale + disp8]");
209 	return MemAddress(sibAddrType(MemAddrType.baseIndexDisp8), indexReg, baseReg, scale, disp8); // with SIB
210 }
211 
212 // variant 9  [RIP + disp32]
213 MemAddress memAddrRipDisp32(int disp32) {
214 	return MemAddress(MemAddrType.ripDisp32, Register.SP, Register.BP, SibScale(), disp32); // with SIB
215 }
216 
217 // Shortcut for memAddrBaseDisp32 and memAddrBaseDisp8. memAddrBaseDisp8 is used when possible.
218 MemAddress minMemAddrBaseDisp(Register baseReg, int displacement)
219 {
220 	if (displacement < byte.min || displacement > byte.max)
221 		return memAddrBaseDisp32(baseReg, displacement);
222 	else
223 		return memAddrBaseDisp8(baseReg, cast(byte)displacement);
224 }
225 
226 // Opcode structures for 1-byte and 2-byte encodings
227 struct OP1 { enum size = 1; ubyte op0; }
228 struct OP2 { enum size = 2; ubyte op0; ubyte op1; }
229 enum bool isAnyOpcode(O) = is(O == OP1) || is(O == OP2);
230 
231 alias PC = ubyte*;
232 
233 enum EncFlg : ubyte {
234 	// Forces REX prefix when one of SP, BP, SI, DI are encoded.
235 	// Without REX prefix byte sized instructions get SPL, BPL, SIL or DIL encoded instead of SP, BP, SI, DI
236 	REX_HIGH  = 1 << 0,
237 	// Forces REX.W prefix to be added. Used for 64bit instructions
238 	REXW_FORCE = 1 << 1,
239 	// Adds Operand-size override prefix (0x66)
240 	OP_SIZE   = 1 << 2,
241 }
242 
243 // ensures REX prefix for ah ch dh bh
244 bool regNeedsRexPrefix(ubyte flags)(Register reg) {
245 	static if (flags & EncFlg.REX_HIGH) return reg >= 4;
246 	else return false;
247 }
248 
249 struct Encoder
250 {
251 	private Arena!ubyte* arena;
252 	private PC pc() { return arena.nextPtr; }
253 
254 	uint pcOffset() { return cast(uint)arena.length; }
255 	void setBuffer(Arena!ubyte* arena) { this.arena = arena; }
256 	ubyte[] code() { return arena.data; }
257 
258 	void sink_put(T)(T value)
259 	{
260 		arena.put(value);
261 	}
262 
263 	void putRexByteChecked(ubyte flags)(ubyte bits, bool forceRex = false) {
264 		static if (flags & EncFlg.REXW_FORCE)
265 			sink_put!ubyte(REX_PREFIX | REX_W | bits);
266 		else
267 			if (bits || forceRex) sink_put!ubyte(REX_PREFIX | bits);
268 	}
269 	void putRexByte_RB(ubyte flags)(Register reg, Register rm) { // reg reg
270 		putRexByteChecked!flags(regTo_Rex_R(reg) | regTo_Rex_B(rm), regNeedsRexPrefix!flags(reg) || regNeedsRexPrefix!flags(rm)); }
271 	void putRexByte_regB(ubyte flags)(Register rm) { // R.R/M reg
272 		putRexByteChecked!flags(regTo_Rex_B(rm), regNeedsRexPrefix!flags(rm)); }
273 	void putRexByte_B(ubyte flags)(Register base) { // base
274 		putRexByteChecked!flags(regTo_Rex_B(base)); }
275 	void putRexByte_RXB(ubyte flags)(Register r, Register index, Register base) { // reg index base
276 		putRexByteChecked!flags(regTo_Rex_R(r) | regTo_Rex_X(index) | regTo_Rex_B(base), regNeedsRexPrefix!flags(r)); }
277 	void putRexByte_XB(ubyte flags)(Register index, Register base) { // index base
278 		putRexByteChecked!flags(regTo_Rex_X(index) | regTo_Rex_B(base)); }
279 
280 	void putInstrBinaryRegReg(ubyte flags, O)(O opcode, Register dst_rm, Register src_reg) if (isAnyOpcode!O) {
281 		static if (flags & EncFlg.OP_SIZE) sink_put(LegacyPrefix.OPERAND_SIZE); // 16 bit operand prefix
282 		putRexByte_RB!flags(src_reg, dst_rm);                                   // REX
283 		sink_put(opcode);                                                       // Opcode
284 		sink_put(encodeModRegRmByte(ModRmMod(0b11), src_reg, dst_rm));          // ModR/r
285 	}
286 	void putInstrBinaryRegRegImm(ubyte flags, O, I)(O opcode, Register dst_rm, Register src_reg, I src_imm) if (isAnyOpcode!O && isAnyImm!I) {
287 		static if (flags & EncFlg.OP_SIZE) sink_put(LegacyPrefix.OPERAND_SIZE); // 16 bit operand prefix
288 		putRexByte_RB!flags(src_reg, dst_rm);                                   // REX
289 		sink_put(opcode);                                                       // Opcode
290 		sink_put(encodeModRegRmByte(ModRmMod(0b11), src_reg, dst_rm));          // ModR/r
291 		sink_put(src_imm);                                                      // Imm8/16/32/64
292 	}
293 	// PUSH, POP, MOV, XCHG, BSWAP
294 	void putInstrBinaryRegImm1(ubyte flags, I)(OP1 opcode, Register dst_rm, I src_imm) if (isAnyImm!I) {
295 		static if (flags & EncFlg.OP_SIZE) sink_put(LegacyPrefix.OPERAND_SIZE); // 16 bit operand prefix
296 		putRexByte_regB!flags(dst_rm);                                          // REX
297 		sink_put!ubyte(opcode.op0 | (dst_rm & 0b0111));                         // Opcode + reg
298 		sink_put(src_imm);                                                      // Imm8/16/32/64
299 	}
300 	void putInstrBinaryRegImm2(ubyte flags, I)(OP1 opcode, ubyte regOpcode, Register dst_rm, I src_imm) if (isAnyImm!I) {
301 		static if (flags & EncFlg.OP_SIZE) sink_put(LegacyPrefix.OPERAND_SIZE); // 16 bit operand prefix
302 		putRexByte_regB!flags(dst_rm);                                          // REX
303 		sink_put(opcode);                                                       // Opcode
304 		sink_put(encodeModRegRmByte(ModRmMod(0b11), cast(Register)regOpcode, dst_rm));  // ModO/R
305 		sink_put(src_imm);                                                      // Imm8/16/32/64
306 	}
307 	// if isReg == true then dst_r is register, otherwise it is extra opcode
308 	void putInstrBinaryRegMem(ubyte flags, bool isReg = true, O)(O opcode, Register reg_or_opcode, MemAddress src_mem) if (isAnyOpcode!O) {
309 		static if (flags & EncFlg.OP_SIZE) sink_put(LegacyPrefix.OPERAND_SIZE); // 16 bit operand prefix
310 		static if (isReg) putRexByte_RXB!flags(reg_or_opcode, src_mem.indexReg, src_mem.baseReg); // REX
311 		else putRexByte_XB!flags(src_mem.indexReg, src_mem.baseReg);            // REX
312 		sink_put(opcode);                                                       // Opcode
313 		sink_put(src_mem.modRmByte(reg_or_opcode));                             // ModR/M
314 		if (src_mem.hasSibByte)	   sink_put(src_mem.sibByte);                   // SIB
315 		if (src_mem.hasDisp32)     sink_put(src_mem.disp32);                    // disp32
316 		else if (src_mem.hasDisp8) sink_put(src_mem.disp8);                     // disp8
317 	}
318 	void putInstrBinaryRegMemImm(ubyte flags, O, I)(O opcode, Register reg, MemAddress src_mem, I src_imm) if (isAnyOpcode!O && isAnyImm!I) {
319 		static if (flags & EncFlg.OP_SIZE) sink_put(LegacyPrefix.OPERAND_SIZE); // 16 bit operand prefix
320 		putRexByte_RXB!flags(reg, src_mem.indexReg, src_mem.baseReg);           // REX
321 		sink_put(opcode);                                                       // Opcode
322 		sink_put(src_mem.modRmByte(reg));                                       // ModR/M
323 		if (src_mem.hasSibByte)	   sink_put(src_mem.sibByte);                   // SIB
324 		if (src_mem.hasDisp32)     sink_put(src_mem.disp32);                    // disp32
325 		else if (src_mem.hasDisp8) sink_put(src_mem.disp8);                     // disp8
326 		sink_put(src_imm);                                                      // Imm8/16/32
327 	}
328 	void putInstrBinaryMemImm(ubyte flags, O, I)(O opcode, ubyte regOpcode, MemAddress dst_mem, I src_imm) if (isAnyOpcode!O && isAnyImm!I) {
329 		putInstrBinaryRegMem!(flags, false)(opcode, cast(Register)regOpcode, dst_mem);
330 		sink_put(src_imm);                                                      // Imm8/16/32
331 	}
332 
333 	void prefix(ubyte val) {
334 		sink_put(val);
335 	}
336 	void putInstrNullary(O)(O opcode) if(isAnyOpcode!O) {
337 		sink_put(opcode);                                                       // Opcode
338 	}
339 	void putInstrNullaryImm(O, I)(O opcode, I imm) if(isAnyOpcode!O && isAnyImm!I) {
340 		sink_put(opcode);                                                       // Opcode
341 		sink_put(imm);                                                          // Imm8/16/32/64
342 	}
343 	// <opcode> /regOpcode
344 	void putInstrUnaryReg1(ubyte flags, O)(O opcode, ubyte regOpcode, Register dst_rm) if (isAnyOpcode!O) {
345 		static if (flags & EncFlg.OP_SIZE) sink_put(LegacyPrefix.OPERAND_SIZE);// 16 bit operand prefix
346 		putRexByte_regB!flags(dst_rm);                                        // REX
347 		sink_put(opcode);                                                       // Opcode
348 		sink_put(encodeModRegRmByte(ModRmMod(0b11), cast(Register)regOpcode, dst_rm));// ModO/R
349 	}
350 	void putInstrUnaryReg2(ubyte flags)(ubyte opcode, Register dst_rm) {
351 		static if (flags & EncFlg.OP_SIZE) sink_put(LegacyPrefix.OPERAND_SIZE);// 16 bit operand prefix
352 		putRexByte_regB!flags(dst_rm);                                        // REX
353 		sink_put!ubyte(opcode | (dst_rm & 0b0111));                             // Opcode
354 	}
355 	void putInstrUnaryMem(ubyte flags, O)(O opcode, ubyte regOpcode, MemAddress dst_mem) if (isAnyOpcode!O) {
356 		putInstrBinaryRegMem!(flags, false)(opcode, cast(Register)regOpcode, dst_mem);
357 	}
358 	void putInstrUnaryImm(ubyte flags, O, I)(O opcode, I imm) if (isAnyOpcode!O && isAnyImm!I) {
359 		static if (flags & EncFlg.OP_SIZE) sink_put(LegacyPrefix.OPERAND_SIZE);// 16 bit operand prefix
360 		sink_put(opcode);                                                       // Opcode
361 		sink_put(imm);                                                          // Imm8/16/32
362 	}
363 }
364 
365 struct Fixup
366 {
367 	private CodeGen_x86_64* codeGen;
368 	private PC fixupPC;
369 
370 	template opDispatch(string member)
371 	{
372 		import std.traits : Parameters;
373 		static foreach(Over; __traits(getOverloads, CodeGen_x86_64, member))
374 		{
375 			auto opDispatch(Parameters!(Over) args) {
376 				auto tempPC = codeGen.encoder.pc;
377 				codeGen.encoder.pc = fixupPC;
378 				scope(exit)codeGen.encoder.pc = tempPC;
379 				mixin("return codeGen."~member~"(args);");
380 			}
381 		}
382 	}
383 }
384 
385 struct Fixup32
386 {
387 	uint fixupOffset;
388 	uint extraOffset;
389 }
390 
391 Imm32 jumpOffset(PC from, PC to) {
392 	assert(to - from == cast(int)(to - from), format("offset from %s to %s is %X and is not representible as int", from, to, to-from));
393 	return Imm32(cast(int)(to - from));
394 }
395 
396 enum AsmArgKind : ubyte { REG, IMM, MEM }
397 enum AsmArgKindProduct : ubyte {
398 //	REG      IMM      MEM         left
399 	REG_REG, IMM_REG, MEM_REG, // REG  right
400 	REG_IMM, IMM_IMM, MEM_IMM, // IMM
401 	REG_MEM, IMM_MEM, MEM_MEM, // MEM
402 }
403 AsmArgKindProduct asmArgKindProduct(AsmArgKind left, AsmArgKind right) {
404 	return cast(AsmArgKindProduct)(left + 3 * right);
405 }
406 union AsmArg
407 {
408 	Imm8 imm8;
409 	Imm16 imm16;
410 	Imm32 imm32;
411 	Imm64 imm64;
412 	Register reg;
413 	MemAddress memAddress;
414 }
415 
416 enum AMD64OpRegular : ubyte {
417 	add,
418 	or,
419 	and,
420 	sub,
421 	xor,
422 	cmp
423 }
424 
425 struct AsmOpParam
426 {
427 	AsmArgKind dstKind;
428 	AsmArgKind srcKind;
429 	AMD64OpRegular op;
430 	ArgType argType;
431 	ArgType immType;
432 }
433 
434 // Sink defines put(T) for ubyte, ubyte[], Imm8, Imm16, Imm32, Imm64
435 struct CodeGen_x86_64
436 {
437 	Encoder encoder;
438 
439 	Fixup fixupAt(PC at) return { return Fixup(&this, at); }
440 	Fixup saveFixup() return { return Fixup(&this, encoder.pc); }
441 	PC pc() { return encoder.pc; }
442 
443 	/// Used for versions of instructions without argument size suffix.
444 	/// mov, add, sub, instead of movq, addb, subd.
445 	/// mov(Register.AX, Register.DI, ArgType.QWORD); instead of movq(Register.AX, Register.DI);
446 	void opDispatch(string s, Arg1, Arg2)(Arg1 dst, Arg2 src, ArgType argType) {
447 		switch(argType) {
448 			static if (__traits(compiles, mixin(s~"b(dst, src)"))) { case ArgType.BYTE:  mixin(s~"b(dst, src);"); break; }
449 			static if (__traits(compiles, mixin(s~"w(dst, src)"))) { case ArgType.WORD:  mixin(s~"w(dst, src);"); break; }
450 			static if (__traits(compiles, mixin(s~"d(dst, src)"))) { case ArgType.DWORD: mixin(s~"d(dst, src);"); break; }
451 			static if (__traits(compiles, mixin(s~"q(dst, src)"))) { case ArgType.QWORD: mixin(s~"q(dst, src);"); break; }
452 			default: assert(false, format("Cannot encode %s(%s, %s, ArgType.%s)", s, dst, src, argType));
453 		}
454 	}
455 
456 	/// ditto
457 	void opDispatch(string s, Arg1)(Arg1 dst, ArgType argType) {
458 		switch(argType) {
459 			static if (__traits(compiles, mixin(s~"b(dst)"))) { case ArgType.BYTE:  mixin(s~"b(dst);"); break; }
460 			static if (__traits(compiles, mixin(s~"w(dst)"))) { case ArgType.WORD:  mixin(s~"w(dst);"); break; }
461 			static if (__traits(compiles, mixin(s~"d(dst)"))) { case ArgType.DWORD: mixin(s~"d(dst);"); break; }
462 			static if (__traits(compiles, mixin(s~"q(dst)"))) { case ArgType.QWORD: mixin(s~"q(dst);"); break; }
463 			default: assert(false, format("Cannot encode %s(%s, ArgType.%s)", s, dst, argType));
464 		}
465 	}
466 
467 	mixin binaryInstr_RMtoR_RtoRM!("add", [0x00, 0x01], [0x02, 0x03]);
468 	mixin binaryInstr_RM_Imm!("add", 0);
469 
470 	mixin instrMOV!();
471 	mixin binaryInstr_RMtoR_RtoRM!("mov", [0x88, 0x89], [0x8A, 0x8B]);
472 
473 	mixin binaryInstr_RMtoR_RtoRM!("sub", [0x28, 0x29], [0x2A, 0x2B]);
474 	mixin binaryInstr_RM_Imm!("sub", 5);
475 
476 	mixin binaryInstr_RMtoR_RtoRM!("and", [0x20, 0x21], [0x22, 0x23]);
477 	mixin binaryInstr_RM_Imm!("and", 4);
478 
479 	mixin binaryInstr_RMtoR_RtoRM!("or", [0x08, 0x09], [0x0A, 0x0B]);
480 	mixin binaryInstr_RM_Imm!("or", 1);
481 
482 	mixin binaryInstr_RMtoR_RtoRM!("xor", [0x30, 0x31], [0x32, 0x33]);
483 	mixin binaryInstr_RM_Imm!("xor", 6);
484 
485 	mixin binaryInstr_RMtoR_RtoRM!("cmp", [0x38, 0x39], [0x3A, 0x3B]);
486 	mixin binaryInstr_RM_Imm!("cmp", 7);
487 
488 	mixin binaryInstr_RMtoR_RtoRM!("xchg", [0x86, 0x87], [0x86, 0x87]);
489 
490 	void leaw(Register dst, MemAddress src){ encoder.putInstrBinaryRegMem!(EncFlg.OP_SIZE) (OP1(0x8D), dst, src); }
491 	void lead(Register dst, MemAddress src){ encoder.putInstrBinaryRegMem!(0)(OP1(0x8D), dst, src); }
492 	void leaq(Register dst, MemAddress src){ encoder.putInstrBinaryRegMem!(EncFlg.REXW_FORCE)(OP1(0x8D), dst, src); }
493 
494 	mixin unaryInstr_RM!("inc", [0xFE,0xFF], 0);
495 	mixin unaryInstr_RM!("dec", [0xFE,0xFF], 1);
496 	mixin unaryInstr_RM!("neg", [0xF6,0xF7], 3); // Two's Complement Negation
497 	mixin unaryInstr_RM!("mul", [0xF6,0xF7], 4);
498 	mixin unaryInstr_RM!("div", [0xF6,0xF7], 6);
499 	mixin unaryInstr_RM!("idiv", [0xF6,0xF7], 7);
500 	mixin unaryInstr_RM!("not", [0xF6,0xF7], 2); // One's Complement Negation
501 
502 	mixin shift_RM_Imm8!("shli", [0xC0,0xC1], 4); // shl dst, imm8
503 	mixin shift_RM_Imm8!("shri", [0xC0,0xC1], 5); // shr dst, imm8
504 	mixin shift_RM_Imm8!("sari", [0xC0,0xC1], 7); // sar dst, imm8
505 	mixin unaryInstr_RM!("shl1", [0xD0,0xD1], 4); // shl dst, 1
506 	mixin unaryInstr_RM!("shr1", [0xD0,0xD1], 5); // shr dst, 1
507 	mixin unaryInstr_RM!("sar1", [0xD0,0xD1], 7); // sar dst, 1
508 	mixin unaryInstr_RM!("shl", [0xD2,0xD3], 4); // shl dst, cl
509 	mixin unaryInstr_RM!("shr", [0xD2,0xD3], 5); // shr dst, cl
510 	mixin unaryInstr_RM!("sar", [0xD2,0xD3], 7); // sar dst, cl
511 
512 	void nop() { encoder.putInstrNullary(OP1(0x90)); }
513 	void ud2() { encoder.putInstrNullary(OP2(0x0F, 0x0B)); }
514 
515 	/// relative call to target virtual address.
516 	void call(Imm32 targetOffset) { encoder.putInstrNullaryImm(OP1(0xE8), targetOffset); } // relative to next instr
517 	void call(PC target) { encoder.putInstrNullaryImm(OP1(0xE8), jumpOffset(encoder.pc + 5, target)); } // relative to next instr
518 	void call(Register target) { encoder.putInstrUnaryReg1!(EncFlg.REXW_FORCE)(OP1(0xFF), 2, target); } // absolute address
519 	void call(MemAddress target) { encoder.putInstrUnaryMem!(0)(OP1(0xFF), 2, target); } // absolute address, use DWORD to omit REX.W
520 
521 	/// Generate fixup for last 32 bits of last instruction.
522 	Fixup32 getAddressFixup() { return Fixup32(encoder.pcOffset - 4, 4); }
523 	Fixup32 getDataFixup() { return Fixup32(encoder.pcOffset - 4, 0); }
524 
525 	/// jump relative to next instr.
526 	void jmp(Imm8 offset ) { encoder.putInstrNullaryImm(OP1(0xEB), offset); }
527 	void jmp(Imm32 offset) { encoder.putInstrNullaryImm(OP1(0xE9), offset); }
528 	void jmpAbs(PC target) { encoder.putInstrNullaryImm(OP1(0xE9), jumpOffset(encoder.pc + 5, target) ); }
529 
530 	/// jump relative to next instr.
531 	void jcc(Condition condition, Imm8  offset) { encoder.putInstrNullaryImm(OP1(0x70 | condition), offset); }
532 	void jcc(Condition condition, Imm32 offset) { encoder.putInstrNullaryImm(OP2(0x0F, 0x80 | condition), offset); }
533 	void jccAbs(Condition condition, PC target) { encoder.putInstrNullaryImm(OP2(0x0F, 0x80 | condition), jumpOffset(encoder.pc + 6, target) ); }
534 
535 	void setcc(Condition condition, Register dst)   { encoder.putInstrUnaryReg1!(EncFlg.REX_HIGH)(OP2(0x0F, 0x90 | condition), 0, dst); }
536 	void setcc(Condition condition, MemAddress dst) { encoder.putInstrUnaryMem !(EncFlg.REX_HIGH)(OP2(0x0F, 0x90 | condition), 0, dst); }
537 
538 	void test(Register dst, Register src, ArgType argType) {
539 		final switch(argType) {
540 			case ArgType.BYTE:  testb(dst, src); break;
541 			case ArgType.WORD:  testw(dst, src); break;
542 			case ArgType.DWORD: testd(dst, src); break;
543 			case ArgType.QWORD: testq(dst, src); break;
544 		}
545 	}
546 
547 	void testb(Register dst, Register src){ encoder.putInstrBinaryRegReg!(EncFlg.REX_HIGH) (OP1(0x84), dst, src); }
548 	void testw(Register dst, Register src){ encoder.putInstrBinaryRegReg!(EncFlg.OP_SIZE) (OP1(0x85), dst, src); }
549 	void testd(Register dst, Register src){ encoder.putInstrBinaryRegReg!(0)(OP1(0x85), dst, src); }
550 	void testq(Register dst, Register src){ encoder.putInstrBinaryRegReg!(EncFlg.REXW_FORCE)(OP1(0x85), dst, src); }
551 
552 	void imulw(Register dst, Register src){ encoder.putInstrBinaryRegReg!(EncFlg.OP_SIZE) (OP2(0x0F, 0xAF), src, dst); }
553 	void imuld(Register dst, Register src){ encoder.putInstrBinaryRegReg!(0)(OP2(0x0F, 0xAF), src, dst); }
554 	void imulq(Register dst, Register src){ encoder.putInstrBinaryRegReg!(EncFlg.REXW_FORCE)(OP2(0x0F, 0xAF), src, dst); }
555 	void imulw(Register dst, MemAddress src){ encoder.putInstrBinaryRegMem!(EncFlg.OP_SIZE) (OP2(0x0F, 0xAF), dst, src); }
556 	void imuld(Register dst, MemAddress src){ encoder.putInstrBinaryRegMem!(0)(OP2(0x0F, 0xAF), dst, src); }
557 	void imulq(Register dst, MemAddress src){ encoder.putInstrBinaryRegMem!(EncFlg.REXW_FORCE)(OP2(0x0F, 0xAF), dst, src); }
558 
559 	void imulw(Register dst, Register src1, Imm8 src2) { encoder.putInstrBinaryRegRegImm!(EncFlg.OP_SIZE) (OP1(0x6B), src1, dst, src2); }
560 	void imuld(Register dst, Register src1, Imm8 src2) { encoder.putInstrBinaryRegRegImm!(0)(OP1(0x6B), src1, dst, src2); }
561 	void imulq(Register dst, Register src1, Imm8 src2) { encoder.putInstrBinaryRegRegImm!(EncFlg.REXW_FORCE)(OP1(0x6B), src1, dst, src2); }
562 	void imulw(Register dst, Register src1, Imm16 src2){ encoder.putInstrBinaryRegRegImm!(EncFlg.OP_SIZE) (OP1(0x69), src1, dst, src2); }
563 	void imuld(Register dst, Register src1, Imm32 src2){ encoder.putInstrBinaryRegRegImm!(0)(OP1(0x69), src1, dst, src2); }
564 	void imulq(Register dst, Register src1, Imm32 src2){ encoder.putInstrBinaryRegRegImm!(EncFlg.REXW_FORCE)(OP1(0x69), src1, dst, src2); }
565 
566 	void imulw(Register dst, MemAddress src1, Imm8 src2) { encoder.putInstrBinaryRegMemImm!(EncFlg.OP_SIZE) (OP1(0x6B), dst, src1, src2); }
567 	void imuld(Register dst, MemAddress src1, Imm8 src2) { encoder.putInstrBinaryRegMemImm!(0)(OP1(0x6B), dst, src1, src2); }
568 	void imulq(Register dst, MemAddress src1, Imm8 src2) { encoder.putInstrBinaryRegMemImm!(EncFlg.REXW_FORCE)(OP1(0x6B), dst, src1, src2); }
569 	void imulw(Register dst, MemAddress src1, Imm16 src2){ encoder.putInstrBinaryRegMemImm!(EncFlg.OP_SIZE) (OP1(0x69), dst, src1, src2); }
570 	void imuld(Register dst, MemAddress src1, Imm32 src2){ encoder.putInstrBinaryRegMemImm!(0)(OP1(0x69), dst, src1, src2); }
571 	void imulq(Register dst, MemAddress src1, Imm32 src2){ encoder.putInstrBinaryRegMemImm!(EncFlg.REXW_FORCE)(OP1(0x69), dst, src1, src2); }
572 
573 	void movzx_btow(Register dst, Register src){ encoder.putInstrBinaryRegReg!(EncFlg.REX_HIGH | EncFlg.OP_SIZE) (OP2(0x0F, 0xB6), src, dst); }
574 	void movzx_btod(Register dst, Register src){ encoder.putInstrBinaryRegReg!(EncFlg.REX_HIGH)(OP2(0x0F, 0xB6), src, dst); }
575 	void movzx_btoq(Register dst, Register src){ encoder.putInstrBinaryRegReg!(EncFlg.REX_HIGH | EncFlg.REXW_FORCE)(OP2(0x0F, 0xB6), src, dst); }
576 	void movzx_wtod(Register dst, Register src){ encoder.putInstrBinaryRegReg!(0)(OP2(0x0F, 0xB7), src, dst); }
577 	void movzx_wtoq(Register dst, Register src){ encoder.putInstrBinaryRegReg!(EncFlg.REXW_FORCE)(OP2(0x0F, 0xB7), src, dst); }
578 
579 	void movsx_btow(Register dst, Register src){ encoder.putInstrBinaryRegReg!(EncFlg.REX_HIGH | EncFlg.OP_SIZE) (OP2(0x0F, 0xBE), src, dst); }
580 	void movsx_btod(Register dst, Register src){ encoder.putInstrBinaryRegReg!(EncFlg.REX_HIGH)(OP2(0x0F, 0xBE), src, dst); }
581 	void movsx_btoq(Register dst, Register src){ encoder.putInstrBinaryRegReg!(EncFlg.REX_HIGH | EncFlg.REXW_FORCE)(OP2(0x0F, 0xBE), src, dst); }
582 	void movsx_wtod(Register dst, Register src){ encoder.putInstrBinaryRegReg!(0)(OP2(0x0F, 0xBF), src, dst); }
583 	void movsx_wtoq(Register dst, Register src){ encoder.putInstrBinaryRegReg!(EncFlg.REXW_FORCE)(OP2(0x0F, 0xBF), src, dst); }
584 	void movsx_dtoq(Register dst, Register src){ encoder.putInstrBinaryRegReg!(EncFlg.REXW_FORCE)(OP1(0x63), src, dst); }
585 
586 	void movzx_btow(Register dst, MemAddress src){ encoder.putInstrBinaryRegMem!(EncFlg.REX_HIGH | EncFlg.OP_SIZE) (OP2(0x0F, 0xB6), dst, src); }
587 	void movzx_btod(Register dst, MemAddress src){ encoder.putInstrBinaryRegMem!(EncFlg.REX_HIGH)(OP2(0x0F, 0xB6), dst, src); }
588 	void movzx_btoq(Register dst, MemAddress src){ encoder.putInstrBinaryRegMem!(EncFlg.REX_HIGH | EncFlg.REXW_FORCE)(OP2(0x0F, 0xB6), dst, src); }
589 	void movzx_wtod(Register dst, MemAddress src){ encoder.putInstrBinaryRegMem!(0)(OP2(0x0F, 0xB7), dst, src); }
590 	void movzx_wtoq(Register dst, MemAddress src){ encoder.putInstrBinaryRegMem!(EncFlg.REXW_FORCE)(OP2(0x0F, 0xB7), dst, src); }
591 
592 	void movsx_btow(Register dst, MemAddress src){ encoder.putInstrBinaryRegMem!(EncFlg.REX_HIGH | EncFlg.OP_SIZE) (OP2(0x0F, 0xBE), dst, src); }
593 	void movsx_btod(Register dst, MemAddress src){ encoder.putInstrBinaryRegMem!(EncFlg.REX_HIGH)(OP2(0x0F, 0xBE), dst, src); }
594 	void movsx_btoq(Register dst, MemAddress src){ encoder.putInstrBinaryRegMem!(EncFlg.REX_HIGH | EncFlg.REXW_FORCE)(OP2(0x0F, 0xBE), dst, src); }
595 	void movsx_wtod(Register dst, MemAddress src){ encoder.putInstrBinaryRegMem!(0)(OP2(0x0F, 0xBF), dst, src); }
596 	void movsx_wtoq(Register dst, MemAddress src){ encoder.putInstrBinaryRegMem!(EncFlg.REXW_FORCE)(OP2(0x0F, 0xBF), dst, src); }
597 	void movsx_dtoq(Register dst, MemAddress src){ encoder.putInstrBinaryRegMem!(EncFlg.REXW_FORCE)(OP1(0x63), dst, src); }
598 
599 	void cwd() { encoder.putInstrNullary(OP2(0x66, 0x99)); }
600 	void cdq() { encoder.putInstrNullary(OP1(0x99)); }
601 	void cqo() { encoder.putInstrNullary(OP2(0x48, 0x99)); }
602 
603 	void popw(Register dst)   { encoder.putInstrUnaryReg2!(EncFlg.OP_SIZE)(0x58, dst); }
604 	void popq(Register dst)   { encoder.putInstrUnaryReg2!(0)(0x58, dst); } // use DWORD to omit REX.W
605 	void popw(MemAddress dst) { encoder.putInstrUnaryMem!(EncFlg.OP_SIZE)(OP1(0x8F), 0, dst); }
606 	void popq(MemAddress dst) { encoder.putInstrUnaryMem!(0)(OP1(0x8F), 0, dst); } // use DWORD to omit REX.W
607 
608 	void pushw(Register dst)   { encoder.putInstrUnaryReg2!(EncFlg.OP_SIZE)(0x50, dst); }
609 	void pushq(Register dst)   { encoder.putInstrUnaryReg2!(0)(0x50, dst); } // use DWORD to omit REX.W
610 	void pushw(MemAddress dst) { encoder.putInstrUnaryMem!(EncFlg.OP_SIZE)(OP1(0xFF), 6, dst); }
611 	void pushq(MemAddress dst) { encoder.putInstrUnaryMem!(0)(OP1(0xFF), 6, dst); } // use DWORD to omit REX.W
612 
613 	void pushb(Imm8  src) { encoder.putInstrUnaryImm!(EncFlg.REX_HIGH )(OP1(0x6A), src); }
614 	void pushw(Imm16 src) { encoder.putInstrUnaryImm!(EncFlg.OP_SIZE )(OP1(0x68), src); }
615 	void pushd(Imm32 src) { encoder.putInstrUnaryImm!(0)(OP1(0x68), src); }
616 
617 	void ret() { encoder.putInstrNullary(OP1(0xC3)); }
618 	void ret(Imm16 bytesToPop) { encoder.putInstrNullaryImm(OP1(0xC2), bytesToPop); }
619 
620 	void int3() { encoder.putInstrNullary(OP1(0xCC)); }
621 	void syscall() { encoder.putInstrNullary(OP2(0x0F, 0x05)); }
622 
623 	void andps(Register dst, Register src) { encoder.putInstrBinaryRegReg!(0)(OP2(0x0F, 0x54), src, dst); }
624 	void andps(Register dst, MemAddress src) { encoder.putInstrBinaryRegMem!(0)(OP2(0x0F, 0x54), dst, src); }
625 	void andpd(Register dst, Register src) { encoder.putInstrBinaryRegReg!(EncFlg.OP_SIZE)(OP2(0x0F, 0x54), src, dst); }
626 	void andpd(Register dst, MemAddress src) { encoder.putInstrBinaryRegMem!(EncFlg.OP_SIZE)(OP2(0x0F, 0x54), dst, src); }
627 	void orps(Register dst, Register src) { encoder.putInstrBinaryRegReg!(0)(OP2(0x0F, 0x56), src, dst); }
628 	void orps(Register dst, MemAddress src) { encoder.putInstrBinaryRegMem!(0)(OP2(0x0F, 0x56), dst, src); }
629 	void orpd(Register dst, Register src) { encoder.putInstrBinaryRegReg!(EncFlg.OP_SIZE)(OP2(0x0F, 0x56), src, dst); }
630 	void orpd(Register dst, MemAddress src) { encoder.putInstrBinaryRegMem!(EncFlg.OP_SIZE)(OP2(0x0F, 0x56), dst, src); }
631 	void xorps(Register dst, Register src) { encoder.putInstrBinaryRegReg!(0)(OP2(0x0F, 0x57), src, dst); }
632 	void xorps(Register dst, MemAddress src) { encoder.putInstrBinaryRegMem!(0)(OP2(0x0F, 0x57), dst, src); }
633 	void xorpd(Register dst, Register src) { encoder.putInstrBinaryRegReg!(EncFlg.OP_SIZE)(OP2(0x0F, 0x57), src, dst); }
634 	void xorpd(Register dst, MemAddress src) { encoder.putInstrBinaryRegMem!(EncFlg.OP_SIZE)(OP2(0x0F, 0x57), dst, src); }
635 	void addps(Register dst, Register src) { encoder.putInstrBinaryRegReg!(0)(OP2(0x0F, 0x58), src, dst); }
636 	void addps(Register dst, MemAddress src) { encoder.putInstrBinaryRegMem!(0)(OP2(0x0F, 0x58), dst, src); }
637 	void addpd(Register dst, Register src) { encoder.putInstrBinaryRegReg!(EncFlg.OP_SIZE)(OP2(0x0F, 0x58), src, dst); }
638 	void addpd(Register dst, MemAddress src) { encoder.putInstrBinaryRegMem!(EncFlg.OP_SIZE)(OP2(0x0F, 0x58), dst, src); }
639 	void mulps(Register dst, Register src) { encoder.putInstrBinaryRegReg!(0)(OP2(0x0F, 0x59), src, dst); }
640 	void mulps(Register dst, MemAddress src) { encoder.putInstrBinaryRegMem!(0)(OP2(0x0F, 0x59), dst, src); }
641 	void mulpd(Register dst, Register src) { encoder.putInstrBinaryRegReg!(EncFlg.OP_SIZE)(OP2(0x0F, 0x59), src, dst); }
642 	void mulpd(Register dst, MemAddress src) { encoder.putInstrBinaryRegMem!(EncFlg.OP_SIZE)(OP2(0x0F, 0x59), dst, src); }
643 	void subps(Register dst, Register src) { encoder.putInstrBinaryRegReg!(0)(OP2(0x0F, 0x5C), src, dst); }
644 	void subps(Register dst, MemAddress src) { encoder.putInstrBinaryRegMem!(0)(OP2(0x0F, 0x5C), dst, src); }
645 	void subpd(Register dst, Register src) { encoder.putInstrBinaryRegReg!(EncFlg.OP_SIZE)(OP2(0x0F, 0x5C), src, dst); }
646 	void subpd(Register dst, MemAddress src) { encoder.putInstrBinaryRegMem!(EncFlg.OP_SIZE)(OP2(0x0F, 0x5C), dst, src); }
647 	void divps(Register dst, Register src) { encoder.putInstrBinaryRegReg!(0)(OP2(0x0F, 0x5E), src, dst); }
648 	void divps(Register dst, MemAddress src) { encoder.putInstrBinaryRegMem!(0)(OP2(0x0F, 0x5E), dst, src); }
649 	void divpd(Register dst, Register src) { encoder.putInstrBinaryRegReg!(EncFlg.OP_SIZE)(OP2(0x0F, 0x5E), src, dst); }
650 	void divpd(Register dst, MemAddress src) { encoder.putInstrBinaryRegMem!(EncFlg.OP_SIZE)(OP2(0x0F, 0x5E), dst, src); }
651 
652 	void cmpss(Register dst, Register src, Imm8 pred) { encoder.prefix(0xF3); encoder.putInstrBinaryRegRegImm!(0)(OP2(0x0F, 0xC2), src, dst, pred); }
653 	void cmpss(Register dst, MemAddress src, Imm8 pred) { encoder.prefix(0xF3); encoder.putInstrBinaryRegMemImm!(0)(OP2(0x0F, 0xC2), dst, src, pred); }
654 	void cmpsd(Register dst, Register src, Imm8 pred) { encoder.prefix(0xF2); encoder.putInstrBinaryRegRegImm!(0)(OP2(0x0F, 0xC2), src, dst, pred); }
655 	void cmpsd(Register dst, MemAddress src, Imm8 pred) { encoder.prefix(0xF2); encoder.putInstrBinaryRegMemImm!(0)(OP2(0x0F, 0xC2), dst, src, pred); }
656 
657 	void ucomiss(Register dst, Register src) { encoder.putInstrBinaryRegReg!(0)(OP2(0x0F, 0x2E), src, dst); }
658 	void ucomiss(Register dst, MemAddress src) { encoder.putInstrBinaryRegMem!(0)(OP2(0x0F, 0x2E), dst, src); }
659 	void ucomisd(Register dst, Register src) { encoder.putInstrBinaryRegReg!(EncFlg.OP_SIZE)(OP2(0x0F, 0x2E), src, dst); }
660 	void ucomisd(Register dst, MemAddress src) { encoder.putInstrBinaryRegMem!(EncFlg.OP_SIZE)(OP2(0x0F, 0x2E), dst, src); }
661 
662 	void addss(Register dst, Register src) { encoder.prefix(0xF3); encoder.putInstrBinaryRegReg!(0)(OP2(0x0F, 0x58), src, dst); }
663 	void addss(Register dst, MemAddress src) { encoder.prefix(0xF3); encoder.putInstrBinaryRegMem!(0)(OP2(0x0F, 0x58), dst, src); }
664 	void addsd(Register dst, Register src) { encoder.prefix(0xF2); encoder.putInstrBinaryRegReg!(0)(OP2(0x0F, 0x58), src, dst); }
665 	void addsd(Register dst, MemAddress src) { encoder.prefix(0xF2); encoder.putInstrBinaryRegMem!(0)(OP2(0x0F, 0x58), dst, src); }
666 	void subss(Register dst, Register src) { encoder.prefix(0xF3); encoder.putInstrBinaryRegReg!(0)(OP2(0x0F, 0x5C), src, dst); }
667 	void subss(Register dst, MemAddress src) { encoder.prefix(0xF3); encoder.putInstrBinaryRegMem!(0)(OP2(0x0F, 0x5C), dst, src); }
668 	void subsd(Register dst, Register src) { encoder.prefix(0xF2); encoder.putInstrBinaryRegReg!(0)(OP2(0x0F, 0x5C), src, dst); }
669 	void subsd(Register dst, MemAddress src) { encoder.prefix(0xF2); encoder.putInstrBinaryRegMem!(0)(OP2(0x0F, 0x5C), dst, src); }
670 	void mulss(Register dst, Register src) { encoder.prefix(0xF3); encoder.putInstrBinaryRegReg!(0)(OP2(0x0F, 0x59), src, dst); }
671 	void mulss(Register dst, MemAddress src) { encoder.prefix(0xF3); encoder.putInstrBinaryRegMem!(0)(OP2(0x0F, 0x59), dst, src); }
672 	void mulsd(Register dst, Register src) { encoder.prefix(0xF2); encoder.putInstrBinaryRegReg!(0)(OP2(0x0F, 0x59), src, dst); }
673 	void mulsd(Register dst, MemAddress src) { encoder.prefix(0xF2); encoder.putInstrBinaryRegMem!(0)(OP2(0x0F, 0x59), dst, src); }
674 	void divss(Register dst, Register src) { encoder.prefix(0xF3); encoder.putInstrBinaryRegReg!(0)(OP2(0x0F, 0x5E), src, dst); }
675 	void divss(Register dst, MemAddress src) { encoder.prefix(0xF3); encoder.putInstrBinaryRegMem!(0)(OP2(0x0F, 0x5E), dst, src); }
676 	void divsd(Register dst, Register src) { encoder.prefix(0xF2); encoder.putInstrBinaryRegReg!(0)(OP2(0x0F, 0x5E), src, dst); }
677 	void divsd(Register dst, MemAddress src) { encoder.prefix(0xF2); encoder.putInstrBinaryRegMem!(0)(OP2(0x0F, 0x5E), dst, src); }
678 
679 	void movss(Register dst, Register src) { encoder.prefix(0xF3); encoder.putInstrBinaryRegReg!(0)(OP2(0x0F, 0x10), src, dst); }
680 	void movss(Register dst, MemAddress src) { encoder.prefix(0xF3); encoder.putInstrBinaryRegMem!(0)(OP2(0x0F, 0x10), dst, src); }
681 	void movss(MemAddress dst, Register src) { encoder.prefix(0xF3); encoder.putInstrBinaryRegMem!(0)(OP2(0x0F, 0x11), src, dst); }
682 
683 	void movsd(Register dst, Register src) { encoder.prefix(0xF2); encoder.putInstrBinaryRegReg!(0)(OP2(0x0F, 0x10), src, dst); }
684 	void movsd(Register dst, MemAddress src) { encoder.prefix(0xF2); encoder.putInstrBinaryRegMem!(0)(OP2(0x0F, 0x10), dst, src); }
685 	void movsd(MemAddress dst, Register src) { encoder.prefix(0xF2); encoder.putInstrBinaryRegMem!(0)(OP2(0x0F, 0x11), src, dst); }
686 
687 	void movaps(Register dst, Register src) { encoder.putInstrBinaryRegReg!(0)(OP2(0x0F, 0x28), src, dst); }
688 	void movaps(Register dst, MemAddress src) { encoder.putInstrBinaryRegMem!(0)(OP2(0x0F, 0x28), dst, src); }
689 	void movaps(MemAddress dst, Register src) { encoder.putInstrBinaryRegMem!(0)(OP2(0x0F, 0x29), src, dst); }
690 
691 	void movups(Register dst, Register src) { encoder.putInstrBinaryRegReg!(0)(OP2(0x0F, 0x10), src, dst); }
692 	void movups(Register dst, MemAddress src) { encoder.putInstrBinaryRegMem!(0)(OP2(0x0F, 0x10), dst, src); }
693 	void movups(MemAddress dst, Register src) { encoder.putInstrBinaryRegMem!(0)(OP2(0x0F, 0x11), src, dst); }
694 
695 	// x stands for xmm
696 	void movd_xr(Register dst, Register src) { encoder.putInstrBinaryRegReg!(EncFlg.OP_SIZE)(OP2(0x0F, 0x6E), src, dst); }
697 	void movd_xr(Register dst, MemAddress src) { encoder.putInstrBinaryRegMem!(EncFlg.OP_SIZE)(OP2(0x0F, 0x6E), dst, src); }
698 	void movq_xr(Register dst, Register src) { encoder.putInstrBinaryRegReg!(EncFlg.OP_SIZE|EncFlg.REXW_FORCE)(OP2(0x0F, 0x6E), src, dst); }
699 	void movq_xr(Register dst, MemAddress src) { encoder.putInstrBinaryRegMem!(EncFlg.OP_SIZE|EncFlg.REXW_FORCE)(OP2(0x0F, 0x6E), dst, src); }
700 
701 	void movd_rx(Register dst, Register src) { encoder.putInstrBinaryRegReg!(EncFlg.OP_SIZE)(OP2(0x0F, 0x7E), dst, src); }
702 	void movd_rx(MemAddress dst, Register src) { encoder.putInstrBinaryRegMem!(EncFlg.OP_SIZE)(OP2(0x0F, 0x7E), src, dst); }
703 	void movq_rx(Register dst, Register src) { encoder.putInstrBinaryRegReg!(EncFlg.OP_SIZE|EncFlg.REXW_FORCE)(OP2(0x0F, 0x7E), dst, src); }
704 	void movq_rx(MemAddress dst, Register src) { encoder.putInstrBinaryRegMem!(EncFlg.OP_SIZE|EncFlg.REXW_FORCE)(OP2(0x0F, 0x7E), src, dst); }
705 
706 	void cvtss2sd(Register dst, Register src) { encoder.prefix(0xF3); encoder.putInstrBinaryRegReg!(0)(OP2(0x0F, 0x5A), src, dst); }
707 	void cvtss2sd(Register dst, MemAddress src) { encoder.prefix(0xF3); encoder.putInstrBinaryRegMem!(0)(OP2(0x0F, 0x5A), dst, src); }
708 	void cvtsd2ss(Register dst, Register src) { encoder.prefix(0xF2); encoder.putInstrBinaryRegReg!(0)(OP2(0x0F, 0x5A), src, dst); }
709 	void cvtsd2ss(Register dst, MemAddress src) { encoder.prefix(0xF2); encoder.putInstrBinaryRegMem!(0)(OP2(0x0F, 0x5A), dst, src); }
710 
711 	void cvtss2sid(Register dst, Register src) { encoder.prefix(0xF3); encoder.putInstrBinaryRegReg!(0)(OP2(0x0F, 0x2D), src, dst); }
712 	void cvtss2sid(Register dst, MemAddress src) { encoder.prefix(0xF3); encoder.putInstrBinaryRegMem!(0)(OP2(0x0F, 0x2D), dst, src); }
713 	void cvtss2siq(Register dst, Register src) { encoder.prefix(0xF3); encoder.putInstrBinaryRegReg!(EncFlg.REXW_FORCE)(OP2(0x0F, 0x2D), src, dst); }
714 	void cvtss2siq(Register dst, MemAddress src) { encoder.prefix(0xF3); encoder.putInstrBinaryRegMem!(EncFlg.REXW_FORCE)(OP2(0x0F, 0x2D), dst, src); }
715 
716 	void cvtsid2ss(Register dst, Register src) { encoder.prefix(0xF3); encoder.putInstrBinaryRegReg!(0)(OP2(0x0F, 0x2A), src, dst); }
717 	void cvtsid2ss(Register dst, MemAddress src) { encoder.prefix(0xF3); encoder.putInstrBinaryRegMem!(0)(OP2(0x0F, 0x2A), dst, src); }
718 	void cvtsiq2ss(Register dst, Register src) { encoder.prefix(0xF3); encoder.putInstrBinaryRegReg!(EncFlg.REXW_FORCE)(OP2(0x0F, 0x2A), src, dst); }
719 	void cvtsiq2ss(Register dst, MemAddress src) { encoder.prefix(0xF3); encoder.putInstrBinaryRegMem!(EncFlg.REXW_FORCE)(OP2(0x0F, 0x2A), dst, src); }
720 
721 	void cvttss2sid(Register dst, Register src) { encoder.prefix(0xF3); encoder.putInstrBinaryRegReg!(0)(OP2(0x0F, 0x2C), src, dst); }
722 	void cvttss2sid(Register dst, MemAddress src) { encoder.prefix(0xF3); encoder.putInstrBinaryRegMem!(0)(OP2(0x0F, 0x2C), dst, src); }
723 	void cvttss2siq(Register dst, Register src) { encoder.prefix(0xF3); encoder.putInstrBinaryRegReg!(EncFlg.REXW_FORCE)(OP2(0x0F, 0x2C), src, dst); }
724 	void cvttss2siq(Register dst, MemAddress src) { encoder.prefix(0xF3); encoder.putInstrBinaryRegMem!(EncFlg.REXW_FORCE)(OP2(0x0F, 0x2C), dst, src); }
725 
726 	void cvtsd2sid(Register dst, Register src) { encoder.prefix(0xF2); encoder.putInstrBinaryRegReg!(0)(OP2(0x0F, 0x2D), src, dst); }
727 	void cvtsd2sid(Register dst, MemAddress src) { encoder.prefix(0xF2); encoder.putInstrBinaryRegMem!(0)(OP2(0x0F, 0x2D), dst, src); }
728 	void cvtsd2siq(Register dst, Register src) { encoder.prefix(0xF2); encoder.putInstrBinaryRegReg!(EncFlg.REXW_FORCE)(OP2(0x0F, 0x2D), src, dst); }
729 	void cvtsd2siq(Register dst, MemAddress src) { encoder.prefix(0xF2); encoder.putInstrBinaryRegMem!(EncFlg.REXW_FORCE)(OP2(0x0F, 0x2D), dst, src); }
730 
731 	void cvtsid2sd(Register dst, Register src) { encoder.prefix(0xF2); encoder.putInstrBinaryRegReg!(0)(OP2(0x0F, 0x2A), src, dst); }
732 	void cvtsid2sd(Register dst, MemAddress src) { encoder.prefix(0xF2); encoder.putInstrBinaryRegMem!(0)(OP2(0x0F, 0x2A), dst, src); }
733 	void cvtsiq2sd(Register dst, Register src) { encoder.prefix(0xF2); encoder.putInstrBinaryRegReg!(EncFlg.REXW_FORCE)(OP2(0x0F, 0x2A), src, dst); }
734 	void cvtsiq2sd(Register dst, MemAddress src) { encoder.prefix(0xF2); encoder.putInstrBinaryRegMem!(EncFlg.REXW_FORCE)(OP2(0x0F, 0x2A), dst, src); }
735 
736 	void cvttsd2sid(Register dst, Register src) { encoder.prefix(0xF2); encoder.putInstrBinaryRegReg!(0)(OP2(0x0F, 0x2C), src, dst); }
737 	void cvttsd2sid(Register dst, MemAddress src) { encoder.prefix(0xF2); encoder.putInstrBinaryRegMem!(0)(OP2(0x0F, 0x2C), dst, src); }
738 	void cvttsd2siq(Register dst, Register src) { encoder.prefix(0xF2); encoder.putInstrBinaryRegReg!(EncFlg.REXW_FORCE)(OP2(0x0F, 0x2C), src, dst); }
739 	void cvttsd2siq(Register dst, MemAddress src) { encoder.prefix(0xF2); encoder.putInstrBinaryRegMem!(EncFlg.REXW_FORCE)(OP2(0x0F, 0x2C), dst, src); }
740 
741 	void rep_prefix() { encoder.prefix(0xF3); }
742 	void stos() { encoder.putInstrNullary(OP1(0xAA)); }
743 
744 	void encodeRegular(AsmArg dst, AsmArg src, AsmOpParam param)
745 	{
746 		static immutable ubyte[] op_tbl_bin = [
747 			0x00, // add,
748 			0x08, // or,
749 			0x20, // and,
750 			0x28, // sub,
751 			0x30, // xor,
752 			0x38, // cmp,
753 		];
754 
755 		static immutable ubyte[] op_tbl_un = [
756 			0, // add,
757 			1, // or,
758 			4, // and,
759 			5, // sub,
760 			6, // xor,
761 			7, // cmp
762 		];
763 
764 		AsmArgKindProduct prod = asmArgKindProduct(param.dstKind, param.srcKind);
765 		final switch(prod) with(AsmArgKindProduct)
766 		{
767 			case REG_REG:
768 				final switch(param.argType) {
769 					case ArgType.BYTE:  encoder.putInstrBinaryRegReg!(EncFlg.REX_HIGH) (OP1(cast(ubyte)(op_tbl_bin[param.op]+0)), dst.reg, src.reg); break;
770 					case ArgType.WORD:  encoder.putInstrBinaryRegReg!(EncFlg.OP_SIZE) (OP1(cast(ubyte)(op_tbl_bin[param.op]+1)), dst.reg, src.reg); break;
771 					case ArgType.DWORD: encoder.putInstrBinaryRegReg!(0)(OP1(cast(ubyte)(op_tbl_bin[param.op]+1)), dst.reg, src.reg); break;
772 					case ArgType.QWORD: encoder.putInstrBinaryRegReg!(EncFlg.REXW_FORCE)(OP1(cast(ubyte)(op_tbl_bin[param.op]+1)), dst.reg, src.reg); break;
773 				} break;
774 
775 			case MEM_REG:
776 				final switch(param.argType) {
777 					case ArgType.BYTE:  encoder.putInstrBinaryRegMem!(EncFlg.REX_HIGH) (OP1(cast(ubyte)(op_tbl_bin[param.op]+0)), src.reg, dst.memAddress); break;
778 					case ArgType.WORD:  encoder.putInstrBinaryRegMem!(EncFlg.OP_SIZE) (OP1(cast(ubyte)(op_tbl_bin[param.op]+1)), src.reg, dst.memAddress); break;
779 					case ArgType.DWORD: encoder.putInstrBinaryRegMem!(0)(OP1(cast(ubyte)(op_tbl_bin[param.op]+1)), src.reg, dst.memAddress); break;
780 					case ArgType.QWORD: encoder.putInstrBinaryRegMem!(EncFlg.REXW_FORCE)(OP1(cast(ubyte)(op_tbl_bin[param.op]+1)), src.reg, dst.memAddress); break;
781 				} break;
782 
783 			case REG_MEM:
784 				final switch(param.argType) {
785 					case ArgType.BYTE:  encoder.putInstrBinaryRegMem!(EncFlg.REX_HIGH) (OP1(cast(ubyte)(op_tbl_bin[param.op]+2)), dst.reg, src.memAddress); break;
786 					case ArgType.WORD:  encoder.putInstrBinaryRegMem!(EncFlg.OP_SIZE) (OP1(cast(ubyte)(op_tbl_bin[param.op]+3)), dst.reg, src.memAddress); break;
787 					case ArgType.DWORD: encoder.putInstrBinaryRegMem!(0)(OP1(cast(ubyte)(op_tbl_bin[param.op]+3)), dst.reg, src.memAddress); break;
788 					case ArgType.QWORD: encoder.putInstrBinaryRegMem!(EncFlg.REXW_FORCE)(OP1(cast(ubyte)(op_tbl_bin[param.op]+3)), dst.reg, src.memAddress); break;
789 				} break;
790 
791 			case REG_IMM:
792 				bool valid1 = param.argType == param.immType;
793 				bool valid2 = param.immType == ArgType.BYTE;
794 				bool valid3 = param.argType == ArgType.QWORD && param.immType == ArgType.DWORD;
795 				assert(valid1 || valid2 || valid3, format("%s %s", param.argType, param.immType));
796 
797 				final switch(param.immType)
798 				{
799 					case ArgType.BYTE:
800 						final switch(param.argType) {
801 							case ArgType.BYTE:  encoder.putInstrBinaryRegImm2!(EncFlg.REX_HIGH)  (OP1(0x80), op_tbl_un[param.op], dst.reg, src.imm8); break;
802 							case ArgType.WORD:  encoder.putInstrBinaryRegImm2!(EncFlg.OP_SIZE)  (OP1(0x83), op_tbl_un[param.op], dst.reg, src.imm8); break;
803 							case ArgType.DWORD: encoder.putInstrBinaryRegImm2!(0) (OP1(0x83), op_tbl_un[param.op], dst.reg, src.imm8); break;
804 							case ArgType.QWORD: encoder.putInstrBinaryRegImm2!(EncFlg.REXW_FORCE) (OP1(0x83), op_tbl_un[param.op], dst.reg, src.imm8); break;
805 						} break;
806 
807 					case ArgType.WORD:  encoder.putInstrBinaryRegImm2!(EncFlg.OP_SIZE)  (OP1(0x81), op_tbl_un[param.op], dst.reg, src.imm16); break;
808 					case ArgType.DWORD:
809 						if (param.argType == ArgType.QWORD) goto case ArgType.QWORD;
810 						encoder.putInstrBinaryRegImm2!(0) (OP1(0x81), op_tbl_un[param.op], dst.reg, src.imm32);
811 						break;
812 					case ArgType.QWORD: encoder.putInstrBinaryRegImm2!(EncFlg.REXW_FORCE) (OP1(0x81), op_tbl_un[param.op], dst.reg, src.imm32); break;
813 				} break;
814 
815 			case MEM_IMM:
816 				assert(param.argType == param.immType || param.immType == ArgType.BYTE);
817 				final switch(param.immType)
818 				{
819 					case ArgType.BYTE:
820 						final switch(param.argType) {
821 							case ArgType.BYTE:  encoder.putInstrBinaryMemImm!(EncFlg.REX_HIGH)  (OP1(0x80), op_tbl_un[param.op], dst.memAddress, src.imm8); break;
822 							case ArgType.WORD:  encoder.putInstrBinaryMemImm!(EncFlg.OP_SIZE)  (OP1(0x83), op_tbl_un[param.op], dst.memAddress, src.imm8); break;
823 							case ArgType.DWORD: encoder.putInstrBinaryMemImm!(0) (OP1(0x83), op_tbl_un[param.op], dst.memAddress, src.imm8); break;
824 							case ArgType.QWORD: encoder.putInstrBinaryMemImm!(EncFlg.REXW_FORCE) (OP1(0x83), op_tbl_un[param.op], dst.memAddress, src.imm8); break;
825 						} break;
826 
827 					case ArgType.WORD:  encoder.putInstrBinaryMemImm!(EncFlg.OP_SIZE)  (OP1(0x81), op_tbl_un[param.op], dst.memAddress, src.imm16); break;
828 					case ArgType.DWORD: encoder.putInstrBinaryMemImm!(0) (OP1(0x81), op_tbl_un[param.op], dst.memAddress, src.imm32); break;
829 					case ArgType.QWORD: encoder.putInstrBinaryMemImm!(EncFlg.REXW_FORCE) (OP1(0x81), op_tbl_un[param.op], dst.memAddress, src.imm32); break;
830 				} break;
831 
832 			case IMM_REG, IMM_IMM, IMM_MEM, MEM_MEM:
833 				assert(false);
834 		}
835 	}
836 }
837 
838 mixin template instrMOV() {
839 	void movb(Register dst, Imm8  src){ encoder.putInstrBinaryRegImm1!(EncFlg.REX_HIGH) (OP1(0xB0), dst, src); }
840 	void movw(Register dst, Imm16 src){ encoder.putInstrBinaryRegImm1!(EncFlg.OP_SIZE) (OP1(0xB8), dst, src); }
841 	void movd(Register dst, Imm32 src){ encoder.putInstrBinaryRegImm1!(0)(OP1(0xB8), dst, src); }
842 	void movq(Register dst, Imm32 src){ encoder.putInstrBinaryRegImm2!(EncFlg.REXW_FORCE)(OP1(0xC7), 0, dst, src); }
843 	void movq(Register dst, Imm64 src){ encoder.putInstrBinaryRegImm1!(EncFlg.REXW_FORCE)(OP1(0xB8), dst, src); }
844 
845 	void movb(MemAddress dst, Imm8  src){ encoder.putInstrBinaryMemImm!(EncFlg.REX_HIGH) (OP1(0xC6), 0, dst, src); }
846 	void movw(MemAddress dst, Imm16 src){ encoder.putInstrBinaryMemImm!(EncFlg.OP_SIZE) (OP1(0xC7), 0, dst, src); }
847 	void movd(MemAddress dst, Imm32 src){ encoder.putInstrBinaryMemImm!(0)(OP1(0xC7), 0, dst, src); }
848 	void movq(MemAddress dst, Imm32 src){ encoder.putInstrBinaryMemImm!(EncFlg.REXW_FORCE)(OP1(0xC7), 0, dst, src); }
849 }
850 
851 mixin template binaryInstr_RMtoR_RtoRM(string name, ubyte[2] rm_r, ubyte[2] r_rm) {
852 	mixin(format("void %sb(MemAddress dst, Register src){ encoder.putInstrBinaryRegMem!(EncFlg.REX_HIGH) (OP1(%s), src, dst); }", name, rm_r[0]));
853 	mixin(format("void %sw(MemAddress dst, Register src){ encoder.putInstrBinaryRegMem!(EncFlg.OP_SIZE) (OP1(%s), src, dst); }", name, rm_r[1]));
854 	mixin(format("void %sd(MemAddress dst, Register src){ encoder.putInstrBinaryRegMem!(0)(OP1(%s), src, dst); }", name, rm_r[1]));
855 	mixin(format("void %sq(MemAddress dst, Register src){ encoder.putInstrBinaryRegMem!(EncFlg.REXW_FORCE)(OP1(%s), src, dst); }", name, rm_r[1]));
856 
857 	mixin(format("void %sb(Register dst, Register src){ encoder.putInstrBinaryRegReg!(EncFlg.REX_HIGH) (OP1(%s), dst, src); }", name, rm_r[0]));
858 	mixin(format("void %sw(Register dst, Register src){ encoder.putInstrBinaryRegReg!(EncFlg.OP_SIZE) (OP1(%s), dst, src); }", name, rm_r[1]));
859 	mixin(format("void %sd(Register dst, Register src){ encoder.putInstrBinaryRegReg!(0)(OP1(%s), dst, src); }", name, rm_r[1]));
860 	mixin(format("void %sq(Register dst, Register src){ encoder.putInstrBinaryRegReg!(EncFlg.REXW_FORCE)(OP1(%s), dst, src); }", name, rm_r[1]));
861 
862 	mixin(format("void %sb(Register dst, MemAddress src){ encoder.putInstrBinaryRegMem!(EncFlg.REX_HIGH) (OP1(%s), dst, src); }", name, r_rm[0]));
863 	mixin(format("void %sw(Register dst, MemAddress src){ encoder.putInstrBinaryRegMem!(EncFlg.OP_SIZE) (OP1(%s), dst, src); }", name, r_rm[1]));
864 	mixin(format("void %sd(Register dst, MemAddress src){ encoder.putInstrBinaryRegMem!(0)(OP1(%s), dst, src); }", name, r_rm[1]));
865 	mixin(format("void %sq(Register dst, MemAddress src){ encoder.putInstrBinaryRegMem!(EncFlg.REXW_FORCE)(OP1(%s), dst, src); }", name, r_rm[1]));
866 }
867 
868 mixin template binaryInstr_RM_Imm(string name, ubyte extraOpcode) {
869 	mixin(format("void %sb(Register dst,   Imm8  src){ encoder.putInstrBinaryRegImm2!(EncFlg.REX_HIGH) (OP1(0x80), %s, dst, src); }", name, extraOpcode));
870 	mixin(format("void %sw(Register dst,   Imm16 src){ encoder.putInstrBinaryRegImm2!(EncFlg.OP_SIZE) (OP1(0x81), %s, dst, src); }", name, extraOpcode));
871 	mixin(format("void %sd(Register dst,   Imm32 src){ encoder.putInstrBinaryRegImm2!(0)(OP1(0x81), %s, dst, src); }", name, extraOpcode));
872 	mixin(format("void %sq(Register dst,   Imm32 src){ encoder.putInstrBinaryRegImm2!(EncFlg.REXW_FORCE)(OP1(0x81), %s, dst, src); }", name, extraOpcode));
873 
874 	mixin(format("void %sw(Register dst,   Imm8 src){ encoder.putInstrBinaryRegImm2!(EncFlg.OP_SIZE) (OP1(0x83), %s, dst, src); }", name, extraOpcode));
875 	mixin(format("void %sd(Register dst,   Imm8 src){ encoder.putInstrBinaryRegImm2!(0)(OP1(0x83), %s, dst, src); }", name, extraOpcode));
876 	mixin(format("void %sq(Register dst,   Imm8 src){ encoder.putInstrBinaryRegImm2!(EncFlg.REXW_FORCE)(OP1(0x83), %s, dst, src); }", name, extraOpcode));
877 
878 	mixin(format("void %sb(MemAddress dst, Imm8  src){ encoder.putInstrBinaryMemImm!(EncFlg.REX_HIGH) (OP1(0x80), %s, dst, src); }", name, extraOpcode));
879 	mixin(format("void %sw(MemAddress dst, Imm16 src){ encoder.putInstrBinaryMemImm!(EncFlg.OP_SIZE) (OP1(0x81), %s, dst, src); }", name, extraOpcode));
880 	mixin(format("void %sd(MemAddress dst, Imm32 src){ encoder.putInstrBinaryMemImm!(0)(OP1(0x81), %s, dst, src); }", name, extraOpcode));
881 	mixin(format("void %sq(MemAddress dst, Imm32 src){ encoder.putInstrBinaryMemImm!(EncFlg.REXW_FORCE)(OP1(0x81), %s, dst, src); }", name, extraOpcode));
882 
883 	mixin(format("void %sw(MemAddress dst, Imm8 src){ encoder.putInstrBinaryMemImm!(EncFlg.OP_SIZE) (OP1(0x83), %s, dst, src); }", name, extraOpcode));
884 	mixin(format("void %sd(MemAddress dst, Imm8 src){ encoder.putInstrBinaryMemImm!(0)(OP1(0x83), %s, dst, src); }", name, extraOpcode));
885 	mixin(format("void %sq(MemAddress dst, Imm8 src){ encoder.putInstrBinaryMemImm!(EncFlg.REXW_FORCE)(OP1(0x83), %s, dst, src); }", name, extraOpcode));
886 }
887 
888 mixin template unaryInstr_RM(string name, ubyte[2] opcodes, ubyte extraOpcode) {
889 	mixin(format("void %sb(Register dst) { encoder.putInstrUnaryReg1!(EncFlg.REX_HIGH) (OP1(%s), %s, dst); }", name, opcodes[0], extraOpcode));
890 	mixin(format("void %sw(Register dst) { encoder.putInstrUnaryReg1!(EncFlg.OP_SIZE) (OP1(%s), %s, dst); }", name, opcodes[1], extraOpcode));
891 	mixin(format("void %sd(Register dst) { encoder.putInstrUnaryReg1!(0)(OP1(%s), %s, dst); }", name, opcodes[1], extraOpcode));
892 	mixin(format("void %sq(Register dst) { encoder.putInstrUnaryReg1!(EncFlg.REXW_FORCE)(OP1(%s), %s, dst); }", name, opcodes[1], extraOpcode));
893 
894 	mixin(format("void %sb(MemAddress dst) { encoder.putInstrUnaryMem!(EncFlg.REX_HIGH) (OP1(%s), %s, dst); }", name, opcodes[0], extraOpcode));
895 	mixin(format("void %sw(MemAddress dst) { encoder.putInstrUnaryMem!(EncFlg.OP_SIZE) (OP1(%s), %s, dst); }", name, opcodes[1], extraOpcode));
896 	mixin(format("void %sd(MemAddress dst) { encoder.putInstrUnaryMem!(0)(OP1(%s), %s, dst); }", name, opcodes[1], extraOpcode));
897 	mixin(format("void %sq(MemAddress dst) { encoder.putInstrUnaryMem!(EncFlg.REXW_FORCE)(OP1(%s), %s, dst); }", name, opcodes[1], extraOpcode));
898 }
899 
900 // TODO: duplicate with binaryInstr_RM_Imm
901 mixin template shift_RM_Imm8(string name, ubyte[2] opcodes, ubyte extraOpcode) {
902 	mixin(format("void %sb(Register dst, Imm8 src) { encoder.putInstrBinaryRegImm2!(EncFlg.REX_HIGH) (OP1(%s), %s, dst, src); }", name, opcodes[0], extraOpcode));
903 	mixin(format("void %sw(Register dst, Imm8 src) { encoder.putInstrBinaryRegImm2!(EncFlg.OP_SIZE) (OP1(%s), %s, dst, src); }", name, opcodes[1], extraOpcode));
904 	mixin(format("void %sd(Register dst, Imm8 src) { encoder.putInstrBinaryRegImm2!(0)(OP1(%s), %s, dst, src); }", name, opcodes[1], extraOpcode));
905 	mixin(format("void %sq(Register dst, Imm8 src) { encoder.putInstrBinaryRegImm2!(EncFlg.REXW_FORCE)(OP1(%s), %s, dst, src); }", name, opcodes[1], extraOpcode));
906 
907 	mixin(format("void %sb(MemAddress dst, Imm8 src) { encoder.putInstrBinaryMemImm!(EncFlg.REX_HIGH) (OP1(%s), %s, dst, src); }", name, opcodes[0], extraOpcode));
908 	mixin(format("void %sw(MemAddress dst, Imm8 src) { encoder.putInstrBinaryMemImm!(EncFlg.OP_SIZE) (OP1(%s), %s, dst, src); }", name, opcodes[1], extraOpcode));
909 	mixin(format("void %sd(MemAddress dst, Imm8 src) { encoder.putInstrBinaryMemImm!(0)(OP1(%s), %s, dst, src); }", name, opcodes[1], extraOpcode));
910 	mixin(format("void %sq(MemAddress dst, Imm8 src) { encoder.putInstrBinaryMemImm!(EncFlg.REX_HIGH)(OP1(%s), %s, dst, src); }", name, opcodes[1], extraOpcode));
911 }