1 /**
2 Copyright: Copyright (c) 2017-2018 Andrey Penechko.
3 License: $(WEB boost.org/LICENSE_1_0.txt, Boost License 1.0).
4 Authors: Andrey Penechko.
5 */
6 module ir_test;
7 
8 import std.bitmanip : bitfields;
9 import std.format : formattedWrite;
10 import std.stdio;
11 
12 // 'Simple and Efficient Construction of Static Single Assignment Form'
13 // When local value numbering for one block is finished, we call this block FILLED
14 // Basic block is SEALED if no further predecessors will be added to the block
15 // As only filled blocks may have successors, predecessors are always filled
16 // Sealed block is not necessarily filled
17 
18 
19 void main()
20 {
21 	//example1;
22 	example2;
23 }
24 
25 void example1()
26 {
27 	IrFunction fun = createFunction();
28 	// 'Simple and Efficient Construction of Static Single Assignment Form' example 1
29 	// 0: a <- 42;
30 	// 1: b <- a;
31 	// 2: c <- a + b;
32 	// 3: a <- c + 23;
33 	// 4: c <- a + d;
34 
35 	// This comes from AST
36 	Var var_a = Var(VarId(0), IrValueType.i32); // a
37 	Var var_b = Var(VarId(1), IrValueType.i32); // b
38 	Var var_c = Var(VarId(2), IrValueType.i32); // c
39 	Var var_d = Var(VarId(3), IrValueType.i32); // d
40 
41 	// This is a pass converting AST to IR
42 	BlockId startBlock = fun.addBlock();
43 
44 	// 0
45 	IrRef op0_42 = fun.put(42);
46 	fun.writeVariable(var_a, startBlock, op0_42);
47 	// 1
48 	IrRef op1_a = fun.readVariable(var_a, startBlock);
49 	fun.writeVariable(var_b, startBlock, op1_a);
50 	// 2
51 	IrRef op2_a = fun.readVariable(var_a, startBlock);
52 	IrRef op2_b = fun.readVariable(var_b, startBlock);
53 	IrRef op2 = fun.put(makeInstr2(IrOpcode.o_add, IrValueType.i32, op2_a, op2_b));
54 	fun.writeVariable(var_c, startBlock, op2);
55 	// 3
56 	IrRef op3_c = fun.readVariable(var_c, startBlock);
57 	IrRef op3_23 = fun.put(23);
58 	IrRef op3 = fun.put(makeInstr2(IrOpcode.o_add, IrValueType.i32, op3_c, op3_23));
59 	fun.writeVariable(var_a, startBlock, op3);
60 	// 4
61 	IrRef op4_a = fun.readVariable(var_a, startBlock);
62 	IrRef op4_d = fun.readVariable(var_d, startBlock);
63 	IrRef op4 = fun.put(makeInstr2(IrOpcode.o_add, IrValueType.i32, op4_a, op4_d));
64 	fun.writeVariable(var_c, startBlock, op4);
65 	fun.print();
66 	// %const_0 = 0
67 	// %const_1 = 1
68 	// %const_2 = 42
69 	// %const_3 = 23
70 	// %0 = i32 o_add    i32 42, i32 42
71 	// %1 = i32 o_add    i32 %0, i32 23
72 	// %2 = i32 o_add    i32 %1, i32 %255 //same as ??
73 }
74 
75 void example2()
76 {
77 	IrFunction fun = createFunction();
78 	// 'Simple and Efficient Construction of Static Single Assignment Form' example 2
79 	// x = 42
80 	// while(3 < 4)
81 	// {
82 	//   if (4 > 3) {
83 	//     x = 84
84 	//   }
85 	//   else
86 	//   {}
87 	// }
88 	// y = x + 1;
89 
90 	Var var_x = Var(VarId(0), IrValueType.i32); // x
91 	Var var_y = Var(VarId(1), IrValueType.i32); // y
92 
93 	// x = 42
94 	BlockId startBlock = fun.addBlock();
95 	IrRef op0_42 = fun.put(42);
96 	fun.writeVariable(var_x, startBlock, op0_42);
97 	fun.sealBlock(startBlock);
98 
99 	BlockId whileHeader = fun.addBlock(startBlock);
100 	// i < 4
101 	IrRef op2_3 = fun.put(3);
102 	IrRef op2_4 = fun.put(4);
103 	IrRef op2 = fun.put(makeInstrCmp(IrCond.l, op2_3, op2_4));
104 	// while()
105 	IrRef op_while_branch = fun.put(makeBranch(op2));
106 
107 	BlockId whileBodyStart = fun.addBlock(whileHeader);
108 	fun.branchArg1(op_while_branch, whileBodyStart);
109 	// if (i > 3)
110 	IrRef op3_4 = fun.put(4);
111 	IrRef op3_3 = fun.put(3);
112 	IrRef op3 = fun.put(makeInstrCmp(IrCond.g, op3_4, op3_3));
113 	IrRef if_branch = fun.put(makeBranch(op3));
114 	fun.sealBlock(whileBodyStart);
115 
116 	BlockId thenBlock = fun.addBlock(whileBodyStart);
117 	IrRef op4_84 = fun.put(84);
118 	fun.writeVariable(var_x, thenBlock, op4_84);
119 	fun.branchArg1(if_branch, thenBlock);
120 	IrRef while_end_jump_1 = fun.put(makeJump());
121 	fun.sealBlock(thenBlock);
122 
123 	BlockId elseBlock = fun.addBlock(whileBodyStart);
124 	fun.branchArg2(if_branch, elseBlock);
125 	IrRef while_end_jump_2 = fun.put(makeJump());
126 	fun.sealBlock(elseBlock);
127 
128 	BlockId whileBodyEnd = fun.addBlock();
129 	fun.addBlockPred(whileBodyEnd, thenBlock);
130 	fun.addBlockPred(whileBodyEnd, elseBlock);
131 	fun.branchArg1(while_end_jump_1, whileBodyEnd);
132 	fun.branchArg1(while_end_jump_2, whileBodyEnd);
133 	IrRef header_jump = fun.put(makeJump());
134 	fun.branchArg1(header_jump, whileHeader);
135 	fun.addBlockPred(whileHeader, whileBodyEnd);
136 	fun.sealBlock(whileBodyEnd);
137 	fun.sealBlock(whileHeader);
138 
139 	BlockId afterWhileBlock = fun.addBlock(whileHeader);
140 	fun.sealBlock(afterWhileBlock);
141 	fun.branchArg2(op_while_branch, afterWhileBlock);
142 	IrRef op5_x = fun.readVariable(var_x, afterWhileBlock);
143 	IrRef op5_1 = fun.put(1);
144 	IrRef op5 = fun.put(makeInstr2(IrOpcode.o_add, IrValueType.i32, op5_x, op5_1));
145 	fun.writeVariable(var_y, afterWhileBlock, op5);
146 
147 	fun.print();
148 }
149 
150 void testSign()
151 {
152 	IrFunction fun = createFunction();
153 	/*
154 	// i32 sign(i32 number)
155 	// {
156 	//   i32 result; // = 0;
157 	//   if (number < 0) result = 0-1;
158 	//   else if (number > 0) result = 1;
159 	//   else result = 0;
160 	//   return result;
161 	// }
162 	// This come from AST
163 	VarId numberVar = VarId(0); // parameter
164 	VarId resultVar = VarId(0); // variable
165 
166 	// This is a pass converting AST to IR
167 	BlockId startBlock = BlockId(0);
168 	IrRef numRef = fun.put(makeInstr0(IrOpcode.o_param, IrValueType.i32));
169 	fun.writeVariable(numberVar, startBlock, numRef);
170 	IrRef resRef = fun.put(makeInstr1(IrOpcode.o_assign, IrValueType.i32, ZERO_I32_REF));
171 	IrRef cmpRef = fun.put(makeInstrCmp(IrCond.ge, numRef, ZERO_I32_REF));
172 	IrRef brnRef = fun.put(makeInstr0(IrOpcode.o_branch, IrValueType.i1));
173 	IrRef tmp1Ref = fun.put(makeInstr2(IrOpcode.o_sub, IrValueType.i32, ZERO_I32_REF, ONE_I32_REF));
174 	fun.instructions[brnRef.index].arg0 = tmp1Ref;
175 	IrRef res2Ref = fun.put(makeInstr2(IrOpcode.o_assign, IrValueType.i32, resRef, tmp1Ref));
176 	*/
177 	fun.print();
178 }
179 
180 IrFunction createFunction() {
181 	IrFunction fun;
182 	//fun.constants ~= [IrConstant(0), IrConstant(1)];
183 	return fun;
184 }
185 
186 enum ZERO_I1_REF  = IrRef(0, IrValueKind.con, IrValueType.i1);
187 enum ZERO_I32_REF = IrRef(0, IrValueKind.con, IrValueType.i32);
188 enum ZERO_I64_REF = IrRef(0, IrValueKind.con, IrValueType.i64);
189 enum ONE_I1_REF   = IrRef(1, IrValueKind.con, IrValueType.i1);
190 enum ONE_I32_REF  = IrRef(1, IrValueKind.con, IrValueType.i32);
191 enum ONE_I64_REF  = IrRef(1, IrValueKind.con, IrValueType.i64);
192 
193 struct IrFunction
194 {
195 	// first constant is always 0/false, second is alway 1/true
196 	IrConstant[] constants;
197 	IrInstruction[] instructions;
198 	IrPhi[] phis;
199 	IrBlock[] blocks;
200 	IrRef[BlockVarPair] blockVarDef;
201 
202 	BlockId addBlock(BlockId predecessor) {
203 		BlockId blockId = addBlock();
204 		blocks[blockId].preds ~= predecessor;
205 		return blockId;
206 	}
207 
208 	void addBlockPred(BlockId blockId, BlockId predecessor) {
209 		blocks[blockId].preds ~= predecessor;
210 	}
211 
212 	BlockId addBlock() {
213 		uint index = cast(uint)blocks.length;
214 		IrInstruction blockInstr;
215 		blockInstr.op = IrOpcode.o_block;
216 		blockInstr.arg0.index = index;
217 		auto irRef = put(blockInstr);
218 		blocks ~= IrBlock(irRef);
219 		return BlockId(index);
220 	}
221 
222 	IrRef put(long value) {
223 		foreach(uint i, con; constants)
224 			if (con.i64 == value)
225 				return IrRef(i, IrValueKind.con, IrValueType.i64);
226 		uint index = cast(uint)constants.length;
227 		constants ~= IrConstant(value);
228 		return IrRef(index, IrValueKind.con, IrValueType.i64);
229 	}
230 
231 	IrRef put(int value) {
232 		foreach(uint i, con; constants)
233 			if (con.i32 == value)
234 				return IrRef(i, IrValueKind.con, IrValueType.i32);
235 		uint index = cast(uint)constants.length;
236 		constants ~= IrConstant(value);
237 		return IrRef(index, IrValueKind.con, IrValueType.i32);
238 	}
239 
240 	IrRef put(IrInstruction instr) {
241 		uint index = cast(uint)instructions.length;
242 		instructions ~= instr;
243 		auto irRef = IrRef(index, IrValueKind.instr, instr.type);
244 		foreach(IrRef argRef; instr.args[0..numInstrArgs[instr.op]])
245 			if (argRef.kind == IrValueKind.phi)
246 				phis[argRef.index].addUser(irRef);
247 		return irRef;
248 	}
249 
250 	void branchArg1(IrRef branchInstrRef, BlockId target1) { instructions[branchInstrRef.index].arg1.index = target1; }
251 	void branchArg2(IrRef branchInstrRef, BlockId target2) { instructions[branchInstrRef.index].arg2.index = target2; }
252 
253 	IrRef addPhi(BlockId blockId, IrValueType type) {
254 		uint index = cast(uint)phis.length;
255 		auto irRef = IrRef(index, IrValueKind.phi, type);
256 		phis ~= IrPhi(blockId, type);
257 		blocks[blockId].phis ~= irRef;
258 		return irRef;
259 	}
260 
261 	void print() {
262 		foreach(int i, con; constants)
263 			writefln("  %%const_%s = %s", i, con.i64);
264 		foreach(int i, instr; instructions) {
265 			switch(instr.op)
266 			{
267 				case IrOpcode.o_branch:
268 					writefln("  branch %s @%s, @%s", RefPr(&this, instr.arg0), instr.arg1.index, instr.arg2.index);
269 					break;
270 
271 				case IrOpcode.o_jump:
272 					writefln("  jump @%s", instr.arg1.index);
273 					break;
274 
275 				case IrOpcode.o_block:
276 					auto block = &blocks[instr.arg0.index];
277 					writef("@%s", instr.arg0.index);
278 					foreach(pred; block.preds)
279 						writef(" <- @%s", pred);
280 					writeln;
281 					foreach(phiRef; block.phis)
282 					{
283 						writef("  %s phi.%s(", phiRef.type, phiRef.index);
284 						foreach(phi_i, arg; phis[phiRef.index].args)
285 						{
286 							if (phi_i > 0) write(", ");
287 							writef("%s", RefPr(&this, arg));
288 						}
289 						writeln(")");
290 					}
291 					break;
292 
293 				case IrOpcode.o_icmp:
294 					writef("  %%%s = %- 3s %- 8s", i, instr.type, instr.op);
295 					writef(" %s %s, %s", instr.cond, RefPr(&this, instr.arg0), RefPr(&this, instr.arg1));
296 					writeln;
297 					break;
298 
299 				default:
300 					if (hasInstrReturn[instr.op])
301 						writef("  %%%s = %- 3s %- 8s", i, instr.type, instr.op);
302 					else  writef("  %%%s %s", i, instr.op);
303 
304 					switch (numInstrArgs[instr.op]) {
305 						case 1: writef(" %s", RefPr(&this, instr.arg0)); break;
306 						case 2: writef(" %s, %s", RefPr(&this, instr.arg0),
307 							RefPr(&this, instr.arg1)); break;
308 						default: break;
309 					}
310 					writeln;
311 					break;
312 			}
313 		}
314 	}
315 
316 	// Algorithm 1: Implementation of local value numbering
317 	void writeVariable(Var variable, BlockId blockId, IrRef value)
318 	{
319 		blockVarDef[BlockVarPair(blockId, variable.id)] = value;
320 	}
321 
322 	// ditto
323 	IrRef readVariable(Var variable, BlockId blockId)
324 	{
325 		if (auto irRef = BlockVarPair(blockId, variable.id) in blockVarDef)
326 			return *irRef;
327 		return readVariableRecursive(variable, blockId);
328 	}
329 
330 	// Algorithm 2: Implementation of global value numbering
331 	IrRef readVariableRecursive(Var variable, BlockId blockId)
332 	{
333 		IrRef value;
334 		auto block = &blocks[blockId];
335 		if (!block.isSealed) {
336 			// Incomplete CFG
337 			value = addPhi(blockId, variable.type);
338 			block.incompletePhis ~= IncompletePhi(variable, value);
339 		}
340 		else if (block.preds.length == 1) {
341 			// Optimize the common case of one predecessor: No phi needed
342 			value = readVariable(variable, block.preds[0]);
343 		}
344 		else
345 		{
346 			// Break potential cycles with operandless phi
347 			value = addPhi(blockId, variable.type);
348 			writeVariable(variable, blockId, value);
349 			value = addPhiOperands(variable, value, blockId);
350 		}
351 		writeVariable(variable, blockId, value);
352 		return value;
353 	}
354 
355 	// ditto
356 	IrRef addPhiOperands(Var variable, IrRef phiRef, BlockId blockId)
357 	{
358 		// Determine operands from predecessors
359 		foreach (BlockId pred; blocks[blockId].preds)
360 		{
361 			auto val = readVariable(variable, pred);
362 			// Phi should not be cached before loop, since readVariable can add phi to phis, reallocating the array
363 			phis[phiRef.index].addArg(val);
364 		}
365 		return tryRemoveTrivialPhi(phiRef);
366 	}
367 
368 	// Algorithm 3: Detect and recursively remove a trivial φ function
369 	IrRef tryRemoveTrivialPhi(IrRef phiRef)
370 	{
371 		IrRef same = IrRef();
372 		foreach (arg; phis[phiRef.index].args) {
373 			if (arg == same || arg == phiRef) {
374 				continue; // Unique value or self−reference
375 			}
376 			if (same != IrRef())
377 			{
378 				return phiRef; // The phi merges at least two values: not trivial
379 			}
380 			same = arg;
381 		}
382 		if (same == IrRef()) {
383 			//same = new Undef(); // The phi is unreachable or in the start block
384 		}
385 		IrRef[] users = phis[phiRef.index].users.dup; // Remember all users except the phi itself
386 		replaceBy(phis[phiRef.index], phiRef, same); // Reroute all uses of phi to same and remove phi
387 
388 		// Try to recursively remove all phi users, which might have become trivial
389 		foreach (use; users)
390 			if (use.kind == IrValueKind.phi)
391 				if (use != phiRef)
392 					tryRemoveTrivialPhi(use);
393 		return same;
394 	}
395 
396 	// ditto
397 	void replaceBy(ref IrPhi phi, IrRef phiRef, IrRef byWhat)
398 	{
399 		foreach (IrRef userRef; phi.users)
400 		{
401 			final switch (userRef.kind) {
402 				case IrValueKind.con: break;
403 				case IrValueKind.label: break;
404 				case IrValueKind.instr:
405 					auto instr = instructions[userRef.index];
406 					foreach (ref IrRef arg; instr.args[0..numInstrArgs[instr.op]])
407 						if (arg == phiRef) arg = byWhat;
408 					break;
409 				case IrValueKind.phi:
410 					auto otherPhi = &phis[userRef.index];
411 					foreach (ref IrRef arg; otherPhi.args)
412 						if (arg == phiRef) arg = byWhat;
413 					break;
414 			}
415 		}
416 		blocks[phi.blockId].phis.removeInPlace(phiRef);
417 		phi = IrPhi();
418 	}
419 
420 	// Algorithm 4: Handling incomplete CFGs
421 	void sealBlock(BlockId blockId)
422 	{
423 		foreach (pair; blocks[blockId].incompletePhis)
424 			addPhiOperands(pair.var, pair.phi, blockId);
425 		blocks[blockId].isSealed = true;
426 	}
427 }
428 
429 T[] removeInPlace(T)(T[] array, T what)
430 {
431 	size_t i = 0;
432 	size_t length = array.length;
433 	while(i < length)
434 	{
435 		if (array[i] == what)
436 		{
437 			array[i] = array[length-1];
438 			--length;
439 		}
440 		++i;
441 	}
442 	return assumeSafeAppend(array[0..length]);
443 }
444 
445 unittest
446 {
447 	assert(removeInPlace([], 1) == []);
448 	assert(removeInPlace([1], 1) == []);
449 	assert(removeInPlace([1], 2) == [1]);
450 	assert(removeInPlace([1, 2], 2) == [1]);
451 	assert(removeInPlace([2, 1], 2) == [1]);
452 }
453 
454 struct IrPhi
455 {
456 	BlockId blockId;
457 	IrValueType type;
458 	IrRef[] args;
459 	IrRef[] users;
460 
461 	void addArg(IrRef arg)
462 	{
463 		args ~= arg;
464 	}
465 
466 	void addUser(IrRef user)
467 	{
468 		users ~= user;
469 	}
470 }
471 
472 struct BlockVarPair
473 {
474 	BlockId blockId;
475 	VarId varId;
476 }
477 
478 struct IncompletePhi
479 {
480 	Var var;
481 	IrRef phi;
482 }
483 
484 struct IrBlock
485 {
486 	IrRef instrRef;
487 	IncompletePhi[] incompletePhis;
488 	BlockId[] preds;
489 	IrRef[] phis;
490 	mixin(bitfields!(
491 		bool, "isSealed",  1,
492 		uint, "",          7
493 	));
494 }
495 
496 // print helper for refs
497 struct RefPr
498 {
499 	IrFunction* fun;
500 	IrRef r;
501 	void toString(scope void delegate(const(char)[]) sink) {
502 		final switch(r.kind) {
503 			case IrValueKind.con:
504 				final switch(r.type) {
505 					case IrValueType.i1:  sink.formattedWrite("i1  %s",  fun.constants[r.index].i1);  break;
506 					case IrValueType.i32: sink.formattedWrite("i32 %s", fun.constants[r.index].i32); break;
507 					case IrValueType.i64: sink.formattedWrite("i64 %s", fun.constants[r.index].i64); break;
508 				} break;
509 			case IrValueKind.label: sink.formattedWrite("@%s", r.index); break;
510 			case IrValueKind.instr: sink.formattedWrite("%s %%%s", r.type, r.index); break;
511 			case IrValueKind.phi:  sink.formattedWrite("%- 3s phi.%s", r.type, r.index); break;
512 		}
513 	}
514 }
515 
516 struct IrRef
517 {
518 	//enum Null = IrRef.init;
519 	this(uint idx, IrValueKind k, IrValueType t) {
520 		index = idx; isDefined = true; kind = k; type = t;
521 	}
522 	mixin(bitfields!(
523 		// instruction/constant index
524 		uint,        "index",     27,
525 		bool,        "isDefined",  1,
526 		IrValueKind, "kind",       2,
527 		IrValueType, "type",       2
528 	));
529 }
530 
531 struct Var { VarId id; IrValueType type; }
532 struct VarId { uint id; alias id this; }
533 struct BlockId { uint id; alias id this; }
534 
535 union IrConstant
536 {
537 	this(int value)  { this.i32 = value; }
538 	this(long value) { this.i64 = value; }
539 	bool i1;
540 	int i32;
541 	long i64;
542 }
543 
544 IrInstruction makeInstr0(IrOpcode op, IrValueType type) {
545 	return IrInstruction(op, type); }
546 IrInstruction makeInstr1(IrOpcode op, IrValueType type, IrRef arg0) {
547 	auto i = IrInstruction(op, type); i.arg0 = arg0; return i; }
548 IrInstruction makeInstr2(IrOpcode op, IrValueType type, IrRef arg0, IrRef arg1) {
549 	auto i = IrInstruction(op, type); i.arg0 = arg0; i.arg1 = arg1; return i; }
550 IrInstruction makeInstrCmp(IrCond cond, IrRef arg0, IrRef arg1) {
551 	auto i = IrInstruction(IrOpcode.o_icmp, IrValueType.i1); i.cond = cond; i.arg0 = arg0; i.arg1 = arg1; return i; }
552 IrInstruction makeBranch(IrRef arg0) {
553 	auto i = IrInstruction(IrOpcode.o_branch); i.arg0 = arg0; return i; }
554 IrInstruction makeJump() {
555 	auto i = IrInstruction(IrOpcode.o_jump); return i; }
556 
557 struct IrInstruction
558 {
559 	IrOpcode op;
560 	IrValueType type;
561 	union
562 	{
563 		struct
564 		{
565 			IrCond cond; // condition of icmp instruction
566 			ushort numUses;
567 		}
568 		IrRef arg2;
569 	}
570 	union {
571 		struct {
572 			IrRef arg0, arg1;
573 		}
574 		IrRef[2] args;
575 	}
576 }
577 
578 enum IrOpcode : ubyte
579 {
580 	// zero args
581 	o_nop,
582 	o_branch,
583 	o_jump,
584 	o_block,
585 
586 	//one args
587 	o_not,
588 	o_assign,
589 
590 	// two args
591 	o_icmp,
592 	o_add,
593 	o_sub,
594 	o_mul,
595 	o_div,
596 
597 	// multi args
598 }
599 
600 enum IrCond : ubyte {
601 	eq,
602 	ne,
603 	l,
604 	le,
605 	g,
606 	ge
607 }
608 
609 ubyte[IrOpcode.max+1] numInstrArgs = [0,0,0,0,1,1,2,2,2,2,2];
610 bool[IrOpcode.max+1] hasInstrReturn = [0,0,0,0,1,1,1,1,1,1,1];
611 
612 // Type of constant or return type of instruction
613 enum IrValueType : ubyte
614 {
615 	i1,
616 	i32,
617 	i64
618 }
619 
620 // To mark IR reference
621 enum IrValueKind : ubyte
622 {
623 	con,
624 	label,
625 	instr,
626 	phi
627 }