1 /* file: "tinyc.d" */
2 
3 /* Copyright (C) 2001 by Marc Feeley, All Rights Reserved. */
4 // D port by MrSmith33 (C) 2014
5 module tinyc;
6 
7 
8 import std.stdio;
9 import std.algorithm : equal;
10 
11 /*
12  * This is a compiler for the Tiny-C language.  Tiny-C is a
13  * considerably stripped down version of C and it is meant as a
14  * pedagogical tool for learning about compilers.  The integer global
15  * variables "a" to "z" are predefined and initialized to zero, and it
16  * is not possible to declare new variables.  The compiler reads the
17  * program from standard input and prints out the value of the
18  * variables that are not zero.  The grammar of Tiny-C in EBNF is:
19  *
20  *  <program> ::= <statement>
21  *  <statement> ::= "if" <paren_expr> <statement> |
22  *                  "if" <paren_expr> <statement> "else" <statement> |
23  *                  "while" <paren_expr> <statement> |
24  *                  "do" <statement> "while" <paren_expr> ";" |
25  *                  "{" { <statement> } "}" |
26  *                  <expr> ";" |
27  *                  ";"
28  *  <paren_expr> ::= "(" <expr> ")"
29  *  <expr> ::= <test> | <id> "=" <expr>
30  *  <test> ::= <sum> | <sum> "<" <sum>
31  *  <sum> ::= <term> | <sum> "+" <term> | <sum> "-" <term>
32  *  <term> ::= <id> | <int> | <paren_expr>
33  *  <id> ::= "a" | "b" | "c" | "d" | ... | "z"
34  *  <int> ::= <an_unsigned_decimal_integer>
35  *
36  * Here are a few invocations of the compiler:
37  *
38  * % echo "a=b=c=2<3;" | ./a.out
39  * a = 1
40  * b = 1
41  * c = 1
42  * % echo "{ i=1; while (i<100) i=i+i; }" | ./a.out
43  * i = 128
44  * % echo "{ i=125; j=100; while (i-j) if (i<j) j=j-i; else i=i-j; }" | ./a.out
45  * i = 25
46  * j = 25
47  * % echo "{ i=1; do i=i+10; while (i<50); }" | ./a.out
48  * i = 51
49  * % echo "{ i=1; while ((i=i+10)<50) ; }" | ./a.out
50  * i = 51
51  * % echo "{ i=7; if (i<5) n=1; if (i<10) y=2; }" | ./a.out
52  * i = 7
53  * y = 2
54  *
55  * The compiler does a minimal amount of error checking to help
56  * highlight the structure of the compiler.
57  */
58 
59 
60 /*---------------------------------------------------------------------------*/
61 
62 /* Lexer. */
63 
64 enum { DO_SYM, ELSE_SYM, IF_SYM, WHILE_SYM, LBRA, RBRA, LPAR, RPAR,
65 			 PLUS, MINUS, LESS, SEMI, EQUAL, INT, ID, EOI };
66 string[] words = [ "do", "else", "if", "while", null ];
67 
68 void syntax_error() { assert(false, "syntax error"); }
69 
70 struct Lexer
71 {
72 	char delegate() charSource;
73 
74 	char ch = ' ';
75 	int sym;
76 	int int_val;
77 	char[100] id_name;
78 
79 	void next_ch() { ch = charSource(); }
80 
81 	void next_sym()
82 	{
83 		while(ch == ' ' || ch == '\n') next_ch();
84 
85 		switch (ch)
86 		{
87 			case 255: sym = EOI; break; //EOF
88 			case '{': next_ch(); sym = LBRA; break;
89 			case '}': next_ch(); sym = RBRA; break;
90 			case '(': next_ch(); sym = LPAR; break;
91 			case ')': next_ch(); sym = RPAR; break;
92 			case '+': next_ch(); sym = PLUS; break;
93 			case '-': next_ch(); sym = MINUS; break;
94 			case '<': next_ch(); sym = LESS; break;
95 			case ';': next_ch(); sym = SEMI; break;
96 			case '=': next_ch(); sym = EQUAL; break;
97 			default:
98 			{
99 				if (ch >= '0' && ch <= '9')
100 				{
101 					int_val = 0; /* missing overflow check */
102 					while (ch >= '0' && ch <= '9')
103 					{
104 						int_val = int_val*10 + (ch - '0');
105 						next_ch();
106 					}
107 					sym = INT;
108 				}
109 				else if (ch >= 'a' && ch <= 'z')
110 				{
111 					size_t i = 0; /* missing overflow check */
112 					while ((ch >= 'a' && ch <= 'z') || ch == '_')
113 					{
114 						id_name[i++] = ch;
115 						next_ch();
116 					}
117 					id_name[i] = '\0';
118 					sym = 0;
119 					while (words[sym] !is null && !equal(words[sym], id_name[0..i]))
120 					{
121 						sym = sym + 1;
122 					}
123 					if (words[sym] is null)
124 					{
125 						if (id_name[1] == '\0')
126 							sym = ID;
127 						else
128 							syntax_error();
129 					}
130 				}
131 				else
132 					syntax_error();
133 			}
134 		}
135 	}
136 }
137 
138 /*---------------------------------------------------------------------------*/
139 
140 /* Parser. */
141 
142 enum { VAR, CST, ADD, SUB, LT, SET,
143 			 IF1, IF2, WHILE, DO, EMPTY, SEQ, EXPR, PROG }
144 struct Node
145 {
146 	int kind;
147 	Node* o1, o2, o3;
148 	int val;
149 }
150 
151 struct Parser
152 {
153 	Lexer* lexer;
154 
155 	Node* new_Node(int k)
156 	{
157 		Node* n = new Node;
158 		n.kind = k;
159 		return n;
160 	}
161 
162 	Node* term()  /* <term> ::= <id> | <int> | <paren_expr> */
163 	{
164 		Node* n;
165 		if (lexer.sym == ID) { n=new_Node(VAR); n.val=lexer.id_name[0]-'a'; lexer.next_sym(); }
166 		else if (lexer.sym == INT) { n=new_Node(CST); n.val=lexer.int_val; lexer.next_sym(); }
167 		else n = paren_expr();
168 		return n;
169 	}
170 
171 	Node* sum()  /* <sum> ::= <term> | <sum> "+" <term> | <sum> "-" <term> */
172 	{
173 		Node* t, n = term();
174 		while (lexer.sym == PLUS || lexer.sym == MINUS)
175 		{
176 			t=n; n=new_Node(lexer.sym==PLUS?ADD:SUB); lexer.next_sym(); n.o1=t; n.o2=term();
177 		}
178 		return n;
179 	}
180 
181 	Node* test()  /* <test> ::= <sum> | <sum> "<" <sum> */
182 	{
183 		Node* t, n = sum();
184 		if (lexer.sym == LESS)
185 		{
186 			t=n; n=new_Node(LT); lexer.next_sym(); n.o1=t; n.o2=sum();
187 		}
188 		return n;
189 	}
190 
191 	Node* expr()  /* <expr> ::= <test> | <id> "=" <expr> */
192 	{
193 		Node* t, n;
194 		if (lexer.sym != ID) return test();
195 		n = test();
196 		if (n.kind == VAR && lexer.sym == EQUAL)
197 		{
198 			t=n; n=new_Node(SET); lexer.next_sym(); n.o1=t; n.o2=expr();
199 		}
200 		return n;
201 	}
202 
203 	Node* paren_expr()  /* <paren_expr> ::= "(" <expr> ")" */
204 	{
205 		Node* n;
206 		if (lexer.sym == LPAR) lexer.next_sym(); else syntax_error();
207 		n = expr();
208 		if (lexer.sym == RPAR) lexer.next_sym(); else syntax_error();
209 		return n;
210 	}
211 
212 	Node* statement()
213 	{
214 		Node* n;
215 
216 		if (lexer.sym == IF_SYM)  /* "if" <paren_expr> <statement> */
217 		{
218 			n = new_Node(IF1);
219 			lexer.next_sym();
220 			n.o1 = paren_expr();
221 			n.o2 = statement();
222 			if (lexer.sym == ELSE_SYM)  /* ... "else" <statement> */
223 			{
224 				n.kind = IF2;
225 				lexer.next_sym();
226 				n.o3 = statement();
227 			}
228 		}
229 		else if (lexer.sym == WHILE_SYM)  /* "while" <paren_expr> <statement> */
230 		{
231 			n = new_Node(WHILE);
232 			lexer.next_sym();
233 			n.o1 = paren_expr();
234 			n.o2 = statement();
235 		}
236 		else if (lexer.sym == DO_SYM)  /* "do" <statement> "while" <paren_expr> ";" */
237 		{
238 			n = new_Node(DO);
239 			lexer.next_sym();
240 			n.o1 = statement();
241 			if (lexer.sym == WHILE_SYM)
242 				lexer.next_sym();
243 			else
244 				syntax_error();
245 			n.o2 = paren_expr();
246 			if (lexer.sym == SEMI)
247 				lexer.next_sym();
248 			else
249 				syntax_error();
250 		}
251 		else if (lexer.sym == SEMI)  /* ";" */
252 		{
253 			n = new_Node(EMPTY);
254 			lexer.next_sym();
255 		}
256 		else if (lexer.sym == LBRA)  /* "{" { <statement> } "}" */
257 		{
258 			n = new_Node(EMPTY);
259 			lexer.next_sym();
260 			while (lexer.sym != RBRA)
261 			{
262 				Node* temp = n;
263 				n = new_Node(SEQ);
264 				n.o1 = temp;
265 				n.o2 = statement();
266 			}
267 			lexer.next_sym();
268 		}
269 		else  /* <expr> ";" */
270 		{
271 			n = new_Node(EXPR);
272 			n.o1 = expr();
273 			if (lexer.sym == SEMI)
274 				lexer.next_sym();
275 			else
276 				syntax_error();
277 		}
278 		return n;
279 	}
280 
281 	Node* program()  /* <program> ::= <statement> */
282 	{
283 		Node* n = new_Node(PROG);
284 		lexer.next_sym();
285 		n.o1 = statement();
286 		if (lexer.sym != EOI)
287 			syntax_error();
288 		return n;
289 	}
290 }
291 
292 
293 /*---------------------------------------------------------------------------*/
294 
295 /* Code generator. */
296 
297 enum { IFETCH, ISTORE, IPUSH, IPOP, IADD, ISUB, ILT, JZ, JNZ, JMP, HALT };
298 string[] opCodes = ["IFETCH", "ISTORE", "IPUSH", "IPOP", "IADD", "ISUB", "ILT", "JZ", "JNZ", "JMP", "HALT"];
299 
300 string opCodeToStr(byte op) {
301 	if (op < opCodes.length)
302 		return opCodes[op];
303 	return "INVALID";
304 }
305 
306 alias code = byte;
307 
308 struct CodeGenerator
309 {
310 	byte* pc;
311 
312 	void put(byte c) { *pc++ = c; } /* missing overflow check */
313 	byte* hole() { return pc++; }
314 	void fix(byte* src, byte* dst) { *src = cast(byte)(dst - src); } /* missing overflow check */
315 
316 	void compile(Node* n)
317 	{
318 		byte* p1, p2;
319 		switch (n.kind)
320 		{
321 			case VAR  : put(IFETCH); put(cast(byte)n.val); break;
322 			case CST  : put(IPUSH); put(cast(byte)n.val); break;
323 			case ADD  : compile(n.o1); compile(n.o2); put(IADD); break;
324 			case SUB  : compile(n.o1); compile(n.o2); put(ISUB); break;
325 			case LT   : compile(n.o1); compile(n.o2); put(ILT); break;
326 			case SET  : compile(n.o2); put(ISTORE); put(cast(byte)n.o1.val); break;
327 			case IF1  : compile(n.o1); put(JZ); p1=hole(); compile(n.o2); fix(p1,pc); break;
328 			case IF2  : compile(n.o1); put(JZ); p1=hole(); compile(n.o2); put(JMP); p2=hole();
329 									fix(p1,pc); compile(n.o3); fix(p2,pc); break;
330 			case WHILE: p1=pc; compile(n.o1); put(JZ); p2=hole(); compile(n.o2);
331 									put(JMP); fix(hole(),p1); fix(p2,pc); break;
332 			case DO   : p1=pc; compile(n.o1); compile(n.o2); put(JNZ); fix(hole(),p1); break;
333 			case EMPTY: break;
334 			case SEQ  : compile(n.o1); compile(n.o2); break;
335 			case EXPR : compile(n.o1); put(IPOP); break;
336 			case PROG : compile(n.o1); put(HALT); break;
337 			default: break;
338 		}
339 	}
340 }
341 
342 void printAST(Node* n, string indent = "")
343 {
344 	switch (n.kind)
345 	{
346 		case VAR  : writeln(indent, "VAR ", cast(char)(n.val+'a'));   break;
347 		case CST  : writeln(indent, "CST");   break;
348 		case ADD  : writeln(indent, "ADD");   printAST(n.o1, indent~"  "); printAST(n.o2, indent~"  "); break;
349 		case SUB  : writeln(indent, "SUB");   printAST(n.o1, indent~"  "); printAST(n.o2, indent~"  "); break;
350 		case LT   : writeln(indent, "LT");    printAST(n.o1, indent~"  "); printAST(n.o2, indent~"  "); break;
351 		case SET  : writeln(indent, "SET");   printAST(n.o2, indent~"  "); break;
352 		case IF1  : writeln(indent, "IF1");   printAST(n.o1, indent~"  "); printAST(n.o2, indent~"  "); break;
353 		case IF2  : writeln(indent, "IF2");   printAST(n.o1, indent~"  "); printAST(n.o2, indent~"  "); printAST(n.o3, indent~"  "); break;
354 		case WHILE: writeln(indent, "WHILE"); printAST(n.o1, indent~"  "); printAST(n.o2, indent~"  "); break;
355 		case DO   : writeln(indent, "DO");    printAST(n.o1, indent~"  "); printAST(n.o2, indent~"  "); break;
356 		case EMPTY: writeln(indent, "EMPTY"); break;
357 		case SEQ  : writeln(indent, "SEQ");   printAST(n.o1, indent~"  "); printAST(n.o2, indent~"  "); break;
358 		case EXPR : writeln(indent, "EXPR");  printAST(n.o1, indent~"  "); break;
359 		case PROG : writeln(indent, "PROG");  printAST(n.o1, indent~"  "); break;
360 		default: break;
361 	}
362 }
363 
364 /*---------------------------------------------------------------------------*/
365 
366 /* Jit compiler */
367 
368 struct JitVM
369 {
370 	import amd64asm;
371 	import utils;
372 
373 	ubyte[] mem;
374 	ubyte[] code;
375 
376 	int[26] globals;
377 
378 	void reset()
379 	{
380 		globals[] = 0;
381 	}
382 
383 	void free()
384 	{
385 		free_executable_memory(mem);
386 		mem = null;
387 	}
388 
389 	void run()
390 	{
391 		alias JittedFunc = void function(void*);
392 		JittedFunc func = cast(JittedFunc)mem.ptr;
393 		//assert(false);
394 		func(&globals[0]);
395 	}
396 
397 	/// RAX stores ptr to variables int[26]
398 	enum VARS_REG = Register.AX;
399 	/// RCX temp
400 	enum TEMP_REG_1 = Register.CX;
401 	enum TEMP_REG_2 = Register.DX;
402 	CodeGen_x86_64 gen;
403 
404 	void compile(Node* n)
405 	{
406 		if (mem.length == 0) mem = alloc_executable_memory(4096 * 64);
407 		gen.encoder.setBuffer(mem);
408 
409 		gen.movq(VARS_REG, Register.CX); // Mov address into VARS_REG
410 		compileNode(n);
411 		code = gen.encoder.code;
412 
413 		gen.encoder.resetPC();
414 	}
415 
416 	void compileNode(Node* n)
417 	{
418 		switch (n.kind)
419 		{
420 			case VAR  :
421 				gen.movd(TEMP_REG_1, memAddrBaseDisp8(VARS_REG, cast(ubyte)(n.val * 4))); // IFETCH
422 				break;
423 			case CST  : // constant
424 				gen.movd(TEMP_REG_1, Imm32(n.val)); // n.val == literal value
425 				break;
426 			case ADD  :
427 				compileNode(n.o1);
428 					gen.pushq(TEMP_REG_1);
429 				compileNode(n.o2);
430 					gen.popq(TEMP_REG_2); // o2
431 					gen.addd(TEMP_REG_1, TEMP_REG_2); // o1 += o2
432 				break;
433 			case SUB  : // o1 - o2
434 				compileNode(n.o2);
435 					gen.pushq(TEMP_REG_1);
436 				compileNode(n.o1);
437 					gen.popq(TEMP_REG_2); // o2
438 					gen.subd(TEMP_REG_1, TEMP_REG_2); // o1 = o1 - o2
439 				break;
440 			case LT   : // o1 < o2
441 				compileNode(n.o1);
442 					gen.pushq(TEMP_REG_1);
443 				compileNode(n.o2);
444 					gen.popq(TEMP_REG_2); // o1
445 					gen.cmpd(TEMP_REG_2, TEMP_REG_1); // cmp(o1, o2) // memAddrBase(Register.SP)
446 					gen.setcc(Condition.L, TEMP_REG_1);
447 					gen.movzx_btod(TEMP_REG_1, TEMP_REG_1);
448 				break;
449 			case SET  :
450 				compileNode(n.o2); // result in TEMP_REG_1
451 					gen.movd(memAddrBaseDisp8(VARS_REG, cast(ubyte)(n.o1.val * 4)), TEMP_REG_1); // ISTORE;
452 				break;
453 			case IF1  :
454 				compileNode(n.o1); // paren_expr
455 					gen.testd(TEMP_REG_1, TEMP_REG_1);
456 					auto false_jump = gen.saveFixup();
457 					gen.jcc(Condition.Z, Imm32(0));
458 					auto nextInstrOff = gen.pc;
459 				compileNode(n.o2);
460 					false_jump.jcc(Condition.Z, jumpOffset(nextInstrOff, gen.pc));
461 				break;
462 			case IF2  :
463 				compileNode(n.o1); // paren_expr
464 					gen.testd(TEMP_REG_1, TEMP_REG_1);
465 					auto else_jump_fix = gen.saveFixup();
466 					gen.jcc(Condition.Z, Imm32(0));
467 					auto else_pc = gen.pc;
468 				compileNode(n.o2);
469 					auto end_jump = gen.saveFixup();
470 					gen.jmp(Imm32(0));
471 					auto then_pc = gen.pc;
472 					else_jump_fix.jcc(Condition.Z, jumpOffset(else_pc, gen.pc)); // fix cond -> else
473 				compileNode(n.o3);
474 					end_jump.jmp(jumpOffset(then_pc, gen.pc)); // fix then -> end
475 				break;
476 			case WHILE:
477 					auto condition_pc = gen.pc;
478 				compileNode(n.o1); // paren_expr
479 					gen.testd(TEMP_REG_1, TEMP_REG_1);
480 					auto break_fix = gen.saveFixup();
481 					gen.jcc(Condition.Z, Imm32(0)); // break
482 					auto break_pc = gen.pc;
483 				compileNode(n.o2); // statement
484 					auto continue_fix = gen.saveFixup();
485 					gen.jmp(Imm32(0));
486 					continue_fix.jmp(jumpOffset(gen.pc, condition_pc)); // continue
487 					break_fix.jcc(Condition.Z, jumpOffset(break_pc, gen.pc)); // fix cond -> end
488 				break;
489 			case DO   :
490 					auto do_pc = gen.pc;
491 				compileNode(n.o1); // do <statement>
492 				compileNode(n.o2); // while <paren_expr>
493 					gen.testd(TEMP_REG_1, TEMP_REG_1);
494 					auto continue_fix = gen.saveFixup();
495 					gen.jcc(Condition.NZ, Imm32(0));
496 					continue_fix.jcc(Condition.NZ, jumpOffset(gen.pc, do_pc)); // continue
497 				break;
498 			case EMPTY: break;
499 			case SEQ  : compileNode(n.o1); compileNode(n.o2); break;
500 			case EXPR : compileNode(n.o1); break;
501 			case PROG : compileNode(n.o1); gen.ret(); break;
502 			default: break;
503 		}
504 	}
505 }
506 
507 /*---------------------------------------------------------------------------*/
508 char[1000] buf;
509 size_t buf_index;
510 struct Putter {
511 	static void reset() {
512 		buf_index = 0;
513 	}
514 	static void put(const char chr) {
515 		buf[buf_index++] = chr;
516 	}
517 }
518 char[] print_stack(int[] stack, int* stack_ptr)
519 {
520 	ptrdiff_t size = stack_ptr - stack.ptr;
521 	if (size > 0)
522 	{
523 		import std.format : formattedWrite;
524 		const(char)[] temp = buf;
525 		int[] stack_vals = stack[0..size];
526 		Putter.reset();
527 		foreach(int val; stack_vals)
528 		{
529 			formattedWrite(Putter(), "%s ", val);
530 		}
531 		return buf[0..buf_index];
532 	}
533 	return null;
534 }
535 
536 /* Virtual machine. */
537 struct VM
538 {
539 	int[26] globals;
540 
541 	void run(code[] _object)
542 	{
543 		int[1000] stack = void;
544 		int* sp = stack.ptr;
545 		code* pc = _object.ptr;
546 
547 		loop: while(true)
548 		{
549 			//writefln("exec\t%s\t%s\t% 6s\t%s\t%s", pc - _object.ptr, *pc, opCodeToStr(*pc), sp - stack.ptr, print_stack(stack, sp));
550 			final switch (*pc++)
551 			{
552 				case IFETCH: *sp++ = globals[*pc++];               break;
553 				case ISTORE: globals[*pc++] = sp[-1];              break;
554 				case IPUSH : *sp++ = *pc++;                        break;
555 				case IPOP  : --sp;                                 break;
556 				case IADD  : sp[-2] = sp[-2] + sp[-1]; --sp;       break;
557 				case ISUB  : sp[-2] = sp[-2] - sp[-1]; --sp;       break;
558 				case ILT   : sp[-2] = sp[-2] < sp[-1]; --sp;       break;
559 				case JMP   : pc += *pc;                            break;
560 				case JZ    : if (*--sp == 0) pc += *pc; else pc++; break;
561 				case JNZ   : if (*--sp != 0) pc += *pc; else pc++; break;
562 				case HALT: break loop;
563 			}
564 		}
565 	}
566 }