1 /* file: "tinyc.d" */ 2 3 /* Copyright (C) 2001 by Marc Feeley, All Rights Reserved. */ 4 // D port by MrSmith33 (C) 2014 5 module tinyc; 6 7 8 import std.stdio; 9 import std.algorithm : equal; 10 11 /* 12 * This is a compiler for the Tiny-C language. Tiny-C is a 13 * considerably stripped down version of C and it is meant as a 14 * pedagogical tool for learning about compilers. The integer global 15 * variables "a" to "z" are predefined and initialized to zero, and it 16 * is not possible to declare new variables. The compiler reads the 17 * program from standard input and prints out the value of the 18 * variables that are not zero. The grammar of Tiny-C in EBNF is: 19 * 20 * <program> ::= <statement> 21 * <statement> ::= "if" <paren_expr> <statement> | 22 * "if" <paren_expr> <statement> "else" <statement> | 23 * "while" <paren_expr> <statement> | 24 * "do" <statement> "while" <paren_expr> ";" | 25 * "{" { <statement> } "}" | 26 * <expr> ";" | 27 * ";" 28 * <paren_expr> ::= "(" <expr> ")" 29 * <expr> ::= <test> | <id> "=" <expr> 30 * <test> ::= <sum> | <sum> "<" <sum> 31 * <sum> ::= <term> | <sum> "+" <term> | <sum> "-" <term> 32 * <term> ::= <id> | <int> | <paren_expr> 33 * <id> ::= "a" | "b" | "c" | "d" | ... | "z" 34 * <int> ::= <an_unsigned_decimal_integer> 35 * 36 * Here are a few invocations of the compiler: 37 * 38 * % echo "a=b=c=2<3;" | ./a.out 39 * a = 1 40 * b = 1 41 * c = 1 42 * % echo "{ i=1; while (i<100) i=i+i; }" | ./a.out 43 * i = 128 44 * % echo "{ i=125; j=100; while (i-j) if (i<j) j=j-i; else i=i-j; }" | ./a.out 45 * i = 25 46 * j = 25 47 * % echo "{ i=1; do i=i+10; while (i<50); }" | ./a.out 48 * i = 51 49 * % echo "{ i=1; while ((i=i+10)<50) ; }" | ./a.out 50 * i = 51 51 * % echo "{ i=7; if (i<5) n=1; if (i<10) y=2; }" | ./a.out 52 * i = 7 53 * y = 2 54 * 55 * The compiler does a minimal amount of error checking to help 56 * highlight the structure of the compiler. 57 */ 58 59 60 /*---------------------------------------------------------------------------*/ 61 62 /* Lexer. */ 63 64 enum { DO_SYM, ELSE_SYM, IF_SYM, WHILE_SYM, LBRA, RBRA, LPAR, RPAR, 65 PLUS, MINUS, LESS, SEMI, EQUAL, INT, ID, EOI }; 66 string[] words = [ "do", "else", "if", "while", null ]; 67 68 void syntax_error() { assert(false, "syntax error"); } 69 70 struct Lexer 71 { 72 char delegate() charSource; 73 74 char ch = ' '; 75 int sym; 76 int int_val; 77 char[100] id_name; 78 79 void next_ch() { ch = charSource(); } 80 81 void next_sym() 82 { 83 while(ch == ' ' || ch == '\n') next_ch(); 84 85 switch (ch) 86 { 87 case 255: sym = EOI; break; //EOF 88 case '{': next_ch(); sym = LBRA; break; 89 case '}': next_ch(); sym = RBRA; break; 90 case '(': next_ch(); sym = LPAR; break; 91 case ')': next_ch(); sym = RPAR; break; 92 case '+': next_ch(); sym = PLUS; break; 93 case '-': next_ch(); sym = MINUS; break; 94 case '<': next_ch(); sym = LESS; break; 95 case ';': next_ch(); sym = SEMI; break; 96 case '=': next_ch(); sym = EQUAL; break; 97 default: 98 { 99 if (ch >= '0' && ch <= '9') 100 { 101 int_val = 0; /* missing overflow check */ 102 while (ch >= '0' && ch <= '9') 103 { 104 int_val = int_val*10 + (ch - '0'); 105 next_ch(); 106 } 107 sym = INT; 108 } 109 else if (ch >= 'a' && ch <= 'z') 110 { 111 size_t i = 0; /* missing overflow check */ 112 while ((ch >= 'a' && ch <= 'z') || ch == '_') 113 { 114 id_name[i++] = ch; 115 next_ch(); 116 } 117 id_name[i] = '\0'; 118 sym = 0; 119 while (words[sym] !is null && !equal(words[sym], id_name[0..i])) 120 { 121 sym = sym + 1; 122 } 123 if (words[sym] is null) 124 { 125 if (id_name[1] == '\0') 126 sym = ID; 127 else 128 syntax_error(); 129 } 130 } 131 else 132 syntax_error(); 133 } 134 } 135 } 136 } 137 138 /*---------------------------------------------------------------------------*/ 139 140 /* Parser. */ 141 142 enum { VAR, CST, ADD, SUB, LT, SET, 143 IF1, IF2, WHILE, DO, EMPTY, SEQ, EXPR, PROG } 144 struct Node 145 { 146 int kind; 147 Node* o1, o2, o3; 148 int val; 149 } 150 151 struct Parser 152 { 153 Lexer* lexer; 154 155 Node* new_Node(int k) 156 { 157 Node* n = new Node; 158 n.kind = k; 159 return n; 160 } 161 162 Node* term() /* <term> ::= <id> | <int> | <paren_expr> */ 163 { 164 Node* n; 165 if (lexer.sym == ID) { n=new_Node(VAR); n.val=lexer.id_name[0]-'a'; lexer.next_sym(); } 166 else if (lexer.sym == INT) { n=new_Node(CST); n.val=lexer.int_val; lexer.next_sym(); } 167 else n = paren_expr(); 168 return n; 169 } 170 171 Node* sum() /* <sum> ::= <term> | <sum> "+" <term> | <sum> "-" <term> */ 172 { 173 Node* t, n = term(); 174 while (lexer.sym == PLUS || lexer.sym == MINUS) 175 { 176 t=n; n=new_Node(lexer.sym==PLUS?ADD:SUB); lexer.next_sym(); n.o1=t; n.o2=term(); 177 } 178 return n; 179 } 180 181 Node* test() /* <test> ::= <sum> | <sum> "<" <sum> */ 182 { 183 Node* t, n = sum(); 184 if (lexer.sym == LESS) 185 { 186 t=n; n=new_Node(LT); lexer.next_sym(); n.o1=t; n.o2=sum(); 187 } 188 return n; 189 } 190 191 Node* expr() /* <expr> ::= <test> | <id> "=" <expr> */ 192 { 193 Node* t, n; 194 if (lexer.sym != ID) return test(); 195 n = test(); 196 if (n.kind == VAR && lexer.sym == EQUAL) 197 { 198 t=n; n=new_Node(SET); lexer.next_sym(); n.o1=t; n.o2=expr(); 199 } 200 return n; 201 } 202 203 Node* paren_expr() /* <paren_expr> ::= "(" <expr> ")" */ 204 { 205 Node* n; 206 if (lexer.sym == LPAR) lexer.next_sym(); else syntax_error(); 207 n = expr(); 208 if (lexer.sym == RPAR) lexer.next_sym(); else syntax_error(); 209 return n; 210 } 211 212 Node* statement() 213 { 214 Node* n; 215 216 if (lexer.sym == IF_SYM) /* "if" <paren_expr> <statement> */ 217 { 218 n = new_Node(IF1); 219 lexer.next_sym(); 220 n.o1 = paren_expr(); 221 n.o2 = statement(); 222 if (lexer.sym == ELSE_SYM) /* ... "else" <statement> */ 223 { 224 n.kind = IF2; 225 lexer.next_sym(); 226 n.o3 = statement(); 227 } 228 } 229 else if (lexer.sym == WHILE_SYM) /* "while" <paren_expr> <statement> */ 230 { 231 n = new_Node(WHILE); 232 lexer.next_sym(); 233 n.o1 = paren_expr(); 234 n.o2 = statement(); 235 } 236 else if (lexer.sym == DO_SYM) /* "do" <statement> "while" <paren_expr> ";" */ 237 { 238 n = new_Node(DO); 239 lexer.next_sym(); 240 n.o1 = statement(); 241 if (lexer.sym == WHILE_SYM) 242 lexer.next_sym(); 243 else 244 syntax_error(); 245 n.o2 = paren_expr(); 246 if (lexer.sym == SEMI) 247 lexer.next_sym(); 248 else 249 syntax_error(); 250 } 251 else if (lexer.sym == SEMI) /* ";" */ 252 { 253 n = new_Node(EMPTY); 254 lexer.next_sym(); 255 } 256 else if (lexer.sym == LBRA) /* "{" { <statement> } "}" */ 257 { 258 n = new_Node(EMPTY); 259 lexer.next_sym(); 260 while (lexer.sym != RBRA) 261 { 262 Node* temp = n; 263 n = new_Node(SEQ); 264 n.o1 = temp; 265 n.o2 = statement(); 266 } 267 lexer.next_sym(); 268 } 269 else /* <expr> ";" */ 270 { 271 n = new_Node(EXPR); 272 n.o1 = expr(); 273 if (lexer.sym == SEMI) 274 lexer.next_sym(); 275 else 276 syntax_error(); 277 } 278 return n; 279 } 280 281 Node* program() /* <program> ::= <statement> */ 282 { 283 Node* n = new_Node(PROG); 284 lexer.next_sym(); 285 n.o1 = statement(); 286 if (lexer.sym != EOI) 287 syntax_error(); 288 return n; 289 } 290 } 291 292 293 /*---------------------------------------------------------------------------*/ 294 295 /* Code generator. */ 296 297 enum { IFETCH, ISTORE, IPUSH, IPOP, IADD, ISUB, ILT, JZ, JNZ, JMP, HALT }; 298 string[] opCodes = ["IFETCH", "ISTORE", "IPUSH", "IPOP", "IADD", "ISUB", "ILT", "JZ", "JNZ", "JMP", "HALT"]; 299 300 string opCodeToStr(byte op) { 301 if (op < opCodes.length) 302 return opCodes[op]; 303 return "INVALID"; 304 } 305 306 alias code = byte; 307 308 struct CodeGenerator 309 { 310 byte* pc; 311 312 void put(byte c) { *pc++ = c; } /* missing overflow check */ 313 byte* hole() { return pc++; } 314 void fix(byte* src, byte* dst) { *src = cast(byte)(dst - src); } /* missing overflow check */ 315 316 void compile(Node* n) 317 { 318 byte* p1, p2; 319 switch (n.kind) 320 { 321 case VAR : put(IFETCH); put(cast(byte)n.val); break; 322 case CST : put(IPUSH); put(cast(byte)n.val); break; 323 case ADD : compile(n.o1); compile(n.o2); put(IADD); break; 324 case SUB : compile(n.o1); compile(n.o2); put(ISUB); break; 325 case LT : compile(n.o1); compile(n.o2); put(ILT); break; 326 case SET : compile(n.o2); put(ISTORE); put(cast(byte)n.o1.val); break; 327 case IF1 : compile(n.o1); put(JZ); p1=hole(); compile(n.o2); fix(p1,pc); break; 328 case IF2 : compile(n.o1); put(JZ); p1=hole(); compile(n.o2); put(JMP); p2=hole(); 329 fix(p1,pc); compile(n.o3); fix(p2,pc); break; 330 case WHILE: p1=pc; compile(n.o1); put(JZ); p2=hole(); compile(n.o2); 331 put(JMP); fix(hole(),p1); fix(p2,pc); break; 332 case DO : p1=pc; compile(n.o1); compile(n.o2); put(JNZ); fix(hole(),p1); break; 333 case EMPTY: break; 334 case SEQ : compile(n.o1); compile(n.o2); break; 335 case EXPR : compile(n.o1); put(IPOP); break; 336 case PROG : compile(n.o1); put(HALT); break; 337 default: break; 338 } 339 } 340 } 341 342 void printAST(Node* n, string indent = "") 343 { 344 switch (n.kind) 345 { 346 case VAR : writeln(indent, "VAR ", cast(char)(n.val+'a')); break; 347 case CST : writeln(indent, "CST"); break; 348 case ADD : writeln(indent, "ADD"); printAST(n.o1, indent~" "); printAST(n.o2, indent~" "); break; 349 case SUB : writeln(indent, "SUB"); printAST(n.o1, indent~" "); printAST(n.o2, indent~" "); break; 350 case LT : writeln(indent, "LT"); printAST(n.o1, indent~" "); printAST(n.o2, indent~" "); break; 351 case SET : writeln(indent, "SET"); printAST(n.o2, indent~" "); break; 352 case IF1 : writeln(indent, "IF1"); printAST(n.o1, indent~" "); printAST(n.o2, indent~" "); break; 353 case IF2 : writeln(indent, "IF2"); printAST(n.o1, indent~" "); printAST(n.o2, indent~" "); printAST(n.o3, indent~" "); break; 354 case WHILE: writeln(indent, "WHILE"); printAST(n.o1, indent~" "); printAST(n.o2, indent~" "); break; 355 case DO : writeln(indent, "DO"); printAST(n.o1, indent~" "); printAST(n.o2, indent~" "); break; 356 case EMPTY: writeln(indent, "EMPTY"); break; 357 case SEQ : writeln(indent, "SEQ"); printAST(n.o1, indent~" "); printAST(n.o2, indent~" "); break; 358 case EXPR : writeln(indent, "EXPR"); printAST(n.o1, indent~" "); break; 359 case PROG : writeln(indent, "PROG"); printAST(n.o1, indent~" "); break; 360 default: break; 361 } 362 } 363 364 /*---------------------------------------------------------------------------*/ 365 366 /* Jit compiler */ 367 368 struct JitVM 369 { 370 import amd64asm; 371 import utils; 372 373 ubyte[] mem; 374 ubyte[] code; 375 376 int[26] globals; 377 378 void reset() 379 { 380 globals[] = 0; 381 } 382 383 void free() 384 { 385 free_executable_memory(mem); 386 mem = null; 387 } 388 389 void run() 390 { 391 alias JittedFunc = void function(void*); 392 JittedFunc func = cast(JittedFunc)mem.ptr; 393 //assert(false); 394 func(&globals[0]); 395 } 396 397 /// RAX stores ptr to variables int[26] 398 enum VARS_REG = Register.AX; 399 /// RCX temp 400 enum TEMP_REG_1 = Register.CX; 401 enum TEMP_REG_2 = Register.DX; 402 CodeGen_x86_64 gen; 403 404 void compile(Node* n) 405 { 406 if (mem.length == 0) mem = alloc_executable_memory(4096 * 64); 407 gen.encoder.setBuffer(mem); 408 409 gen.movq(VARS_REG, Register.CX); // Mov address into VARS_REG 410 compileNode(n); 411 code = gen.encoder.code; 412 413 gen.encoder.resetPC(); 414 } 415 416 void compileNode(Node* n) 417 { 418 switch (n.kind) 419 { 420 case VAR : 421 gen.movd(TEMP_REG_1, memAddrBaseDisp8(VARS_REG, cast(ubyte)(n.val * 4))); // IFETCH 422 break; 423 case CST : // constant 424 gen.movd(TEMP_REG_1, Imm32(n.val)); // n.val == literal value 425 break; 426 case ADD : 427 compileNode(n.o1); 428 gen.pushq(TEMP_REG_1); 429 compileNode(n.o2); 430 gen.popq(TEMP_REG_2); // o2 431 gen.addd(TEMP_REG_1, TEMP_REG_2); // o1 += o2 432 break; 433 case SUB : // o1 - o2 434 compileNode(n.o2); 435 gen.pushq(TEMP_REG_1); 436 compileNode(n.o1); 437 gen.popq(TEMP_REG_2); // o2 438 gen.subd(TEMP_REG_1, TEMP_REG_2); // o1 = o1 - o2 439 break; 440 case LT : // o1 < o2 441 compileNode(n.o1); 442 gen.pushq(TEMP_REG_1); 443 compileNode(n.o2); 444 gen.popq(TEMP_REG_2); // o1 445 gen.cmpd(TEMP_REG_2, TEMP_REG_1); // cmp(o1, o2) // memAddrBase(Register.SP) 446 gen.setcc(Condition.L, TEMP_REG_1); 447 gen.movzx_btod(TEMP_REG_1, TEMP_REG_1); 448 break; 449 case SET : 450 compileNode(n.o2); // result in TEMP_REG_1 451 gen.movd(memAddrBaseDisp8(VARS_REG, cast(ubyte)(n.o1.val * 4)), TEMP_REG_1); // ISTORE; 452 break; 453 case IF1 : 454 compileNode(n.o1); // paren_expr 455 gen.testd(TEMP_REG_1, TEMP_REG_1); 456 auto false_jump = gen.saveFixup(); 457 gen.jcc(Condition.Z, Imm32(0)); 458 auto nextInstrOff = gen.pc; 459 compileNode(n.o2); 460 false_jump.jcc(Condition.Z, jumpOffset(nextInstrOff, gen.pc)); 461 break; 462 case IF2 : 463 compileNode(n.o1); // paren_expr 464 gen.testd(TEMP_REG_1, TEMP_REG_1); 465 auto else_jump_fix = gen.saveFixup(); 466 gen.jcc(Condition.Z, Imm32(0)); 467 auto else_pc = gen.pc; 468 compileNode(n.o2); 469 auto end_jump = gen.saveFixup(); 470 gen.jmp(Imm32(0)); 471 auto then_pc = gen.pc; 472 else_jump_fix.jcc(Condition.Z, jumpOffset(else_pc, gen.pc)); // fix cond -> else 473 compileNode(n.o3); 474 end_jump.jmp(jumpOffset(then_pc, gen.pc)); // fix then -> end 475 break; 476 case WHILE: 477 auto condition_pc = gen.pc; 478 compileNode(n.o1); // paren_expr 479 gen.testd(TEMP_REG_1, TEMP_REG_1); 480 auto break_fix = gen.saveFixup(); 481 gen.jcc(Condition.Z, Imm32(0)); // break 482 auto break_pc = gen.pc; 483 compileNode(n.o2); // statement 484 auto continue_fix = gen.saveFixup(); 485 gen.jmp(Imm32(0)); 486 continue_fix.jmp(jumpOffset(gen.pc, condition_pc)); // continue 487 break_fix.jcc(Condition.Z, jumpOffset(break_pc, gen.pc)); // fix cond -> end 488 break; 489 case DO : 490 auto do_pc = gen.pc; 491 compileNode(n.o1); // do <statement> 492 compileNode(n.o2); // while <paren_expr> 493 gen.testd(TEMP_REG_1, TEMP_REG_1); 494 auto continue_fix = gen.saveFixup(); 495 gen.jcc(Condition.NZ, Imm32(0)); 496 continue_fix.jcc(Condition.NZ, jumpOffset(gen.pc, do_pc)); // continue 497 break; 498 case EMPTY: break; 499 case SEQ : compileNode(n.o1); compileNode(n.o2); break; 500 case EXPR : compileNode(n.o1); break; 501 case PROG : compileNode(n.o1); gen.ret(); break; 502 default: break; 503 } 504 } 505 } 506 507 /*---------------------------------------------------------------------------*/ 508 char[1000] buf; 509 size_t buf_index; 510 struct Putter { 511 static void reset() { 512 buf_index = 0; 513 } 514 static void put(const char chr) { 515 buf[buf_index++] = chr; 516 } 517 } 518 char[] print_stack(int[] stack, int* stack_ptr) 519 { 520 ptrdiff_t size = stack_ptr - stack.ptr; 521 if (size > 0) 522 { 523 import std.format : formattedWrite; 524 const(char)[] temp = buf; 525 int[] stack_vals = stack[0..size]; 526 Putter.reset(); 527 foreach(int val; stack_vals) 528 { 529 formattedWrite(Putter(), "%s ", val); 530 } 531 return buf[0..buf_index]; 532 } 533 return null; 534 } 535 536 /* Virtual machine. */ 537 struct VM 538 { 539 int[26] globals; 540 541 void run(code[] _object) 542 { 543 int[1000] stack = void; 544 int* sp = stack.ptr; 545 code* pc = _object.ptr; 546 547 loop: while(true) 548 { 549 //writefln("exec\t%s\t%s\t% 6s\t%s\t%s", pc - _object.ptr, *pc, opCodeToStr(*pc), sp - stack.ptr, print_stack(stack, sp)); 550 final switch (*pc++) 551 { 552 case IFETCH: *sp++ = globals[*pc++]; break; 553 case ISTORE: globals[*pc++] = sp[-1]; break; 554 case IPUSH : *sp++ = *pc++; break; 555 case IPOP : --sp; break; 556 case IADD : sp[-2] = sp[-2] + sp[-1]; --sp; break; 557 case ISUB : sp[-2] = sp[-2] - sp[-1]; --sp; break; 558 case ILT : sp[-2] = sp[-2] < sp[-1]; --sp; break; 559 case JMP : pc += *pc; break; 560 case JZ : if (*--sp == 0) pc += *pc; else pc++; break; 561 case JNZ : if (*--sp != 0) pc += *pc; else pc++; break; 562 case HALT: break loop; 563 } 564 } 565 } 566 }