1 /** 2 Copyright: Copyright (c) 2019 Andrey Penechko. 3 License: $(WEB boost.org/LICENSE_1_0.txt, Boost License 1.0). 4 Authors: Andrey Penechko. 5 */ 6 module vox.be.obj; 7 8 import std.bitmanip : bitfields; 9 import std.format; 10 import std.stdio; 11 import std.traits : getUDAs; 12 13 import vox.all; 14 15 struct HostSymbol 16 { 17 this(string symName, void* ptr, string modName = "host") { 18 this.symName = symName; 19 this.modName = modName; 20 this.ptr = ptr; 21 } 22 string symName; 23 string modName; 24 void* ptr; 25 } 26 27 struct ExternalSymbolId 28 { 29 Identifier modId; 30 Identifier symId; 31 } 32 33 enum getLinkIndexKind(T) = getUDAs!(T, LinkIndexKind)[0]; 34 35 enum LinkIndexKind : ubyte { 36 none, 37 symbol, 38 section, 39 module_, 40 reference, 41 } 42 43 struct LinkIndex 44 { 45 /// 46 this(uint _index, LinkIndexKind _kind) 47 { 48 bufferIndex = _index; 49 kind = _kind; 50 } 51 52 void toString(scope void delegate(const(char)[]) sink) const { 53 if (asUint == 0) { 54 sink("<null>"); 55 return; 56 } 57 58 final switch(kind) with(LinkIndexKind) { 59 case none: sink("<none>"); break; 60 case symbol: sink.formattedWrite("sym.%s", bufferIndex); break; 61 case section: sink.formattedWrite("sec.%s", bufferIndex); break; 62 case module_: sink.formattedWrite("mod.%s", bufferIndex); break; 63 case reference: sink.formattedWrite("ref.%s", bufferIndex); break; 64 } 65 } 66 67 union 68 { 69 mixin(bitfields!( 70 uint, "bufferIndex", 28, 71 LinkIndexKind, "kind", 4, 72 )); 73 74 // is 0 for undefined index 75 uint asUint; 76 } 77 78 bool isDefined() { return asUint != 0; } 79 80 bool isSymbol() { return kind == LinkIndexKind.symbol; } 81 bool isSection() { return kind == LinkIndexKind.section; } 82 bool isModule() { return kind == LinkIndexKind.module_; } 83 bool isReference() { return kind == LinkIndexKind.reference; } 84 } 85 86 enum ObjectSymbolFlags : ushort { 87 isMutable = 1 << 0, 88 isAllZero = 1 << 1, 89 needsZeroTermination = 1 << 2, 90 /// If set calls use indirect call form 91 /// symbol represents not a data but pointer to data 92 isIndirect = 1 << 3, 93 /// If true, data can be printed for debug as a string 94 isString = 1 << 4, 95 /// If true, data can be printed for debug as an address/pointer 96 isPointer = 1 << 5, 97 /// If true, data can be printed for debug as a float 98 isFloat = 1 << 6, 99 /// Marked if transitively used from any root symbol (only used for imported symbols atm) 100 isReferenced = 1 << 7, 101 } 102 103 enum ObjectSymbolKind : ushort { 104 /// We have it's contents 105 isLocal, 106 /// Symbol comes from dll 107 isImported, 108 /// Symbol comes from host 109 isHost, 110 } 111 112 /// Final data is located at ObjectSection.sectionData + ObjectSymbol.sectionOffset 113 @(LinkIndexKind.symbol) 114 struct ObjectSymbol 115 { 116 /// 117 ObjectSymbolKind kind; 118 /// Set of ObjectSymbolFlags 119 ushort flags; 120 /// How symbol must be aligned 121 ubyte alignmentPower = 0; 122 /// 123 Identifier id; 124 /// Points to initializer if it is provided. (Can be null) 125 ubyte* dataPtr; 126 /// Offset from the start of section. Can be equal to dataPtr if host symbol 127 ulong sectionOffset; 128 /// Length in bytes. Doesn't include padding and zero termination 129 /// Is set in setInitializer (when has initializer), or manually (when zero inited, or is external host symbol) 130 uint length; 131 /// Symbol is inside this module 132 LinkIndex moduleIndex; 133 /// Symbol is inside this section 134 LinkIndex sectionIndex; 135 /// List of references coming from this symbol 136 LinkIndex firstRef; 137 /// List of module symbols 138 LinkIndex nextSymbol; 139 140 void markReferenced() { flags |= ObjectSymbolFlags.isReferenced; } 141 142 uint alignment() { return 1 << cast(uint)alignmentPower; } 143 144 bool isMutable() { return cast(bool)(flags & ObjectSymbolFlags.isMutable); } 145 bool isAllZero() { return cast(bool)(flags & ObjectSymbolFlags.isAllZero); } 146 bool needsZeroTermination() { return cast(bool)(flags & ObjectSymbolFlags.needsZeroTermination); } 147 bool isIndirect() { return cast(bool)(flags & ObjectSymbolFlags.isIndirect); } 148 bool isString() { return cast(bool)(flags & ObjectSymbolFlags.isString); } 149 bool isPointer() { return cast(bool)(flags & ObjectSymbolFlags.isPointer); } 150 bool isReferenced() { return cast(bool)(flags & ObjectSymbolFlags.isReferenced); } 151 152 void setInitializer(ubyte[] data) { 153 dataPtr = data.ptr; 154 assert(data.length <= 1024UL*1024*1024*1, "initializer is bigger than 1GB"); 155 length = cast(uint)data.length; 156 } 157 ubyte[] initializer() { 158 if (dataPtr is null) return null; 159 return dataPtr[0..length]; 160 } 161 } 162 163 enum ObjectModuleKind : ubyte { 164 isLocal, 165 isImported, 166 isHost 167 } 168 169 @(LinkIndexKind.module_) 170 struct ObjectModule 171 { 172 /// 173 ObjectModuleKind kind; 174 /// Set of ObjectModuleFlags 175 ushort flags; 176 /// Used for referencing dll modules in import table 177 Identifier id; 178 /// Linked list of modules 179 LinkIndex nextModule; 180 /// Linked list of symbols 181 LinkIndex firstSymbol; 182 183 void markReferenced() { flags |= ObjectModuleFlags.isReferenced; } 184 185 bool isLocal() { return kind == ObjectModuleKind.isLocal; } 186 bool isImported() { return kind == ObjectModuleKind.isImported; } 187 bool isExternal() { return isLocal || isImported; } 188 189 bool isReferenced() { return cast(bool)(flags & ObjectModuleFlags.isReferenced); } 190 bool isVerbose() { return cast(bool)(flags & ObjectModuleFlags.isVerbose); } 191 } 192 193 enum ObjectModuleFlags : ushort { 194 /// Marked if transitively used from any root symbol (only used for imported symbols atm) 195 isReferenced = 1 << 0, 196 /// Only printed in dump when verbose printing is enabled 197 isVerbose = 1 << 1, 198 } 199 200 @(LinkIndexKind.section) 201 struct ObjectSection 202 { 203 /// In JIT mode: absolute address 204 /// In exe mode: offset from executable start to the section start after loading (in memory) 205 ulong sectionAddress; 206 /// Can be null 207 /// Storage for appending data to this section 208 /// In JIT mode `buffer.bufPtr` is equal to sectionAddress 209 /// Points to the data of this section. Used to perform fixups 210 /// Length of initialized data is in `buffer.length` 211 Arena!ubyte* buffer; 212 /// Length of zero-initialized data (not included into `initDataLength`, but included into `totalLength`) 213 uint zeroDataLength; 214 /// Length of initialized data 215 ulong initDataLength() { 216 if (!buffer) return 0; 217 return buffer.length; 218 } 219 /// 220 ulong totalLength() { 221 if (!buffer) return zeroDataLength; 222 return buffer.length + zeroDataLength; 223 } 224 /// 225 ubyte alignmentPower; 226 /// 227 ObjectSectionType type; 228 /// set of ObjectSectionFlags 229 ushort flags; 230 /// 231 Identifier id; 232 233 uint alignment() { return 1 << cast(uint)alignmentPower; } 234 bool flag_read() { return (flags & ObjectSectionFlags.read) != 0;} 235 bool flag_write() { return (flags & ObjectSectionFlags.write) != 0;} 236 bool flag_execute() { return (flags & ObjectSectionFlags.execute) != 0;} 237 } 238 239 enum ObjectSectionType : ubyte { 240 host, // section for host symbols 241 code, // executable code 242 imports, // import section 243 rw_data, // rw data section 244 ro_data, // r data section 245 } 246 enum NUM_BUILTIN_SECTIONS = ObjectSectionType.max+1; 247 248 enum ObjectSectionFlags : ushort { 249 none = 0, 250 read = 1, 251 write = 2, 252 execute = 4, 253 } 254 255 enum ObjectSymbolRefKind : ubyte { 256 /// 64 bits 257 absolute64, 258 /// 32 bit relative offset 259 relative32, 260 } 261 262 /// Represents numeric reference contained inside 'fromSymbol' 263 @(LinkIndexKind.reference) 264 struct ObjectSymbolReference 265 { 266 /// TODO: not needed. We get to references through `fromSymbol` already 267 LinkIndex fromSymbol; 268 /// 269 LinkIndex referencedSymbol; 270 /// link to next reference coming from 'fromSymbol' 271 LinkIndex nextReference; 272 /// Offset from start of 'fromSymbol' to the reference to 'referencedSymbol' 273 uint refOffset; 274 /// Extra offset added to the reference inside 'fromSymbol' 275 /// For example on x86_64 direct RIP-relative call instruction is 0xE8 0xNN 0xNN 0xNN 0xNN 276 /// where 0xNN 0xNN 0xNN 0xNN is offset between address of next instruction and callee. 277 /// Fixup address is (fromSymbol_address + refOffset) 278 /// Fixup offset is calculated as (referencedSymbol_address - (fromSymbol_address + refOffset + extraOffset)) 279 /// In call example extraOffset = 4 280 281 // TODO: store extra offset inside memory being fixed 282 short extraOffset; 283 /// Describes type of reference and its size in bytes 284 ObjectSymbolRefKind refKind; 285 } 286 287 struct ObjectSymbolTable 288 { 289 Arena!uint buffer; 290 LinkIndex firstModule; 291 292 alias addSymbol = append!ObjectSymbol; 293 alias addSection = append!ObjectSection; 294 alias addModule = append!ObjectModule; 295 alias addReference = append!ObjectSymbolReference; 296 297 LinkIndex append(T)(ref T value) 298 { 299 LinkIndex result; 300 result.bufferIndex = cast(uint)buffer.length; 301 result.kind = getLinkIndexKind!T; 302 303 enum numAllocatedSlots = divCeil(T.sizeof, uint.sizeof); 304 T* item = cast(T*)buffer.voidPut(numAllocatedSlots).ptr; 305 *item = value; 306 307 static if (is(T == ObjectSymbolReference)) 308 { 309 ObjectSymbol* sym = getSymbol(item.fromSymbol); 310 item.nextReference = sym.firstRef; 311 sym.firstRef = result; 312 getSymbol(item.referencedSymbol).markReferenced; 313 } 314 else static if (is(T == ObjectModule)) 315 { 316 item.nextModule = firstModule; 317 firstModule = result; 318 } 319 else static if (is(T == ObjectSymbol)) 320 { 321 ObjectModule* mod = getModule(item.moduleIndex); 322 item.nextSymbol = mod.firstSymbol; 323 mod.firstSymbol = result; 324 } 325 //writefln("add %s %s", result.kind, result.bufferIndex); 326 327 return result; 328 } 329 330 alias getSymbol = get!ObjectSymbol; 331 alias getSection = get!ObjectSection; 332 alias getModule = get!ObjectModule; 333 alias getReference = get!ObjectSymbolReference; 334 335 T* get(T)(LinkIndex index) 336 { 337 assert(index.isDefined, "null index"); 338 assert(index.kind == getLinkIndexKind!T, format("%s != %s", index.kind, getLinkIndexKind!T)); 339 return cast(T*)(&buffer.bufPtr[index.bufferIndex]); 340 } 341 342 void dump(CompilationContext* c) 343 { 344 for (LinkIndex modIndex = firstModule; modIndex.isDefined; modIndex = getModule(modIndex).nextModule) 345 { 346 ObjectModule* mod = getModule(modIndex); 347 writefln("%s %s", modIndex, c.idString(mod.id)); 348 349 if (mod.isVerbose) { 350 writeln(` (hidden as isVerbose)`); 351 continue; 352 } 353 354 LinkIndex symIndex = mod.firstSymbol; 355 while (symIndex.isDefined) 356 { 357 ObjectSymbol* sym = getSymbol(symIndex); 358 ObjectSection* section = getSection(sym.sectionIndex); 359 360 writef(" %s %s %s bytes", symIndex, c.idString(sym.id), sym.length); 361 if (sym.isAllZero) write(" zeroinit"); 362 if (sym.needsZeroTermination) write(" zeroterm"); 363 if (sym.isString) 364 writefln(` "%s"`, (cast(char*)(sym.dataPtr))[0..sym.length]); 365 else writeln; 366 367 writefln(" address: 0x%08X", section.sectionAddress + sym.sectionOffset); 368 writefln(" section: 0x%08X %s", section.sectionAddress, c.idString(section.id)); 369 writefln(" data: %s bytes", sym.length); 370 if (sym.isString) { 371 writefln(` as string: "%s"`, (cast(char*)(sym.dataPtr))[0..sym.length]); 372 } 373 if (sym.isPointer) { 374 switch(sym.length) { 375 case 4: writefln(` as ptr: 0x%X`, *cast(uint*)sym.dataPtr); break; 376 case 8: writefln(` as ptr: 0x%X`, *cast(ulong*)sym.dataPtr); break; 377 default: break; 378 } 379 } 380 if (sym.dataPtr) { 381 printHex(sym.dataPtr[0..sym.length], 16, PrintAscii.no, " "); 382 } 383 384 LinkIndex symRefIndex = sym.firstRef; 385 while (symRefIndex.isDefined) 386 { 387 ObjectSymbolReference* symRef = getReference(symRefIndex); 388 writefln(" %s -> %s: off 0x%08X extraOff %s %s", 389 symRefIndex, symRef.referencedSymbol, symRef.refOffset, 390 symRef.extraOffset, symRef.refKind); 391 symRefIndex = symRef.nextReference; 392 } 393 symIndex = sym.nextSymbol; 394 } 395 } 396 } 397 398 // prints function label JSON for .dd64 database file of x64dbg debugger 399 void print_dd64_debug_info(CompilationContext* context) 400 { 401 LinkIndex modIndex = firstModule; 402 while (modIndex.isDefined) 403 { 404 ObjectModule* mod = getModule(modIndex); 405 if (mod.isLocal) 406 { 407 LinkIndex symIndex = mod.firstSymbol; 408 while (symIndex.isDefined) 409 { 410 ObjectSymbol* sym = getSymbol(symIndex); 411 ObjectSection* section = getSection(sym.sectionIndex); 412 413 // it is a function 414 if (sym.sectionIndex == context.builtinSections[ObjectSectionType.code]) 415 { 416 writefln(" {"); 417 writefln(" \"module\": \"%s\",", context.outputFilename); 418 writefln(" \"address\": \"0x%X\",", section.sectionAddress + sym.sectionOffset); 419 writefln(" \"manual\": true,"); 420 //writefln(" \"text\": \"%s.%s\"", context.idString(mod.id), context.idString(sym.id)); 421 writefln(" \"text\": \"%s\"", context.idString(sym.id)); 422 writefln(" },"); 423 //writefln(" 0x%X %s.%s", section.sectionAddress + sym.sectionOffset, context.idString(mod.id), context.idString(sym.id)); 424 } 425 426 symIndex = sym.nextSymbol; 427 } 428 } 429 430 modIndex = mod.nextModule; 431 } 432 } 433 }