1 /**
2 Copyright: Copyright (c) 2019 Andrey Penechko.
3 License: $(WEB boost.org/LICENSE_1_0.txt, Boost License 1.0).
4 Authors: Andrey Penechko.
5 */
6 module vox.be.obj;
7 
8 import std.bitmanip : bitfields;
9 import std.format;
10 import std.stdio;
11 import std.traits : getUDAs;
12 
13 import vox.all;
14 
15 struct HostSymbol
16 {
17 	this(string symName, void* ptr, string modName = "host") {
18 		this.symName = symName;
19 		this.modName = modName;
20 		this.ptr = ptr;
21 	}
22 	string symName;
23 	string modName;
24 	void* ptr;
25 }
26 
27 struct ExternalSymbolId
28 {
29 	Identifier modId;
30 	Identifier symId;
31 }
32 
33 enum getLinkIndexKind(T) = getUDAs!(T, LinkIndexKind)[0];
34 
35 enum LinkIndexKind : ubyte {
36 	none,
37 	symbol,
38 	section,
39 	module_,
40 	reference,
41 }
42 
43 struct LinkIndex
44 {
45 	///
46 	this(uint _index, LinkIndexKind _kind)
47 	{
48 		bufferIndex = _index;
49 		kind = _kind;
50 	}
51 
52 	void toString(scope void delegate(const(char)[]) sink) const {
53 		if (asUint == 0) {
54 			sink("<null>");
55 			return;
56 		}
57 
58 		final switch(kind) with(LinkIndexKind) {
59 			case none: sink("<none>"); break;
60 			case symbol: sink.formattedWrite("sym.%s", bufferIndex); break;
61 			case section: sink.formattedWrite("sec.%s", bufferIndex); break;
62 			case module_: sink.formattedWrite("mod.%s", bufferIndex); break;
63 			case reference: sink.formattedWrite("ref.%s", bufferIndex); break;
64 		}
65 	}
66 
67 	union
68 	{
69 		mixin(bitfields!(
70 			uint,         "bufferIndex",  28,
71 			LinkIndexKind,       "kind",   4,
72 		));
73 
74 		// is 0 for undefined index
75 		uint asUint;
76 	}
77 
78 	bool isDefined() { return asUint != 0; }
79 
80 	bool isSymbol() { return kind == LinkIndexKind.symbol; }
81 	bool isSection() { return kind == LinkIndexKind.section; }
82 	bool isModule() { return kind == LinkIndexKind.module_; }
83 	bool isReference() { return kind == LinkIndexKind.reference; }
84 }
85 
86 enum ObjectSymbolFlags : ushort {
87 	isMutable            = 1 << 0,
88 	isAllZero            = 1 << 1,
89 	needsZeroTermination = 1 << 2,
90 	/// If set calls use indirect call form
91 	/// symbol represents not a data but pointer to data
92 	isIndirect           = 1 << 3,
93 	/// If true, data can be printed for debug as a string
94 	isString             = 1 << 4,
95 	/// If true, data can be printed for debug as an address/pointer
96 	isPointer            = 1 << 5,
97 	/// If true, data can be printed for debug as a float
98 	isFloat              = 1 << 6,
99 	/// Marked if transitively used from any root symbol (only used for imported symbols atm)
100 	isReferenced         = 1 << 7,
101 }
102 
103 enum ObjectSymbolKind : ushort {
104 	/// We have it's contents
105 	isLocal,
106 	/// Symbol comes from dll
107 	isImported,
108 	/// Symbol comes from host
109 	isHost,
110 }
111 
112 /// Final data is located at ObjectSection.sectionData + ObjectSymbol.sectionOffset
113 @(LinkIndexKind.symbol)
114 struct ObjectSymbol
115 {
116 	///
117 	ObjectSymbolKind kind;
118 	/// Set of ObjectSymbolFlags
119 	ushort flags;
120 	/// How symbol must be aligned
121 	ubyte alignmentPower = 0;
122 	///
123 	Identifier id;
124 	/// Points to initializer if it is provided. (Can be null)
125 	ubyte* dataPtr;
126 	/// Offset from the start of section. Can be equal to dataPtr if host symbol
127 	ulong sectionOffset;
128 	/// Length in bytes. Doesn't include padding and zero termination
129 	/// Is set in setInitializer (when has initializer), or manually (when zero inited, or is external host symbol)
130 	uint length;
131 	/// Symbol is inside this module
132 	LinkIndex moduleIndex;
133 	/// Symbol is inside this section
134 	LinkIndex sectionIndex;
135 	/// List of references coming from this symbol
136 	LinkIndex firstRef;
137 	/// List of module symbols
138 	LinkIndex nextSymbol;
139 
140 	void markReferenced() { flags |= ObjectSymbolFlags.isReferenced; }
141 
142 	uint alignment() { return 1 << cast(uint)alignmentPower; }
143 
144 	bool isMutable() { return cast(bool)(flags & ObjectSymbolFlags.isMutable); }
145 	bool isAllZero() { return cast(bool)(flags & ObjectSymbolFlags.isAllZero); }
146 	bool needsZeroTermination() { return cast(bool)(flags & ObjectSymbolFlags.needsZeroTermination); }
147 	bool isIndirect() { return cast(bool)(flags & ObjectSymbolFlags.isIndirect); }
148 	bool isString() { return cast(bool)(flags & ObjectSymbolFlags.isString); }
149 	bool isPointer() { return cast(bool)(flags & ObjectSymbolFlags.isPointer); }
150 	bool isReferenced() { return cast(bool)(flags & ObjectSymbolFlags.isReferenced); }
151 
152 	void setInitializer(ubyte[] data) {
153 		dataPtr = data.ptr;
154 		assert(data.length <= 1024UL*1024*1024*1, "initializer is bigger than 1GB");
155 		length = cast(uint)data.length;
156 	}
157 	ubyte[] initializer() {
158 		if (dataPtr is null) return null;
159 		return dataPtr[0..length];
160 	}
161 }
162 
163 enum ObjectModuleKind : ubyte {
164 	isLocal,
165 	isImported,
166 	isHost
167 }
168 
169 @(LinkIndexKind.module_)
170 struct ObjectModule
171 {
172 	///
173 	ObjectModuleKind kind;
174 	/// Set of ObjectModuleFlags
175 	ushort flags;
176 	/// Used for referencing dll modules in import table
177 	Identifier id;
178 	/// Linked list of modules
179 	LinkIndex nextModule;
180 	/// Linked list of symbols
181 	LinkIndex firstSymbol;
182 
183 	void markReferenced() { flags |= ObjectModuleFlags.isReferenced; }
184 
185 	bool isLocal() { return kind == ObjectModuleKind.isLocal; }
186 	bool isImported() { return kind == ObjectModuleKind.isImported; }
187 	bool isExternal() { return isLocal || isImported; }
188 
189 	bool isReferenced() { return cast(bool)(flags & ObjectModuleFlags.isReferenced); }
190 	bool isVerbose() { return cast(bool)(flags & ObjectModuleFlags.isVerbose); }
191 }
192 
193 enum ObjectModuleFlags : ushort {
194 	/// Marked if transitively used from any root symbol (only used for imported symbols atm)
195 	isReferenced = 1 << 0,
196 	/// Only printed in dump when verbose printing is enabled
197 	isVerbose    = 1 << 1,
198 }
199 
200 @(LinkIndexKind.section)
201 struct ObjectSection
202 {
203 	/// In JIT mode: absolute address
204 	/// In exe mode: offset from executable start to the section start after loading (in memory)
205 	ulong sectionAddress;
206 	/// Can be null
207 	/// Storage for appending data to this section
208 	/// In JIT mode `buffer.bufPtr` is equal to sectionAddress
209 	/// Points to the data of this section. Used to perform fixups
210 	/// Length of initialized data is in `buffer.length`
211 	Arena!ubyte* buffer;
212 	/// Length of zero-initialized data (not included into `initDataLength`, but included into `totalLength`)
213 	uint zeroDataLength;
214 	/// Length of initialized data
215 	ulong initDataLength() {
216 		if (!buffer) return 0;
217 		return buffer.length;
218 	}
219 	///
220 	ulong totalLength() {
221 		if (!buffer) return zeroDataLength;
222 		return buffer.length + zeroDataLength;
223 	}
224 	///
225 	ubyte alignmentPower;
226 	///
227 	ObjectSectionType type;
228 	/// set of ObjectSectionFlags
229 	ushort flags;
230 	///
231 	Identifier id;
232 
233 	uint alignment() { return 1 << cast(uint)alignmentPower; }
234 	bool flag_read() { return (flags & ObjectSectionFlags.read) != 0;}
235 	bool flag_write() { return (flags & ObjectSectionFlags.write) != 0;}
236 	bool flag_execute() { return (flags & ObjectSectionFlags.execute) != 0;}
237 }
238 
239 enum ObjectSectionType : ubyte {
240 	host,    // section for host symbols
241 	code,    // executable code
242 	imports, // import section
243 	rw_data, // rw data section
244 	ro_data, // r data section
245 }
246 enum NUM_BUILTIN_SECTIONS = ObjectSectionType.max+1;
247 
248 enum ObjectSectionFlags : ushort {
249 	none = 0,
250 	read = 1,
251 	write = 2,
252 	execute = 4,
253 }
254 
255 enum ObjectSymbolRefKind : ubyte {
256 	/// 64 bits
257 	absolute64,
258 	/// 32 bit relative offset
259 	relative32,
260 }
261 
262 /// Represents numeric reference contained inside 'fromSymbol'
263 @(LinkIndexKind.reference)
264 struct ObjectSymbolReference
265 {
266 	/// TODO: not needed. We get to references through `fromSymbol` already
267 	LinkIndex fromSymbol;
268 	///
269 	LinkIndex referencedSymbol;
270 	/// link to next reference coming from 'fromSymbol'
271 	LinkIndex nextReference;
272 	/// Offset from start of 'fromSymbol' to the reference to 'referencedSymbol'
273 	uint refOffset;
274 	/// Extra offset added to the reference inside 'fromSymbol'
275 	/// For example on x86_64 direct RIP-relative call instruction is 0xE8 0xNN 0xNN 0xNN 0xNN
276 	/// where 0xNN 0xNN 0xNN 0xNN is offset between address of next instruction and callee.
277 	/// Fixup address is (fromSymbol_address + refOffset)
278 	/// Fixup offset is calculated as (referencedSymbol_address - (fromSymbol_address + refOffset + extraOffset))
279 	/// In call example extraOffset = 4
280 
281 	// TODO: store extra offset inside memory being fixed
282 	short extraOffset;
283 	/// Describes type of reference and its size in bytes
284 	ObjectSymbolRefKind refKind;
285 }
286 
287 struct ObjectSymbolTable
288 {
289 	Arena!uint buffer;
290 	LinkIndex firstModule;
291 
292 	alias addSymbol = append!ObjectSymbol;
293 	alias addSection = append!ObjectSection;
294 	alias addModule = append!ObjectModule;
295 	alias addReference = append!ObjectSymbolReference;
296 
297 	LinkIndex append(T)(ref T value)
298 	{
299 		LinkIndex result;
300 		result.bufferIndex = cast(uint)buffer.length;
301 		result.kind = getLinkIndexKind!T;
302 
303 		enum numAllocatedSlots = divCeil(T.sizeof, uint.sizeof);
304 		T* item = cast(T*)buffer.voidPut(numAllocatedSlots).ptr;
305 		*item = value;
306 
307 		static if (is(T == ObjectSymbolReference))
308 		{
309 			ObjectSymbol* sym = getSymbol(item.fromSymbol);
310 			item.nextReference = sym.firstRef;
311 			sym.firstRef = result;
312 			getSymbol(item.referencedSymbol).markReferenced;
313 		}
314 		else static if (is(T == ObjectModule))
315 		{
316 			item.nextModule = firstModule;
317 			firstModule = result;
318 		}
319 		else static if (is(T == ObjectSymbol))
320 		{
321 			ObjectModule* mod = getModule(item.moduleIndex);
322 			item.nextSymbol = mod.firstSymbol;
323 			mod.firstSymbol = result;
324 		}
325 		//writefln("add %s %s", result.kind, result.bufferIndex);
326 
327 		return result;
328 	}
329 
330 	alias getSymbol = get!ObjectSymbol;
331 	alias getSection = get!ObjectSection;
332 	alias getModule = get!ObjectModule;
333 	alias getReference = get!ObjectSymbolReference;
334 
335 	T* get(T)(LinkIndex index)
336 	{
337 		assert(index.isDefined, "null index");
338 		assert(index.kind == getLinkIndexKind!T, format("%s != %s", index.kind, getLinkIndexKind!T));
339 		return cast(T*)(&buffer.bufPtr[index.bufferIndex]);
340 	}
341 
342 	void dump(CompilationContext* c)
343 	{
344 		for (LinkIndex modIndex = firstModule; modIndex.isDefined; modIndex = getModule(modIndex).nextModule)
345 		{
346 			ObjectModule* mod = getModule(modIndex);
347 			writefln("%s %s", modIndex, c.idString(mod.id));
348 
349 			if (mod.isVerbose) {
350 				writeln(`  (hidden as isVerbose)`);
351 				continue;
352 			}
353 
354 			LinkIndex symIndex = mod.firstSymbol;
355 			while (symIndex.isDefined)
356 			{
357 				ObjectSymbol* sym = getSymbol(symIndex);
358 				ObjectSection* section = getSection(sym.sectionIndex);
359 
360 				writef("  %s %s %s bytes", symIndex, c.idString(sym.id), sym.length);
361 				if (sym.isAllZero) write(" zeroinit");
362 				if (sym.needsZeroTermination) write(" zeroterm");
363 				if (sym.isString)
364 					writefln(` "%s"`, (cast(char*)(sym.dataPtr))[0..sym.length]);
365 				else writeln;
366 
367 				writefln("    address: 0x%08X", section.sectionAddress + sym.sectionOffset);
368 				writefln("    section: 0x%08X %s", section.sectionAddress, c.idString(section.id));
369 				writefln("    data: %s bytes", sym.length);
370 				if (sym.isString) {
371 					writefln(`      as string: "%s"`, (cast(char*)(sym.dataPtr))[0..sym.length]);
372 				}
373 				if (sym.isPointer) {
374 					switch(sym.length) {
375 						case 4:  writefln(`      as ptr: 0x%X`, *cast(uint*)sym.dataPtr); break;
376 						case 8:  writefln(`      as ptr: 0x%X`, *cast(ulong*)sym.dataPtr); break;
377 						default: break;
378 					}
379 				}
380 				if (sym.dataPtr) {
381 					printHex(sym.dataPtr[0..sym.length], 16, PrintAscii.no, "      ");
382 				}
383 
384 				LinkIndex symRefIndex = sym.firstRef;
385 				while (symRefIndex.isDefined)
386 				{
387 					ObjectSymbolReference* symRef = getReference(symRefIndex);
388 					writefln("    %s -> %s: off 0x%08X extraOff %s %s",
389 						symRefIndex, symRef.referencedSymbol, symRef.refOffset,
390 						symRef.extraOffset, symRef.refKind);
391 					symRefIndex = symRef.nextReference;
392 				}
393 				symIndex = sym.nextSymbol;
394 			}
395 		}
396 	}
397 
398 	// prints function label JSON for .dd64 database file of x64dbg debugger
399 	void print_dd64_debug_info(CompilationContext* context)
400 	{
401 		LinkIndex modIndex = firstModule;
402 		while (modIndex.isDefined)
403 		{
404 			ObjectModule* mod = getModule(modIndex);
405 			if (mod.isLocal)
406 			{
407 				LinkIndex symIndex = mod.firstSymbol;
408 				while (symIndex.isDefined)
409 				{
410 					ObjectSymbol* sym = getSymbol(symIndex);
411 					ObjectSection* section = getSection(sym.sectionIndex);
412 
413 					// it is a function
414 					if (sym.sectionIndex == context.builtinSections[ObjectSectionType.code])
415 					{
416 						writefln("  {");
417 						writefln("   \"module\": \"%s\",", context.outputFilename);
418 						writefln("   \"address\": \"0x%X\",", section.sectionAddress + sym.sectionOffset);
419 						writefln("   \"manual\": true,");
420 						//writefln("   \"text\": \"%s.%s\"", context.idString(mod.id), context.idString(sym.id));
421 						writefln("   \"text\": \"%s\"", context.idString(sym.id));
422 						writefln("  },");
423 						//writefln("  0x%X %s.%s", section.sectionAddress + sym.sectionOffset, context.idString(mod.id), context.idString(sym.id));
424 					}
425 
426 					symIndex = sym.nextSymbol;
427 				}
428 			}
429 
430 			modIndex = mod.nextModule;
431 		}
432 	}
433 }