1 /// Copyright: Copyright (c) 2017-2019 Andrey Penechko.
2 /// License: $(WEB boost.org/LICENSE_1_0.txt, Boost License 1.0).
3 /// Authors: Andrey Penechko.
4 module vox.fe.identifier;
5 
6 import std.stdio;
7 import vox.utils : Arena, HashMap, TextSink;
8 import vox.context;
9 
10 // Fully qualified identifier of the form `package.module.entity_name`
11 // Must contain at least `entity_name`
12 struct Identifier {
13 	uint index = uint.max;
14 	enum uint HIGH_BIT = 0x8000_0000;
15 
16 	uint strIndex() {
17 		assert(!hasParent);
18 		return index;
19 	}
20 	uint fqnIndex() {
21 		assert(hasParent);
22 		return index & ~HIGH_BIT;
23 	}
24 
25 	bool isDefined() { return index != uint.max; }
26 	bool isUndefined() { return index == uint.max; }
27 	bool hasParent() { return cast(bool)(index & HIGH_BIT); }
28 
29 	Identifier getParent(CompilationContext* c) {
30 		assert(isDefined);
31 		assert(hasParent);
32 		return c.idMap.entries[fqnIndex].fqn.parentId;
33 	}
34 
35 	Identifier getSelf(CompilationContext* c) {
36 		assert(isDefined);
37 		if (hasParent) return c.idMap.entries[fqnIndex].fqn.id;
38 		return this;
39 	}
40 
41 	auto pr(CompilationContext* c) {
42 		return IdentifierPrinter(this, c);
43 	}
44 
45 	void visitPrint(CompilationContext* c, scope void delegate(const(char)[]) sink) {
46 		if (!hasParent) {
47 			sink(c.idMap.get(this));
48 			return;
49 		}
50 		if (isUndefined) {
51 			sink("<undefined id>");
52 			return;
53 		}
54 		auto entry = c.idMap.entries[fqnIndex].fqn;
55 		entry.parentId.visitPrint(c, sink);
56 		sink(".");
57 		sink(c.idMap.get(entry.id));
58 	}
59 }
60 
61 struct IdentifierPrinter {
62 	Identifier id;
63 	CompilationContext* c;
64 	void toString(scope void delegate(const(char)[]) sink) { id.visitPrint(c, sink); }
65 }
66 
67 struct IdentifierMap {
68 	Arena!char stringDataBuffer;
69 	Arena!IdMapEntry entries;
70 	HashMap!(StringKey, uint, StringKey.init) map;
71 	HashMap!(FullyQualifiedName, uint, FullyQualifiedName.init) fqnMap;
72 
73 	string get(Identifier id) {
74 		if (id.isDefined) {
75 			if (id.hasParent) {
76 				id = entries[id.fqnIndex].fqn.id;
77 			}
78 			auto str = entries[id.strIndex].str;
79 			return cast(string)stringDataBuffer.bufPtr[str.offset..str.offset+str.length];
80 		}
81 		return "<undefined id>";
82 	}
83 
84 	Identifier find(const(char)[] str) {
85 		assert(str.length > 0);
86 		auto key = StringKey(str);
87 		return Identifier(map.get(key, uint.max));
88 	}
89 
90 	Identifier getOrRegFormatted(Args...)(CompilationContext* c, const(char)[] fmt, Args args) {
91 		assert(fmt.length > 0);
92 		import std.format : formattedWrite;
93 		auto start = stringDataBuffer.length;
94 		formattedWrite(stringDataBuffer, fmt, args);
95 		auto end = stringDataBuffer.length;
96 
97 		assert(end < uint.max);
98 		assert(end-start < uint.max);
99 		auto len = cast(uint)(end-start);
100 		assert(len > 0);
101 		assert(len <= uint.max);
102 
103 		const(char)[] str = stringDataBuffer.bufPtr[start..end];
104 		auto key = StringKey(str);
105 		uint id = map.get(key, uint.max);
106 
107 		if (id == uint.max) {
108 			// can use lower 31 bits
109 			assert(entries.length < Identifier.HIGH_BIT, "Id map overflow");
110 			id = cast(uint)entries.length;
111 			map.put(c.arrayArena, key, id);
112 
113 			entries.put(IdMapEntry(IdMapString(cast(uint)start, len)));
114 		} else {
115 			// this is old id, remove data from buffer
116 			stringDataBuffer.length = start;
117 		}
118 
119 		return Identifier(id);
120 	}
121 
122 	Identifier getOrReg(CompilationContext* c, const(char)[] str) {
123 		assert(str.length > 0);
124 		assert(str.length <= uint.max);
125 
126 		auto key = StringKey(str);
127 		uint id = map.get(key, uint.max);
128 
129 		if (id == uint.max) {
130 			auto start = stringDataBuffer.length;
131 			char[] buf = stringDataBuffer.put(str);
132 			auto end = stringDataBuffer.length;
133 
134 			auto len = cast(uint)(end-start);
135 			assert(len > 0);
136 			assert(len <= uint.max);
137 
138 			key.ptr = buf.ptr; // set new ptr so that buf data is always used for compare
139 
140 			// can use lower 31 bits
141 			assert(entries.length < Identifier.HIGH_BIT, "Id map overflow");
142 			id = cast(uint)entries.length;
143 			map.put(c.arrayArena, key, id);
144 			entries.put(IdMapEntry(IdMapString(cast(uint)start, len)));
145 		}
146 
147 		return Identifier(id);
148 	}
149 
150 	Identifier getOrRegFqn(CompilationContext* c, FullyQualifiedName key) {
151 		assert(key.id.isDefined);
152 		if (key.parentId.isUndefined) {
153 			return key.id;
154 		}
155 
156 		uint id = fqnMap.get(key, uint.max);
157 
158 		if (id == uint.max) {
159 			assert(entries.length < Identifier.HIGH_BIT, "Id map overflow");
160 			id = cast(uint)entries.length;
161 			fqnMap.put(c.arrayArena, key, id);
162 			entries.put(IdMapEntry(key));
163 		}
164 
165 		return Identifier(id | Identifier.HIGH_BIT);
166 	}
167 
168 	// registers dot-separated identifier
169 	Identifier getOrRegFqn(CompilationContext* c, const(char)[] str) {
170 		import std.algorithm : splitter;
171 
172 		assert(str.length > 0);
173 
174 		Identifier parentId;
175 
176 		foreach(const(char)[] part; str.splitter('.'))
177 		{
178 			Identifier partId = getOrReg(c, part);
179 			parentId = getOrRegFqn(c, FullyQualifiedName(parentId, partId));
180 		}
181 
182 		return parentId;
183 	}
184 
185 	// internal. Called in CompilationContext.initialize()
186 	void regCommonIds(CompilationContext* c)
187 	{
188 		assert(entries.length == 0);
189 		assert(map.length == 0);
190 		assert(stringDataBuffer.length == 0);
191 		foreach (size_t i, string memberName; __traits(allMembers, CommonIds))
192 		{
193 			string name = __traits(getAttributes, __traits(getMember, CommonIds, memberName))[0];
194 			Identifier id = getOrReg(c, name);
195 			assert(id == __traits(getMember, CommonIds, memberName));
196 		}
197 	}
198 }
199 
200 enum CommonIds : Identifier
201 {
202 	@("ptr")      id_ptr      = Identifier(0),
203 	@("length")   id_length   = Identifier(1),
204 	@("min")      id_min      = Identifier(2),
205 	@("max")      id_max      = Identifier(3),
206 	@("sizeof")   id_sizeof   = Identifier(4),
207 	@("offsetof") id_offsetof = Identifier(5),
208 	@("this")     id_this     = Identifier(6),
209 	@("message")  id_message  = Identifier(7),
210 	@("type")     id_type     = Identifier(8),
211 	@("extern")   id_extern   = Identifier(9),
212 	@("static")   id_static   = Identifier(10),
213 	@("syscall")  id_syscall  = Identifier(11),
214 	@("module")   id_module   = Identifier(12),
215 	@("host")     id_host     = Identifier(13),
216 	@("main")     id_main     = Identifier(14),
217 
218 	// Built-in function identifiers
219 	@("$compileError") cash_compile_error = Identifier(id_main.index + 1),
220 	@("$isSlice") cash_is_slice = Identifier(id_main.index + 2),
221 	@("$isInteger") cash_is_integer = Identifier(id_main.index + 3),
222 	@("$isPointer") cash_is_pointer = Identifier(id_main.index + 4),
223 	@("$baseOf") cash_base_of = Identifier(id_main.index + 5),
224 
225 	// Special keywords
226 	@("__FILE__")          kw_file          = Identifier(cash_base_of.index + 1),
227 	@("__LINE__")          kw_line          = Identifier(cash_base_of.index + 2),
228 	@("__FUNCTION_NAME__") kw_function_name = Identifier(cash_base_of.index + 3),
229 	@("__MODULE_NAME__")   kw_module_name   = Identifier(cash_base_of.index + 4),
230 
231 	// Conditional compilation identifiers
232 	@("windows")  id_windows = Identifier(kw_module_name.index + 1),
233 	@("linux")    id_linux   = Identifier(kw_module_name.index + 2),
234 	@("macos")    id_macos   = Identifier(kw_module_name.index + 3),
235 }
236 
237 enum uint commonId_builtin_func_first = CommonIds.cash_compile_error.index;
238 enum uint commonId_builtin_func_last  = CommonIds.cash_base_of.index;
239 enum uint commonId_special_keyword_first = CommonIds.kw_file.index;
240 enum uint commonId_special_keyword_last  = CommonIds.kw_module_name.index;
241 enum uint commonId_version_id_first = CommonIds.id_windows.index;
242 enum uint commonId_version_id_last  = CommonIds.id_macos.index;
243 
244 
245 enum SpecialKeyword : ubyte {
246 	file,          // __FILE__
247 	line,          // __LINE__
248 	function_name, // __FUNCTION_NAME__
249 	module_name,   // __MODULE_NAME__
250 }
251 
252 enum VersionId : ubyte {
253 	id_windows,
254 	id_linux,
255 	id_macos,
256 }
257 
258 
259 private struct StringKey
260 {
261 	const(char)* ptr;
262 	uint length;
263 	uint hash;
264 
265 	this(const(char)[] str)
266 	{
267 		ptr = str.ptr;
268 		length = cast(uint)str.length;
269 		hash = fnv1a_32(cast(const(ubyte)[])str);
270 	}
271 
272 	string data() const
273 	{
274 		return cast(string)ptr[0..length];
275 	}
276 
277 	bool opEquals(StringKey other) const
278 	{
279 		if (hash != other.hash) return false;
280 		return this.data == other.data;
281 	}
282 
283 	size_t toHash()
284 	{
285 		return hash;
286 	}
287 }
288 
289 private struct IdMapString {
290 	uint offset;
291 	uint length;
292 }
293 struct FullyQualifiedName {
294 	Identifier parentId;
295 	Identifier id;
296 }
297 private union IdMapEntry {
298 	this(IdMapString str) { this.str = str; }
299 	this(FullyQualifiedName fqn) { this.fqn = fqn; }
300 	IdMapString str;
301 	FullyQualifiedName fqn;
302 }
303 
304 // https://en.wikipedia.org/wiki/Fowler–Noll–Vo_hash_function
305 private uint fnv1a_32(const(ubyte)[] data)
306 {
307 	enum uint fnvPrime       = 0x01000193;
308 	enum uint fnvOffsetBasis = 0x811c9dc5;
309 
310 	uint _hash = fnvOffsetBasis;
311 
312 	foreach (immutable ubyte i; data)
313 	{
314 		_hash ^= i;
315 		_hash *= fnvPrime;
316 	}
317 
318 	return _hash;
319 }