TPDE
Loading...
Searching...
No Matches
AssemblerElf.hpp
1// SPDX-FileCopyrightText: 2025 Contributors to TPDE <https://tpde.org>
2//
3// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4#pragma once
5
6#include <cassert>
7#include <elf.h>
8#include <span>
9#include <string_view>
10#include <type_traits>
11#include <vector>
12
13#include "base.hpp"
14#include "tpde/Assembler.hpp"
15#include "tpde/StringTable.hpp"
16#include "tpde/util/VectorWriter.hpp"
17#include "util/SmallVector.hpp"
18#include "util/misc.hpp"
19
20namespace tpde {
21
22namespace dwarf {
23// DWARF constants
24constexpr u8 DW_CFA_nop = 0;
25constexpr u8 DW_EH_PE_uleb128 = 0x01;
26constexpr u8 DW_EH_PE_pcrel = 0x10;
27constexpr u8 DW_EH_PE_indirect = 0x80;
28constexpr u8 DW_EH_PE_sdata4 = 0x0b;
29constexpr u8 DW_EH_PE_omit = 0xff;
30
31constexpr u8 DW_CFA_offset_extended = 0x05;
32constexpr u8 DW_CFA_def_cfa = 0x0c;
33constexpr u8 DW_CFA_def_cfa_register = 0x0d;
34constexpr u8 DW_CFA_def_cfa_offset = 0x0e;
35constexpr u8 DW_CFA_offset = 0x80;
36constexpr u8 DW_CFA_advance_loc = 0x40;
37constexpr u8 DW_CFA_advance_loc4 = 0x04;
38
39constexpr u8 DWARF_CFI_PRIMARY_OPCODE_MASK = 0xc0;
40
41constexpr u32 EH_FDE_FUNC_START_OFF = 0x8;
42
43namespace x64 {
44constexpr u8 DW_reg_rax = 0;
45constexpr u8 DW_reg_rdx = 1;
46constexpr u8 DW_reg_rcx = 2;
47constexpr u8 DW_reg_rbx = 3;
48constexpr u8 DW_reg_rsi = 4;
49constexpr u8 DW_reg_rdi = 5;
50constexpr u8 DW_reg_rbp = 6;
51constexpr u8 DW_reg_rsp = 7;
52constexpr u8 DW_reg_r8 = 8;
53constexpr u8 DW_reg_r9 = 9;
54constexpr u8 DW_reg_r10 = 10;
55constexpr u8 DW_reg_r11 = 11;
56constexpr u8 DW_reg_r12 = 12;
57constexpr u8 DW_reg_r13 = 13;
58constexpr u8 DW_reg_r14 = 14;
59constexpr u8 DW_reg_r15 = 15;
60constexpr u8 DW_reg_ra = 16;
61} // namespace x64
62
63namespace a64 {
64constexpr u8 DW_reg_x0 = 0;
65constexpr u8 DW_reg_x1 = 1;
66constexpr u8 DW_reg_x2 = 2;
67constexpr u8 DW_reg_x3 = 3;
68constexpr u8 DW_reg_x4 = 4;
69constexpr u8 DW_reg_x5 = 5;
70constexpr u8 DW_reg_x6 = 6;
71constexpr u8 DW_reg_x7 = 7;
72constexpr u8 DW_reg_x8 = 8;
73constexpr u8 DW_reg_x9 = 9;
74constexpr u8 DW_reg_x10 = 10;
75constexpr u8 DW_reg_x11 = 11;
76constexpr u8 DW_reg_x12 = 12;
77constexpr u8 DW_reg_x13 = 13;
78constexpr u8 DW_reg_x14 = 14;
79constexpr u8 DW_reg_x15 = 15;
80constexpr u8 DW_reg_x16 = 16;
81constexpr u8 DW_reg_x17 = 17;
82constexpr u8 DW_reg_x18 = 18;
83constexpr u8 DW_reg_x19 = 19;
84constexpr u8 DW_reg_x20 = 20;
85constexpr u8 DW_reg_x21 = 21;
86constexpr u8 DW_reg_x22 = 22;
87constexpr u8 DW_reg_x23 = 23;
88constexpr u8 DW_reg_x24 = 24;
89constexpr u8 DW_reg_x25 = 25;
90constexpr u8 DW_reg_x26 = 26;
91constexpr u8 DW_reg_x27 = 27;
92constexpr u8 DW_reg_x28 = 28;
93constexpr u8 DW_reg_x29 = 29;
94constexpr u8 DW_reg_x30 = 30;
95
96constexpr u8 DW_reg_fp = 29;
97constexpr u8 DW_reg_lr = 30;
98
99constexpr u8 DW_reg_v0 = 64;
100constexpr u8 DW_reg_v1 = 65;
101constexpr u8 DW_reg_v2 = 66;
102constexpr u8 DW_reg_v3 = 67;
103constexpr u8 DW_reg_v4 = 68;
104constexpr u8 DW_reg_v5 = 69;
105constexpr u8 DW_reg_v6 = 70;
106constexpr u8 DW_reg_v7 = 71;
107constexpr u8 DW_reg_v8 = 72;
108constexpr u8 DW_reg_v9 = 73;
109constexpr u8 DW_reg_v10 = 74;
110constexpr u8 DW_reg_v11 = 75;
111constexpr u8 DW_reg_v12 = 76;
112constexpr u8 DW_reg_v13 = 77;
113constexpr u8 DW_reg_v14 = 78;
114constexpr u8 DW_reg_v15 = 79;
115constexpr u8 DW_reg_v16 = 80;
116constexpr u8 DW_reg_v17 = 81;
117constexpr u8 DW_reg_v18 = 82;
118constexpr u8 DW_reg_v19 = 83;
119constexpr u8 DW_reg_v20 = 84;
120constexpr u8 DW_reg_v21 = 85;
121constexpr u8 DW_reg_v22 = 86;
122constexpr u8 DW_reg_v23 = 87;
123constexpr u8 DW_reg_v24 = 88;
124constexpr u8 DW_reg_v25 = 89;
125constexpr u8 DW_reg_v26 = 90;
126constexpr u8 DW_reg_v27 = 91;
127constexpr u8 DW_reg_v28 = 92;
128constexpr u8 DW_reg_v29 = 93;
129constexpr u8 DW_reg_v30 = 94;
130
131constexpr u8 DW_reg_sp = 31;
132constexpr u8 DW_reg_pc = 32;
133} // namespace a64
134
135} // namespace dwarf
136
137class AssemblerElf : public Assembler {
138 friend class ElfMapper;
139
140protected:
141 struct TargetInfoElf : Assembler::TargetInfo {
142 /// The OS ABI for the ELF header.
143 u8 elf_osabi;
144 /// The machine for the ELF header.
145 u16 elf_machine;
146 };
147
148public:
149 enum class SymBinding : u8 {
150 /// Symbol with local linkage, must be defined
151 LOCAL,
152 /// Weak linkage
153 WEAK,
154 /// Global linkage
155 GLOBAL,
156 };
157
158 enum class SymVisibility : u8 {
159 DEFAULT = STV_DEFAULT,
160 INTERNAL = STV_INTERNAL,
161 HIDDEN = STV_HIDDEN,
162 PROTECTED = STV_PROTECTED,
163 };
164
165private:
166 std::vector<Elf64_Sym> global_symbols, local_symbols;
167 /// Section indices for large section numbers
168 util::SmallVector<u32, 0> global_shndx, local_shndx;
169
170 StringTable strtab;
171 /// Storage for extra user-provided section names.
172 StringTable shstrtab_extra;
173
174 SecRef secref_text = SecRef();
175 SecRef secref_rodata = SecRef();
176 SecRef secref_relro = SecRef();
177 SecRef secref_data = SecRef();
178 SecRef secref_bss = SecRef();
179 SecRef secref_tdata = SecRef();
180 SecRef secref_tbss = SecRef();
181
182 /// Unwind Info
183 SecRef secref_eh_frame = SecRef();
184 SecRef secref_except_table = SecRef();
185
186public:
187 util::VectorWriter eh_writer;
188
189private:
190 struct ExceptCallSiteInfo {
191 /// Start offset *in section* (not inside function)
192 u64 start;
193 u64 len;
194 u32 landing_pad_label;
195 u32 action_entry;
196 };
197
198 /// Exception Handling temporary storage
199 /// Call Sites for current function
200 std::vector<ExceptCallSiteInfo> except_call_site_table;
201
202 /// Temporary storage for encoding call sites
203 util::SmallVector<u8> except_encoded_call_sites;
204 /// Action Table for current function
205 util::SmallVector<u8> except_action_table;
206 /// The type_info table (contains the symbols which contain the pointers to
207 /// the type_info)
208 std::vector<SymRef> except_type_info_table;
209 /// Table for exception specs
210 std::vector<u8> except_spec_table;
211 /// The current personality function (if any)
212 SymRef cur_personality_func_addr;
213 u32 eh_cur_cie_off = 0u;
214 u32 eh_first_fde_off = 0;
215
216 /// The current function
217 SymRef cur_func;
218
219public:
220 explicit AssemblerElf(const TargetInfoElf &target_info)
221 : Assembler(target_info) {
222 local_symbols.resize(1); // First symbol must be null.
223 init_sections();
224 eh_init_cie();
225 }
226
227 void reset() noexcept override;
228
229private:
230 void init_sections() noexcept;
231
232 std::span<Relocation> get_relocs(SecRef ref) {
233 return get_section(ref).relocs;
234 }
235
236 /// Allocate a new section.
237 [[nodiscard]] SecRef
238 create_section(unsigned type, unsigned flags, unsigned name) noexcept;
239
240 /// Allocate a new section for relocations.
241 [[nodiscard]] SecRef create_rela_section(SecRef ref,
242 unsigned flags,
243 unsigned rela_name) noexcept;
244
245 [[nodiscard]] SymRef create_section_symbol(SecRef ref,
246 std::string_view name) noexcept;
247
248 DataSection &get_or_create_section(SecRef &ref,
249 unsigned rela_name,
250 unsigned type,
251 unsigned flags,
252 unsigned align,
253 bool with_rela = true) noexcept;
254
255public:
256 SecRef get_text_section() noexcept { return secref_text; }
257 SecRef get_data_section(bool rodata, bool relro = false) noexcept;
258 SecRef get_bss_section() noexcept;
259 SecRef get_tdata_section() noexcept;
260 SecRef get_tbss_section() noexcept;
261 SecRef create_structor_section(bool init, SecRef group = SecRef()) noexcept;
262
263 /// Create a new section with the given name, ELF section type, and flags.
264 /// Optionally, a corresponding relocation (.rela) section is also created,
265 /// otherwise, the section must not have relocations.
266 [[nodiscard]] SecRef create_section(std::string_view name,
267 unsigned type,
268 unsigned flags,
269 bool with_rela,
270 SecRef group = SecRef()) noexcept;
271
272 /// Create a new group section.
273 [[nodiscard]] SecRef create_group_section(SymRef signature_sym,
274 bool is_comdat) noexcept;
275
276 const char *sec_name(SecRef ref) const noexcept;
277
278private:
279 bool sec_is_xindex(SecRef ref) const noexcept {
280 return ref.id() >= SHN_LORESERVE;
281 }
282
283public:
284 // Symbols
285
286 void sym_copy(SymRef dst, SymRef src) noexcept;
287
288private:
289 [[nodiscard]] SymRef
290 sym_add(std::string_view name, SymBinding binding, u32 type) noexcept;
291
292public:
293 [[nodiscard]] SymRef sym_add_undef(std::string_view name,
294 SymBinding binding) noexcept {
295 return sym_add(name, binding, STT_NOTYPE);
296 }
297
298 [[nodiscard]] SymRef sym_predef_func(std::string_view name,
299 SymBinding binding) noexcept {
300 return sym_add(name, binding, STT_FUNC);
301 }
302
303 [[nodiscard]] SymRef sym_predef_data(std::string_view name,
304 SymBinding binding) noexcept {
305 return sym_add(name, binding, STT_OBJECT);
306 }
307
308 [[nodiscard]] SymRef sym_predef_tls(std::string_view name,
309 SymBinding binding) noexcept {
310 return sym_add(name, binding, STT_TLS);
311 }
312
313 void sym_def_predef_data(SecRef sec,
314 SymRef sym,
315 std::span<const u8> data,
316 u32 align,
317 u32 *off) noexcept;
318
319 [[nodiscard]] SymRef sym_def_data(SecRef sec,
320 std::string_view name,
321 std::span<const u8> data,
322 u32 align,
323 SymBinding binding,
324 u32 *off = nullptr) {
325 SymRef sym = sym_predef_data(name, binding);
326 sym_def_predef_data(sec, sym, data, align, off);
327 return sym;
328 }
329
330 void sym_def_predef_zero(SecRef sec_ref,
331 SymRef sym_ref,
332 u32 size,
333 u32 align,
334 u32 *off = nullptr) noexcept;
335
336private:
337 /// Set symbol sections for SHN_XINDEX.
338 void sym_def_xindex(SymRef sym_ref, SecRef sec_ref) noexcept;
339
340public:
341 void sym_def(SymRef sym_ref, SecRef sec_ref, u64 pos, u64 size) noexcept {
342 Elf64_Sym *sym = sym_ptr(sym_ref);
343 assert(sym->st_shndx == SHN_UNDEF && "cannot redefined symbol");
344 sym->st_value = pos;
345 sym->st_size = size;
346 if (!sec_is_xindex(sec_ref)) [[likely]] {
347 sym->st_shndx = sec_ref.id();
348 } else {
349 sym->st_shndx = SHN_XINDEX;
350 sym_def_xindex(sym_ref, sec_ref);
351 }
352 // TODO: handle fixups?
353 }
354
355 void sym_set_visibility(SymRef sym, SymVisibility visibility) noexcept {
356 sym_ptr(sym)->st_other = static_cast<u8>(visibility);
357 }
358
359 const char *sym_name(SymRef sym) const noexcept {
360 return strtab.data() + sym_ptr(sym)->st_name;
361 }
362
363 SecRef sym_section(SymRef sym) const noexcept {
364 Elf64_Section shndx = sym_ptr(sym)->st_shndx;
365 if (shndx < SHN_LORESERVE && shndx != SHN_UNDEF) [[likely]] {
366 return SecRef(shndx);
367 }
368 assert(shndx == SHN_XINDEX);
369 const auto &shndx_tab = sym_is_local(sym) ? local_shndx : global_shndx;
370 return SecRef(shndx_tab[sym_idx(sym)]);
371 }
372
373private:
374 [[nodiscard]] static bool sym_is_local(const SymRef sym) noexcept {
375 return (sym.id() & 0x8000'0000) == 0;
376 }
377
378 [[nodiscard]] static u32 sym_idx(const SymRef sym) noexcept {
379 return sym.id() & ~0x8000'0000;
380 }
381
382 [[nodiscard]] Elf64_Sym *sym_ptr(const SymRef sym) noexcept {
383 if (sym_is_local(sym)) {
384 return &local_symbols[sym_idx(sym)];
385 } else {
386 return &global_symbols[sym_idx(sym)];
387 }
388 }
389
390 [[nodiscard]] const Elf64_Sym *sym_ptr(const SymRef sym) const noexcept {
391 if (sym_is_local(sym)) {
392 return &local_symbols[sym_idx(sym)];
393 } else {
394 return &global_symbols[sym_idx(sym)];
395 }
396 }
397
398 // Unwind and exception info
399
400public:
401 static constexpr u32 write_eh_inst(u8 *dst, u8 opcode, u64 arg) noexcept {
402 if (opcode & dwarf::DWARF_CFI_PRIMARY_OPCODE_MASK) {
403 assert((arg & dwarf::DWARF_CFI_PRIMARY_OPCODE_MASK) == 0);
404 *dst = opcode | arg;
405 return 1;
406 }
407 *dst++ = opcode;
408 return 1 + util::uleb_write(dst, arg);
409 }
410
411 static constexpr u32
412 write_eh_inst(u8 *dst, u8 opcode, u64 arg1, u64 arg2) noexcept {
413 u8 *base = dst;
414 dst += write_eh_inst(dst, opcode, arg1);
415 dst += util::uleb_write(dst, arg2);
416 return dst - base;
417 }
418
419 void eh_align_frame() noexcept;
420 void eh_write_inst(u8 opcode, u64 arg) noexcept;
421 void eh_write_inst(u8 opcode, u64 first_arg, u64 second_arg) noexcept;
422
423private:
424 void eh_init_cie(SymRef personality_func_addr = SymRef()) noexcept;
425
426public:
427 u32 eh_begin_fde(SymRef personality_func_addr = SymRef()) noexcept;
428 void eh_end_fde(u32 fde_start, SymRef func) noexcept;
429
430 void except_begin_func() noexcept;
431
432 void except_encode_func(SymRef func_sym, const u32 *label_offsets) noexcept;
433
434 /// add an entry to the call-site table
435 /// must be called in strictly increasing order wrt text_off
436 void except_add_call_site(u32 text_off,
437 u32 len,
438 u32 landing_pad_label,
439 bool is_cleanup) noexcept;
440
441 /// Add a cleanup action to the action table
442 /// *MUST* be the last one
443 void except_add_cleanup_action() noexcept;
444
445 /// add an action to the action table
446 /// An invalid SymRef signals a catch(...)
447 void except_add_action(bool first_action, SymRef type_sym) noexcept;
448
449 void except_add_empty_spec_action(bool first_action) noexcept;
450
451 u32 except_type_idx_for_sym(SymRef sym) noexcept;
452
453 void finalize() noexcept override;
454
455 // Output file generation
456
457 std::vector<u8> build_object_file() noexcept override;
458};
459
460// TODO: Remove these types, instead find a good way to specify architecture as
461// enum parameter (probably contained in Assembler?) to constructor.
462
463class AssemblerElfA64 final : public AssemblerElf {
464 static const TargetInfoElf TARGET_INFO;
465
466public:
467 explicit AssemblerElfA64() noexcept : AssemblerElf(TARGET_INFO) {}
468};
469
470class AssemblerElfX64 final : public AssemblerElf {
471 static const TargetInfoElf TARGET_INFO;
472
473public:
474 explicit AssemblerElfX64() noexcept : AssemblerElf(TARGET_INFO) {}
475};
476
477} // namespace tpde
Assembler base class.