TPDE
Loading...
Searching...
No Matches
Assembler.hpp
1// SPDX-FileCopyrightText: 2025 Contributors to TPDE <https://tpde.org>
2//
3// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4#pragma once
5
6#include "tpde/base.hpp"
7#include "tpde/util/BumpAllocator.hpp"
8#include "tpde/util/SmallVector.hpp"
9#include <cstring>
10#include <span>
11#include <vector>
12
13namespace tpde {
14
15struct SymRef {
16private:
17 u32 val;
18
19public:
20 /// Invalid symbol reference
21 constexpr SymRef() : val(0) {}
22
23 explicit constexpr SymRef(u32 id) : val(id) {}
24
25 u32 id() const { return val; }
26 bool valid() const { return val != 0; }
27
28 bool operator==(const SymRef &other) const { return other.val == val; }
29};
30
31struct SecRef {
32private:
33 u32 val;
34
35public:
36 /// Invalid symbol reference
37 constexpr SecRef() : val(0) {}
38
39 explicit constexpr SecRef(u32 id) : val(id) {}
40
41 u32 id() const { return val; }
42 bool valid() const { return val != 0; }
43
44 bool operator==(const SecRef &other) const { return other.val == val; }
45};
46
47struct Relocation {
48 u32 offset; ///< Offset inside section.
49 SymRef symbol; ///< References symbol.
50 u32 type; ///< Relocation type. File-format-specifc.
51 i32 addend; ///< Addend.
52};
53
54/// Section kinds, lowered to file-format specific flags.
55enum class SectionKind : u8 {
56 Text, ///< Text section, executable code (ELF .text)
57 ReadOnly, ///< Read-only data section (ELF .rodata)
58 EHFrame, ///< EH Frame section (ELF .eh_frame)
59 LSDA, ///< LSDA section (ELF .gcc_except_table)
60 Data, ///< Writable data section (ELF .data)
61 DataRelRO, ///< Read-only data section with relocations (ELF .data.rel.ro)
62 BSS, ///< Zero-initialized data section (ELF .bss)
63 ThreadData, ///< Initialized thread-local data section (ELF .tdata)
64 ThreadBSS, ///< Zero-initialized thread-local data section (ELF .tbss)
65
66 Max
67};
68
69namespace elf {
70class AssemblerElf;
71} // namespace elf
72
73struct DataSection {
74 friend class Assembler;
75 friend class elf::AssemblerElf;
76
77 /// 256 bytes inline storage is enough for 10 relocations, which is a typical
78 /// number for a single function (relevant for COMDAT sections with one
79 /// section per function).
80 using StorageTy = util::SmallVector<u8, 256>;
81
82 /// Section data.
83 StorageTy data;
84
85 u64 addr = 0; ///< Address (file-format-specific).
86 u64 vsize = 0; ///< Size of virtual section, otherwise data.size() is valid.
87 u32 type = 0; ///< Type (file-format-specific).
88 u32 flags = 0; ///< Flags (file-format-specific).
89 u32 name = 0; ///< Name (file-format-specific, can also be index, etc.).
90 u32 align = 1; ///< Alignment (bytes).
91
92private:
93 /// Section symbol, or signature symbol for SHT_GROUP sections.
94 SymRef sym = {};
95
96 SecRef sec_ref;
97
98 util::SmallVector<Relocation, 4> relocs;
99
100public:
101 /// Generic field for target-specific data.
102 void *target_info = nullptr;
103
104 /// Whether the section is virtual, i.e. has no content.
105 bool is_virtual;
106
107private:
108 /// Whether the section can have relocations. For ELF, this implies that the
109 /// immediately following section ID is reserved as relocation section and
110 /// that name-5..name is ".rela".
111 bool has_relocs;
112
113public:
114#ifndef NDEBUG
115 /// Whether the section is currently in use by a SectionWriter.
116 bool locked = false;
117#endif
118
119 DataSection(SecRef ref) noexcept : sec_ref(ref) {}
120
121 SecRef get_ref() const noexcept { return sec_ref; }
122
123 size_t size() const { return is_virtual ? vsize : data.size(); }
124
125 template <typename T>
126 void write(const T &t) noexcept {
127 assert(!locked);
128 assert(!is_virtual);
129 size_t off = data.size();
130 data.resize_uninitialized(data.size() + sizeof(T));
131 std::memcpy(data.data() + off, &t, sizeof(T));
132 }
133
134 size_t reloc_count() const {
135 assert(!is_virtual);
136 assert(has_relocs);
137 return relocs.size();
138 }
139
140 /// Moves all offsets of relocations backwards by the specified offset.
141 void adjust_relocation_offsets(const size_t reloc_start_off,
142 const u32 offset) {
143 for (size_t i = reloc_start_off; i < relocs.size(); i++) {
144 relocs[i].offset -= offset;
145 }
146 }
147};
148
149/// Assembler base class.
150class Assembler {
151public:
152 enum class SymBinding : u8 {
153 /// Symbol with local linkage, must be defined
155 /// Weak linkage
157 /// Global linkage
159 };
160
161 struct TargetInfo {
162 struct SectionFlags {
163 u32 type;
164 u32 flags;
165 u32 name;
166 u8 align = 1;
167 bool has_relocs = true;
168 bool is_bss = false;
169 };
170
171 /// The relocation type for 32-bit pc-relative offsets.
172 u32 reloc_pc32;
173 /// The relocation type for 64-bit absolute addresses.
174 u32 reloc_abs64;
175
176 /// Section flags for the different section kinds.
177 std::array<SectionFlags, unsigned(SectionKind::Max)> section_flags;
178 };
179
180protected:
181 const TargetInfo &target_info;
182
183 util::BumpAllocator<> section_allocator;
184 util::SmallVector<util::BumpAllocUniquePtr<DataSection>, 16> sections;
185
186 std::array<SecRef, unsigned(SectionKind::Max)> default_sections;
187
188 Assembler(const TargetInfo &target_info) noexcept
189 : target_info(target_info) {}
190 virtual ~Assembler();
191
192public:
193 virtual void reset() noexcept;
194
195 /// \name Sections
196 /// @{
197
198 DataSection &get_section(SecRef ref) noexcept {
199 assert(ref.valid());
200 return *sections[ref.id()];
201 }
202
203 const DataSection &get_section(SecRef ref) const noexcept {
204 assert(ref.valid());
205 return *sections[ref.id()];
206 }
207
208 SecRef create_section(const TargetInfo::SectionFlags &flags) noexcept;
209
210 SecRef create_section(SectionKind kind) noexcept {
211 return create_section(target_info.section_flags[unsigned(kind)]);
212 }
213
214 SecRef get_default_section(SectionKind kind) noexcept {
215 SecRef &res = default_sections[unsigned(kind)];
216 if (!res.valid()) {
217 res = create_section(kind);
218 }
219 return res;
220 }
221
222 SecRef get_text_section() noexcept {
223 return get_default_section(SectionKind::Text);
224 }
225 SecRef get_data_section(bool rodata, bool relro = false) noexcept {
226 return get_default_section(!rodata ? SectionKind::Data
227 : relro ? SectionKind::DataRelRO
228 : SectionKind::ReadOnly);
229 }
230 SecRef get_bss_section() noexcept {
231 return get_default_section(SectionKind::BSS);
232 }
233 SecRef get_tdata_section() noexcept {
234 return get_default_section(SectionKind::ThreadData);
235 }
236 SecRef get_tbss_section() noexcept {
237 return get_default_section(SectionKind::ThreadBSS);
238 }
239
240 virtual void rename_section(SecRef, std::string_view name) noexcept = 0;
241
242 virtual SymRef section_symbol(SecRef) noexcept = 0;
243
244 /// @}
245
246 virtual SymRef sym_add_undef(std::string_view, SymBinding) noexcept = 0;
247 virtual SymRef sym_predef_func(std::string_view, SymBinding) noexcept = 0;
248 virtual SymRef sym_predef_data(std::string_view, SymBinding) noexcept = 0;
249 virtual SymRef sym_predef_tls(std::string_view, SymBinding) noexcept = 0;
250 /// Define a symbol at the specified location.
251 virtual void sym_def(SymRef, SecRef, u64 pos, u64 size) noexcept = 0;
252
253 /// Define predefined symbol with the specified data.
254 void sym_def_predef_data(SecRef sec,
255 SymRef sym,
256 std::span<const u8> data,
257 u32 align,
258 u32 *off) noexcept;
259
260 [[nodiscard]] SymRef sym_def_data(SecRef sec,
261 std::string_view name,
262 std::span<const u8> data,
263 u32 align,
264 SymBinding binding,
265 u32 *off = nullptr) {
266 SymRef sym = sym_predef_data(name, binding);
267 sym_def_predef_data(sec, sym, data, align, off);
268 return sym;
269 }
270
271 /// Define predefined symbol with zero; also supported for BSS sections.
272 void sym_def_predef_zero(SecRef sec_ref,
273 SymRef sym_ref,
274 u32 size,
275 u32 align,
276 u32 *off = nullptr) noexcept;
277
278
279 /// \name Relocations
280 /// @{
281
282 /// Add relocation. Type is file-format and target-specific.
284 SecRef sec, SymRef sym, u32 type, u32 offset, i64 addend) noexcept {
285 assert(i32(addend) == addend && "non-32-bit addends are unsupported");
286 get_section(sec).relocs.emplace_back(offset, sym, type, addend);
287 }
288
289 void reloc_pc32(SecRef sec, SymRef sym, u32 offset, i64 addend) noexcept {
290 reloc_sec(sec, sym, target_info.reloc_pc32, offset, addend);
291 }
292
293 void reloc_abs(SecRef sec, SymRef sym, u32 offset, i64 addend) noexcept {
294 reloc_sec(sec, sym, target_info.reloc_abs64, offset, addend);
295 }
296
297 /// @}
298
299 virtual void finalize() noexcept {}
300
301 virtual std::vector<u8> build_object_file() noexcept = 0;
302};
303
304} // namespace tpde
305
306#undef ARG
void sym_def_predef_zero(SecRef sec_ref, SymRef sym_ref, u32 size, u32 align, u32 *off=nullptr) noexcept
Define predefined symbol with zero; also supported for BSS sections.
virtual void sym_def(SymRef, SecRef, u64 pos, u64 size) noexcept=0
Define a symbol at the specified location.
void reloc_sec(SecRef sec, SymRef sym, u32 type, u32 offset, i64 addend) noexcept
Add relocation. Type is file-format and target-specific.
void sym_def_predef_data(SecRef sec, SymRef sym, std::span< const u8 > data, u32 align, u32 *off) noexcept
Define predefined symbol with the specified data.
@ LOCAL
Symbol with local linkage, must be defined.