TPDE
Loading...
Searching...
No Matches
Assembler.hpp
1// SPDX-FileCopyrightText: 2025 Contributors to TPDE <https://tpde.org>
2//
3// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4#pragma once
5
6#include "tpde/base.hpp"
7#include "tpde/util/BumpAllocator.hpp"
8#include "tpde/util/SmallVector.hpp"
9#include <cstring>
10#include <span>
11#include <vector>
12
13namespace tpde {
14
15struct SymRef {
16private:
17 u32 val;
18
19public:
20 /// Invalid symbol reference
21 constexpr SymRef() : val(0) {}
22
23 explicit constexpr SymRef(u32 id) : val(id) {}
24
25 u32 id() const { return val; }
26 bool valid() const { return val != 0; }
27
28 bool operator==(const SymRef &other) const { return other.val == val; }
29};
30
31struct SecRef {
32private:
33 u32 val;
34
35public:
36 /// Invalid symbol reference
37 constexpr SecRef() : val(0) {}
38
39 explicit constexpr SecRef(u32 id) : val(id) {}
40
41 u32 id() const { return val; }
42 bool valid() const { return val != 0; }
43
44 bool operator==(const SecRef &other) const { return other.val == val; }
45};
46
47struct Relocation {
48 u32 offset; ///< Offset inside section.
49 SymRef symbol; ///< References symbol.
50 u32 type; ///< Relocation type. File-format-specifc.
51 i32 addend; ///< Addend.
52};
53
54/// Section kinds, lowered to file-format specific flags.
55enum class SectionKind : u8 {
56 Text, ///< Text section, executable code (ELF .text)
57 ReadOnly, ///< Read-only data section (ELF .rodata)
58 EHFrame, ///< EH Frame section (ELF .eh_frame)
59 LSDA, ///< LSDA section (ELF .gcc_except_table)
60 Data, ///< Writable data section (ELF .data)
61 DataRelRO, ///< Read-only data section with relocations (ELF .data.rel.ro)
62 BSS, ///< Zero-initialized data section (ELF .bss)
63 ThreadData, ///< Initialized thread-local data section (ELF .tdata)
64 ThreadBSS, ///< Zero-initialized thread-local data section (ELF .tbss)
65
66 Max
67};
68
69namespace elf {
70class AssemblerElf;
71} // namespace elf
72
73struct DataSection {
74 friend class Assembler;
75 friend class elf::AssemblerElf;
76
77 /// 256 bytes inline storage is enough for 10 relocations, which is a typical
78 /// number for a single function (relevant for COMDAT sections with one
79 /// section per function).
80 using StorageTy = util::SmallVector<u8, 256>;
81
82 /// Section data.
83 StorageTy data;
84
85 u64 addr = 0; ///< Address (file-format-specific).
86 u64 vsize = 0; ///< Size of virtual section, otherwise data.size() is valid.
87 u32 type = 0; ///< Type (file-format-specific).
88 u32 flags = 0; ///< Flags (file-format-specific).
89 u32 name = 0; ///< Name (file-format-specific, can also be index, etc.).
90 u32 align = 1; ///< Alignment (bytes).
91
92private:
93 /// Section symbol, or signature symbol for SHT_GROUP sections.
94 SymRef sym = {};
95
96 SecRef sec_ref;
97
98 util::SmallVector<Relocation, 4> relocs;
99
100public:
101 /// Generic field for target-specific data.
102 void *target_info = nullptr;
103
104 /// Whether the section is virtual, i.e. has no content.
105 bool is_virtual;
106
107private:
108 /// Whether the section can have relocations. For ELF, this implies that the
109 /// immediately following section ID is reserved as relocation section and
110 /// that name-5..name is ".rela".
111 bool has_relocs;
112
113public:
114#ifndef NDEBUG
115 /// Whether the section is currently in use by a SectionWriter.
116 bool locked = false;
117#endif
118
119 DataSection(SecRef ref) : sec_ref(ref) {}
120
121 SecRef get_ref() const { return sec_ref; }
122
123 size_t size() const { return is_virtual ? vsize : data.size(); }
124
125 template <typename T>
126 void write(const T &t) {
127 assert(!locked);
128 assert(!is_virtual);
129 size_t off = data.size();
130 data.resize_uninitialized(data.size() + sizeof(T));
131 std::memcpy(data.data() + off, &t, sizeof(T));
132 }
133
134 size_t reloc_count() const {
135 assert(!is_virtual);
136 assert(has_relocs);
137 return relocs.size();
138 }
139
140 /// Moves all offsets of relocations backwards by the specified offset.
141 void adjust_relocation_offsets(const size_t reloc_start_off,
142 const u32 offset) {
143 for (size_t i = reloc_start_off; i < relocs.size(); i++) {
144 relocs[i].offset -= offset;
145 }
146 }
147};
148
149/// Assembler base class.
150class Assembler {
151public:
152 enum class SymBinding : u8 {
153 /// Symbol with local linkage, must be defined
155 /// Weak linkage
157 /// Global linkage
159 };
160
161 struct TargetInfo {
162 struct SectionFlags {
163 u32 type;
164 u32 flags;
165 u32 name;
166 u8 align = 1;
167 bool has_relocs = true;
168 bool is_bss = false;
169 };
170
171 /// The relocation type for 32-bit pc-relative offsets.
172 u32 reloc_pc32;
173 /// The relocation type for 64-bit absolute addresses.
174 u32 reloc_abs64;
175
176 /// Section flags for the different section kinds.
177 std::array<SectionFlags, unsigned(SectionKind::Max)> section_flags;
178 };
179
180protected:
181 const TargetInfo &target_info;
182
183 util::BumpAllocator<> section_allocator;
184 util::SmallVector<util::BumpAllocUniquePtr<DataSection>, 16> sections;
185
186 std::array<SecRef, unsigned(SectionKind::Max)> default_sections;
187
188 Assembler(const TargetInfo &target_info) : target_info(target_info) {}
189 virtual ~Assembler();
190
191public:
192 virtual void reset();
193
194 /// \name Sections
195 /// @{
196
197 DataSection &get_section(SecRef ref) {
198 assert(ref.valid());
199 return *sections[ref.id()];
200 }
201
202 const DataSection &get_section(SecRef ref) const {
203 assert(ref.valid());
204 return *sections[ref.id()];
205 }
206
207 SecRef create_section(const TargetInfo::SectionFlags &flags);
208
209 SecRef create_section(SectionKind kind) {
210 return create_section(target_info.section_flags[unsigned(kind)]);
211 }
212
213 SecRef get_default_section(SectionKind kind) {
214 SecRef &res = default_sections[unsigned(kind)];
215 if (!res.valid()) {
216 res = create_section(kind);
217 }
218 return res;
219 }
220
221 virtual void rename_section(SecRef, std::string_view name) = 0;
222
223 virtual SymRef section_symbol(SecRef) = 0;
224
225 /// @}
226
227 virtual SymRef sym_add_undef(std::string_view, SymBinding) = 0;
228 virtual SymRef sym_predef_func(std::string_view, SymBinding) = 0;
229 virtual SymRef sym_predef_data(std::string_view, SymBinding) = 0;
230 virtual SymRef sym_predef_tls(std::string_view, SymBinding) = 0;
231 /// Define a symbol at the specified location.
232 virtual void sym_def(SymRef, SecRef, u64 pos, u64 size) = 0;
233
234 /// Define symbol and allocate space for data; returns offset into section.
235 u32 sym_def_predef_data(SecRef sec, SymRef sym, u64 size, u32 align);
236
237 /// Define predefined symbol with the specified data.
239 SecRef sec, SymRef sym, std::span<const u8> data, u32 align, u32 *off);
240
241 [[nodiscard]] SymRef sym_def_data(SecRef sec,
242 std::string_view name,
243 std::span<const u8> data,
244 u32 align,
245 SymBinding binding,
246 u32 *off = nullptr) {
247 SymRef sym = sym_predef_data(name, binding);
248 sym_def_predef_data(sec, sym, data, align, off);
249 return sym;
250 }
251
252 /// Define predefined symbol with zero; also supported for BSS sections.
254 SecRef sec_ref, SymRef sym_ref, u32 size, u32 align, u32 *off = nullptr);
255
256
257 /// \name Relocations
258 /// @{
259
260 /// Add relocation. Type is file-format and target-specific.
261 void reloc_sec(SecRef sec, SymRef sym, u32 type, u32 offset, i64 addend) {
262 assert(i32(addend) == addend && "non-32-bit addends are unsupported");
263 get_section(sec).relocs.emplace_back(offset, sym, type, addend);
264 }
265
266 void reloc_pc32(SecRef sec, SymRef sym, u32 offset, i64 addend) {
267 reloc_sec(sec, sym, target_info.reloc_pc32, offset, addend);
268 }
269
270 void reloc_abs(SecRef sec, SymRef sym, u32 offset, i64 addend) {
271 reloc_sec(sec, sym, target_info.reloc_abs64, offset, addend);
272 }
273
274 /// @}
275
276 virtual void finalize() {}
277
278 virtual std::vector<u8> build_object_file() = 0;
279};
280
281} // namespace tpde
282
283#undef ARG
u32 sym_def_predef_data(SecRef sec, SymRef sym, u64 size, u32 align)
Define symbol and allocate space for data; returns offset into section.
void sym_def_predef_data(SecRef sec, SymRef sym, std::span< const u8 > data, u32 align, u32 *off)
Define predefined symbol with the specified data.
void sym_def_predef_zero(SecRef sec_ref, SymRef sym_ref, u32 size, u32 align, u32 *off=nullptr)
Define predefined symbol with zero; also supported for BSS sections.
@ LOCAL
Symbol with local linkage, must be defined.
void reloc_sec(SecRef sec, SymRef sym, u32 type, u32 offset, i64 addend)
Add relocation. Type is file-format and target-specific.
virtual void sym_def(SymRef, SecRef, u64 pos, u64 size)=0
Define a symbol at the specified location.