TPDE
Loading...
Searching...
No Matches
CompilerBase.hpp
1// SPDX-FileCopyrightText: 2025 Contributors to TPDE <https://tpde.org>
2//
3// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4#pragma once
5
6#include <algorithm>
7#include <functional>
8#include <unordered_map>
9#include <variant>
10
11#include "Analyzer.hpp"
12#include "Compiler.hpp"
13#include "CompilerConfig.hpp"
14#include "IRAdaptor.hpp"
15#include "tpde/AssignmentPartRef.hpp"
16#include "tpde/FunctionWriter.hpp"
17#include "tpde/RegisterFile.hpp"
18#include "tpde/ValLocalIdx.hpp"
19#include "tpde/ValueAssignment.hpp"
20#include "tpde/base.hpp"
21#include "tpde/util/function_ref.hpp"
22#include "tpde/util/misc.hpp"
23
24namespace tpde {
25// TODO(ts): formulate concept for full compiler so that there is *some* check
26// whether all the required derived methods are implemented?
27
28/// Thread-local storage access mode
29enum class TLSModel {
30 GlobalDynamic,
31 LocalDynamic,
32 InitialExec,
33 LocalExec,
34};
35
36struct CCAssignment {
37 Reg reg = Reg::make_invalid(); ///< Assigned register, invalid implies stack.
38
39 /// If non-zero, indicates that this and the next N values must be
40 /// assigned to consecutive registers, or to the stack. The following
41 /// values must be in the same register bank as this value. u8 is sufficient,
42 /// no architecture has more than 255 parameter registers in a single bank.
43 u8 consecutive = 0;
44 bool sret : 1 = false; ///< Argument is return value pointer.
45
46 /// The argument is passed by value on the stack. The provided argument is a
47 /// pointer; for the call, size bytes will be copied into the corresponding
48 /// stack slot. Behaves like LLVM's byval.
49 ///
50 /// Note: On x86-64 SysV, this is used to pass larger structs in memory. Note
51 /// that AArch64 AAPCS doesn't use byval for structs, instead, the pointer is
52 /// passed without byval and it is the responsibility of the caller to
53 /// explicitly copy the value.
54 bool byval : 1 = false;
55
56 /// Extend integer argument. Highest bit indicates signed-ness, lower bits
57 /// indicate the source width from which the argument should be extended.
58 u8 int_ext = 0;
59
60 u8 align = 0; ///< Argument alignment
61 RegBank bank = RegBank{}; ///< Register bank to assign the value to.
62 u32 size = 0; ///< Argument size, for byval the stack slot size.
63 u32 stack_off = 0; ///< Assigned stack slot, only valid if reg is invalid.
64};
65
66struct CCInfo {
67 // TODO: use RegBitSet
68 const u64 allocatable_regs;
69 const u64 callee_saved_regs;
70 /// Possible argument registers; these registers will not be allocated until
71 /// all arguments have been assigned.
72 const u64 arg_regs;
73};
74
75class CCAssigner {
76public:
77 const CCInfo *ccinfo;
78
79 CCAssigner(const CCInfo &ccinfo) noexcept : ccinfo(&ccinfo) {}
80 virtual ~CCAssigner() noexcept {}
81
82 virtual void reset() noexcept = 0;
83
84 const CCInfo &get_ccinfo() const noexcept { return *ccinfo; }
85
86 virtual void assign_arg(CCAssignment &cca) noexcept = 0;
87 virtual u32 get_stack_size() noexcept = 0;
88 /// Some calling conventions need different call behavior when calling a
89 /// vararg function.
90 virtual bool is_vararg() const noexcept { return false; }
91 virtual void assign_ret(CCAssignment &cca) noexcept = 0;
92};
93
94/// The base class for the compiler.
95/// It implements the main platform independent compilation logic and houses the
96/// analyzer
97template <IRAdaptor Adaptor,
98 typename Derived,
99 CompilerConfig Config = CompilerConfigDefault>
101 // some forwards for the IR type defs
102 using IRValueRef = typename Adaptor::IRValueRef;
103 using IRInstRef = typename Adaptor::IRInstRef;
104 using IRBlockRef = typename Adaptor::IRBlockRef;
105 using IRFuncRef = typename Adaptor::IRFuncRef;
106
107 using BlockIndex = typename Analyzer<Adaptor>::BlockIndex;
108
109 using Assembler = typename Config::Assembler;
110 using AsmReg = typename Config::AsmReg;
111
112 using RegisterFile = tpde::RegisterFile<Config::NUM_BANKS, 32>;
113
114 /// A default implementation for ValRefSpecial.
115 // Note: Subclasses can override this, always used Derived::ValRefSpecial.
117 uint8_t mode = 4;
118 u64 const_data;
119 };
120
121#pragma region CompilerData
122 Adaptor *adaptor;
123 Analyzer<Adaptor> analyzer;
124
125 // data for frame management
126
127 struct {
128 /// The current size of the stack frame
129 u32 frame_size = 0;
130 /// Free-Lists for 1/2/4/8/16 sized allocations
131 // TODO(ts): make the allocations for 4/8 different from the others
132 // since they are probably the one's most used?
133 util::SmallVector<i32, 16> fixed_free_lists[5] = {};
134 /// Free-Lists for all other sizes
135 // TODO(ts): think about which data structure we want here
136 std::unordered_map<u32, std::vector<i32>> dynamic_free_lists{};
137 } stack = {};
138
139 typename Analyzer<Adaptor>::BlockIndex cur_block_idx;
140
141 // Assignments
142
143 static constexpr ValLocalIdx INVALID_VAL_LOCAL_IDX =
144 static_cast<ValLocalIdx>(~0u);
145
146 // TODO(ts): think about different ways to store this that are maybe more
147 // compact?
148 struct {
149 AssignmentAllocator allocator;
150
151 std::array<u32, Config::NUM_BANKS> cur_fixed_assignment_count = {};
152 util::SmallVector<ValueAssignment *, Analyzer<Adaptor>::SMALL_VALUE_NUM>
153 value_ptrs;
154
155 ValLocalIdx variable_ref_list;
156 util::SmallVector<ValLocalIdx, Analyzer<Adaptor>::SMALL_BLOCK_NUM>
157 delayed_free_lists;
158 } assignments = {};
159
160 RegisterFile register_file;
161#ifndef NDEBUG
162 /// Whether we are currently in the middle of generating branch-related code
163 /// and therefore must not change any value-related state.
164 bool generating_branch = false;
165#endif
166
167private:
168 /// Default CCAssigner if the implementation doesn't override cur_cc_assigner.
169 typename Config::DefaultCCAssigner default_cc_assigner;
170
171public:
172 Assembler assembler;
173 Config::FunctionWriter text_writer;
174 // TODO(ts): smallvector?
175 std::vector<SymRef> func_syms;
176 // TODO(ts): combine this with the block vectors in the analyzer to save on
177 // allocations
178 util::SmallVector<Label> block_labels;
179
180 util::SmallVector<std::pair<SymRef, SymRef>, 4> personality_syms = {};
181
182 struct ScratchReg;
183 class ValuePart;
184 struct ValuePartRef;
185 struct ValueRef;
186 struct GenericValuePart;
187#pragma endregion
188
189 struct InstRange {
190 using Range = decltype(std::declval<Adaptor>().block_insts(
191 std::declval<IRBlockRef>()));
192 using Iter = decltype(std::declval<Range>().begin());
193 using EndIter = decltype(std::declval<Range>().end());
194 Iter from;
195 EndIter to;
196 };
197
198 struct CallArg {
199 enum class Flag : u8 {
200 none,
201 zext,
202 sext,
203 sret,
204 byval
205 };
206
207 explicit CallArg(IRValueRef value,
208 Flag flags = Flag::none,
209 u8 byval_align = 0,
210 u32 byval_size = 0)
211 : value(value),
212 flag(flags),
213 byval_align(byval_align),
214 byval_size(byval_size) {}
215
216 IRValueRef value;
217 Flag flag;
218 u8 byval_align;
219 u8 ext_bits = 0;
220 u32 byval_size;
221 };
222
223 template <typename CBDerived>
224 class CallBuilderBase {
225 protected:
226 Derived &compiler;
227 CCAssigner &assigner;
228
229 RegisterFile::RegBitSet arg_regs{};
230
231 public:
232 CallBuilderBase(Derived &compiler, CCAssigner &assigner) noexcept
233 : compiler(compiler), assigner(assigner) {}
234
235 // CBDerived needs:
236 // void add_arg_byval(ValuePart &vp, CCAssignment &cca) noexcept;
237 // void add_arg_stack(ValuePart &vp, CCAssignment &cca) noexcept;
238 // void call_impl(std::variant<SymRef, ValuePart> &&) noexcept;
239 CBDerived *derived() noexcept { return static_cast<CBDerived *>(this); }
240
241 void add_arg(ValuePart &&vp, CCAssignment cca) noexcept;
242 void add_arg(const CallArg &arg, u32 part_count) noexcept;
243 void add_arg(const CallArg &arg) noexcept {
244 add_arg(std::move(arg), compiler.val_parts(arg.value).count());
245 }
246
247 // evict registers, do call, reset stack frame
248 void call(std::variant<SymRef, ValuePart>) noexcept;
249
250 void add_ret(ValuePart &vp, CCAssignment cca) noexcept;
251 void add_ret(ValuePart &&vp, CCAssignment cca) noexcept {
252 add_ret(vp, cca);
253 }
254 void add_ret(ValueRef &vr) noexcept;
255 };
256
257 class RetBuilder {
258 Derived &compiler;
259 CCAssigner &assigner;
260
261 RegisterFile::RegBitSet ret_regs{};
262
263 public:
264 RetBuilder(Derived &compiler, CCAssigner &assigner) noexcept
265 : compiler(compiler), assigner(assigner) {
266 assigner.reset();
267 }
268
269 void add(ValuePart &&vp, CCAssignment cca) noexcept;
270 void add(IRValueRef val) noexcept;
271
272 void ret() noexcept;
273 };
274
275 /// Initialize a CompilerBase, should be called by the derived classes
276 explicit CompilerBase(Adaptor *adaptor)
277 : adaptor(adaptor), analyzer(adaptor), assembler() {
278 static_assert(std::is_base_of_v<CompilerBase, Derived>);
279 static_assert(Compiler<Derived, Config>);
280 }
281
282 /// shortcut for casting to the Derived class so that overloading
283 /// works
284 Derived *derived() { return static_cast<Derived *>(this); }
285
286 const Derived *derived() const { return static_cast<const Derived *>(this); }
287
288 [[nodiscard]] ValLocalIdx val_idx(const IRValueRef value) const noexcept {
289 return analyzer.adaptor->val_local_idx(value);
290 }
291
292 [[nodiscard]] ValueAssignment *
293 val_assignment(const ValLocalIdx idx) noexcept {
294 return assignments.value_ptrs[static_cast<u32>(idx)];
295 }
296
297 /// Compile the functions returned by Adaptor::funcs
298 ///
299 /// \warning If you intend to call this multiple times, you must call reset
300 /// in-between the calls.
301 ///
302 /// \returns Whether the compilation was successful
303 bool compile();
304
305 /// Reset any leftover data from the previous compilation such that it will
306 /// not affect the next compilation
307 void reset();
308
309 /// Get CCAssigner for current function.
310 CCAssigner *cur_cc_assigner() noexcept { return &default_cc_assigner; }
311
312 void init_assignment(IRValueRef value, ValLocalIdx local_idx) noexcept;
313
314private:
315 /// Frees an assignment, its stack slot and registers
316 void free_assignment(ValLocalIdx local_idx, ValueAssignment *) noexcept;
317
318public:
319 /// Release an assignment when reference count drops to zero, either frees
320 /// the assignment immediately or delays free to the end of the live range.
321 void release_assignment(ValLocalIdx local_idx, ValueAssignment *) noexcept;
322
323 /// Init a variable-ref assignment
324 void init_variable_ref(ValLocalIdx local_idx, u32 var_ref_data) noexcept;
325 /// Init a variable-ref assignment
326 void init_variable_ref(IRValueRef value, u32 var_ref_data) noexcept {
327 init_variable_ref(adaptor->val_local_idx(value), var_ref_data);
328 }
329
330 i32 allocate_stack_slot(u32 size) noexcept;
331 void free_stack_slot(u32 slot, u32 size) noexcept;
332
333 template <typename Fn>
334 void handle_func_arg(u32 arg_idx, IRValueRef arg, Fn add_arg) noexcept;
335
336 ValueRef val_ref(IRValueRef value) noexcept;
337
338 std::pair<ValueRef, ValuePartRef> val_ref_single(IRValueRef value) noexcept;
339
340 /// Get a defining reference to a value
341 ValueRef result_ref(IRValueRef value) noexcept;
342
343 std::pair<ValueRef, ValuePartRef>
344 result_ref_single(IRValueRef value) noexcept;
345
346 [[deprecated("Use ValuePartRef::set_value")]]
347 void set_value(ValuePartRef &val_ref, ScratchReg &scratch) noexcept;
348 [[deprecated("Use ValuePartRef::set_value")]]
349 void set_value(ValuePartRef &&val_ref, ScratchReg &scratch) noexcept {
350 set_value(val_ref, scratch);
351 }
352
353 /// Get generic value part into a single register, evaluating expressions
354 /// and materializing immediates as required.
355 AsmReg gval_as_reg(GenericValuePart &gv) noexcept;
356
357 /// Like gval_as_reg; if the GenericValuePart owns a reusable register
358 /// (either a ScratchReg, possibly due to materialization, or a reusable
359 /// ValuePartRef), store it in dst.
360 AsmReg gval_as_reg_reuse(GenericValuePart &gv, ScratchReg &dst) noexcept;
361
362private:
363 Reg select_reg_evict(RegBank bank, u64 exclusion_mask) noexcept;
364
365public:
366 /// Select an available register, evicting loaded values if needed.
367 Reg select_reg(RegBank bank, u64 exclusion_mask) noexcept {
368 Reg res = register_file.find_first_free_excluding(bank, exclusion_mask);
369 if (res.valid()) [[likely]] {
370 return res;
371 }
372 return select_reg_evict(bank, exclusion_mask);
373 }
374
375 /// Reload a value part from memory or recompute variable address.
376 void reload_to_reg(AsmReg dst, AssignmentPartRef ap) noexcept;
377
378 void allocate_spill_slot(AssignmentPartRef ap) noexcept;
379
380 /// Ensure the value is spilled in its stack slot (except variable refs).
381 void spill(AssignmentPartRef ap) noexcept;
382
383 /// Evict the value from its register, spilling if needed, and free register.
384 void evict(AssignmentPartRef ap) noexcept;
385
386 /// Evict the value from the register, spilling if needed, and free register.
387 void evict_reg(Reg reg) noexcept;
388
389 /// Free the register. Requires that the contained value is already spilled.
390 void free_reg(Reg reg) noexcept;
391
392 // TODO(ts): switch to a branch_spill_before naming style?
393 typename RegisterFile::RegBitSet
394 spill_before_branch(bool force_spill = false) noexcept;
395 void release_spilled_regs(typename RegisterFile::RegBitSet) noexcept;
396
397 /// When reaching a point in the function where no other blocks will be
398 /// reached anymore, use this function to release register assignments after
399 /// the end of that block so the compiler does not accidentally use
400 /// registers which don't contain any values
402
403 /// Indicate beginning of region where value-state must not change.
404 void begin_branch_region() noexcept {
405#ifndef NDEBUG
406 assert(!generating_branch);
407 generating_branch = true;
408#endif
409 }
410
411 /// Indicate end of region where value-state must not change.
412 void end_branch_region() noexcept {
413#ifndef NDEBUG
414 assert(generating_branch);
415 generating_branch = false;
416#endif
417 }
418
419#ifndef NDEBUG
420 bool may_change_value_state() const noexcept { return !generating_branch; }
421#endif
422
423 void move_to_phi_nodes(BlockIndex target) noexcept {
424 if (analyzer.block_has_phis(target)) {
425 move_to_phi_nodes_impl(target);
426 }
427 }
428
429 void move_to_phi_nodes_impl(BlockIndex target) noexcept;
430
431 bool branch_needs_split(IRBlockRef target) noexcept {
432 // for now, if the target has PHI-nodes, we split
433 return analyzer.block_has_phis(target);
434 }
435
436 BlockIndex next_block() const noexcept;
437
438 bool try_force_fixed_assignment(IRValueRef) const noexcept { return false; }
439
440 bool hook_post_func_sym_init() noexcept { return true; }
441
442 void analysis_start() noexcept {}
443
444 void analysis_end() noexcept {}
445
446 void reloc_text(SymRef sym, u32 type, u64 offset, i64 addend = 0) noexcept {
447 this->assembler.reloc_sec(
448 text_writer.get_sec_ref(), sym, type, offset, addend);
449 }
450
451 void label_place(Label label) noexcept {
452 this->text_writer.label_place(label, text_writer.offset());
453 }
454
455protected:
456 SymRef get_personality_sym() noexcept;
457
458 bool compile_func(IRFuncRef func, u32 func_idx) noexcept;
459
460 bool compile_block(IRBlockRef block, u32 block_idx) noexcept;
461};
462} // namespace tpde
463
464#include "GenericValuePart.hpp"
465#include "ScratchReg.hpp"
466#include "ValuePartRef.hpp"
467#include "ValueRef.hpp"
468
469namespace tpde {
470
471template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
472template <typename CBDerived>
473void CompilerBase<Adaptor, Derived, Config>::CallBuilderBase<
474 CBDerived>::add_arg(ValuePart &&vp, CCAssignment cca) noexcept {
475 if (!cca.byval) {
476 cca.bank = vp.bank();
477 cca.size = vp.part_size();
478 }
479
480 assigner.assign_arg(cca);
481 bool needs_ext = cca.int_ext != 0;
482 bool ext_sign = cca.int_ext >> 7;
483 unsigned ext_bits = cca.int_ext & 0x3f;
484
485 if (cca.byval) {
486 derived()->add_arg_byval(vp, cca);
487 vp.reset(&compiler);
488 } else if (!cca.reg.valid()) {
489 if (needs_ext) {
490 auto ext = std::move(vp).into_extended(&compiler, ext_sign, ext_bits, 64);
491 derived()->add_arg_stack(ext, cca);
492 ext.reset(&compiler);
493 } else {
494 derived()->add_arg_stack(vp, cca);
495 }
496 vp.reset(&compiler);
497 } else {
498 u32 size = vp.part_size();
499 if (vp.is_in_reg(cca.reg)) {
500 if (!vp.can_salvage()) {
501 compiler.evict_reg(cca.reg);
502 } else {
503 vp.salvage(&compiler);
504 }
505 if (needs_ext) {
506 compiler.generate_raw_intext(cca.reg, cca.reg, ext_sign, ext_bits, 64);
507 }
508 } else {
509 if (compiler.register_file.is_used(cca.reg)) {
510 compiler.evict_reg(cca.reg);
511 }
512 if (vp.can_salvage()) {
513 AsmReg vp_reg = vp.salvage(&compiler);
514 if (needs_ext) {
515 compiler.generate_raw_intext(cca.reg, vp_reg, ext_sign, ext_bits, 64);
516 } else {
517 compiler.mov(cca.reg, vp_reg, size);
518 }
519 } else {
520 vp.reload_into_specific_fixed(&compiler, cca.reg);
521 if (needs_ext) {
522 compiler.generate_raw_intext(
523 cca.reg, cca.reg, ext_sign, ext_bits, 64);
524 }
525 }
526 }
527 vp.reset(&compiler);
528 assert(!compiler.register_file.is_used(cca.reg));
529 compiler.register_file.mark_clobbered(cca.reg);
530 compiler.register_file.allocatable &= ~(u64{1} << cca.reg.id());
531 arg_regs |= (1ull << cca.reg.id());
532 }
533}
534
535template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
536template <typename CBDerived>
537void CompilerBase<Adaptor, Derived, Config>::CallBuilderBase<
538 CBDerived>::add_arg(const CallArg &arg, u32 part_count) noexcept {
539 ValueRef vr = compiler.val_ref(arg.value);
540
541 if (arg.flag == CallArg::Flag::byval) {
542 assert(part_count == 1);
543 add_arg(vr.part(0),
544 CCAssignment{
545 .byval = true,
546 .align = arg.byval_align,
547 .size = arg.byval_size,
548 });
549 return;
550 }
551
552 u32 align = 1;
553 bool consecutive = false;
554 u32 consec_def = 0;
555 if (compiler.arg_is_int128(arg.value)) {
556 // TODO: this also applies to composites with 16-byte alignment
557 align = 16;
558 consecutive = true;
559 } else if (part_count > 1 &&
560 !compiler.arg_allow_split_reg_stack_passing(arg.value)) {
561 consecutive = true;
562 if (part_count > UINT8_MAX) {
563 // Must be completely passed on the stack.
564 consecutive = false;
565 consec_def = -1;
566 }
567 }
568
569 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
570 u8 int_ext = 0;
571 if (arg.flag == CallArg::Flag::sext || arg.flag == CallArg::Flag::zext) {
572 assert(arg.ext_bits != 0 && "cannot extend zero-bit integer");
573 int_ext = arg.ext_bits | (arg.flag == CallArg::Flag::sext ? 0x80 : 0);
574 }
575 derived()->add_arg(
576 vr.part(part_idx),
577 CCAssignment{
578 .consecutive =
579 u8(consecutive ? part_count - part_idx - 1 : consec_def),
580 .sret = arg.flag == CallArg::Flag::sret,
581 .int_ext = int_ext,
582 .align = u8(part_idx == 0 ? align : 1),
583 });
584 }
585}
586
587template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
588template <typename CBDerived>
589void CompilerBase<Adaptor, Derived, Config>::CallBuilderBase<CBDerived>::call(
590 std::variant<SymRef, ValuePart> target) noexcept {
591 typename RegisterFile::RegBitSet skip_evict = arg_regs;
592 if (auto *vp = std::get_if<ValuePart>(&target); vp && vp->can_salvage()) {
593 // call_impl will reset vp, thereby unlock+free the register.
594 assert(vp->cur_reg_unlocked().valid() && "can_salvage implies register");
595 skip_evict |= (1ull << vp->cur_reg_unlocked().id());
596 }
597
598 auto clobbered = ~assigner.get_ccinfo().callee_saved_regs;
599 for (auto reg_id : util::BitSetIterator<>{compiler.register_file.used &
600 clobbered & ~skip_evict}) {
601 compiler.evict_reg(AsmReg{reg_id});
602 compiler.register_file.mark_clobbered(Reg{reg_id});
603 }
604
605 derived()->call_impl(std::move(target));
606
607 assert((compiler.register_file.allocatable & arg_regs) == 0);
608 compiler.register_file.allocatable |= arg_regs;
609}
610
611template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
612template <typename CBDerived>
613void CompilerBase<Adaptor, Derived, Config>::CallBuilderBase<
614 CBDerived>::add_ret(ValuePart &vp, CCAssignment cca) noexcept {
615 cca.bank = vp.bank();
616 cca.size = vp.part_size();
617 assigner.assign_ret(cca);
618 assert(cca.reg.valid() && "return value must be in register");
619 vp.set_value_reg(&compiler, cca.reg);
620}
621
622template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
623template <typename CBDerived>
624void CompilerBase<Adaptor, Derived, Config>::CallBuilderBase<
625 CBDerived>::add_ret(ValueRef &vr) noexcept {
626 assert(vr.has_assignment());
627 u32 part_count = vr.assignment()->part_count;
628 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
629 CCAssignment cca;
630 add_ret(vr.part(part_idx), cca);
631 }
632}
633
634template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
635void CompilerBase<Adaptor, Derived, Config>::RetBuilder::add(
636 ValuePart &&vp, CCAssignment cca) noexcept {
637 cca.bank = vp.bank();
638 u32 size = cca.size = vp.part_size();
639 assigner.assign_ret(cca);
640 assert(cca.reg.valid() && "indirect return value must use sret argument");
641
642 bool needs_ext = cca.int_ext != 0;
643 bool ext_sign = cca.int_ext >> 7;
644 unsigned ext_bits = cca.int_ext & 0x3f;
645
646 if (vp.is_in_reg(cca.reg)) {
647 if (!vp.can_salvage()) {
648 compiler.evict_reg(cca.reg);
649 } else {
650 vp.salvage(&compiler);
651 }
652 if (needs_ext) {
653 compiler.generate_raw_intext(cca.reg, cca.reg, ext_sign, ext_bits, 64);
654 }
655 } else {
656 if (compiler.register_file.is_used(cca.reg)) {
657 compiler.evict_reg(cca.reg);
658 }
659 if (vp.can_salvage()) {
660 AsmReg vp_reg = vp.salvage(&compiler);
661 if (needs_ext) {
662 compiler.generate_raw_intext(cca.reg, vp_reg, ext_sign, ext_bits, 64);
663 } else {
664 compiler.mov(cca.reg, vp_reg, size);
665 }
666 } else {
667 vp.reload_into_specific_fixed(&compiler, cca.reg);
668 if (needs_ext) {
669 compiler.generate_raw_intext(cca.reg, cca.reg, ext_sign, ext_bits, 64);
670 }
671 }
672 }
673 vp.reset(&compiler);
674 assert(!compiler.register_file.is_used(cca.reg));
675 compiler.register_file.mark_clobbered(cca.reg);
676 compiler.register_file.allocatable &= ~(u64{1} << cca.reg.id());
677 ret_regs |= (1ull << cca.reg.id());
678}
679
680template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
681void CompilerBase<Adaptor, Derived, Config>::RetBuilder::add(
682 IRValueRef val) noexcept {
683 u32 part_count = compiler.val_parts(val).count();
684 ValueRef vr = compiler.val_ref(val);
685 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
686 add(vr.part(part_idx), CCAssignment{});
687 }
688}
689
690template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
691void CompilerBase<Adaptor, Derived, Config>::RetBuilder::ret() noexcept {
692 assert((compiler.register_file.allocatable & ret_regs) == 0);
693 compiler.register_file.allocatable |= ret_regs;
694
695 compiler.gen_func_epilog();
696 compiler.release_regs_after_return();
697}
698
699template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
701 // create function symbols
702 text_writer.switch_section(
703 assembler.get_section(assembler.get_text_section()));
704
705 assert(func_syms.empty());
706 for (const IRFuncRef func : adaptor->funcs()) {
707 auto binding = Assembler::SymBinding::GLOBAL;
708 if (adaptor->func_has_weak_linkage(func)) {
709 binding = Assembler::SymBinding::WEAK;
710 } else if (adaptor->func_only_local(func)) {
711 binding = Assembler::SymBinding::LOCAL;
712 }
713 if (adaptor->func_extern(func)) {
714 func_syms.push_back(derived()->assembler.sym_add_undef(
715 adaptor->func_link_name(func), binding));
716 } else {
717 func_syms.push_back(derived()->assembler.sym_predef_func(
718 adaptor->func_link_name(func), binding));
719 }
720 derived()->define_func_idx(func, func_syms.size() - 1);
721 }
722
723 if (!derived()->hook_post_func_sym_init()) {
724 TPDE_LOG_ERR("hook_pust_func_sym_init failed");
725 return false;
726 }
727
728 // TODO(ts): create function labels?
729
730 bool success = true;
731
732 u32 func_idx = 0;
733 for (const IRFuncRef func : adaptor->funcs()) {
734 if (adaptor->func_extern(func)) {
735 TPDE_LOG_TRACE("Skipping compilation of func {}",
736 adaptor->func_link_name(func));
737 ++func_idx;
738 continue;
739 }
740
741 TPDE_LOG_TRACE("Compiling func {}", adaptor->func_link_name(func));
742 if (!derived()->compile_func(func, func_idx)) {
743 TPDE_LOG_ERR("Failed to compile function {}",
744 adaptor->func_link_name(func));
745 success = false;
746 }
747 ++func_idx;
748 }
749
750 text_writer.flush();
751 assembler.finalize();
752
753 // TODO(ts): generate object/map?
754
755 return success;
756}
757
758template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
760 adaptor->reset();
761
762 for (auto &e : stack.fixed_free_lists) {
763 e.clear();
764 }
765 stack.dynamic_free_lists.clear();
766
767 assembler.reset();
768 func_syms.clear();
769 block_labels.clear();
770 personality_syms.clear();
771}
772
773template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
774void CompilerBase<Adaptor, Derived, Config>::init_assignment(
775 IRValueRef value, ValLocalIdx local_idx) noexcept {
776 assert(val_assignment(local_idx) == nullptr);
777 TPDE_LOG_TRACE("Initializing assignment for value {}",
778 static_cast<u32>(local_idx));
779
780 const auto parts = derived()->val_parts(value);
781 const u32 part_count = parts.count();
782 assert(part_count > 0);
783 auto *assignment = assignments.allocator.allocate(part_count);
784 assignments.value_ptrs[static_cast<u32>(local_idx)] = assignment;
785
786 u32 max_part_size = 0;
787 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
788 auto ap = AssignmentPartRef{assignment, part_idx};
789 ap.reset();
790 ap.set_bank(parts.reg_bank(part_idx));
791 const u32 size = parts.size_bytes(part_idx);
792 assert(size > 0);
793 max_part_size = std::max(max_part_size, size);
794 ap.set_part_size(size);
795 }
796
797 const auto &liveness = analyzer.liveness_info(local_idx);
798
799 // if there is only one part, try to hand out a fixed assignment
800 // if the value is used for longer than one block and there aren't too many
801 // definitions in child loops this could interfere with
802 // TODO(ts): try out only fixed assignments if the value is live for more
803 // than two blocks?
804 // TODO(ts): move this to ValuePartRef::alloc_reg to be able to defer this
805 // for results?
806 if (part_count == 1) {
807 const auto &cur_loop =
808 analyzer.loop_from_idx(analyzer.block_loop_idx(cur_block_idx));
809 auto ap = AssignmentPartRef{assignment, 0};
810
811 auto try_fixed =
812 liveness.last > cur_block_idx &&
813 cur_loop.definitions_in_childs +
814 assignments.cur_fixed_assignment_count[ap.bank().id()] <
815 Derived::NUM_FIXED_ASSIGNMENTS[ap.bank().id()];
816 if (derived()->try_force_fixed_assignment(value)) {
817 try_fixed = assignments.cur_fixed_assignment_count[ap.bank().id()] <
818 Derived::NUM_FIXED_ASSIGNMENTS[ap.bank().id()];
819 }
820
821 if (try_fixed) {
822 // check if there is a fixed register available
823 AsmReg reg = derived()->select_fixed_assignment_reg(ap, value);
824 TPDE_LOG_TRACE("Trying to assign fixed reg to value {}",
825 static_cast<u32>(local_idx));
826
827 // TODO: if the register is used, we can free it most of the time, but not
828 // always, e.g. for PHI nodes. Detect this case and free_reg otherwise.
829 if (!reg.invalid() && !register_file.is_used(reg)) {
830 TPDE_LOG_TRACE("Assigning fixed assignment to reg {} for value {}",
831 reg.id(),
832 static_cast<u32>(local_idx));
833 ap.set_reg(reg);
834 ap.set_register_valid(true);
835 ap.set_fixed_assignment(true);
836 register_file.mark_used(reg, local_idx, 0);
837 register_file.inc_lock_count(reg); // fixed assignments always locked
838 register_file.mark_clobbered(reg);
839 ++assignments.cur_fixed_assignment_count[ap.bank().id()];
840 }
841 }
842 }
843
844 const auto last_full = liveness.last_full;
845 const auto ref_count = liveness.ref_count;
846
847 assert(max_part_size <= 256);
848 assignment->max_part_size = max_part_size;
849 assignment->pending_free = false;
850 assignment->variable_ref = false;
851 assignment->stack_variable = false;
852 assignment->delay_free = last_full;
853 assignment->part_count = part_count;
854 assignment->frame_off = 0;
855 assignment->references_left = ref_count;
856}
857
858template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
860 ValLocalIdx local_idx, ValueAssignment *assignment) noexcept {
861 TPDE_LOG_TRACE("Freeing assignment for value {}",
862 static_cast<u32>(local_idx));
863
864 assert(assignments.value_ptrs[static_cast<u32>(local_idx)] == assignment);
865 assignments.value_ptrs[static_cast<u32>(local_idx)] = nullptr;
866 const auto is_var_ref = assignment->variable_ref;
867 const u32 part_count = assignment->part_count;
868
869 // free registers
870 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
871 auto ap = AssignmentPartRef{assignment, part_idx};
872 if (ap.fixed_assignment()) [[unlikely]] {
873 const auto reg = ap.get_reg();
874 assert(register_file.is_fixed(reg));
875 assert(register_file.reg_local_idx(reg) == local_idx);
876 assert(register_file.reg_part(reg) == part_idx);
877 --assignments.cur_fixed_assignment_count[ap.bank().id()];
878 register_file.dec_lock_count_must_zero(reg); // release lock for fixed reg
879 register_file.unmark_used(reg);
880 } else if (ap.register_valid()) {
881 const auto reg = ap.get_reg();
882 assert(!register_file.is_fixed(reg));
883 register_file.unmark_used(reg);
884 }
885 }
886
887#ifdef TPDE_ASSERTS
888 for (auto reg_id : register_file.used_regs()) {
889 assert(register_file.reg_local_idx(AsmReg{reg_id}) != local_idx &&
890 "freeing assignment that is still referenced by a register");
891 }
892#endif
893
894 // variable references do not have a stack slot
895 if (!is_var_ref && assignment->frame_off != 0) {
896 free_stack_slot(assignment->frame_off, assignment->size());
897 }
898
899 assignments.allocator.deallocate(assignment);
900}
901
902template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
903[[gnu::noinline]] void
905 ValLocalIdx local_idx, ValueAssignment *assignment) noexcept {
906 if (!assignment->delay_free) {
907 free_assignment(local_idx, assignment);
908 return;
909 }
910
911 // need to wait until release
912 TPDE_LOG_TRACE("Delay freeing assignment for value {}",
913 static_cast<u32>(local_idx));
914 const auto &liveness = analyzer.liveness_info(local_idx);
915 auto &free_list_head = assignments.delayed_free_lists[u32(liveness.last)];
916 assignment->next_delayed_free_entry = free_list_head;
917 assignment->pending_free = true;
918 free_list_head = local_idx;
919}
920
921template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
923 ValLocalIdx local_idx, u32 var_ref_data) noexcept {
924 TPDE_LOG_TRACE("Initializing variable-ref assignment for value {}",
925 static_cast<u32>(local_idx));
926
927 assert(val_assignment(local_idx) == nullptr);
928 auto *assignment = assignments.allocator.allocate_slow(1, true);
929 assignments.value_ptrs[static_cast<u32>(local_idx)] = assignment;
930
931 assignment->max_part_size = Config::PLATFORM_POINTER_SIZE;
932 assignment->variable_ref = true;
933 assignment->stack_variable = false;
934 assignment->part_count = 1;
935 assignment->var_ref_custom_idx = var_ref_data;
936 assignment->next_delayed_free_entry = assignments.variable_ref_list;
937
938 assignments.variable_ref_list = local_idx;
939
940 AssignmentPartRef ap{assignment, 0};
941 ap.reset();
942 ap.set_bank(Config::GP_BANK);
943 ap.set_part_size(Config::PLATFORM_POINTER_SIZE);
944}
945
946template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
947i32 CompilerBase<Adaptor, Derived, Config>::allocate_stack_slot(
948 u32 size) noexcept {
949 unsigned align_bits = 4;
950 if (size == 0) {
951 return 0; // 0 is the "invalid" stack slot
952 } else if (size <= 16) {
953 // Align up to next power of two.
954 u32 free_list_idx = size == 1 ? 0 : 32 - util::cnt_lz<u32>(size - 1);
955 assert(size <= 1u << free_list_idx);
956 size = 1 << free_list_idx;
957 align_bits = free_list_idx;
958
959 if (!stack.fixed_free_lists[free_list_idx].empty()) {
960 auto slot = stack.fixed_free_lists[free_list_idx].back();
961 stack.fixed_free_lists[free_list_idx].pop_back();
962 return slot;
963 }
964 } else {
965 size = util::align_up(size, 16);
966 auto it = stack.dynamic_free_lists.find(size);
967 if (it != stack.dynamic_free_lists.end() && !it->second.empty()) {
968 const auto slot = it->second.back();
969 it->second.pop_back();
970 return slot;
971 }
972 }
973
974 assert(stack.frame_size != ~0u &&
975 "cannot allocate stack slot before stack frame is initialized");
976
977 // Align frame_size to align_bits
978 for (u32 list_idx = util::cnt_tz(stack.frame_size); list_idx < align_bits;
979 list_idx = util::cnt_tz(stack.frame_size)) {
980 i32 slot = stack.frame_size;
981 if constexpr (Config::FRAME_INDEXING_NEGATIVE) {
982 slot = -(slot + (1ull << list_idx));
983 }
984 stack.fixed_free_lists[list_idx].push_back(slot);
985 stack.frame_size += 1ull << list_idx;
986 }
987
988 auto slot = stack.frame_size;
989 assert(slot != 0 && "stack slot 0 is reserved");
990 stack.frame_size += size;
991
992 if constexpr (Config::FRAME_INDEXING_NEGATIVE) {
993 slot = -(slot + size);
994 }
995 return slot;
996}
997
998template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1000 u32 slot, u32 size) noexcept {
1001 if (size == 0) [[unlikely]] {
1002 assert(slot == 0 && "unexpected slot for zero-sized stack-slot?");
1003 // Do nothing.
1004 } else if (size <= 16) [[likely]] {
1005 u32 free_list_idx = size == 1 ? 0 : 32 - util::cnt_lz<u32>(size - 1);
1006 stack.fixed_free_lists[free_list_idx].push_back(slot);
1007 } else {
1008 size = util::align_up(size, 16);
1009 stack.dynamic_free_lists[size].push_back(slot);
1010 }
1011}
1012
1013template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1014template <typename Fn>
1016 u32 arg_idx, IRValueRef arg, Fn add_arg) noexcept {
1017 ValueRef vr = derived()->result_ref(arg);
1018 if (adaptor->cur_arg_is_byval(arg_idx)) {
1019 std::optional<i32> byval_frame_off =
1020 add_arg(vr.part(0),
1021 CCAssignment{
1022 .byval = true,
1023 .align = u8(adaptor->cur_arg_byval_align(arg_idx)),
1024 .size = adaptor->cur_arg_byval_size(arg_idx),
1025 });
1026
1027 if (byval_frame_off) {
1028 // We need to convert the assignment into a stack variable ref.
1029 ValLocalIdx local_idx = val_idx(arg);
1030 // TODO: we shouldn't create the result_ref for such cases in the first
1031 // place. However, this is not easy to detect up front, it depends on the
1032 // target and the calling convention whether this is possible.
1033 vr.reset();
1034 // Value assignment might have been free'd by ValueRef reset.
1035 if (ValueAssignment *assignment = val_assignment(local_idx)) {
1036 free_assignment(local_idx, assignment);
1037 }
1038 init_variable_ref(local_idx, 0);
1039 ValueAssignment *assignment = this->val_assignment(local_idx);
1040 assignment->stack_variable = true;
1041 assignment->frame_off = *byval_frame_off;
1042 }
1043 return;
1044 }
1045
1046 if (adaptor->cur_arg_is_sret(arg_idx)) {
1047 add_arg(vr.part(0), CCAssignment{.sret = true});
1048 return;
1049 }
1050
1051 const u32 part_count = vr.assignment()->part_count;
1052
1053 u32 align = 1;
1054 u32 consecutive = 0;
1055 u32 consec_def = 0;
1056 if (derived()->arg_is_int128(arg)) {
1057 // TODO: this also applies to composites with 16-byte alignment
1058 align = 16;
1059 consecutive = 1;
1060 } else if (part_count > 1 &&
1061 !derived()->arg_allow_split_reg_stack_passing(arg)) {
1062 consecutive = 1;
1063 if (part_count > UINT8_MAX) {
1064 // Must be completely passed on the stack.
1065 consecutive = 0;
1066 consec_def = -1;
1067 }
1068 }
1069
1070 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
1071 add_arg(vr.part(part_idx),
1072 CCAssignment{
1073 .consecutive =
1074 u8(consecutive ? part_count - part_idx - 1 : consec_def),
1075 .align = u8(part_idx == 0 ? align : 1),
1076 });
1077 }
1078}
1079
1080template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1082 CompilerBase<Adaptor, Derived, Config>::val_ref(IRValueRef value) noexcept {
1083 if (auto special = derived()->val_ref_special(value); special) {
1084 return ValueRef{this, std::move(*special)};
1085 }
1086
1087 const ValLocalIdx local_idx = analyzer.adaptor->val_local_idx(value);
1088 assert(val_assignment(local_idx) != nullptr && "value use before def");
1089 return ValueRef{this, local_idx};
1090}
1091
1092template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1093std::pair<typename CompilerBase<Adaptor, Derived, Config>::ValueRef,
1096 IRValueRef value) noexcept {
1097 std::pair<ValueRef, ValuePartRef> res{val_ref(value), this};
1098 res.second = res.first.part(0);
1099 return res;
1100}
1101
1102template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1105 IRValueRef value) noexcept {
1106 const ValLocalIdx local_idx = analyzer.adaptor->val_local_idx(value);
1107 if (val_assignment(local_idx) == nullptr) {
1108 init_assignment(value, local_idx);
1109 }
1110 return ValueRef{this, local_idx};
1111}
1112
1113template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1114std::pair<typename CompilerBase<Adaptor, Derived, Config>::ValueRef,
1115 typename CompilerBase<Adaptor, Derived, Config>::ValuePartRef>
1116 CompilerBase<Adaptor, Derived, Config>::result_ref_single(
1117 IRValueRef value) noexcept {
1118 std::pair<ValueRef, ValuePartRef> res{result_ref(value), this};
1119 res.second = res.first.part(0);
1120 return res;
1121}
1122
1123template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1125 ValuePartRef &val_ref, ScratchReg &scratch) noexcept {
1126 val_ref.set_value(std::move(scratch));
1127}
1128
1129template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1132 GenericValuePart &gv) noexcept {
1133 if (std::holds_alternative<ScratchReg>(gv.state)) {
1134 return std::get<ScratchReg>(gv.state).cur_reg();
1135 }
1136 if (std::holds_alternative<ValuePartRef>(gv.state)) {
1137 auto &vpr = std::get<ValuePartRef>(gv.state);
1138 if (vpr.has_reg()) {
1139 return vpr.cur_reg();
1140 }
1141 return vpr.load_to_reg();
1142 }
1143 if (auto *expr = std::get_if<typename GenericValuePart::Expr>(&gv.state)) {
1144 if (expr->has_base() && !expr->has_index() && expr->disp == 0) {
1145 return expr->base_reg();
1146 }
1147 return derived()->gval_expr_as_reg(gv);
1148 }
1149 TPDE_UNREACHABLE("gval_as_reg on empty GenericValuePart");
1150}
1151
1152template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1153typename CompilerBase<Adaptor, Derived, Config>::AsmReg
1155 GenericValuePart &gv, ScratchReg &dst) noexcept {
1156 AsmReg reg = gval_as_reg(gv);
1157 if (!dst.has_reg()) {
1158 if (auto *scratch = std::get_if<ScratchReg>(&gv.state)) {
1159 dst = std::move(*scratch);
1160 } else if (auto *val_ref = std::get_if<ValuePartRef>(&gv.state)) {
1161 if (val_ref->can_salvage()) {
1162 dst.alloc_specific(val_ref->salvage());
1163 assert(dst.cur_reg() == reg && "salvaging unsuccessful");
1164 }
1165 }
1166 }
1167 return reg;
1168}
1169
1170template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1171Reg CompilerBase<Adaptor, Derived, Config>::select_reg_evict(
1172 RegBank bank, u64 exclusion_mask) noexcept {
1173 TPDE_LOG_DBG("select_reg_evict for bank {}", bank.id());
1174 auto candidates =
1175 register_file.used & register_file.bank_regs(bank) & ~exclusion_mask;
1176
1177 Reg candidate = Reg::make_invalid();
1178 u32 max_score = 0;
1179 for (auto reg_id : util::BitSetIterator<>(candidates)) {
1180 Reg reg{reg_id};
1181 if (register_file.is_fixed(reg)) {
1182 continue;
1183 }
1184
1185 // Must be an evictable value, not a temporary.
1186 auto local_idx = register_file.reg_local_idx(reg);
1187 u32 part = register_file.reg_part(Reg{reg});
1188 assert(local_idx != INVALID_VAL_LOCAL_IDX);
1189 ValueAssignment *va = val_assignment(local_idx);
1190 AssignmentPartRef ap{va, part};
1191
1192 // We want to sort registers by the following (ordered by priority):
1193 // - stack variable ref (~1 add/sub to reconstruct)
1194 // - other variable ref (1-2 instrs to reconstruct)
1195 // - already spilled (no store needed)
1196 // - last use farthest away (most likely to get spilled anyhow, so there's
1197 // not much harm in spilling earlier)
1198 // - lowest ref-count (least used)
1199 //
1200 // TODO: evaluate and refine this heuristic
1201
1202 // TODO: evict stack variable refs before others
1203 if (ap.variable_ref()) {
1204 TPDE_LOG_DBG(" r{} ({}) is variable-ref", reg_id, u32(local_idx));
1205 candidate = reg;
1206 break;
1207 }
1208
1209 u32 score = 0;
1210 if (ap.stack_valid()) {
1211 score |= u32{1} << 31;
1212 }
1213
1214 const auto &liveness = analyzer.liveness_info(local_idx);
1215 u32 last_use_dist = u32(liveness.last) - u32(cur_block_idx);
1216 score |= (last_use_dist < 0x8000 ? 0x8000 - last_use_dist : 0) << 16;
1217
1218 u32 refs_left = va->pending_free ? 0 : va->references_left;
1219 score |= (refs_left < 0xffff ? 0x10000 - refs_left : 1);
1220
1221 TPDE_LOG_DBG(" r{} ({}:{}) rc={}/{} live={}-{}{} spilled={} score={:#x}",
1222 reg_id,
1223 u32(local_idx),
1224 part,
1225 refs_left,
1226 liveness.ref_count,
1227 u32(liveness.first),
1228 u32(liveness.last),
1229 &"*"[!liveness.last_full],
1230 ap.stack_valid(),
1231 score);
1232
1233 assert(score != 0);
1234 if (score > max_score) {
1235 candidate = reg;
1236 max_score = score;
1237 }
1238 }
1239 if (candidate.invalid()) [[unlikely]] {
1240 TPDE_FATAL("ran out of registers for scratch registers");
1241 }
1242 TPDE_LOG_DBG(" selected r{}", candidate.id());
1243 evict_reg(candidate);
1244 return candidate;
1245}
1246
1247template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1249 AsmReg dst, AssignmentPartRef ap) noexcept {
1250 if (!ap.variable_ref()) {
1251 assert(ap.stack_valid());
1252 derived()->load_from_stack(dst, ap.frame_off(), ap.part_size());
1253 } else if (ap.is_stack_variable()) {
1254 derived()->load_address_of_stack_var(dst, ap);
1255 } else if constexpr (!Config::DEFAULT_VAR_REF_HANDLING) {
1256 derived()->load_address_of_var_reference(dst, ap);
1257 } else {
1258 TPDE_UNREACHABLE("non-stack-variable needs custom var-ref handling");
1259 }
1260}
1261
1262template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1263void CompilerBase<Adaptor, Derived, Config>::allocate_spill_slot(
1264 AssignmentPartRef ap) noexcept {
1265 assert(!ap.variable_ref() && "cannot allocate spill slot for variable ref");
1266 if (ap.assignment()->frame_off == 0) {
1267 assert(!ap.stack_valid() && "stack-valid set without spill slot");
1268 ap.assignment()->frame_off = allocate_stack_slot(ap.assignment()->size());
1269 assert(ap.assignment()->frame_off != 0);
1270 }
1271}
1272
1273template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1275 AssignmentPartRef ap) noexcept {
1276 assert(may_change_value_state());
1277 if (!ap.stack_valid() && !ap.variable_ref()) {
1278 assert(ap.register_valid() && "cannot spill uninitialized assignment part");
1279 allocate_spill_slot(ap);
1280 derived()->spill_reg(ap.get_reg(), ap.frame_off(), ap.part_size());
1281 ap.set_stack_valid();
1282 }
1283}
1284
1285template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1287 AssignmentPartRef ap) noexcept {
1288 assert(may_change_value_state());
1289 assert(ap.register_valid());
1290 derived()->spill(ap);
1291 ap.set_register_valid(false);
1292 register_file.unmark_used(ap.get_reg());
1293}
1294
1295template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1297 assert(may_change_value_state());
1298 assert(!register_file.is_fixed(reg));
1299 assert(register_file.reg_local_idx(reg) != INVALID_VAL_LOCAL_IDX);
1300
1301 ValLocalIdx local_idx = register_file.reg_local_idx(reg);
1302 auto part = register_file.reg_part(reg);
1303 AssignmentPartRef evict_part{val_assignment(local_idx), part};
1304 assert(evict_part.register_valid());
1305 assert(evict_part.get_reg() == reg);
1306 derived()->spill(evict_part);
1307 evict_part.set_register_valid(false);
1308 register_file.unmark_used(reg);
1309}
1310
1311template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1313 assert(may_change_value_state());
1314 assert(!register_file.is_fixed(reg));
1315 assert(register_file.reg_local_idx(reg) != INVALID_VAL_LOCAL_IDX);
1316
1317 ValLocalIdx local_idx = register_file.reg_local_idx(reg);
1318 auto part = register_file.reg_part(reg);
1319 AssignmentPartRef ap{val_assignment(local_idx), part};
1320 assert(ap.register_valid());
1321 assert(ap.get_reg() == reg);
1322 assert(!ap.modified() || ap.variable_ref());
1323 ap.set_register_valid(false);
1324 register_file.unmark_used(reg);
1325}
1326
1327template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1328typename CompilerBase<Adaptor, Derived, Config>::RegisterFile::RegBitSet
1329 CompilerBase<Adaptor, Derived, Config>::spill_before_branch(
1330 bool force_spill) noexcept {
1331 // since we do not explicitly keep track of register assignments per block,
1332 // whenever we might branch off to a block that we do not directly compile
1333 // afterwards (i.e. the register assignments might change in between), we
1334 // need to spill all registers which are not fixed and remove them from the
1335 // register state.
1336 //
1337 // This leads to worse codegen but saves a significant overhead to
1338 // store/manage the register assignment for each block (256 bytes/block for
1339 // x64) and possible compile-time as there might be additional logic to move
1340 // values around
1341
1342 // First, we consider the case that the current block only has one successor
1343 // which is compiled directly after the current one, in which case we do not
1344 // have to spill anything.
1345 //
1346 // Secondly, if the next block has multiple incoming edges, we always have
1347 // to spill and remove from the register assignment. Otherwise, we
1348 // only need to spill values if they are alive in any successor which is not
1349 // the next block.
1350 //
1351 // Values which are only read from PHI-Nodes and have no extended lifetimes,
1352 // do not need to be spilled as they die at the edge.
1353
1354 using RegBitSet = typename RegisterFile::RegBitSet;
1355
1356 assert(may_change_value_state());
1357
1358 const IRBlockRef cur_block_ref = analyzer.block_ref(cur_block_idx);
1359 // Earliest succeeding block after the current block that is not the
1360 // immediately succeeding block. Used to determine whether a value needs to
1361 // be spilled.
1362 BlockIndex earliest_next_succ = Analyzer<Adaptor>::INVALID_BLOCK_IDX;
1363
1364 bool must_spill = force_spill;
1365 if (!must_spill) {
1366 // We must always spill if no block is immediately succeeding or that block
1367 // has multiple incoming edges.
1368 auto next_block_is_succ = false;
1369 auto next_block_has_multiple_incoming = false;
1370 u32 succ_count = 0;
1371 for (const IRBlockRef succ : adaptor->block_succs(cur_block_ref)) {
1372 ++succ_count;
1373 BlockIndex succ_idx = analyzer.block_idx(succ);
1374 if (u32(succ_idx) == u32(cur_block_idx) + 1) {
1375 next_block_is_succ = true;
1376 if (analyzer.block_has_multiple_incoming(succ)) {
1377 next_block_has_multiple_incoming = true;
1378 }
1379 } else if (succ_idx > cur_block_idx && succ_idx < earliest_next_succ) {
1380 earliest_next_succ = succ_idx;
1381 }
1382 }
1383
1384 must_spill = !next_block_is_succ || next_block_has_multiple_incoming;
1385
1386 if (succ_count == 1 && !must_spill) {
1387 return RegBitSet{};
1388 }
1389 }
1390
1391 auto release_regs = RegBitSet{};
1392 // TODO(ts): just use register_file.used_nonfixed_regs()?
1393 for (auto reg : register_file.used_regs()) {
1394 auto local_idx = register_file.reg_local_idx(Reg{reg});
1395 auto part = register_file.reg_part(Reg{reg});
1396 if (local_idx == INVALID_VAL_LOCAL_IDX) {
1397 // scratch regs can never be held across blocks
1398 continue;
1399 }
1400 AssignmentPartRef ap{val_assignment(local_idx), part};
1401 if (ap.fixed_assignment()) {
1402 // fixed registers do not need to be spilled
1403 continue;
1404 }
1405
1406 // Remove from register assignment if the next block cannot rely on the
1407 // value being in the specific register.
1408 if (must_spill) {
1409 release_regs |= RegBitSet{1ull} << reg;
1410 }
1411
1412 if (!ap.modified() || ap.variable_ref()) {
1413 // No need to spill values that were already spilled or are variable refs.
1414 continue;
1415 }
1416
1417 const auto &liveness = analyzer.liveness_info(local_idx);
1418 if (liveness.last <= cur_block_idx) {
1419 // No need to spill value if it dies immediately after the block.
1420 continue;
1421 }
1422
1423 // If the value is live in a different block, we might need to spill it.
1424 // Cases:
1425 // - Live in successor that precedes current block => ignore, value must
1426 // have been spilled already due to RPO.
1427 // - Live only in successor that immediately follows this block => no need
1428 // to spill, value can be kept in register, register state for next block
1429 // is still valid.
1430 // - Live in successor in some distance after current block => spill
1431 if (must_spill || earliest_next_succ <= liveness.last) {
1432 spill(ap);
1433 }
1434 }
1435
1436 return release_regs;
1437}
1438
1439template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1441 typename RegisterFile::RegBitSet regs) noexcept {
1442 assert(may_change_value_state());
1443
1444 // TODO(ts): needs changes for other RegisterFile impls
1445 for (auto reg_id : util::BitSetIterator<>{regs & register_file.used}) {
1446 if (!register_file.is_fixed(Reg{reg_id})) {
1447 free_reg(Reg{reg_id});
1448 }
1449 }
1450}
1451
1452template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1454 release_regs_after_return() noexcept {
1455 // we essentially have to free all non-fixed registers
1456 for (auto reg_id : register_file.used_regs()) {
1457 if (!register_file.is_fixed(Reg{reg_id})) {
1458 free_reg(Reg{reg_id});
1459 }
1460 }
1461}
1462
1463template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1464void CompilerBase<Adaptor, Derived, Config>::move_to_phi_nodes_impl(
1465 BlockIndex target) noexcept {
1466 // PHI-nodes are always moved to their stack-slot (unless they are fixed)
1467 //
1468 // However, we need to take care of PHI-dependencies (cycles and chains)
1469 // as to not overwrite values which might be needed.
1470 //
1471 // In most cases, we expect the number of PHIs to be small but we want to
1472 // stay reasonably efficient even with larger numbers of PHIs
1473
1474 struct ScratchWrapper {
1475 Derived *self;
1476 AsmReg cur_reg = AsmReg::make_invalid();
1477 bool backed_up = false;
1478 bool was_modified = false;
1479 u8 part = 0;
1480 ValLocalIdx local_idx = INVALID_VAL_LOCAL_IDX;
1481
1482 ScratchWrapper(Derived *self) : self{self} {}
1483
1484 ~ScratchWrapper() { reset(); }
1485
1486 void reset() {
1487 if (cur_reg.invalid()) {
1488 return;
1489 }
1490
1491 self->register_file.unmark_fixed(cur_reg);
1492 self->register_file.unmark_used(cur_reg);
1493
1494 if (backed_up) {
1495 // restore the register state
1496 // TODO(ts): do we actually need the reload?
1497 auto *assignment = self->val_assignment(local_idx);
1498 // check if the value was free'd, then we dont need to restore
1499 // it
1500 if (assignment) {
1501 auto ap = AssignmentPartRef{assignment, part};
1502 if (!ap.variable_ref()) {
1503 // TODO(ts): assert that this always happens?
1504 assert(ap.stack_valid());
1505 self->load_from_stack(cur_reg, ap.frame_off(), ap.part_size());
1506 }
1507 ap.set_reg(cur_reg);
1508 ap.set_register_valid(true);
1509 ap.set_modified(was_modified);
1510 self->register_file.mark_used(cur_reg, local_idx, part);
1511 }
1512 backed_up = false;
1513 }
1514 cur_reg = AsmReg::make_invalid();
1515 }
1516
1517 AsmReg alloc_from_bank(RegBank bank) {
1518 if (cur_reg.valid() && self->register_file.reg_bank(cur_reg) == bank) {
1519 return cur_reg;
1520 }
1521 if (cur_reg.valid()) {
1522 reset();
1523 }
1524
1525 // TODO(ts): try to first find a non callee-saved/clobbered
1526 // register...
1527 auto &reg_file = self->register_file;
1528 auto reg = reg_file.find_first_free_excluding(bank, 0);
1529 if (reg.invalid()) {
1530 // TODO(ts): use clock here?
1531 reg = reg_file.find_first_nonfixed_excluding(bank, 0);
1532 if (reg.invalid()) {
1533 TPDE_FATAL("ran out of registers for scratch registers");
1534 }
1535
1536 backed_up = true;
1537 local_idx = reg_file.reg_local_idx(reg);
1538 part = reg_file.reg_part(reg);
1539 AssignmentPartRef ap{self->val_assignment(local_idx), part};
1540 was_modified = ap.modified();
1541 // TODO(ts): this does not spill for variable refs
1542 // We don't use evict_reg here, as we know that we can't change the
1543 // value state.
1544 assert(ap.register_valid() && ap.get_reg() == reg);
1545 if (!ap.stack_valid() && !ap.variable_ref()) {
1546 self->allocate_spill_slot(ap);
1547 self->spill_reg(ap.get_reg(), ap.frame_off(), ap.part_size());
1548 ap.set_stack_valid();
1549 }
1550 ap.set_register_valid(false);
1551 reg_file.unmark_used(reg);
1552 }
1553
1554 reg_file.mark_used(reg, INVALID_VAL_LOCAL_IDX, 0);
1555 reg_file.mark_clobbered(reg);
1556 reg_file.mark_fixed(reg);
1557 cur_reg = reg;
1558 return reg;
1559 }
1560
1561 ScratchWrapper &operator=(const ScratchWrapper &) = delete;
1562 ScratchWrapper &operator=(ScratchWrapper &&) = delete;
1563 };
1564
1565 IRBlockRef target_ref = analyzer.block_ref(target);
1566 IRBlockRef cur_ref = analyzer.block_ref(cur_block_idx);
1567
1568 // collect all the nodes
1569 struct NodeEntry {
1570 IRValueRef phi;
1571 IRValueRef incoming_val;
1572 ValLocalIdx phi_local_idx;
1573 // local idx of same-block phi node that needs special handling
1574 ValLocalIdx incoming_phi_local_idx = INVALID_VAL_LOCAL_IDX;
1575 // bool incoming_is_phi;
1576 u32 ref_count;
1577
1578 bool operator<(const NodeEntry &other) const noexcept {
1579 return phi_local_idx < other.phi_local_idx;
1580 }
1581
1582 bool operator<(ValLocalIdx other) const noexcept {
1583 return phi_local_idx < other;
1584 }
1585 };
1586
1587 util::SmallVector<NodeEntry, 16> nodes;
1588 for (IRValueRef phi : adaptor->block_phis(target_ref)) {
1589 ValLocalIdx phi_local_idx = adaptor->val_local_idx(phi);
1590 auto incoming = adaptor->val_as_phi(phi).incoming_val_for_block(cur_ref);
1591 nodes.emplace_back(NodeEntry{
1592 .phi = phi, .incoming_val = incoming, .phi_local_idx = phi_local_idx});
1593 }
1594
1595 // We check that the block has phi nodes before getting here.
1596 assert(!nodes.empty() && "block marked has having phi nodes has none");
1597
1598 ScratchWrapper scratch{derived()};
1599 const auto move_to_phi = [this, &scratch](IRValueRef phi,
1600 IRValueRef incoming_val) {
1601 // TODO(ts): if phi==incoming_val, we should be able to elide the move
1602 // even if the phi is in a fixed register, no?
1603
1604 auto phi_vr = derived()->result_ref(phi);
1605 auto val_vr = derived()->val_ref(incoming_val);
1606 if (phi == incoming_val) {
1607 return;
1608 }
1609
1610 u32 part_count = phi_vr.assignment()->part_count;
1611 for (u32 i = 0; i < part_count; ++i) {
1612 AssignmentPartRef phi_ap{phi_vr.assignment(), i};
1613
1614 AsmReg reg{};
1615 ValuePartRef val_vpr = val_vr.part(i);
1616 if (val_vpr.is_const()) {
1617 reg = scratch.alloc_from_bank(val_vpr.bank());
1618 val_vpr.reload_into_specific_fixed(reg);
1619 } else if (val_vpr.assignment().register_valid() ||
1620 val_vpr.assignment().fixed_assignment()) {
1621 reg = val_vpr.assignment().get_reg();
1622 } else {
1623 reg = val_vpr.reload_into_specific_fixed(
1624 this, scratch.alloc_from_bank(phi_ap.bank()));
1625 }
1626
1627 if (phi_ap.fixed_assignment()) {
1628 derived()->mov(phi_ap.get_reg(), reg, phi_ap.part_size());
1629 } else {
1630 allocate_spill_slot(phi_ap);
1631 derived()->spill_reg(reg, phi_ap.frame_off(), phi_ap.part_size());
1632 phi_ap.set_stack_valid();
1633 }
1634 }
1635 };
1636
1637 if (nodes.size() == 1) {
1638 move_to_phi(nodes[0].phi, nodes[0].incoming_val);
1639 return;
1640 }
1641
1642 // sort so we can binary search later
1643 std::sort(nodes.begin(), nodes.end());
1644
1645 // fill in the refcount
1646 auto all_zero_ref = true;
1647 for (auto &node : nodes) {
1648 // We don't need to do anything for PHIs that don't reference other PHIs or
1649 // self-referencing PHIs.
1650 bool incoming_is_phi = adaptor->val_is_phi(node.incoming_val);
1651 if (!incoming_is_phi || node.incoming_val == node.phi) {
1652 continue;
1653 }
1654
1655 ValLocalIdx inc_local_idx = adaptor->val_local_idx(node.incoming_val);
1656 auto it = std::lower_bound(nodes.begin(), nodes.end(), inc_local_idx);
1657 if (it == nodes.end() || it->phi != node.incoming_val) {
1658 // Incoming value is a PHI node, but it's not from our block, so we don't
1659 // need to be particularly careful when assigning values.
1660 continue;
1661 }
1662 node.incoming_phi_local_idx = inc_local_idx;
1663 ++it->ref_count;
1664 all_zero_ref = false;
1665 }
1666
1667 if (all_zero_ref) {
1668 // no cycles/chain that we need to take care of
1669 for (auto &node : nodes) {
1670 move_to_phi(node.phi, node.incoming_val);
1671 }
1672 return;
1673 }
1674
1675 // TODO(ts): this is rather inefficient...
1676 util::SmallVector<u32, 32> ready_indices;
1677 ready_indices.reserve(nodes.size());
1678 util::SmallBitSet<256> waiting_nodes;
1679 waiting_nodes.resize(nodes.size());
1680 for (u32 i = 0; i < nodes.size(); ++i) {
1681 if (nodes[i].ref_count) {
1682 waiting_nodes.mark_set(i);
1683 } else {
1684 ready_indices.push_back(i);
1685 }
1686 }
1687
1688 u32 handled_count = 0;
1689 u32 cur_tmp_part_count = 0;
1690 i32 cur_tmp_slot = 0;
1691 u32 cur_tmp_slot_size = 0;
1692 IRValueRef cur_tmp_val = Adaptor::INVALID_VALUE_REF;
1693 ScratchWrapper tmp_reg1{derived()}, tmp_reg2{derived()};
1694
1695 const auto move_from_tmp_phi = [&](IRValueRef target_phi) {
1696 auto phi_vr = val_ref(target_phi);
1697 if (cur_tmp_part_count <= 2) {
1698 AssignmentPartRef ap{phi_vr.assignment(), 0};
1699 assert(!tmp_reg1.cur_reg.invalid());
1700 if (ap.fixed_assignment()) {
1701 derived()->mov(ap.get_reg(), tmp_reg1.cur_reg, ap.part_size());
1702 } else {
1703 derived()->spill_reg(tmp_reg1.cur_reg, ap.frame_off(), ap.part_size());
1704 }
1705
1706 if (cur_tmp_part_count == 2) {
1707 AssignmentPartRef ap_high{phi_vr.assignment(), 1};
1708 assert(!ap_high.fixed_assignment());
1709 assert(!tmp_reg2.cur_reg.invalid());
1710 derived()->spill_reg(
1711 tmp_reg2.cur_reg, ap_high.frame_off(), ap_high.part_size());
1712 }
1713 return;
1714 }
1715
1716 for (u32 i = 0; i < cur_tmp_part_count; ++i) {
1717 AssignmentPartRef phi_ap{phi_vr.assignment(), i};
1718 assert(!phi_ap.fixed_assignment());
1719
1720 auto slot_off = cur_tmp_slot + phi_ap.part_off();
1721 auto reg = tmp_reg1.alloc_from_bank(phi_ap.bank());
1722 derived()->load_from_stack(reg, slot_off, phi_ap.part_size());
1723 derived()->spill_reg(reg, phi_ap.frame_off(), phi_ap.part_size());
1724 }
1725 };
1726
1727 while (handled_count != nodes.size()) {
1728 if (ready_indices.empty()) {
1729 // need to break a cycle
1730 auto cur_idx_opt = waiting_nodes.first_set();
1731 assert(cur_idx_opt);
1732 auto cur_idx = *cur_idx_opt;
1733 assert(nodes[cur_idx].ref_count == 1);
1734 assert(cur_tmp_val == Adaptor::INVALID_VALUE_REF);
1735
1736 auto phi_val = nodes[cur_idx].phi;
1737 auto phi_vr = this->val_ref(phi_val);
1738 auto *assignment = phi_vr.assignment();
1739 cur_tmp_part_count = assignment->part_count;
1740 cur_tmp_val = phi_val;
1741
1742 if (cur_tmp_part_count > 2) {
1743 // use a stack slot to store the temporaries
1744 cur_tmp_slot_size = assignment->size();
1745 cur_tmp_slot = allocate_stack_slot(cur_tmp_slot_size);
1746
1747 for (u32 i = 0; i < cur_tmp_part_count; ++i) {
1748 auto ap = AssignmentPartRef{assignment, i};
1749 assert(!ap.fixed_assignment());
1750 auto slot_off = cur_tmp_slot + ap.part_off();
1751
1752 if (ap.register_valid()) {
1753 auto reg = ap.get_reg();
1754 derived()->spill_reg(reg, slot_off, ap.part_size());
1755 } else {
1756 auto reg = tmp_reg1.alloc_from_bank(ap.bank());
1757 assert(ap.stack_valid());
1758 derived()->load_from_stack(reg, ap.frame_off(), ap.part_size());
1759 derived()->spill_reg(reg, slot_off, ap.part_size());
1760 }
1761 }
1762 } else {
1763 // TODO(ts): if the PHI is not fixed, then we can just reuse its
1764 // register if it has one
1765 auto phi_vpr = phi_vr.part(0);
1766 auto reg = tmp_reg1.alloc_from_bank(phi_vpr.bank());
1767 phi_vpr.reload_into_specific_fixed(this, reg);
1768
1769 if (cur_tmp_part_count == 2) {
1770 // TODO(ts): just change the part ref on the lower ref?
1771 auto phi_vpr_high = phi_vr.part(1);
1772 auto reg_high = tmp_reg2.alloc_from_bank(phi_vpr_high.bank());
1773 phi_vpr_high.reload_into_specific_fixed(this, reg_high);
1774 }
1775 }
1776
1777 nodes[cur_idx].ref_count = 0;
1778 ready_indices.push_back(cur_idx);
1779 waiting_nodes.mark_unset(cur_idx);
1780 }
1781
1782 for (u32 i = 0; i < ready_indices.size(); ++i) {
1783 ++handled_count;
1784 auto cur_idx = ready_indices[i];
1785 auto phi_val = nodes[cur_idx].phi;
1786 IRValueRef incoming_val = nodes[cur_idx].incoming_val;
1787 if (incoming_val == phi_val) {
1788 // no need to do anything, except ref-counting
1789 (void)val_ref(incoming_val);
1790 (void)val_ref(phi_val);
1791 continue;
1792 }
1793
1794 if (incoming_val == cur_tmp_val) {
1795 move_from_tmp_phi(phi_val);
1796
1797 if (cur_tmp_part_count > 2) {
1798 free_stack_slot(cur_tmp_slot, cur_tmp_slot_size);
1799 cur_tmp_slot = 0xFFFF'FFFF;
1800 cur_tmp_slot_size = 0;
1801 }
1802 cur_tmp_val = Adaptor::INVALID_VALUE_REF;
1803 // skip the code below as the ref count of the temp val is
1804 // already 0 and we don't want to reinsert it into the ready
1805 // list
1806 continue;
1807 }
1808
1809 move_to_phi(phi_val, incoming_val);
1810
1811 if (nodes[cur_idx].incoming_phi_local_idx == INVALID_VAL_LOCAL_IDX) {
1812 continue;
1813 }
1814
1815 auto it = std::lower_bound(
1816 nodes.begin(), nodes.end(), nodes[cur_idx].incoming_phi_local_idx);
1817 assert(it != nodes.end() && it->phi == incoming_val &&
1818 "incoming_phi_local_idx set incorrectly");
1819
1820 assert(it->ref_count > 0);
1821 if (--it->ref_count == 0) {
1822 auto node_idx = static_cast<u32>(it - nodes.begin());
1823 ready_indices.push_back(node_idx);
1824 waiting_nodes.mark_unset(node_idx);
1825 }
1826 }
1827 ready_indices.clear();
1828 }
1829}
1830
1831template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1834 return static_cast<BlockIndex>(static_cast<u32>(cur_block_idx) + 1);
1835}
1836
1837template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1839 SymRef personality_sym;
1840 if (this->adaptor->cur_needs_unwind_info()) {
1841 SymRef personality_func = derived()->cur_personality_func();
1842 if (personality_func.valid()) {
1843 for (const auto &[fn_sym, ptr_sym] : personality_syms) {
1844 if (fn_sym == personality_func) {
1845 personality_sym = ptr_sym;
1846 break;
1847 }
1848 }
1849
1850 if (!personality_sym.valid()) {
1851 // create symbol that contains the address of the personality
1852 // function
1853 u32 off;
1854 static constexpr std::array<u8, 8> zero{};
1855
1856 auto rodata = this->assembler.get_data_section(true, true);
1857 personality_sym = this->assembler.sym_def_data(
1858 rodata, "", zero, 8, Assembler::SymBinding::LOCAL, &off);
1859 this->assembler.reloc_abs(rodata, personality_func, off, 0);
1860
1861 personality_syms.emplace_back(personality_func, personality_sym);
1862 }
1863 }
1864 }
1865 return personality_sym;
1866}
1867
1868template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1870 const IRFuncRef func, const u32 func_idx) noexcept {
1871 if (!adaptor->switch_func(func)) {
1872 return false;
1873 }
1874 derived()->analysis_start();
1875 analyzer.switch_func(func);
1876 derived()->analysis_end();
1877
1878#ifndef NDEBUG
1879 stack.frame_size = ~0u;
1880#endif
1881 for (auto &e : stack.fixed_free_lists) {
1882 e.clear();
1883 }
1884 stack.dynamic_free_lists.clear();
1885
1886 assignments.cur_fixed_assignment_count = {};
1887 assert(std::ranges::none_of(assignments.value_ptrs, std::identity{}));
1888 if (assignments.value_ptrs.size() < analyzer.liveness.size()) {
1889 assignments.value_ptrs.resize(analyzer.liveness.size());
1890 }
1891
1892 assignments.allocator.reset();
1893 assignments.variable_ref_list = INVALID_VAL_LOCAL_IDX;
1894 assignments.delayed_free_lists.clear();
1895 assignments.delayed_free_lists.resize(analyzer.block_layout.size(),
1896 INVALID_VAL_LOCAL_IDX);
1897
1898 cur_block_idx =
1899 static_cast<BlockIndex>(analyzer.block_idx(adaptor->cur_entry_block()));
1900
1901 register_file.reset();
1902#ifndef NDEBUG
1903 generating_branch = false;
1904#endif
1905
1906 // Simple heuristic for initial allocation size
1907 u32 expected_code_size = 0x8 * analyzer.num_insts + 0x40;
1908 this->text_writer.begin_func(expected_code_size);
1909
1910 derived()->start_func(func_idx);
1911
1912 block_labels.clear();
1913 block_labels.resize_uninitialized(analyzer.block_layout.size());
1914 for (u32 i = 0; i < analyzer.block_layout.size(); ++i) {
1915 block_labels[i] = text_writer.label_create();
1916 }
1917
1918 // TODO(ts): place function label
1919 // TODO(ts): make function labels optional?
1920
1921 CCAssigner *cc_assigner = derived()->cur_cc_assigner();
1922 assert(cc_assigner != nullptr);
1923
1924 register_file.allocatable = cc_assigner->get_ccinfo().allocatable_regs;
1925
1926 // This initializes the stack frame, which must reserve space for
1927 // callee-saved registers, vararg save area, etc.
1928 cc_assigner->reset();
1929 derived()->gen_func_prolog_and_args(cc_assigner);
1930
1931 for (const IRValueRef alloca : adaptor->cur_static_allocas()) {
1932 auto size = adaptor->val_alloca_size(alloca);
1933 size = util::align_up(size, adaptor->val_alloca_align(alloca));
1934
1935 ValLocalIdx local_idx = adaptor->val_local_idx(alloca);
1936 init_variable_ref(local_idx, 0);
1937 ValueAssignment *assignment = val_assignment(local_idx);
1938 assignment->stack_variable = true;
1939 assignment->frame_off = allocate_stack_slot(size);
1940 }
1941
1942 if constexpr (!Config::DEFAULT_VAR_REF_HANDLING) {
1943 derived()->setup_var_ref_assignments();
1944 }
1945
1946 for (u32 i = 0; i < analyzer.block_layout.size(); ++i) {
1947 const auto block_ref = analyzer.block_layout[i];
1948 TPDE_LOG_TRACE(
1949 "Compiling block {} ({})", i, adaptor->block_fmt_ref(block_ref));
1950 if (!derived()->compile_block(block_ref, i)) [[unlikely]] {
1951 TPDE_LOG_ERR("Failed to compile block {} ({})",
1952 i,
1953 adaptor->block_fmt_ref(block_ref));
1954 // Ensure invariant that value_ptrs only contains nullptr at the end.
1955 assignments.value_ptrs.clear();
1956 return false;
1957 }
1958 }
1959
1960 // Reset all variable-ref assignment pointers to nullptr.
1961 ValLocalIdx variable_ref_list = assignments.variable_ref_list;
1962 while (variable_ref_list != INVALID_VAL_LOCAL_IDX) {
1963 u32 idx = u32(variable_ref_list);
1964 ValLocalIdx next = assignments.value_ptrs[idx]->next_delayed_free_entry;
1965 assignments.value_ptrs[idx] = nullptr;
1966 variable_ref_list = next;
1967 }
1968
1969 assert(std::ranges::none_of(assignments.value_ptrs, std::identity{}) &&
1970 "found non-freed ValueAssignment, maybe missing ref-count?");
1971
1972 derived()->finish_func(func_idx);
1973 this->text_writer.finish_func();
1974
1975 return true;
1976}
1977
1978template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1980 const IRBlockRef block, const u32 block_idx) noexcept {
1981 cur_block_idx =
1982 static_cast<typename Analyzer<Adaptor>::BlockIndex>(block_idx);
1983
1984 label_place(block_labels[block_idx]);
1985 auto &&val_range = adaptor->block_insts(block);
1986 auto end = val_range.end();
1987 for (auto it = val_range.begin(); it != end; ++it) {
1988 const IRInstRef inst = *it;
1989 if (this->adaptor->inst_fused(inst)) {
1990 continue;
1991 }
1992
1993 auto it_cpy = it;
1994 ++it_cpy;
1995 if (!derived()->compile_inst(inst, InstRange{.from = it_cpy, .to = end}))
1996 [[unlikely]] {
1997 TPDE_LOG_ERR("Failed to compile instruction {}",
1998 this->adaptor->inst_fmt_ref(inst));
1999 return false;
2000 }
2001 }
2002
2003#ifndef NDEBUG
2004 // Some consistency checks. Register assignment information must match, all
2005 // used registers must have an assignment (no temporaries across blocks), and
2006 // fixed registers must be fixed assignments.
2007 for (auto reg_id : register_file.used_regs()) {
2008 Reg reg{reg_id};
2009 assert(register_file.reg_local_idx(reg) != INVALID_VAL_LOCAL_IDX);
2010 AssignmentPartRef ap{val_assignment(register_file.reg_local_idx(reg)),
2011 register_file.reg_part(reg)};
2012 assert(ap.register_valid());
2013 assert(ap.get_reg() == reg);
2014 assert(!register_file.is_fixed(reg) || ap.fixed_assignment());
2015 }
2016#endif
2017
2018 if (static_cast<u32>(assignments.delayed_free_lists[block_idx]) != ~0u) {
2019 auto list_entry = assignments.delayed_free_lists[block_idx];
2020 while (static_cast<u32>(list_entry) != ~0u) {
2021 auto *assignment = assignments.value_ptrs[static_cast<u32>(list_entry)];
2022 auto next_entry = assignment->next_delayed_free_entry;
2023 derived()->free_assignment(list_entry, assignment);
2024 list_entry = next_entry;
2025 }
2026 }
2027 return true;
2028}
2029
2030} // namespace tpde
A default implementation for ValRefSpecial.
AsmReg gval_as_reg_reuse(GenericValuePart &gv, ScratchReg &dst) noexcept
void spill(AssignmentPartRef ap) noexcept
Ensure the value is spilled in its stack slot (except variable refs).
void evict(AssignmentPartRef ap) noexcept
Evict the value from its register, spilling if needed, and free register.
void init_variable_ref(ValLocalIdx local_idx, u32 var_ref_data) noexcept
Init a variable-ref assignment.
std::unordered_map< u32, std::vector< i32 > > dynamic_free_lists
Free-Lists for all other sizes.
AsmReg gval_as_reg(GenericValuePart &gv) noexcept
u32 frame_size
The current size of the stack frame.
void begin_branch_region() noexcept
Indicate beginning of region where value-state must not change.
ValueRef result_ref(IRValueRef value) noexcept
Get a defining reference to a value.
void init_variable_ref(IRValueRef value, u32 var_ref_data) noexcept
Init a variable-ref assignment.
void end_branch_region() noexcept
Indicate end of region where value-state must not change.
void reload_to_reg(AsmReg dst, AssignmentPartRef ap) noexcept
Reload a value part from memory or recompute variable address.
CCAssigner * cur_cc_assigner() noexcept
Get CCAssigner for current function.
void free_reg(Reg reg) noexcept
Free the register. Requires that the contained value is already spilled.
util::SmallVector< i32, 16 > fixed_free_lists[5]
Free-Lists for 1/2/4/8/16 sized allocations.
Reg select_reg(RegBank bank, u64 exclusion_mask) noexcept
Select an available register, evicting loaded values if needed.
CompilerBase(Adaptor *adaptor)
Initialize a CompilerBase, should be called by the derived classes.
void evict_reg(Reg reg) noexcept
Evict the value from the register, spilling if needed, and free register.
void release_assignment(ValLocalIdx local_idx, ValueAssignment *) noexcept