TPDE
Loading...
Searching...
No Matches
CompilerBase.hpp
1// SPDX-FileCopyrightText: 2025 Contributors to TPDE <https://tpde.org>
2//
3// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4#pragma once
5
6#include <algorithm>
7#include <functional>
8#include <unordered_map>
9#include <variant>
10
11#include "Analyzer.hpp"
12#include "Compiler.hpp"
13#include "CompilerConfig.hpp"
14#include "IRAdaptor.hpp"
15#include "tpde/AssignmentPartRef.hpp"
16#include "tpde/RegisterFile.hpp"
17#include "tpde/ValLocalIdx.hpp"
18#include "tpde/ValueAssignment.hpp"
19#include "tpde/base.hpp"
20#include "tpde/util/function_ref.hpp"
21#include "tpde/util/misc.hpp"
22
23namespace tpde {
24// TODO(ts): formulate concept for full compiler so that there is *some* check
25// whether all the required derived methods are implemented?
26
27/// Thread-local storage access mode
28enum class TLSModel {
29 GlobalDynamic,
30 LocalDynamic,
31 InitialExec,
32 LocalExec,
33};
34
35struct CCAssignment {
36 Reg reg = Reg::make_invalid();
37
38 /// If non-zero, indicates that this and the next N values must be
39 /// assigned to consecutive registers, or to the stack. The following
40 /// values must be in the same register bank as this value. u8 is sufficient,
41 /// no architecture has more than 255 parameter registers in a single bank.
42 u8 consecutive = 0;
43 bool sret : 1 = false; ///< Argument is return value pointer.
44 bool sext : 1 = false; ///< Sign-extend single integer.
45 bool zext : 1 = false; ///< Zero-extend single integer.
46 bool byval : 1 = false;
47 u8 align = 0;
48 u8 size = 0;
49 RegBank bank = RegBank{};
50 u32 byval_align = 0;
51 u32 byval_size = 0; // only used if byval is set
52 u32 stack_off = 0; // only used if reg is invalid
53};
54
55struct CCInfo {
56 // TODO: use RegBitSet
57 const u64 allocatable_regs;
58 const u64 callee_saved_regs;
59 /// Possible argument registers; these registers will not be allocated until
60 /// all arguments have been assigned.
61 const u64 arg_regs;
62};
63
64class CCAssigner {
65public:
66 const CCInfo *ccinfo;
67
68 CCAssigner(const CCInfo &ccinfo) noexcept : ccinfo(&ccinfo) {}
69 virtual ~CCAssigner() noexcept {}
70
71 virtual void reset() noexcept = 0;
72
73 const CCInfo &get_ccinfo() const noexcept { return *ccinfo; }
74
75 virtual void assign_arg(CCAssignment &cca) noexcept = 0;
76 virtual u32 get_stack_size() noexcept = 0;
77 /// Some calling conventions need different call behavior when calling a
78 /// vararg function.
79 virtual bool is_vararg() const noexcept { return false; }
80 virtual void assign_ret(CCAssignment &cca) noexcept = 0;
81};
82
83/// The base class for the compiler.
84/// It implements the main platform independent compilation logic and houses the
85/// analyzer
86template <IRAdaptor Adaptor,
87 typename Derived,
88 CompilerConfig Config = CompilerConfigDefault>
90 // some forwards for the IR type defs
91 using IRValueRef = typename Adaptor::IRValueRef;
92 using IRInstRef = typename Adaptor::IRInstRef;
93 using IRBlockRef = typename Adaptor::IRBlockRef;
94 using IRFuncRef = typename Adaptor::IRFuncRef;
95
96 using BlockIndex = typename Analyzer<Adaptor>::BlockIndex;
97
98 using Assembler = typename Config::Assembler;
99 using AsmReg = typename Config::AsmReg;
100
101 using RegisterFile = tpde::RegisterFile<Config::NUM_BANKS, 32>;
102
103 /// A default implementation for ValRefSpecial.
104 // Note: Subclasses can override this, always used Derived::ValRefSpecial.
106 uint8_t mode = 4;
107 u64 const_data;
108 };
109
110#pragma region CompilerData
111 Adaptor *adaptor;
112 Analyzer<Adaptor> analyzer;
113
114 // data for frame management
115
116 struct {
117 /// The current size of the stack frame
118 u32 frame_size = 0;
119 /// Free-Lists for 1/2/4/8/16 sized allocations
120 // TODO(ts): make the allocations for 4/8 different from the others
121 // since they are probably the one's most used?
122 util::SmallVector<i32, 16> fixed_free_lists[5] = {};
123 /// Free-Lists for all other sizes
124 // TODO(ts): think about which data structure we want here
125 std::unordered_map<u32, std::vector<i32>> dynamic_free_lists{};
126 } stack = {};
127
128 typename Analyzer<Adaptor>::BlockIndex cur_block_idx;
129
130 // Assignments
131
132 static constexpr ValLocalIdx INVALID_VAL_LOCAL_IDX =
133 static_cast<ValLocalIdx>(~0u);
134
135 // TODO(ts): think about different ways to store this that are maybe more
136 // compact?
137 struct {
138 AssignmentAllocator allocator;
139
140 std::array<u32, Config::NUM_BANKS> cur_fixed_assignment_count = {};
141 util::SmallVector<ValueAssignment *, Analyzer<Adaptor>::SMALL_VALUE_NUM>
142 value_ptrs;
143
144 ValLocalIdx variable_ref_list;
145 util::SmallVector<ValLocalIdx, Analyzer<Adaptor>::SMALL_BLOCK_NUM>
146 delayed_free_lists;
147 } assignments = {};
148
149 RegisterFile register_file;
150#ifndef NDEBUG
151 /// Whether we are currently in the middle of generating branch-related code
152 /// and therefore must not change any value-related state.
153 bool generating_branch = false;
154#endif
155
156private:
157 /// Default CCAssigner if the implementation doesn't override cur_cc_assigner.
158 typename Config::DefaultCCAssigner default_cc_assigner;
159
160public:
161 Assembler assembler;
162 Assembler::SectionWriter text_writer;
163 // TODO(ts): smallvector?
164 std::vector<typename Assembler::SymRef> func_syms;
165 // TODO(ts): combine this with the block vectors in the analyzer to save on
166 // allocations
167 util::SmallVector<typename Assembler::Label> block_labels;
168
169 util::SmallVector<
170 std::pair<typename Assembler::SymRef, typename Assembler::SymRef>,
171 4>
172 personality_syms = {};
173
174 struct ScratchReg;
175 class ValuePart;
176 struct ValuePartRef;
177 struct ValueRef;
178 struct GenericValuePart;
179#pragma endregion
180
181 struct InstRange {
182 using Range = decltype(std::declval<Adaptor>().block_insts(
183 std::declval<IRBlockRef>()));
184 using Iter = decltype(std::declval<Range>().begin());
185 using EndIter = decltype(std::declval<Range>().end());
186 Iter from;
187 EndIter to;
188 };
189
190 struct CallArg {
191 enum class Flag : u8 {
192 none,
193 zext,
194 sext,
195 sret,
196 byval
197 };
198
199 explicit CallArg(IRValueRef value,
200 Flag flags = Flag::none,
201 u32 byval_align = 0,
202 u32 byval_size = 0)
203 : value(value),
204 flag(flags),
205 byval_align(byval_align),
206 byval_size(byval_size) {}
207
208 IRValueRef value;
209 Flag flag;
210 u32 byval_align;
211 u32 byval_size;
212 };
213
214 template <typename CBDerived>
215 class CallBuilderBase {
216 protected:
217 Derived &compiler;
218 CCAssigner &assigner;
219
220 RegisterFile::RegBitSet arg_regs{};
221
222 public:
223 CallBuilderBase(Derived &compiler, CCAssigner &assigner) noexcept
224 : compiler(compiler), assigner(assigner) {}
225
226 // CBDerived needs:
227 // void add_arg_byval(ValuePart &vp, CCAssignment &cca) noexcept;
228 // void add_arg_stack(ValuePart &vp, CCAssignment &cca) noexcept;
229 // void call_impl(std::variant<Assembler::SymRef, ValuePart> &&) noexcept;
230 CBDerived *derived() noexcept { return static_cast<CBDerived *>(this); }
231
232 void add_arg(ValuePart &&vp, CCAssignment cca) noexcept;
233 void add_arg(CallArg &&arg) noexcept;
234
235 // evict registers, do call, reset stack frame
236 void call(std::variant<typename Assembler::SymRef, ValuePart>) noexcept;
237
238 void add_ret(ValuePart &vp, CCAssignment cca) noexcept;
239 void add_ret(ValuePart &&vp, CCAssignment cca) noexcept {
240 add_ret(vp, cca);
241 }
242 void add_ret(ValueRef &vr) noexcept;
243 };
244
245 class RetBuilder {
246 Derived &compiler;
247 CCAssigner &assigner;
248
249 RegisterFile::RegBitSet ret_regs{};
250
251 public:
252 RetBuilder(Derived &compiler, CCAssigner &assigner) noexcept
253 : compiler(compiler), assigner(assigner) {
254 assigner.reset();
255 }
256
257 void add(ValuePart &&vp, CCAssignment cca) noexcept;
258 void add(IRValueRef val, CallArg::Flag flag = CallArg::Flag::none) noexcept;
259
260 void ret() noexcept;
261 };
262
263 /// Initialize a CompilerBase, should be called by the derived classes
264 explicit CompilerBase(Adaptor *adaptor)
265 : adaptor(adaptor), analyzer(adaptor), assembler() {
266 static_assert(std::is_base_of_v<CompilerBase, Derived>);
267 static_assert(Compiler<Derived, Config>);
268 }
269
270 /// shortcut for casting to the Derived class so that overloading
271 /// works
272 Derived *derived() { return static_cast<Derived *>(this); }
273
274 const Derived *derived() const { return static_cast<const Derived *>(this); }
275
276 [[nodiscard]] ValLocalIdx val_idx(const IRValueRef value) const noexcept {
277 return analyzer.adaptor->val_local_idx(value);
278 }
279
280 [[nodiscard]] ValueAssignment *
281 val_assignment(const ValLocalIdx idx) noexcept {
282 return assignments.value_ptrs[static_cast<u32>(idx)];
283 }
284
285 /// Compile the functions returned by Adaptor::funcs
286 ///
287 /// \warning If you intend to call this multiple times, you must call reset
288 /// in-between the calls.
289 ///
290 /// \returns Whether the compilation was successful
291 bool compile();
292
293 /// Reset any leftover data from the previous compilation such that it will
294 /// not affect the next compilation
295 void reset();
296
297 /// Get CCAssigner for current function.
298 CCAssigner *cur_cc_assigner() noexcept { return &default_cc_assigner; }
299
300 void init_assignment(IRValueRef value, ValLocalIdx local_idx) noexcept;
301
302 /// Frees an assignment, its stack slot and registers
303 void free_assignment(ValLocalIdx local_idx) noexcept;
304
305 /// Init a variable-ref assignment
306 void init_variable_ref(ValLocalIdx local_idx, u32 var_ref_data) noexcept;
307 /// Init a variable-ref assignment
308 void init_variable_ref(IRValueRef value, u32 var_ref_data) noexcept {
309 init_variable_ref(adaptor->val_local_idx(value), var_ref_data);
310 }
311
312 i32 allocate_stack_slot(u32 size) noexcept;
313 void free_stack_slot(u32 slot, u32 size) noexcept;
314
315 template <typename Fn>
316 void handle_func_arg(u32 arg_idx, IRValueRef arg, Fn add_arg) noexcept;
317
318 ValueRef val_ref(IRValueRef value) noexcept;
319
320 std::pair<ValueRef, ValuePartRef> val_ref_single(IRValueRef value) noexcept;
321
322 /// Get a defining reference to a value
323 ValueRef result_ref(IRValueRef value) noexcept;
324
325 std::pair<ValueRef, ValuePartRef>
326 result_ref_single(IRValueRef value) noexcept;
327
328 void set_value(ValuePartRef &val_ref, ScratchReg &scratch) noexcept;
329 void set_value(ValuePartRef &&val_ref, ScratchReg &scratch) noexcept {
330 set_value(val_ref, scratch);
331 }
332
333 /// Get generic value part into a single register, evaluating expressions
334 /// and materializing immediates as required.
335 AsmReg gval_as_reg(GenericValuePart &gv) noexcept;
336
337 /// Like gval_as_reg; if the GenericValuePart owns a reusable register
338 /// (either a ScratchReg, possibly due to materialization, or a reusable
339 /// ValuePartRef), store it in dst.
340 AsmReg gval_as_reg_reuse(GenericValuePart &gv, ScratchReg &dst) noexcept;
341
342private:
343 Reg select_reg_evict(RegBank bank, u64 exclusion_mask) noexcept;
344
345public:
346 /// Select an available register, evicting loaded values if needed.
347 Reg select_reg(RegBank bank, u64 exclusion_mask) noexcept {
348 Reg res = register_file.find_first_free_excluding(bank, exclusion_mask);
349 if (res.valid()) [[likely]] {
350 return res;
351 }
352 return select_reg_evict(bank, exclusion_mask);
353 }
354
355 /// Reload a value part from memory or recompute variable address.
356 void reload_to_reg(AsmReg dst, AssignmentPartRef ap) noexcept;
357
358 void allocate_spill_slot(AssignmentPartRef ap) noexcept;
359
360 /// Ensure the value is spilled in its stack slot (except variable refs).
361 void spill(AssignmentPartRef ap) noexcept;
362
363 /// Evict the value from its register, spilling if needed, and free register.
364 void evict(AssignmentPartRef ap) noexcept;
365
366 /// Evict the value from the register, spilling if needed, and free register.
367 void evict_reg(Reg reg) noexcept;
368
369 /// Free the register. Requires that the contained value is already spilled.
370 void free_reg(Reg reg) noexcept;
371
372 // TODO(ts): switch to a branch_spill_before naming style?
373 typename RegisterFile::RegBitSet
374 spill_before_branch(bool force_spill = false) noexcept;
375 void release_spilled_regs(typename RegisterFile::RegBitSet) noexcept;
376
377 /// When reaching a point in the function where no other blocks will be
378 /// reached anymore, use this function to release register assignments after
379 /// the end of that block so the compiler does not accidentally use
380 /// registers which don't contain any values
382
383 /// Indicate beginning of region where value-state must not change.
384 void begin_branch_region() noexcept {
385#ifndef NDEBUG
386 assert(!generating_branch);
387 generating_branch = true;
388#endif
389 }
390
391 /// Indicate end of region where value-state must not change.
392 void end_branch_region() noexcept {
393#ifndef NDEBUG
394 assert(generating_branch);
395 generating_branch = false;
396#endif
397 }
398
399#ifndef NDEBUG
400 bool may_change_value_state() const noexcept { return !generating_branch; }
401#endif
402
403 void move_to_phi_nodes(BlockIndex target) noexcept {
404 if (analyzer.block_has_phis(target)) {
405 move_to_phi_nodes_impl(target);
406 }
407 }
408
409 void move_to_phi_nodes_impl(BlockIndex target) noexcept;
410
411 bool branch_needs_split(IRBlockRef target) noexcept {
412 // for now, if the target has PHI-nodes, we split
413 return analyzer.block_has_phis(target);
414 }
415
416 BlockIndex next_block() const noexcept;
417
418 bool try_force_fixed_assignment(IRValueRef) const noexcept { return false; }
419
420 bool hook_post_func_sym_init() noexcept { return true; }
421
422 void analysis_start() noexcept {}
423
424 void analysis_end() noexcept {}
425
426 void reloc_text(Assembler::SymRef sym,
427 u32 type,
428 u64 offset,
429 i64 addend = 0) noexcept {
430 this->assembler.reloc_sec(
431 text_writer.get_sec_ref(), sym, type, offset, addend);
432 }
433
434 void label_place(Assembler::Label label) noexcept {
435 this->assembler.label_place(
436 label, text_writer.get_sec_ref(), text_writer.offset());
437 }
438
439protected:
440 Assembler::SymRef get_personality_sym() noexcept;
441
442 bool compile_func(IRFuncRef func, u32 func_idx) noexcept;
443
444 bool compile_block(IRBlockRef block, u32 block_idx) noexcept;
445};
446} // namespace tpde
447
448#include "GenericValuePart.hpp"
449#include "ScratchReg.hpp"
450#include "ValuePartRef.hpp"
451#include "ValueRef.hpp"
452
453namespace tpde {
454
455template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
456template <typename CBDerived>
457void CompilerBase<Adaptor, Derived, Config>::CallBuilderBase<
458 CBDerived>::add_arg(ValuePart &&vp, CCAssignment cca) noexcept {
459 cca.bank = vp.bank();
460 cca.size = vp.part_size();
461
462 assigner.assign_arg(cca);
463
464 if (cca.byval) {
465 derived()->add_arg_byval(vp, cca);
466 vp.reset(&compiler);
467 } else if (!cca.reg.valid()) {
468 derived()->add_arg_stack(vp, cca);
469 vp.reset(&compiler);
470 } else {
471 u32 size = vp.part_size();
472 if (vp.is_in_reg(cca.reg)) {
473 if (!vp.can_salvage()) {
474 compiler.evict_reg(cca.reg);
475 } else {
476 vp.salvage(&compiler);
477 }
478 if (cca.sext || cca.zext) {
479 compiler.generate_raw_intext(cca.reg, cca.reg, cca.sext, 8 * size, 64);
480 }
481 } else {
482 if (compiler.register_file.is_used(cca.reg)) {
483 compiler.evict_reg(cca.reg);
484 }
485 if (vp.can_salvage()) {
486 AsmReg vp_reg = vp.salvage(&compiler);
487 if (cca.sext || cca.zext) {
488 compiler.generate_raw_intext(cca.reg, vp_reg, cca.sext, 8 * size, 64);
489 } else {
490 compiler.mov(cca.reg, vp_reg, size);
491 }
492 } else {
493 vp.reload_into_specific_fixed(&compiler, cca.reg);
494 if (cca.sext || cca.zext) {
495 compiler.generate_raw_intext(
496 cca.reg, cca.reg, cca.sext, 8 * size, 64);
497 }
498 }
499 }
500 vp.reset(&compiler);
501 assert(!compiler.register_file.is_used(cca.reg));
502 compiler.register_file.mark_used(
503 cca.reg, CompilerBase::INVALID_VAL_LOCAL_IDX, 0);
504 compiler.register_file.mark_fixed(cca.reg);
505 compiler.register_file.mark_clobbered(cca.reg);
506 arg_regs |= (1ull << cca.reg.id());
507 }
508}
509
510template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
511template <typename CBDerived>
512void CompilerBase<Adaptor, Derived, Config>::CallBuilderBase<
513 CBDerived>::add_arg(CallArg &&arg) noexcept {
514 ValueRef vr = compiler.val_ref(arg.value);
515 const u32 part_count = compiler.val_parts(arg.value).count();
516
517 if (arg.flag == CallArg::Flag::byval) {
518 assert(part_count == 1);
519 add_arg(vr.part(0),
520 CCAssignment{
521 .byval = true,
522 .byval_align = arg.byval_align,
523 .byval_size = arg.byval_size,
524 });
525 return;
526 }
527
528 u32 align = 1;
529 bool consecutive = false;
530 u32 consec_def = 0;
531 if (compiler.arg_is_int128(arg.value)) {
532 // TODO: this also applies to composites with 16-byte alignment
533 align = 16;
534 consecutive = true;
535 } else if (part_count > 1 &&
536 !compiler.arg_allow_split_reg_stack_passing(arg.value)) {
537 consecutive = true;
538 if (part_count > UINT8_MAX) {
539 // Must be completely passed on the stack.
540 consecutive = false;
541 consec_def = -1;
542 }
543 }
544
545 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
546 derived()->add_arg(
547 vr.part(part_idx),
548 CCAssignment{
549 .consecutive =
550 u8(consecutive ? part_count - part_idx - 1 : consec_def),
551 .sret = arg.flag == CallArg::Flag::sret,
552 .sext = arg.flag == CallArg::Flag::sext,
553 .zext = arg.flag == CallArg::Flag::zext,
554 .align = u8(part_idx == 0 ? align : 1),
555 });
556 }
557}
558
559template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
560template <typename CBDerived>
561void CompilerBase<Adaptor, Derived, Config>::CallBuilderBase<CBDerived>::call(
562 std::variant<typename Assembler::SymRef, ValuePart> target) noexcept {
563 typename RegisterFile::RegBitSet skip_evict = arg_regs;
564 if (auto *vp = std::get_if<ValuePart>(&target); vp && vp->can_salvage()) {
565 // call_impl will reset vp, thereby unlock+free the register.
566 assert(vp->cur_reg_unlocked().valid() && "can_salvage implies register");
567 skip_evict |= (1ull << vp->cur_reg_unlocked().id());
568 }
569
570 auto clobbered = ~assigner.get_ccinfo().callee_saved_regs;
571 for (auto reg_id : util::BitSetIterator<>{compiler.register_file.used &
572 clobbered & ~skip_evict}) {
573 compiler.evict_reg(AsmReg{reg_id});
574 compiler.register_file.mark_clobbered(Reg{reg_id});
575 }
576
577 derived()->call_impl(std::move(target));
578
579 assert((compiler.register_file.fixed & arg_regs) == arg_regs);
580 assert((compiler.register_file.used & arg_regs) == arg_regs);
581 compiler.register_file.fixed &= ~arg_regs;
582 compiler.register_file.used &= ~arg_regs;
583}
584
585template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
586template <typename CBDerived>
587void CompilerBase<Adaptor, Derived, Config>::CallBuilderBase<
588 CBDerived>::add_ret(ValuePart &vp, CCAssignment cca) noexcept {
589 cca.bank = vp.bank();
590 cca.size = vp.part_size();
591 assigner.assign_ret(cca);
592 assert(cca.reg.valid() && "return value must be in register");
593 vp.set_value_reg(&compiler, cca.reg);
594}
595
596template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
597template <typename CBDerived>
598void CompilerBase<Adaptor, Derived, Config>::CallBuilderBase<
599 CBDerived>::add_ret(ValueRef &vr) noexcept {
600 assert(vr.has_assignment());
601 u32 part_count = vr.assignment()->part_count;
602 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
603 CCAssignment cca;
604 add_ret(vr.part(part_idx), cca);
605 }
606}
607
608template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
609void CompilerBase<Adaptor, Derived, Config>::RetBuilder::add(
610 ValuePart &&vp, CCAssignment cca) noexcept {
611 cca.bank = vp.bank();
612 u32 size = cca.size = vp.part_size();
613 assigner.assign_ret(cca);
614 assert(cca.reg.valid() && "indirect return value must use sret argument");
615
616 if (vp.is_in_reg(cca.reg)) {
617 if (!vp.can_salvage()) {
618 compiler.evict_reg(cca.reg);
619 } else {
620 vp.salvage(&compiler);
621 }
622 if (cca.sext || cca.zext) {
623 compiler.generate_raw_intext(cca.reg, cca.reg, cca.sext, 8 * size, 64);
624 }
625 } else {
626 if (compiler.register_file.is_used(cca.reg)) {
627 compiler.evict_reg(cca.reg);
628 }
629 if (vp.can_salvage()) {
630 AsmReg vp_reg = vp.salvage(&compiler);
631 if (cca.sext || cca.zext) {
632 compiler.generate_raw_intext(cca.reg, vp_reg, cca.sext, 8 * size, 64);
633 } else {
634 compiler.mov(cca.reg, vp_reg, size);
635 }
636 } else {
637 vp.reload_into_specific_fixed(&compiler, cca.reg);
638 if (cca.sext || cca.zext) {
639 compiler.generate_raw_intext(cca.reg, cca.reg, cca.sext, 8 * size, 64);
640 }
641 }
642 }
643 vp.reset(&compiler);
644 assert(!compiler.register_file.is_used(cca.reg));
645 compiler.register_file.mark_used(
646 cca.reg, CompilerBase::INVALID_VAL_LOCAL_IDX, 0);
647 compiler.register_file.mark_fixed(cca.reg);
648 compiler.register_file.mark_clobbered(cca.reg);
649 ret_regs |= (1ull << cca.reg.id());
650}
651
652template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
653void CompilerBase<Adaptor, Derived, Config>::RetBuilder::add(
654 IRValueRef val, CallArg::Flag flag) noexcept {
655 u32 part_count = compiler.val_parts(val).count();
656 ValueRef vr = compiler.val_ref(val);
657 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
658 add(vr.part(part_idx),
659 CCAssignment{
660 .sext = flag == CallArg::Flag::sext,
661 .zext = flag == CallArg::Flag::zext,
662 });
663 }
664}
665
666template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
667void CompilerBase<Adaptor, Derived, Config>::RetBuilder::ret() noexcept {
668 assert((compiler.register_file.fixed & ret_regs) == ret_regs);
669 assert((compiler.register_file.used & ret_regs) == ret_regs);
670 compiler.register_file.fixed &= ~ret_regs;
671 compiler.register_file.used &= ~ret_regs;
672
673 compiler.gen_func_epilog();
674 compiler.release_regs_after_return();
675}
676
677template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
679 // create function symbols
680 text_writer.switch_section(
681 assembler.get_section(assembler.get_text_section()));
682
683 assert(func_syms.empty());
684 for (const IRFuncRef func : adaptor->funcs()) {
685 auto binding = Assembler::SymBinding::GLOBAL;
686 if (adaptor->func_has_weak_linkage(func)) {
687 binding = Assembler::SymBinding::WEAK;
688 } else if (adaptor->func_only_local(func)) {
689 binding = Assembler::SymBinding::LOCAL;
690 }
691 if (adaptor->func_extern(func)) {
692 func_syms.push_back(derived()->assembler.sym_add_undef(
693 adaptor->func_link_name(func), binding));
694 } else {
695 func_syms.push_back(derived()->assembler.sym_predef_func(
696 adaptor->func_link_name(func), binding));
697 }
698 derived()->define_func_idx(func, func_syms.size() - 1);
699 }
700
701 if (!derived()->hook_post_func_sym_init()) {
702 TPDE_LOG_ERR("hook_pust_func_sym_init failed");
703 return false;
704 }
705
706 // TODO(ts): create function labels?
707
708 bool success = true;
709
710 u32 func_idx = 0;
711 for (const IRFuncRef func : adaptor->funcs()) {
712 if (adaptor->func_extern(func)) {
713 TPDE_LOG_TRACE("Skipping compilation of func {}",
714 adaptor->func_link_name(func));
715 ++func_idx;
716 continue;
717 }
718
719 TPDE_LOG_TRACE("Compiling func {}", adaptor->func_link_name(func));
720 if (!derived()->compile_func(func, func_idx)) {
721 TPDE_LOG_ERR("Failed to compile function {}",
722 adaptor->func_link_name(func));
723 success = false;
724 }
725 ++func_idx;
726 }
727
728 text_writer.flush();
729 assembler.finalize();
730
731 // TODO(ts): generate object/map?
732
733 return success;
734}
735
736template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
738 adaptor->reset();
739
740 for (auto &e : stack.fixed_free_lists) {
741 e.clear();
742 }
743 stack.dynamic_free_lists.clear();
744
745 assembler.reset();
746 func_syms.clear();
747 block_labels.clear();
748 personality_syms.clear();
749}
750
751template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
752void CompilerBase<Adaptor, Derived, Config>::init_assignment(
753 IRValueRef value, ValLocalIdx local_idx) noexcept {
754 assert(val_assignment(local_idx) == nullptr);
755 TPDE_LOG_TRACE("Initializing assignment for value {}",
756 static_cast<u32>(local_idx));
757
758 const auto parts = derived()->val_parts(value);
759 const u32 part_count = parts.count();
760 assert(part_count > 0);
761 auto *assignment = assignments.allocator.allocate(part_count);
762 assignments.value_ptrs[static_cast<u32>(local_idx)] = assignment;
763
764 u32 max_part_size = 0;
765 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
766 auto ap = AssignmentPartRef{assignment, part_idx};
767 ap.reset();
768 ap.set_bank(parts.reg_bank(part_idx));
769 const u32 size = parts.size_bytes(part_idx);
770 assert(size > 0);
771 max_part_size = std::max(max_part_size, size);
772 ap.set_part_size(size);
773 }
774
775 const auto &liveness = analyzer.liveness_info(local_idx);
776
777 // if there is only one part, try to hand out a fixed assignment
778 // if the value is used for longer than one block and there aren't too many
779 // definitions in child loops this could interfere with
780 // TODO(ts): try out only fixed assignments if the value is live for more
781 // than two blocks?
782 // TODO(ts): move this to ValuePartRef::alloc_reg to be able to defer this
783 // for results?
784 if (part_count == 1) {
785 const auto &cur_loop =
786 analyzer.loop_from_idx(analyzer.block_loop_idx(cur_block_idx));
787 auto ap = AssignmentPartRef{assignment, 0};
788
789 auto try_fixed =
790 liveness.last > cur_block_idx &&
791 cur_loop.definitions_in_childs +
792 assignments.cur_fixed_assignment_count[ap.bank().id()] <
793 Derived::NUM_FIXED_ASSIGNMENTS[ap.bank().id()];
794 if (derived()->try_force_fixed_assignment(value)) {
795 try_fixed = assignments.cur_fixed_assignment_count[ap.bank().id()] <
796 Derived::NUM_FIXED_ASSIGNMENTS[ap.bank().id()];
797 }
798
799 if (try_fixed) {
800 // check if there is a fixed register available
801 AsmReg reg = derived()->select_fixed_assignment_reg(ap.bank(), value);
802 TPDE_LOG_TRACE("Trying to assign fixed reg to value {}",
803 static_cast<u32>(local_idx));
804
805 // TODO: if the register is used, we can free it most of the time, but not
806 // always, e.g. for PHI nodes. Detect this case and free_reg otherwise.
807 if (!reg.invalid() && !register_file.is_used(reg)) {
808 TPDE_LOG_TRACE("Assigning fixed assignment to reg {} for value {}",
809 reg.id(),
810 static_cast<u32>(local_idx));
811 ap.set_reg(reg);
812 ap.set_register_valid(true);
813 ap.set_fixed_assignment(true);
814 register_file.mark_used(reg, local_idx, 0);
815 register_file.inc_lock_count(reg); // fixed assignments always locked
816 register_file.mark_clobbered(reg);
817 ++assignments.cur_fixed_assignment_count[ap.bank().id()];
818 }
819 }
820 }
821
822 const auto last_full = liveness.last_full;
823 const auto ref_count = liveness.ref_count;
824
825 assert(max_part_size <= 256);
826 assignment->max_part_size = max_part_size;
827 assignment->pending_free = false;
828 assignment->variable_ref = false;
829 assignment->stack_variable = false;
830 assignment->delay_free = last_full;
831 assignment->part_count = part_count;
832 assignment->frame_off = 0;
833 assignment->references_left = ref_count;
834}
835
836template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
838 const ValLocalIdx local_idx) noexcept {
839 TPDE_LOG_TRACE("Freeing assignment for value {}",
840 static_cast<u32>(local_idx));
841
842 ValueAssignment *assignment =
843 assignments.value_ptrs[static_cast<u32>(local_idx)];
844 assignments.value_ptrs[static_cast<u32>(local_idx)] = nullptr;
845 const auto is_var_ref = assignment->variable_ref;
846 const u32 part_count = assignment->part_count;
847
848 // free registers
849 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
850 auto ap = AssignmentPartRef{assignment, part_idx};
851 if (ap.fixed_assignment()) [[unlikely]] {
852 const auto reg = ap.get_reg();
853 assert(register_file.is_fixed(reg));
854 assert(register_file.reg_local_idx(reg) == local_idx);
855 assert(register_file.reg_part(reg) == part_idx);
856 --assignments.cur_fixed_assignment_count[ap.bank().id()];
857 register_file.dec_lock_count_must_zero(reg); // release lock for fixed reg
858 register_file.unmark_used(reg);
859 } else if (ap.register_valid()) {
860 const auto reg = ap.get_reg();
861 assert(!register_file.is_fixed(reg));
862 register_file.unmark_used(reg);
863 }
864 }
865
866#ifdef TPDE_ASSERTS
867 for (auto reg_id : register_file.used_regs()) {
868 assert(register_file.reg_local_idx(AsmReg{reg_id}) != local_idx &&
869 "freeing assignment that is still referenced by a register");
870 }
871#endif
872
873 // variable references do not have a stack slot
874 if (!is_var_ref && assignment->frame_off != 0) {
875 free_stack_slot(assignment->frame_off, assignment->size());
876 }
877
878 assignments.allocator.deallocate(assignment);
879}
880
881template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
883 ValLocalIdx local_idx, u32 var_ref_data) noexcept {
884 TPDE_LOG_TRACE("Initializing variable-ref assignment for value {}",
885 static_cast<u32>(local_idx));
886
887 assert(val_assignment(local_idx) == nullptr);
888 auto *assignment = assignments.allocator.allocate_slow(1, true);
889 assignments.value_ptrs[static_cast<u32>(local_idx)] = assignment;
890
891 assignment->max_part_size = Config::PLATFORM_POINTER_SIZE;
892 assignment->variable_ref = true;
893 assignment->stack_variable = false;
894 assignment->part_count = 1;
895 assignment->var_ref_custom_idx = var_ref_data;
896 assignment->next_delayed_free_entry = assignments.variable_ref_list;
897
898 assignments.variable_ref_list = local_idx;
899
900 AssignmentPartRef ap{assignment, 0};
901 ap.reset();
902 ap.set_bank(Config::GP_BANK);
903 ap.set_part_size(Config::PLATFORM_POINTER_SIZE);
904}
905
906template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
907i32 CompilerBase<Adaptor, Derived, Config>::allocate_stack_slot(
908 u32 size) noexcept {
909 unsigned align_bits = 4;
910 if (size == 0) {
911 return 0; // 0 is the "invalid" stack slot
912 } else if (size <= 16) {
913 // Align up to next power of two.
914 u32 free_list_idx = size == 1 ? 0 : 32 - util::cnt_lz<u32>(size - 1);
915 assert(size <= 1u << free_list_idx);
916 size = 1 << free_list_idx;
917 align_bits = free_list_idx;
918
919 if (!stack.fixed_free_lists[free_list_idx].empty()) {
920 auto slot = stack.fixed_free_lists[free_list_idx].back();
921 stack.fixed_free_lists[free_list_idx].pop_back();
922 return slot;
923 }
924 } else {
925 size = util::align_up(size, 16);
926 auto it = stack.dynamic_free_lists.find(size);
927 if (it != stack.dynamic_free_lists.end() && !it->second.empty()) {
928 const auto slot = it->second.back();
929 it->second.pop_back();
930 return slot;
931 }
932 }
933
934 assert(stack.frame_size != ~0u &&
935 "cannot allocate stack slot before stack frame is initialized");
936
937 // Align frame_size to align_bits
938 for (u32 list_idx = util::cnt_tz(stack.frame_size); list_idx < align_bits;
939 list_idx = util::cnt_tz(stack.frame_size)) {
940 i32 slot = stack.frame_size;
941 if constexpr (Config::FRAME_INDEXING_NEGATIVE) {
942 slot = -(slot + (1ull << list_idx));
943 }
944 stack.fixed_free_lists[list_idx].push_back(slot);
945 stack.frame_size += 1ull << list_idx;
946 }
947
948 auto slot = stack.frame_size;
949 assert(slot != 0 && "stack slot 0 is reserved");
950 stack.frame_size += size;
951
952 if constexpr (Config::FRAME_INDEXING_NEGATIVE) {
953 slot = -(slot + size);
954 }
955 return slot;
956}
957
958template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
960 u32 slot, u32 size) noexcept {
961 if (size == 0) [[unlikely]] {
962 assert(slot == 0 && "unexpected slot for zero-sized stack-slot?");
963 // Do nothing.
964 } else if (size <= 16) [[likely]] {
965 u32 free_list_idx = size == 1 ? 0 : 32 - util::cnt_lz<u32>(size - 1);
966 stack.fixed_free_lists[free_list_idx].push_back(slot);
967 } else {
968 size = util::align_up(size, 16);
969 stack.dynamic_free_lists[size].push_back(slot);
970 }
971}
972
973template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
974template <typename Fn>
976 u32 arg_idx, IRValueRef arg, Fn add_arg) noexcept {
977 ValueRef vr = derived()->result_ref(arg);
978 if (adaptor->cur_arg_is_byval(arg_idx)) {
979 std::optional<i32> byval_frame_off =
980 add_arg(vr.part(0),
981 CCAssignment{
982 .byval = true,
983 .byval_align = adaptor->cur_arg_byval_align(arg_idx),
984 .byval_size = adaptor->cur_arg_byval_size(arg_idx),
985 });
986
987 if (byval_frame_off) {
988 // We need to convert the assignment into a stack variable ref.
989 ValLocalIdx local_idx = val_idx(arg);
990 // TODO: we shouldn't create the result_ref for such cases in the first
991 // place. However, this is not easy to detect up front, it depends on the
992 // target and the calling convention whether this is possible.
993 vr.reset();
994 // Value assignment might have been free'd by ValueRef reset.
995 if (val_assignment(local_idx)) {
996 free_assignment(local_idx);
997 }
998 init_variable_ref(local_idx, 0);
999 ValueAssignment *assignment = this->val_assignment(local_idx);
1000 assignment->stack_variable = true;
1001 assignment->frame_off = *byval_frame_off;
1002 }
1003 return;
1004 }
1005
1006 if (adaptor->cur_arg_is_sret(arg_idx)) {
1007 add_arg(vr.part(0), CCAssignment{.sret = true});
1008 return;
1009 }
1010
1011 const u32 part_count = vr.assignment()->part_count;
1012
1013 u32 align = 1;
1014 u32 consecutive = 0;
1015 u32 consec_def = 0;
1016 if (derived()->arg_is_int128(arg)) {
1017 // TODO: this also applies to composites with 16-byte alignment
1018 align = 16;
1019 consecutive = 1;
1020 } else if (part_count > 1 &&
1021 !derived()->arg_allow_split_reg_stack_passing(arg)) {
1022 consecutive = 1;
1023 if (part_count > UINT8_MAX) {
1024 // Must be completely passed on the stack.
1025 consecutive = 0;
1026 consec_def = -1;
1027 }
1028 }
1029
1030 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
1031 add_arg(vr.part(part_idx),
1032 CCAssignment{
1033 .consecutive =
1034 u8(consecutive ? part_count - part_idx - 1 : consec_def),
1035 .align = u8(part_idx == 0 ? align : 1),
1036 });
1037 }
1038}
1039
1040template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1042 CompilerBase<Adaptor, Derived, Config>::val_ref(IRValueRef value) noexcept {
1043 if (auto special = derived()->val_ref_special(value); special) {
1044 return ValueRef{this, std::move(*special)};
1045 }
1046
1047 const ValLocalIdx local_idx = analyzer.adaptor->val_local_idx(value);
1048 assert(val_assignment(local_idx) != nullptr && "value use before def");
1049 return ValueRef{this, local_idx};
1050}
1051
1052template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1053std::pair<typename CompilerBase<Adaptor, Derived, Config>::ValueRef,
1056 IRValueRef value) noexcept {
1057 std::pair<ValueRef, ValuePartRef> res{val_ref(value), this};
1058 res.second = res.first.part(0);
1059 return res;
1060}
1061
1062template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1065 IRValueRef value) noexcept {
1066 const ValLocalIdx local_idx = analyzer.adaptor->val_local_idx(value);
1067 if (val_assignment(local_idx) == nullptr) {
1068 init_assignment(value, local_idx);
1069 }
1070 return ValueRef{this, local_idx};
1071}
1072
1073template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1074std::pair<typename CompilerBase<Adaptor, Derived, Config>::ValueRef,
1075 typename CompilerBase<Adaptor, Derived, Config>::ValuePartRef>
1076 CompilerBase<Adaptor, Derived, Config>::result_ref_single(
1077 IRValueRef value) noexcept {
1078 std::pair<ValueRef, ValuePartRef> res{result_ref(value), this};
1079 res.second = res.first.part(0);
1080 return res;
1081}
1082
1083template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1085 ValuePartRef &val_ref, ScratchReg &scratch) noexcept {
1086 auto ap = val_ref.assignment();
1087 assert(scratch.has_reg());
1088 auto reg = scratch.cur_reg();
1089
1090 if (ap.fixed_assignment()) {
1091 auto cur_reg = ap.get_reg();
1092 assert(register_file.is_used(cur_reg));
1093 assert(register_file.is_fixed(cur_reg));
1094 assert(register_file.reg_local_idx(cur_reg) == val_ref.local_idx());
1095
1096 if (cur_reg.id() != reg.id()) {
1097 derived()->mov(cur_reg, reg, ap.part_size());
1098 }
1099
1100 ap.set_register_valid(true);
1101 ap.set_modified(true);
1102 return;
1103 }
1104
1105 if (ap.register_valid()) {
1106 auto cur_reg = ap.get_reg();
1107 if (cur_reg.id() == reg.id()) {
1108 ap.set_modified(true);
1109 return;
1110 }
1111 val_ref.unlock();
1112 assert(!register_file.is_fixed(cur_reg));
1113 register_file.unmark_used(cur_reg);
1114 }
1115
1116 // ScratchReg's reg is fixed and used => unfix, keep used, update assignment
1117 assert(register_file.is_used(reg));
1118 assert(register_file.is_fixed(reg));
1119 assert(register_file.is_clobbered(reg));
1120 scratch.force_set_reg(AsmReg::make_invalid());
1121 register_file.unmark_fixed(reg);
1122 register_file.update_reg_assignment(reg, val_ref.local_idx(), val_ref.part());
1123 ap.set_reg(reg);
1124 ap.set_register_valid(true);
1125 ap.set_modified(true);
1126}
1127
1128template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1131 GenericValuePart &gv) noexcept {
1132 if (std::holds_alternative<ScratchReg>(gv.state)) {
1133 return std::get<ScratchReg>(gv.state).cur_reg();
1134 }
1135 if (std::holds_alternative<ValuePartRef>(gv.state)) {
1136 auto &vpr = std::get<ValuePartRef>(gv.state);
1137 if (vpr.has_reg()) {
1138 return vpr.cur_reg();
1139 }
1140 return vpr.load_to_reg();
1141 }
1142 if (auto *expr = std::get_if<typename GenericValuePart::Expr>(&gv.state)) {
1143 if (expr->has_base() && !expr->has_index() && expr->disp == 0) {
1144 return expr->base_reg();
1145 }
1146 return derived()->gval_expr_as_reg(gv);
1147 }
1148 TPDE_UNREACHABLE("gval_as_reg on empty GenericValuePart");
1149}
1150
1151template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1152typename CompilerBase<Adaptor, Derived, Config>::AsmReg
1154 GenericValuePart &gv, ScratchReg &dst) noexcept {
1155 AsmReg reg = gval_as_reg(gv);
1156 if (!dst.has_reg()) {
1157 if (auto *scratch = std::get_if<ScratchReg>(&gv.state)) {
1158 dst = std::move(*scratch);
1159 } else if (auto *val_ref = std::get_if<ValuePartRef>(&gv.state)) {
1160 if (val_ref->can_salvage()) {
1161 dst.alloc_specific(val_ref->salvage());
1162 assert(dst.cur_reg() == reg && "salvaging unsuccessful");
1163 }
1164 }
1165 }
1166 return reg;
1167}
1168
1169template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1170Reg CompilerBase<Adaptor, Derived, Config>::select_reg_evict(
1171 RegBank bank, u64 exclusion_mask) noexcept {
1172 TPDE_LOG_DBG("select_reg_evict for bank {}", bank.id());
1173 auto candidates = register_file.allocatable & ~register_file.fixed &
1174 register_file.bank_regs(bank) & ~exclusion_mask;
1175
1176 Reg candidate = Reg::make_invalid();
1177 u32 max_score = 0;
1178 for (auto reg_id : util::BitSetIterator<>(candidates)) {
1179 Reg reg{reg_id};
1180
1181 // Must be an evictable value, not a temporary.
1182 auto local_idx = register_file.reg_local_idx(reg);
1183 u32 part = register_file.reg_part(Reg{reg});
1184 assert(local_idx != INVALID_VAL_LOCAL_IDX);
1185 ValueAssignment *va = val_assignment(local_idx);
1186 AssignmentPartRef ap{va, part};
1187
1188 // We want to sort registers by the following (ordered by priority):
1189 // - stack variable ref (~1 add/sub to reconstruct)
1190 // - other variable ref (1-2 instrs to reconstruct)
1191 // - already spilled (no store needed)
1192 // - last use farthest away (most likely to get spilled anyhow, so there's
1193 // not much harm in spilling earlier)
1194 // - lowest ref-count (least used)
1195 //
1196 // TODO: evaluate and refine this heuristic
1197
1198 // TODO: evict stack variable refs before others
1199 if (ap.variable_ref()) {
1200 TPDE_LOG_DBG(" r{} ({}) is variable-ref", reg_id, u32(local_idx));
1201 candidate = reg;
1202 break;
1203 }
1204
1205 u32 score = 0;
1206 if (ap.stack_valid()) {
1207 score |= u32{1} << 31;
1208 }
1209
1210 const auto &liveness = analyzer.liveness_info(local_idx);
1211 u32 last_use_dist = u32(liveness.last) - u32(cur_block_idx);
1212 score |= (last_use_dist < 0x8000 ? 0x8000 - last_use_dist : 0) << 16;
1213
1214 u32 refs_left = va->pending_free ? 0 : va->references_left;
1215 score |= (refs_left < 0xffff ? 0x10000 - refs_left : 1);
1216
1217 TPDE_LOG_DBG(" r{} ({}:{}) rc={}/{} live={}-{}{} spilled={} score={:#x}",
1218 reg_id,
1219 u32(local_idx),
1220 part,
1221 refs_left,
1222 liveness.ref_count,
1223 u32(liveness.first),
1224 u32(liveness.last),
1225 &"*"[!liveness.last_full],
1226 ap.stack_valid(),
1227 score);
1228
1229 assert(score != 0);
1230 if (score > max_score) {
1231 candidate = reg;
1232 max_score = score;
1233 }
1234 }
1235 if (candidate.invalid()) [[unlikely]] {
1236 TPDE_FATAL("ran out of registers for scratch registers");
1237 }
1238 TPDE_LOG_DBG(" selected r{}", candidate.id());
1239 evict_reg(candidate);
1240 return candidate;
1241}
1242
1243template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1245 AsmReg dst, AssignmentPartRef ap) noexcept {
1246 if (!ap.variable_ref()) {
1247 assert(ap.stack_valid());
1248 derived()->load_from_stack(dst, ap.frame_off(), ap.part_size());
1249 } else if (ap.is_stack_variable()) {
1250 derived()->load_address_of_stack_var(dst, ap);
1251 } else if constexpr (!Config::DEFAULT_VAR_REF_HANDLING) {
1252 derived()->load_address_of_var_reference(dst, ap);
1253 } else {
1254 TPDE_UNREACHABLE("non-stack-variable needs custom var-ref handling");
1255 }
1256}
1257
1258template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1259void CompilerBase<Adaptor, Derived, Config>::allocate_spill_slot(
1260 AssignmentPartRef ap) noexcept {
1261 assert(!ap.variable_ref() && "cannot allocate spill slot for variable ref");
1262 if (ap.assignment()->frame_off == 0) {
1263 assert(!ap.stack_valid() && "stack-valid set without spill slot");
1264 ap.assignment()->frame_off = allocate_stack_slot(ap.assignment()->size());
1265 assert(ap.assignment()->frame_off != 0);
1266 }
1267}
1268
1269template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1271 AssignmentPartRef ap) noexcept {
1272 assert(may_change_value_state());
1273 if (!ap.stack_valid() && !ap.variable_ref()) {
1274 assert(ap.register_valid() && "cannot spill uninitialized assignment part");
1275 allocate_spill_slot(ap);
1276 derived()->spill_reg(ap.get_reg(), ap.frame_off(), ap.part_size());
1277 ap.set_stack_valid();
1278 }
1279}
1280
1281template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1283 AssignmentPartRef ap) noexcept {
1284 assert(may_change_value_state());
1285 assert(ap.register_valid());
1286 derived()->spill(ap);
1287 ap.set_register_valid(false);
1288 register_file.unmark_used(ap.get_reg());
1289}
1290
1291template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1293 assert(may_change_value_state());
1294 assert(!register_file.is_fixed(reg));
1295 assert(register_file.reg_local_idx(reg) != INVALID_VAL_LOCAL_IDX);
1296
1297 ValLocalIdx local_idx = register_file.reg_local_idx(reg);
1298 auto part = register_file.reg_part(reg);
1299 AssignmentPartRef evict_part{val_assignment(local_idx), part};
1300 assert(evict_part.register_valid());
1301 assert(evict_part.get_reg() == reg);
1302 derived()->spill(evict_part);
1303 evict_part.set_register_valid(false);
1304 register_file.unmark_used(reg);
1305}
1306
1307template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1309 assert(may_change_value_state());
1310 assert(!register_file.is_fixed(reg));
1311 assert(register_file.reg_local_idx(reg) != INVALID_VAL_LOCAL_IDX);
1312
1313 ValLocalIdx local_idx = register_file.reg_local_idx(reg);
1314 auto part = register_file.reg_part(reg);
1315 AssignmentPartRef ap{val_assignment(local_idx), part};
1316 assert(ap.register_valid());
1317 assert(ap.get_reg() == reg);
1318 assert(!ap.modified() || ap.variable_ref());
1319 ap.set_register_valid(false);
1320 register_file.unmark_used(reg);
1321}
1322
1323template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1324typename CompilerBase<Adaptor, Derived, Config>::RegisterFile::RegBitSet
1325 CompilerBase<Adaptor, Derived, Config>::spill_before_branch(
1326 bool force_spill) noexcept {
1327 // since we do not explicitly keep track of register assignments per block,
1328 // whenever we might branch off to a block that we do not directly compile
1329 // afterwards (i.e. the register assignments might change in between), we
1330 // need to spill all registers which are not fixed and remove them from the
1331 // register state.
1332 //
1333 // This leads to worse codegen but saves a significant overhead to
1334 // store/manage the register assignment for each block (256 bytes/block for
1335 // x64) and possible compile-time as there might be additional logic to move
1336 // values around
1337
1338 // First, we consider the case that the current block only has one successor
1339 // which is compiled directly after the current one, in which case we do not
1340 // have to spill anything.
1341 //
1342 // Secondly, if the next block has multiple incoming edges, we always have
1343 // to spill and remove from the register assignment. Otherwise, we
1344 // only need to spill values if they are alive in any successor which is not
1345 // the next block.
1346 //
1347 // Values which are only read from PHI-Nodes and have no extended lifetimes,
1348 // do not need to be spilled as they die at the edge.
1349
1350 using RegBitSet = typename RegisterFile::RegBitSet;
1351
1352 assert(may_change_value_state());
1353
1354 const IRBlockRef cur_block_ref = analyzer.block_ref(cur_block_idx);
1355
1356 bool must_spill = force_spill;
1357 if (!must_spill) {
1358 // We must always spill if no block is immediately succeeding or that block
1359 // has multiple incoming edges.
1360 auto next_block_is_succ = false;
1361 auto next_block_has_multiple_incoming = false;
1362 u32 succ_count = 0;
1363 for (const IRBlockRef succ : adaptor->block_succs(cur_block_ref)) {
1364 ++succ_count;
1365 if (static_cast<u32>(analyzer.block_idx(succ)) ==
1366 static_cast<u32>(cur_block_idx) + 1) {
1367 next_block_is_succ = true;
1368 if (analyzer.block_has_multiple_incoming(succ)) {
1369 next_block_has_multiple_incoming = true;
1370 }
1371 }
1372 }
1373
1374 must_spill = !next_block_is_succ || next_block_has_multiple_incoming;
1375
1376 if (succ_count == 1 && !must_spill) {
1377 return RegBitSet{};
1378 }
1379 }
1380
1381 /*if (!next_block_is_succ) {
1382 // spill and remove from reg assignment
1383 auto spilled = RegisterFile::RegBitSet{};
1384 for (auto reg : register_file.used_regs()) {
1385 auto local_idx = register_file.reg_local_idx(AsmReg{reg});
1386 auto part = register_file.reg_part(AsmReg{reg});
1387 if (local_idx == INVALID_VAL_LOCAL_IDX) {
1388 // TODO(ts): can this actually happen?
1389 continue;
1390 }
1391 auto ap = AssignmentPartRef{val_assignment(local_idx), part};
1392 ap.spill_if_needed(this);
1393 spilled |= RegisterFile::RegBitSet{1ull} << reg;
1394 }
1395 return spilled;
1396 }*/
1397
1398 const auto spill_reg_if_needed = [&](const auto reg) {
1399 auto local_idx = register_file.reg_local_idx(AsmReg{reg});
1400 auto part = register_file.reg_part(AsmReg{reg});
1401 if (local_idx == INVALID_VAL_LOCAL_IDX) {
1402 // scratch regs can never be held across blocks
1403 return true;
1404 }
1405 auto *assignment = val_assignment(local_idx);
1406 auto ap = AssignmentPartRef{assignment, part};
1407 if (ap.fixed_assignment()) {
1408 // fixed registers do not need to be spilled
1409 return true;
1410 }
1411
1412 if (!ap.modified()) {
1413 // no need to spill if the value was not modified
1414 return false;
1415 }
1416
1417 if (ap.variable_ref()) {
1418 return false;
1419 }
1420
1421 const auto &liveness = analyzer.liveness_info(local_idx);
1422 if (liveness.last <= cur_block_idx) {
1423 // no need to spill value if it dies immediately after the block
1424 return false;
1425 }
1426
1427 if (must_spill) {
1428 spill(ap);
1429 return false;
1430 }
1431
1432 // spill if the value is alive in any successor that is not the next
1433 // block
1434 for (const IRBlockRef succ : adaptor->block_succs(cur_block_ref)) {
1435 const auto block_idx = analyzer.block_idx(succ);
1436 if (static_cast<u32>(block_idx) == static_cast<u32>(cur_block_idx) + 1) {
1437 continue;
1438 }
1439 if (block_idx >= liveness.first && block_idx <= liveness.last) {
1440 spill(ap);
1441 return false;
1442 }
1443 }
1444
1445 return false;
1446 };
1447
1448 auto spilled = RegBitSet{};
1449 // TODO(ts): just use register_file.used_nonfixed_regs()?
1450 for (auto reg : register_file.used_regs()) {
1451 const auto reg_fixed = spill_reg_if_needed(reg);
1452 // remove from register assignment if the next block cannot rely on the
1453 // value being in the specific register
1454 if (!reg_fixed && must_spill) {
1455 // TODO(ts): this needs to be changed if this is supposed to work
1456 // with other RegisterFile implementations
1457 spilled |= RegBitSet{1ull} << reg;
1458 }
1459 }
1460
1461 return spilled;
1462}
1463
1464template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1466 typename RegisterFile::RegBitSet regs) noexcept {
1467 assert(may_change_value_state());
1468
1469 // TODO(ts): needs changes for other RegisterFile impls
1470 auto used_non_fixed_regs = register_file.used & ~register_file.fixed;
1471 for (auto reg_id : util::BitSetIterator<>{regs & used_non_fixed_regs}) {
1472 free_reg(Reg{reg_id});
1473 }
1474}
1475
1476template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1478 release_regs_after_return() noexcept {
1479 // we essentially have to free all non-fixed registers
1480 for (auto reg_id : register_file.used_nonfixed_regs()) {
1481 free_reg(Reg{reg_id});
1482 }
1483}
1484
1485template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1486void CompilerBase<Adaptor, Derived, Config>::move_to_phi_nodes_impl(
1487 BlockIndex target) noexcept {
1488 // PHI-nodes are always moved to their stack-slot (unless they are fixed)
1489 //
1490 // However, we need to take care of PHI-dependencies (cycles and chains)
1491 // as to not overwrite values which might be needed.
1492 //
1493 // In most cases, we expect the number of PHIs to be small but we want to
1494 // stay reasonably efficient even with larger numbers of PHIs
1495
1496 struct ScratchWrapper {
1497 Derived *self;
1498 AsmReg cur_reg = AsmReg::make_invalid();
1499 bool backed_up = false;
1500 bool was_modified = false;
1501 u8 part = 0;
1502 ValLocalIdx local_idx = INVALID_VAL_LOCAL_IDX;
1503
1504 ScratchWrapper(Derived *self) : self{self} {}
1505
1506 ~ScratchWrapper() { reset(); }
1507
1508 void reset() {
1509 if (cur_reg.invalid()) {
1510 return;
1511 }
1512
1513 self->register_file.unmark_fixed(cur_reg);
1514 self->register_file.unmark_used(cur_reg);
1515
1516 if (backed_up) {
1517 // restore the register state
1518 // TODO(ts): do we actually need the reload?
1519 auto *assignment = self->val_assignment(local_idx);
1520 // check if the value was free'd, then we dont need to restore
1521 // it
1522 if (assignment) {
1523 auto ap = AssignmentPartRef{assignment, part};
1524 if (!ap.variable_ref()) {
1525 // TODO(ts): assert that this always happens?
1526 assert(ap.stack_valid());
1527 self->load_from_stack(cur_reg, ap.frame_off(), ap.part_size());
1528 }
1529 ap.set_reg(cur_reg);
1530 ap.set_register_valid(true);
1531 ap.set_modified(was_modified);
1532 self->register_file.mark_used(cur_reg, local_idx, part);
1533 }
1534 backed_up = false;
1535 }
1536 cur_reg = AsmReg::make_invalid();
1537 }
1538
1539 AsmReg alloc_from_bank(RegBank bank) {
1540 if (cur_reg.valid() && self->register_file.reg_bank(cur_reg) == bank) {
1541 return cur_reg;
1542 }
1543 if (cur_reg.valid()) {
1544 reset();
1545 }
1546
1547 // TODO(ts): try to first find a non callee-saved/clobbered
1548 // register...
1549 auto &reg_file = self->register_file;
1550 auto reg = reg_file.find_first_free_excluding(bank, 0);
1551 if (reg.invalid()) {
1552 // TODO(ts): use clock here?
1553 reg = reg_file.find_first_nonfixed_excluding(bank, 0);
1554 if (reg.invalid()) {
1555 TPDE_FATAL("ran out of registers for scratch registers");
1556 }
1557
1558 backed_up = true;
1559 local_idx = reg_file.reg_local_idx(reg);
1560 part = reg_file.reg_part(reg);
1561 AssignmentPartRef ap{self->val_assignment(local_idx), part};
1562 was_modified = ap.modified();
1563 // TODO(ts): this does not spill for variable refs
1564 // We don't use evict_reg here, as we know that we can't change the
1565 // value state.
1566 assert(ap.register_valid() && ap.get_reg() == reg);
1567 if (!ap.stack_valid() && !ap.variable_ref()) {
1568 self->allocate_spill_slot(ap);
1569 self->spill_reg(ap.get_reg(), ap.frame_off(), ap.part_size());
1570 ap.set_stack_valid();
1571 }
1572 ap.set_register_valid(false);
1573 reg_file.unmark_used(reg);
1574 }
1575
1576 reg_file.mark_used(reg, INVALID_VAL_LOCAL_IDX, 0);
1577 reg_file.mark_clobbered(reg);
1578 reg_file.mark_fixed(reg);
1579 cur_reg = reg;
1580 return reg;
1581 }
1582
1583 ScratchWrapper &operator=(const ScratchWrapper &) = delete;
1584 ScratchWrapper &operator=(ScratchWrapper &&) = delete;
1585 };
1586
1587 IRBlockRef target_ref = analyzer.block_ref(target);
1588 IRBlockRef cur_ref = analyzer.block_ref(cur_block_idx);
1589
1590 // collect all the nodes
1591 struct NodeEntry {
1592 IRValueRef phi;
1593 IRValueRef incoming_val;
1594 ValLocalIdx phi_local_idx;
1595 // local idx of same-block phi node that needs special handling
1596 ValLocalIdx incoming_phi_local_idx = INVALID_VAL_LOCAL_IDX;
1597 // bool incoming_is_phi;
1598 u32 ref_count;
1599
1600 bool operator<(const NodeEntry &other) const noexcept {
1601 return phi_local_idx < other.phi_local_idx;
1602 }
1603
1604 bool operator<(ValLocalIdx other) const noexcept {
1605 return phi_local_idx < other;
1606 }
1607 };
1608
1609 util::SmallVector<NodeEntry, 16> nodes;
1610 for (IRValueRef phi : adaptor->block_phis(target_ref)) {
1611 ValLocalIdx phi_local_idx = adaptor->val_local_idx(phi);
1612 auto incoming = adaptor->val_as_phi(phi).incoming_val_for_block(cur_ref);
1613 nodes.emplace_back(NodeEntry{
1614 .phi = phi, .incoming_val = incoming, .phi_local_idx = phi_local_idx});
1615 }
1616
1617 // We check that the block has phi nodes before getting here.
1618 assert(!nodes.empty() && "block marked has having phi nodes has none");
1619
1620 ScratchWrapper scratch{derived()};
1621 const auto move_to_phi = [this, &scratch](IRValueRef phi,
1622 IRValueRef incoming_val) {
1623 // TODO(ts): if phi==incoming_val, we should be able to elide the move
1624 // even if the phi is in a fixed register, no?
1625
1626 auto phi_vr = derived()->result_ref(phi);
1627 auto val_vr = derived()->val_ref(incoming_val);
1628 if (phi == incoming_val) {
1629 return;
1630 }
1631
1632 u32 part_count = phi_vr.assignment()->part_count;
1633 for (u32 i = 0; i < part_count; ++i) {
1634 AssignmentPartRef phi_ap{phi_vr.assignment(), i};
1635
1636 AsmReg reg{};
1637 ValuePartRef val_vpr = val_vr.part(i);
1638 if (val_vpr.is_const()) {
1639 reg = scratch.alloc_from_bank(val_vpr.bank());
1640 val_vpr.reload_into_specific_fixed(reg);
1641 } else if (val_vpr.assignment().register_valid() ||
1642 val_vpr.assignment().fixed_assignment()) {
1643 reg = val_vpr.assignment().get_reg();
1644 } else {
1645 reg = val_vpr.reload_into_specific_fixed(
1646 this, scratch.alloc_from_bank(phi_ap.bank()));
1647 }
1648
1649 if (phi_ap.fixed_assignment()) {
1650 derived()->mov(phi_ap.get_reg(), reg, phi_ap.part_size());
1651 } else {
1652 allocate_spill_slot(phi_ap);
1653 derived()->spill_reg(reg, phi_ap.frame_off(), phi_ap.part_size());
1654 phi_ap.set_stack_valid();
1655 }
1656 }
1657 };
1658
1659 if (nodes.size() == 1) {
1660 move_to_phi(nodes[0].phi, nodes[0].incoming_val);
1661 return;
1662 }
1663
1664 // sort so we can binary search later
1665 std::sort(nodes.begin(), nodes.end());
1666
1667 // fill in the refcount
1668 auto all_zero_ref = true;
1669 for (auto &node : nodes) {
1670 // We don't need to do anything for PHIs that don't reference other PHIs or
1671 // self-referencing PHIs.
1672 bool incoming_is_phi = adaptor->val_is_phi(node.incoming_val);
1673 if (!incoming_is_phi || node.incoming_val == node.phi) {
1674 continue;
1675 }
1676
1677 ValLocalIdx inc_local_idx = adaptor->val_local_idx(node.incoming_val);
1678 auto it = std::lower_bound(nodes.begin(), nodes.end(), inc_local_idx);
1679 if (it == nodes.end() || it->phi != node.incoming_val) {
1680 // Incoming value is a PHI node, but it's not from our block, so we don't
1681 // need to be particularly careful when assigning values.
1682 continue;
1683 }
1684 node.incoming_phi_local_idx = inc_local_idx;
1685 ++it->ref_count;
1686 all_zero_ref = false;
1687 }
1688
1689 if (all_zero_ref) {
1690 // no cycles/chain that we need to take care of
1691 for (auto &node : nodes) {
1692 move_to_phi(node.phi, node.incoming_val);
1693 }
1694 return;
1695 }
1696
1697 // TODO(ts): this is rather inefficient...
1698 util::SmallVector<u32, 32> ready_indices;
1699 ready_indices.reserve(nodes.size());
1700 util::SmallBitSet<256> waiting_nodes;
1701 waiting_nodes.resize(nodes.size());
1702 for (u32 i = 0; i < nodes.size(); ++i) {
1703 if (nodes[i].ref_count) {
1704 waiting_nodes.mark_set(i);
1705 } else {
1706 ready_indices.push_back(i);
1707 }
1708 }
1709
1710 u32 handled_count = 0;
1711 u32 cur_tmp_part_count = 0;
1712 i32 cur_tmp_slot = 0;
1713 u32 cur_tmp_slot_size = 0;
1714 IRValueRef cur_tmp_val = Adaptor::INVALID_VALUE_REF;
1715 ScratchWrapper tmp_reg1{derived()}, tmp_reg2{derived()};
1716
1717 const auto move_from_tmp_phi = [&](IRValueRef target_phi) {
1718 auto phi_vr = val_ref(target_phi);
1719 if (cur_tmp_part_count <= 2) {
1720 AssignmentPartRef ap{phi_vr.assignment(), 0};
1721 assert(!tmp_reg1.cur_reg.invalid());
1722 if (ap.fixed_assignment()) {
1723 derived()->mov(ap.get_reg(), tmp_reg1.cur_reg, ap.part_size());
1724 } else {
1725 derived()->spill_reg(tmp_reg1.cur_reg, ap.frame_off(), ap.part_size());
1726 }
1727
1728 if (cur_tmp_part_count == 2) {
1729 AssignmentPartRef ap_high{phi_vr.assignment(), 1};
1730 assert(!ap_high.fixed_assignment());
1731 assert(!tmp_reg2.cur_reg.invalid());
1732 derived()->spill_reg(
1733 tmp_reg2.cur_reg, ap_high.frame_off(), ap_high.part_size());
1734 }
1735 return;
1736 }
1737
1738 for (u32 i = 0; i < cur_tmp_part_count; ++i) {
1739 AssignmentPartRef phi_ap{phi_vr.assignment(), i};
1740 assert(!phi_ap.fixed_assignment());
1741
1742 auto slot_off = cur_tmp_slot + phi_ap.part_off();
1743 auto reg = tmp_reg1.alloc_from_bank(phi_ap.bank());
1744 derived()->load_from_stack(reg, slot_off, phi_ap.part_size());
1745 derived()->spill_reg(reg, phi_ap.frame_off(), phi_ap.part_size());
1746 }
1747 };
1748
1749 while (handled_count != nodes.size()) {
1750 if (ready_indices.empty()) {
1751 // need to break a cycle
1752 auto cur_idx_opt = waiting_nodes.first_set();
1753 assert(cur_idx_opt);
1754 auto cur_idx = *cur_idx_opt;
1755 assert(nodes[cur_idx].ref_count == 1);
1756 assert(cur_tmp_val == Adaptor::INVALID_VALUE_REF);
1757
1758 auto phi_val = nodes[cur_idx].phi;
1759 auto phi_vr = this->val_ref(phi_val);
1760 auto *assignment = phi_vr.assignment();
1761 cur_tmp_part_count = assignment->part_count;
1762 cur_tmp_val = phi_val;
1763
1764 if (cur_tmp_part_count > 2) {
1765 // use a stack slot to store the temporaries
1766 cur_tmp_slot_size = assignment->size();
1767 cur_tmp_slot = allocate_stack_slot(cur_tmp_slot_size);
1768
1769 for (u32 i = 0; i < cur_tmp_part_count; ++i) {
1770 auto ap = AssignmentPartRef{assignment, i};
1771 assert(!ap.fixed_assignment());
1772 auto slot_off = cur_tmp_slot + ap.part_off();
1773
1774 if (ap.register_valid()) {
1775 auto reg = ap.get_reg();
1776 derived()->spill_reg(reg, slot_off, ap.part_size());
1777 } else {
1778 auto reg = tmp_reg1.alloc_from_bank(ap.bank());
1779 assert(ap.stack_valid());
1780 derived()->load_from_stack(reg, ap.frame_off(), ap.part_size());
1781 derived()->spill_reg(reg, slot_off, ap.part_size());
1782 }
1783 }
1784 } else {
1785 // TODO(ts): if the PHI is not fixed, then we can just reuse its
1786 // register if it has one
1787 auto phi_vpr = phi_vr.part(0);
1788 auto reg = tmp_reg1.alloc_from_bank(phi_vpr.bank());
1789 phi_vpr.reload_into_specific_fixed(this, reg);
1790
1791 if (cur_tmp_part_count == 2) {
1792 // TODO(ts): just change the part ref on the lower ref?
1793 auto phi_vpr_high = phi_vr.part(1);
1794 auto reg_high = tmp_reg2.alloc_from_bank(phi_vpr_high.bank());
1795 phi_vpr_high.reload_into_specific_fixed(this, reg_high);
1796 }
1797 }
1798
1799 nodes[cur_idx].ref_count = 0;
1800 ready_indices.push_back(cur_idx);
1801 waiting_nodes.mark_unset(cur_idx);
1802 }
1803
1804 for (u32 i = 0; i < ready_indices.size(); ++i) {
1805 ++handled_count;
1806 auto cur_idx = ready_indices[i];
1807 auto phi_val = nodes[cur_idx].phi;
1808 IRValueRef incoming_val = nodes[cur_idx].incoming_val;
1809 if (incoming_val == phi_val) {
1810 // no need to do anything, except ref-counting
1811 (void)val_ref(incoming_val);
1812 (void)val_ref(phi_val);
1813 continue;
1814 }
1815
1816 if (incoming_val == cur_tmp_val) {
1817 move_from_tmp_phi(phi_val);
1818
1819 if (cur_tmp_part_count > 2) {
1820 free_stack_slot(cur_tmp_slot, cur_tmp_slot_size);
1821 cur_tmp_slot = 0xFFFF'FFFF;
1822 cur_tmp_slot_size = 0;
1823 }
1824 cur_tmp_val = Adaptor::INVALID_VALUE_REF;
1825 // skip the code below as the ref count of the temp val is
1826 // already 0 and we don't want to reinsert it into the ready
1827 // list
1828 continue;
1829 }
1830
1831 move_to_phi(phi_val, incoming_val);
1832
1833 if (nodes[cur_idx].incoming_phi_local_idx == INVALID_VAL_LOCAL_IDX) {
1834 continue;
1835 }
1836
1837 auto it = std::lower_bound(
1838 nodes.begin(), nodes.end(), nodes[cur_idx].incoming_phi_local_idx);
1839 assert(it != nodes.end() && it->phi == incoming_val &&
1840 "incoming_phi_local_idx set incorrectly");
1841
1842 assert(it->ref_count > 0);
1843 if (--it->ref_count == 0) {
1844 auto node_idx = static_cast<u32>(it - nodes.begin());
1845 ready_indices.push_back(node_idx);
1846 waiting_nodes.mark_unset(node_idx);
1847 }
1848 }
1849 ready_indices.clear();
1850 }
1851}
1852
1853template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1856 return static_cast<BlockIndex>(static_cast<u32>(cur_block_idx) + 1);
1857}
1858
1859template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1862 using SymRef = typename Assembler::SymRef;
1863 SymRef personality_sym;
1864 if (this->adaptor->cur_needs_unwind_info()) {
1865 SymRef personality_func = derived()->cur_personality_func();
1866 if (personality_func.valid()) {
1867 for (const auto &[fn_sym, ptr_sym] : personality_syms) {
1868 if (fn_sym == personality_func) {
1869 personality_sym = ptr_sym;
1870 break;
1871 }
1872 }
1873
1874 if (!personality_sym.valid()) {
1875 // create symbol that contains the address of the personality
1876 // function
1877 u32 off;
1878 static constexpr std::array<u8, 8> zero{};
1879
1880 auto rodata = this->assembler.get_data_section(true, true);
1881 personality_sym = this->assembler.sym_def_data(
1882 rodata, "", zero, 8, Assembler::SymBinding::LOCAL, &off);
1883 this->assembler.reloc_abs(rodata, personality_func, off, 0);
1884
1885 personality_syms.emplace_back(personality_func, personality_sym);
1886 }
1887 }
1888 }
1889 return personality_sym;
1890}
1891
1892template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1894 const IRFuncRef func, const u32 func_idx) noexcept {
1895 if (!adaptor->switch_func(func)) {
1896 return false;
1897 }
1898 derived()->analysis_start();
1899 analyzer.switch_func(func);
1900 derived()->analysis_end();
1901
1902#ifndef NDEBUG
1903 stack.frame_size = ~0u;
1904#endif
1905 for (auto &e : stack.fixed_free_lists) {
1906 e.clear();
1907 }
1908 stack.dynamic_free_lists.clear();
1909
1910 assignments.cur_fixed_assignment_count = {};
1911 assert(std::ranges::none_of(assignments.value_ptrs, std::identity{}));
1912 if (assignments.value_ptrs.size() < analyzer.liveness.size()) {
1913 assignments.value_ptrs.resize(analyzer.liveness.size());
1914 }
1915
1916 assignments.allocator.reset();
1917 assignments.variable_ref_list = INVALID_VAL_LOCAL_IDX;
1918 assignments.delayed_free_lists.clear();
1919 assignments.delayed_free_lists.resize(analyzer.block_layout.size(),
1920 INVALID_VAL_LOCAL_IDX);
1921
1922 cur_block_idx =
1923 static_cast<BlockIndex>(analyzer.block_idx(adaptor->cur_entry_block()));
1924
1925 register_file.reset();
1926#ifndef NDEBUG
1927 generating_branch = false;
1928#endif
1929
1930 // Simple heuristic for initial allocation size
1931 u32 expected_code_size = 0x8 * analyzer.num_insts + 0x40;
1932 this->text_writer.growth_size = expected_code_size;
1933 this->text_writer.ensure_space(expected_code_size);
1934
1935 derived()->start_func(func_idx);
1936
1937 block_labels.clear();
1938 block_labels.resize_uninitialized(analyzer.block_layout.size());
1939 for (u32 i = 0; i < analyzer.block_layout.size(); ++i) {
1940 block_labels[i] = assembler.label_create();
1941 }
1942
1943 // TODO(ts): place function label
1944 // TODO(ts): make function labels optional?
1945
1946 CCAssigner *cc_assigner = derived()->cur_cc_assigner();
1947 assert(cc_assigner != nullptr);
1948
1949 register_file.allocatable = cc_assigner->get_ccinfo().allocatable_regs;
1950
1951 // This initializes the stack frame, which must reserve space for
1952 // callee-saved registers, vararg save area, etc.
1953 cc_assigner->reset();
1954 derived()->gen_func_prolog_and_args(cc_assigner);
1955
1956 for (const IRValueRef alloca : adaptor->cur_static_allocas()) {
1957 auto size = adaptor->val_alloca_size(alloca);
1958 size = util::align_up(size, adaptor->val_alloca_align(alloca));
1959
1960 ValLocalIdx local_idx = adaptor->val_local_idx(alloca);
1961 init_variable_ref(local_idx, 0);
1962 ValueAssignment *assignment = val_assignment(local_idx);
1963 assignment->stack_variable = true;
1964 assignment->frame_off = allocate_stack_slot(size);
1965 }
1966
1967 if constexpr (!Config::DEFAULT_VAR_REF_HANDLING) {
1968 derived()->setup_var_ref_assignments();
1969 }
1970
1971 for (u32 i = 0; i < analyzer.block_layout.size(); ++i) {
1972 const auto block_ref = analyzer.block_layout[i];
1973 TPDE_LOG_TRACE(
1974 "Compiling block {} ({})", i, adaptor->block_fmt_ref(block_ref));
1975 if (!derived()->compile_block(block_ref, i)) [[unlikely]] {
1976 TPDE_LOG_ERR("Failed to compile block {} ({})",
1977 i,
1978 adaptor->block_fmt_ref(block_ref));
1979 // Ensure invariant that value_ptrs only contains nullptr at the end.
1980 assignments.value_ptrs.clear();
1981 return false;
1982 }
1983 }
1984
1985 // Reset all variable-ref assignment pointers to nullptr.
1986 ValLocalIdx variable_ref_list = assignments.variable_ref_list;
1987 while (variable_ref_list != INVALID_VAL_LOCAL_IDX) {
1988 u32 idx = u32(variable_ref_list);
1989 ValLocalIdx next = assignments.value_ptrs[idx]->next_delayed_free_entry;
1990 assignments.value_ptrs[idx] = nullptr;
1991 variable_ref_list = next;
1992 }
1993
1994 assert(std::ranges::none_of(assignments.value_ptrs, std::identity{}) &&
1995 "found non-freed ValueAssignment, maybe missing ref-count?");
1996
1997 derived()->finish_func(func_idx);
1998
1999 return true;
2000}
2001
2002template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
2004 const IRBlockRef block, const u32 block_idx) noexcept {
2005 cur_block_idx =
2006 static_cast<typename Analyzer<Adaptor>::BlockIndex>(block_idx);
2007
2008 label_place(block_labels[block_idx]);
2009 auto &&val_range = adaptor->block_insts(block);
2010 auto end = val_range.end();
2011 for (auto it = val_range.begin(); it != end; ++it) {
2012 const IRInstRef inst = *it;
2013 if (this->adaptor->inst_fused(inst)) {
2014 continue;
2015 }
2016
2017 auto it_cpy = it;
2018 ++it_cpy;
2019 if (!derived()->compile_inst(inst, InstRange{.from = it_cpy, .to = end}))
2020 [[unlikely]] {
2021 TPDE_LOG_ERR("Failed to compile instruction {}",
2022 this->adaptor->inst_fmt_ref(inst));
2023 return false;
2024 }
2025 }
2026
2027#ifndef NDEBUG
2028 // Some consistency checks. Register assignment information must match, all
2029 // used registers must have an assignment (no temporaries across blocks), and
2030 // fixed registers must be fixed assignments.
2031 for (auto reg_id : register_file.used_regs()) {
2032 Reg reg{reg_id};
2033 assert(register_file.reg_local_idx(reg) != INVALID_VAL_LOCAL_IDX);
2034 AssignmentPartRef ap{val_assignment(register_file.reg_local_idx(reg)),
2035 register_file.reg_part(reg)};
2036 assert(ap.register_valid());
2037 assert(ap.get_reg() == reg);
2038 assert(!register_file.is_fixed(reg) || ap.fixed_assignment());
2039 }
2040#endif
2041
2042 if (static_cast<u32>(assignments.delayed_free_lists[block_idx]) != ~0u) {
2043 auto list_entry = assignments.delayed_free_lists[block_idx];
2044 while (static_cast<u32>(list_entry) != ~0u) {
2045 auto next_entry = assignments.value_ptrs[static_cast<u32>(list_entry)]
2046 ->next_delayed_free_entry;
2047 derived()->free_assignment(list_entry);
2048 list_entry = next_entry;
2049 }
2050 }
2051 return true;
2052}
2053
2054} // namespace tpde
A default implementation for ValRefSpecial.
AsmReg gval_as_reg_reuse(GenericValuePart &gv, ScratchReg &dst) noexcept
void spill(AssignmentPartRef ap) noexcept
Ensure the value is spilled in its stack slot (except variable refs).
void evict(AssignmentPartRef ap) noexcept
Evict the value from its register, spilling if needed, and free register.
void init_variable_ref(ValLocalIdx local_idx, u32 var_ref_data) noexcept
Init a variable-ref assignment.
std::unordered_map< u32, std::vector< i32 > > dynamic_free_lists
Free-Lists for all other sizes.
AsmReg gval_as_reg(GenericValuePart &gv) noexcept
u32 frame_size
The current size of the stack frame.
void begin_branch_region() noexcept
Indicate beginning of region where value-state must not change.
ValueRef result_ref(IRValueRef value) noexcept
Get a defining reference to a value.
void init_variable_ref(IRValueRef value, u32 var_ref_data) noexcept
Init a variable-ref assignment.
void end_branch_region() noexcept
Indicate end of region where value-state must not change.
void reload_to_reg(AsmReg dst, AssignmentPartRef ap) noexcept
Reload a value part from memory or recompute variable address.
void free_assignment(ValLocalIdx local_idx) noexcept
Frees an assignment, its stack slot and registers.
CCAssigner * cur_cc_assigner() noexcept
Get CCAssigner for current function.
void free_reg(Reg reg) noexcept
Free the register. Requires that the contained value is already spilled.
util::SmallVector< i32, 16 > fixed_free_lists[5]
Free-Lists for 1/2/4/8/16 sized allocations.
Reg select_reg(RegBank bank, u64 exclusion_mask) noexcept
Select an available register, evicting loaded values if needed.
CompilerBase(Adaptor *adaptor)
Initialize a CompilerBase, should be called by the derived classes.
void evict_reg(Reg reg) noexcept
Evict the value from the register, spilling if needed, and free register.