TPDE
Loading...
Searching...
No Matches
CompilerBase.hpp
1// SPDX-FileCopyrightText: 2025 Contributors to TPDE <https://tpde.org>
2//
3// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4#pragma once
5
6#include <algorithm>
7#include <functional>
8#include <unordered_map>
9#include <variant>
10
11#include "Analyzer.hpp"
12#include "Compiler.hpp"
13#include "CompilerConfig.hpp"
14#include "IRAdaptor.hpp"
15#include "tpde/AssignmentPartRef.hpp"
16#include "tpde/FunctionWriter.hpp"
17#include "tpde/RegisterFile.hpp"
18#include "tpde/ValLocalIdx.hpp"
19#include "tpde/ValueAssignment.hpp"
20#include "tpde/base.hpp"
21#include "tpde/util/function_ref.hpp"
22#include "tpde/util/misc.hpp"
23
24namespace tpde {
25// TODO(ts): formulate concept for full compiler so that there is *some* check
26// whether all the required derived methods are implemented?
27
28/// Thread-local storage access mode
29enum class TLSModel {
30 GlobalDynamic,
31 LocalDynamic,
32 InitialExec,
33 LocalExec,
34};
35
36struct CCAssignment {
37 Reg reg = Reg::make_invalid(); ///< Assigned register, invalid implies stack.
38
39 /// If non-zero, indicates that this and the next N values must be
40 /// assigned to consecutive registers, or to the stack. The following
41 /// values must be in the same register bank as this value. u8 is sufficient,
42 /// no architecture has more than 255 parameter registers in a single bank.
43 u8 consecutive = 0;
44 bool sret : 1 = false; ///< Argument is return value pointer.
45
46 /// The argument is passed by value on the stack. The provided argument is a
47 /// pointer; for the call, size bytes will be copied into the corresponding
48 /// stack slot. Behaves like LLVM's byval.
49 ///
50 /// Note: On x86-64 SysV, this is used to pass larger structs in memory. Note
51 /// that AArch64 AAPCS doesn't use byval for structs, instead, the pointer is
52 /// passed without byval and it is the responsibility of the caller to
53 /// explicitly copy the value.
54 bool byval : 1 = false;
55
56 /// Extend integer argument. Highest bit indicates signed-ness, lower bits
57 /// indicate the source width from which the argument should be extended.
58 u8 int_ext = 0;
59
60 u8 align = 0; ///< Argument alignment
61 RegBank bank = RegBank{}; ///< Register bank to assign the value to.
62 u32 size = 0; ///< Argument size, for byval the stack slot size.
63 u32 stack_off = 0; ///< Assigned stack slot, only valid if reg is invalid.
64};
65
66struct CCInfo {
67 // TODO: use RegBitSet
68 const u64 allocatable_regs;
69 const u64 callee_saved_regs;
70 /// Possible argument registers; these registers will not be allocated until
71 /// all arguments have been assigned.
72 const u64 arg_regs;
73 /// Size of red zone below stack pointer.
74 const u8 red_zone_size = 0;
75};
76
77class CCAssigner {
78public:
79 const CCInfo *ccinfo;
80
81 CCAssigner(const CCInfo &ccinfo) noexcept : ccinfo(&ccinfo) {}
82 virtual ~CCAssigner() noexcept {}
83
84 virtual void reset() noexcept = 0;
85
86 const CCInfo &get_ccinfo() const noexcept { return *ccinfo; }
87
88 virtual void assign_arg(CCAssignment &cca) noexcept = 0;
89 virtual u32 get_stack_size() noexcept = 0;
90 /// Some calling conventions need different call behavior when calling a
91 /// vararg function.
92 virtual bool is_vararg() const noexcept { return false; }
93 virtual void assign_ret(CCAssignment &cca) noexcept = 0;
94};
95
96/// The base class for the compiler.
97/// It implements the main platform independent compilation logic and houses the
98/// analyzer
99template <IRAdaptor Adaptor,
100 typename Derived,
101 CompilerConfig Config = CompilerConfigDefault>
103 // some forwards for the IR type defs
104 using IRValueRef = typename Adaptor::IRValueRef;
105 using IRInstRef = typename Adaptor::IRInstRef;
106 using IRBlockRef = typename Adaptor::IRBlockRef;
107 using IRFuncRef = typename Adaptor::IRFuncRef;
108
109 using BlockIndex = typename Analyzer<Adaptor>::BlockIndex;
110
111 using Assembler = typename Config::Assembler;
112 using AsmReg = typename Config::AsmReg;
113
114 using RegisterFile = tpde::RegisterFile<Config::NUM_BANKS, 32>;
115
116 /// A default implementation for ValRefSpecial.
117 // Note: Subclasses can override this, always used Derived::ValRefSpecial.
119 uint8_t mode = 4;
120 u64 const_data;
121 };
122
123#pragma region CompilerData
124 Adaptor *adaptor;
125 Analyzer<Adaptor> analyzer;
126
127 // data for frame management
128
129 struct {
130 /// The current size of the stack frame
131 u32 frame_size = 0;
132 /// Whether the stack frame might have dynamic alloca. Dynamic allocas may
133 /// require a different and less efficient frame setup.
135 /// Whether the function is guaranteed to be a leaf function. Throughout the
136 /// entire function, the compiler may assume the absence of function calls.
138 /// Whether the function actually includes a call. There are cases, where it
139 /// is not clear from the beginning whether a function has function calls.
140 /// If a function has no calls, this will allow using the red zone
141 /// guaranteed by some ABIs.
143 /// Free-Lists for 1/2/4/8/16 sized allocations
144 // TODO(ts): make the allocations for 4/8 different from the others
145 // since they are probably the one's most used?
146 util::SmallVector<i32, 16> fixed_free_lists[5] = {};
147 /// Free-Lists for all other sizes
148 // TODO(ts): think about which data structure we want here
149 std::unordered_map<u32, std::vector<i32>> dynamic_free_lists{};
150 } stack = {};
151
152 typename Analyzer<Adaptor>::BlockIndex cur_block_idx;
153
154 // Assignments
155
156 static constexpr ValLocalIdx INVALID_VAL_LOCAL_IDX =
157 static_cast<ValLocalIdx>(~0u);
158
159 // TODO(ts): think about different ways to store this that are maybe more
160 // compact?
161 struct {
162 AssignmentAllocator allocator;
163
164 std::array<u32, Config::NUM_BANKS> cur_fixed_assignment_count = {};
165 util::SmallVector<ValueAssignment *, Analyzer<Adaptor>::SMALL_VALUE_NUM>
166 value_ptrs;
167
168 ValLocalIdx variable_ref_list;
169 util::SmallVector<ValLocalIdx, Analyzer<Adaptor>::SMALL_BLOCK_NUM>
170 delayed_free_lists;
171 } assignments = {};
172
173 RegisterFile register_file;
174#ifndef NDEBUG
175 /// Whether we are currently in the middle of generating branch-related code
176 /// and therefore must not change any value-related state.
177 bool generating_branch = false;
178#endif
179
180private:
181 /// Default CCAssigner if the implementation doesn't override cur_cc_assigner.
182 typename Config::DefaultCCAssigner default_cc_assigner;
183
184public:
185 Assembler assembler;
186 Config::FunctionWriter text_writer;
187 // TODO(ts): smallvector?
188 std::vector<SymRef> func_syms;
189 // TODO(ts): combine this with the block vectors in the analyzer to save on
190 // allocations
191 util::SmallVector<Label> block_labels;
192
193 util::SmallVector<std::pair<SymRef, SymRef>, 4> personality_syms = {};
194
195 struct ScratchReg;
196 class ValuePart;
197 struct ValuePartRef;
198 struct ValueRef;
199 struct GenericValuePart;
200#pragma endregion
201
202 struct InstRange {
203 using Range = decltype(std::declval<Adaptor>().block_insts(
204 std::declval<IRBlockRef>()));
205 using Iter = decltype(std::declval<Range>().begin());
206 using EndIter = decltype(std::declval<Range>().end());
207 Iter from;
208 EndIter to;
209 };
210
211 struct CallArg {
212 enum class Flag : u8 {
213 none,
214 zext,
215 sext,
216 sret,
217 byval
218 };
219
220 explicit CallArg(IRValueRef value,
221 Flag flags = Flag::none,
222 u8 byval_align = 0,
223 u32 byval_size = 0)
224 : value(value),
225 flag(flags),
226 byval_align(byval_align),
227 byval_size(byval_size) {}
228
229 IRValueRef value;
230 Flag flag;
231 u8 byval_align;
232 u8 ext_bits = 0;
233 u32 byval_size;
234 };
235
236 template <typename CBDerived>
237 class CallBuilderBase {
238 protected:
239 Derived &compiler;
240 CCAssigner &assigner;
241
242 RegisterFile::RegBitSet arg_regs{};
243
244 public:
245 CallBuilderBase(Derived &compiler, CCAssigner &assigner) noexcept
246 : compiler(compiler), assigner(assigner) {}
247
248 // CBDerived needs:
249 // void add_arg_byval(ValuePart &vp, CCAssignment &cca) noexcept;
250 // void add_arg_stack(ValuePart &vp, CCAssignment &cca) noexcept;
251 // void call_impl(std::variant<SymRef, ValuePart> &&) noexcept;
252 CBDerived *derived() noexcept { return static_cast<CBDerived *>(this); }
253
254 void add_arg(ValuePart &&vp, CCAssignment cca) noexcept;
255 void add_arg(const CallArg &arg, u32 part_count) noexcept;
256 void add_arg(const CallArg &arg) noexcept {
257 add_arg(std::move(arg), compiler.val_parts(arg.value).count());
258 }
259
260 // evict registers, do call, reset stack frame
261 void call(std::variant<SymRef, ValuePart>) noexcept;
262
263 void add_ret(ValuePart &vp, CCAssignment cca) noexcept;
264 void add_ret(ValuePart &&vp, CCAssignment cca) noexcept {
265 add_ret(vp, cca);
266 }
267 void add_ret(ValueRef &vr) noexcept;
268 };
269
270 class RetBuilder {
271 Derived &compiler;
272 CCAssigner &assigner;
273
274 RegisterFile::RegBitSet ret_regs{};
275
276 public:
277 RetBuilder(Derived &compiler, CCAssigner &assigner) noexcept
278 : compiler(compiler), assigner(assigner) {
279 assigner.reset();
280 }
281
282 void add(ValuePart &&vp, CCAssignment cca) noexcept;
283 void add(IRValueRef val) noexcept;
284
285 void ret() noexcept;
286 };
287
288 /// Initialize a CompilerBase, should be called by the derived classes
289 explicit CompilerBase(Adaptor *adaptor)
290 : adaptor(adaptor), analyzer(adaptor), assembler() {
291 static_assert(std::is_base_of_v<CompilerBase, Derived>);
292 static_assert(Compiler<Derived, Config>);
293 }
294
295 /// shortcut for casting to the Derived class so that overloading
296 /// works
297 Derived *derived() { return static_cast<Derived *>(this); }
298
299 const Derived *derived() const { return static_cast<const Derived *>(this); }
300
301 [[nodiscard]] ValLocalIdx val_idx(const IRValueRef value) const noexcept {
302 return analyzer.adaptor->val_local_idx(value);
303 }
304
305 [[nodiscard]] ValueAssignment *
306 val_assignment(const ValLocalIdx idx) noexcept {
307 return assignments.value_ptrs[static_cast<u32>(idx)];
308 }
309
310 /// Compile the functions returned by Adaptor::funcs
311 ///
312 /// \warning If you intend to call this multiple times, you must call reset
313 /// in-between the calls.
314 ///
315 /// \returns Whether the compilation was successful
316 bool compile();
317
318 /// Reset any leftover data from the previous compilation such that it will
319 /// not affect the next compilation
320 void reset();
321
322 /// Get CCAssigner for current function.
323 CCAssigner *cur_cc_assigner() noexcept { return &default_cc_assigner; }
324
325 void init_assignment(IRValueRef value, ValLocalIdx local_idx) noexcept;
326
327private:
328 /// Frees an assignment, its stack slot and registers
329 void free_assignment(ValLocalIdx local_idx, ValueAssignment *) noexcept;
330
331public:
332 /// Release an assignment when reference count drops to zero, either frees
333 /// the assignment immediately or delays free to the end of the live range.
334 void release_assignment(ValLocalIdx local_idx, ValueAssignment *) noexcept;
335
336 /// Init a variable-ref assignment
337 void init_variable_ref(ValLocalIdx local_idx, u32 var_ref_data) noexcept;
338 /// Init a variable-ref assignment
339 void init_variable_ref(IRValueRef value, u32 var_ref_data) noexcept {
340 init_variable_ref(adaptor->val_local_idx(value), var_ref_data);
341 }
342
343 i32 allocate_stack_slot(u32 size) noexcept;
344 void free_stack_slot(u32 slot, u32 size) noexcept;
345
346 template <typename Fn>
347 void handle_func_arg(u32 arg_idx, IRValueRef arg, Fn add_arg) noexcept;
348
349 ValueRef val_ref(IRValueRef value) noexcept;
350
351 std::pair<ValueRef, ValuePartRef> val_ref_single(IRValueRef value) noexcept;
352
353 /// Get a defining reference to a value
354 ValueRef result_ref(IRValueRef value) noexcept;
355
356 std::pair<ValueRef, ValuePartRef>
357 result_ref_single(IRValueRef value) noexcept;
358
359 /// Make dst an alias for src, which must be a non-constant value with an
360 /// identical part configuration. src must be in its last use (is_owned()),
361 /// and the assignment will be repurposed for dst, keeping all assigned
362 /// registers and stack slots.
363 ValueRef result_ref_alias(IRValueRef dst, ValueRef &&src) noexcept;
364
365 /// Initialize value as a pointer into a stack variable (i.e., a value
366 /// allocated from cur_static_allocas() or similar) with an offset. The
367 /// result value will be a stack variable itself.
368 ValueRef result_ref_stack_slot(IRValueRef value,
369 AssignmentPartRef base,
370 i32 off) noexcept;
371
372 [[deprecated("Use ValuePartRef::set_value")]]
373 void set_value(ValuePartRef &val_ref, ScratchReg &scratch) noexcept;
374 [[deprecated("Use ValuePartRef::set_value")]]
375 void set_value(ValuePartRef &&val_ref, ScratchReg &scratch) noexcept {
376 set_value(val_ref, scratch);
377 }
378
379 /// Get generic value part into a single register, evaluating expressions
380 /// and materializing immediates as required.
381 AsmReg gval_as_reg(GenericValuePart &gv) noexcept;
382
383 /// Like gval_as_reg; if the GenericValuePart owns a reusable register
384 /// (either a ScratchReg, possibly due to materialization, or a reusable
385 /// ValuePartRef), store it in dst.
386 AsmReg gval_as_reg_reuse(GenericValuePart &gv, ScratchReg &dst) noexcept;
387
388private:
389 Reg select_reg_evict(RegBank bank, u64 exclusion_mask) noexcept;
390
391public:
392 /// Select an available register, evicting loaded values if needed.
393 Reg select_reg(RegBank bank, u64 exclusion_mask) noexcept {
394 Reg res = register_file.find_first_free_excluding(bank, exclusion_mask);
395 if (res.valid()) [[likely]] {
396 return res;
397 }
398 return select_reg_evict(bank, exclusion_mask);
399 }
400
401 /// Reload a value part from memory or recompute variable address.
402 void reload_to_reg(AsmReg dst, AssignmentPartRef ap) noexcept;
403
404 void allocate_spill_slot(AssignmentPartRef ap) noexcept;
405
406 /// Ensure the value is spilled in its stack slot (except variable refs).
407 void spill(AssignmentPartRef ap) noexcept;
408
409 /// Evict the value from its register, spilling if needed, and free register.
410 void evict(AssignmentPartRef ap) noexcept;
411
412 /// Evict the value from the register, spilling if needed, and free register.
413 void evict_reg(Reg reg) noexcept;
414
415 /// Free the register. Requires that the contained value is already spilled.
416 void free_reg(Reg reg) noexcept;
417
418 // TODO(ts): switch to a branch_spill_before naming style?
419 typename RegisterFile::RegBitSet
420 spill_before_branch(bool force_spill = false) noexcept;
421 void release_spilled_regs(typename RegisterFile::RegBitSet) noexcept;
422
423 /// When reaching a point in the function where no other blocks will be
424 /// reached anymore, use this function to release register assignments after
425 /// the end of that block so the compiler does not accidentally use
426 /// registers which don't contain any values
428
429 /// Generate a switch at the end of a basic block. Only the lowest bits of the
430 /// condition are considered. The condition must be a general-purpose
431 /// register. The cases must be sorted and every case value must appear at
432 /// most once.
434 ScratchReg &&cond,
435 u32 width,
436 IRBlockRef default_block,
437 std::span<const std::pair<u64, IRBlockRef>> cases) noexcept;
438
439 /// Indicate beginning of region where value-state must not change.
440 void begin_branch_region() noexcept {
441#ifndef NDEBUG
442 assert(!generating_branch);
443 generating_branch = true;
444#endif
445 }
446
447 /// Indicate end of region where value-state must not change.
448 void end_branch_region() noexcept {
449#ifndef NDEBUG
450 assert(generating_branch);
451 generating_branch = false;
452#endif
453 }
454
455#ifndef NDEBUG
456 bool may_change_value_state() const noexcept { return !generating_branch; }
457#endif
458
459 void move_to_phi_nodes(BlockIndex target) noexcept {
460 if (analyzer.block_has_phis(target)) {
461 move_to_phi_nodes_impl(target);
462 }
463 }
464
465 void move_to_phi_nodes_impl(BlockIndex target) noexcept;
466
467 bool branch_needs_split(IRBlockRef target) noexcept {
468 // for now, if the target has PHI-nodes, we split
469 return analyzer.block_has_phis(target);
470 }
471
472 BlockIndex next_block() const noexcept;
473
474 bool try_force_fixed_assignment(IRValueRef) const noexcept { return false; }
475
476 bool hook_post_func_sym_init() noexcept { return true; }
477
478 void analysis_start() noexcept {}
479
480 void analysis_end() noexcept {}
481
482 void reloc_text(SymRef sym, u32 type, u64 offset, i64 addend = 0) noexcept {
483 this->assembler.reloc_sec(
484 text_writer.get_sec_ref(), sym, type, offset, addend);
485 }
486
487 void label_place(Label label) noexcept {
488 this->text_writer.label_place(label, text_writer.offset());
489 }
490
491protected:
492 SymRef get_personality_sym() noexcept;
493
494 bool compile_func(IRFuncRef func, u32 func_idx) noexcept;
495
496 bool compile_block(IRBlockRef block, u32 block_idx) noexcept;
497};
498} // namespace tpde
499
500#include "GenericValuePart.hpp"
501#include "ScratchReg.hpp"
502#include "ValuePartRef.hpp"
503#include "ValueRef.hpp"
504
505namespace tpde {
506
507template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
508template <typename CBDerived>
509void CompilerBase<Adaptor, Derived, Config>::CallBuilderBase<
510 CBDerived>::add_arg(ValuePart &&vp, CCAssignment cca) noexcept {
511 if (!cca.byval) {
512 cca.bank = vp.bank();
513 cca.size = vp.part_size();
514 }
515
516 assigner.assign_arg(cca);
517 bool needs_ext = cca.int_ext != 0;
518 bool ext_sign = cca.int_ext >> 7;
519 unsigned ext_bits = cca.int_ext & 0x3f;
520
521 if (cca.byval) {
522 derived()->add_arg_byval(vp, cca);
523 vp.reset(&compiler);
524 } else if (!cca.reg.valid()) {
525 if (needs_ext) {
526 auto ext = std::move(vp).into_extended(&compiler, ext_sign, ext_bits, 64);
527 derived()->add_arg_stack(ext, cca);
528 ext.reset(&compiler);
529 } else {
530 derived()->add_arg_stack(vp, cca);
531 }
532 vp.reset(&compiler);
533 } else {
534 u32 size = vp.part_size();
535 if (vp.is_in_reg(cca.reg)) {
536 if (!vp.can_salvage()) {
537 compiler.evict_reg(cca.reg);
538 } else {
539 vp.salvage(&compiler);
540 }
541 if (needs_ext) {
542 compiler.generate_raw_intext(cca.reg, cca.reg, ext_sign, ext_bits, 64);
543 }
544 } else {
545 if (compiler.register_file.is_used(cca.reg)) {
546 compiler.evict_reg(cca.reg);
547 }
548 if (vp.can_salvage()) {
549 AsmReg vp_reg = vp.salvage(&compiler);
550 if (needs_ext) {
551 compiler.generate_raw_intext(cca.reg, vp_reg, ext_sign, ext_bits, 64);
552 } else {
553 compiler.mov(cca.reg, vp_reg, size);
554 }
555 } else {
556 vp.reload_into_specific_fixed(&compiler, cca.reg);
557 if (needs_ext) {
558 compiler.generate_raw_intext(
559 cca.reg, cca.reg, ext_sign, ext_bits, 64);
560 }
561 }
562 }
563 vp.reset(&compiler);
564 assert(!compiler.register_file.is_used(cca.reg));
565 compiler.register_file.mark_clobbered(cca.reg);
566 compiler.register_file.allocatable &= ~(u64{1} << cca.reg.id());
567 arg_regs |= (1ull << cca.reg.id());
568 }
569}
570
571template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
572template <typename CBDerived>
573void CompilerBase<Adaptor, Derived, Config>::CallBuilderBase<
574 CBDerived>::add_arg(const CallArg &arg, u32 part_count) noexcept {
575 ValueRef vr = compiler.val_ref(arg.value);
576
577 if (arg.flag == CallArg::Flag::byval) {
578 assert(part_count == 1);
579 add_arg(vr.part(0),
580 CCAssignment{
581 .byval = true,
582 .align = arg.byval_align,
583 .size = arg.byval_size,
584 });
585 return;
586 }
587
588 u32 align = 1;
589 bool consecutive = false;
590 u32 consec_def = 0;
591 if (compiler.arg_is_int128(arg.value)) {
592 // TODO: this also applies to composites with 16-byte alignment
593 align = 16;
594 consecutive = true;
595 } else if (part_count > 1 &&
596 !compiler.arg_allow_split_reg_stack_passing(arg.value)) {
597 consecutive = true;
598 if (part_count > UINT8_MAX) {
599 // Must be completely passed on the stack.
600 consecutive = false;
601 consec_def = -1;
602 }
603 }
604
605 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
606 u8 int_ext = 0;
607 if (arg.flag == CallArg::Flag::sext || arg.flag == CallArg::Flag::zext) {
608 assert(arg.ext_bits != 0 && "cannot extend zero-bit integer");
609 int_ext = arg.ext_bits | (arg.flag == CallArg::Flag::sext ? 0x80 : 0);
610 }
611 derived()->add_arg(
612 vr.part(part_idx),
613 CCAssignment{
614 .consecutive =
615 u8(consecutive ? part_count - part_idx - 1 : consec_def),
616 .sret = arg.flag == CallArg::Flag::sret,
617 .int_ext = int_ext,
618 .align = u8(part_idx == 0 ? align : 1),
619 });
620 }
621}
622
623template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
624template <typename CBDerived>
625void CompilerBase<Adaptor, Derived, Config>::CallBuilderBase<CBDerived>::call(
626 std::variant<SymRef, ValuePart> target) noexcept {
627 assert(!compiler.stack.is_leaf_function && "leaf func must not have calls");
628 compiler.stack.generated_call = true;
629 typename RegisterFile::RegBitSet skip_evict = arg_regs;
630 if (auto *vp = std::get_if<ValuePart>(&target); vp && vp->can_salvage()) {
631 // call_impl will reset vp, thereby unlock+free the register.
632 assert(vp->cur_reg_unlocked().valid() && "can_salvage implies register");
633 skip_evict |= (1ull << vp->cur_reg_unlocked().id());
634 }
635
636 auto clobbered = ~assigner.get_ccinfo().callee_saved_regs;
637 for (auto reg_id : util::BitSetIterator<>{compiler.register_file.used &
638 clobbered & ~skip_evict}) {
639 compiler.evict_reg(AsmReg{reg_id});
640 compiler.register_file.mark_clobbered(Reg{reg_id});
641 }
642
643 derived()->call_impl(std::move(target));
644
645 assert((compiler.register_file.allocatable & arg_regs) == 0);
646 compiler.register_file.allocatable |= arg_regs;
647}
648
649template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
650template <typename CBDerived>
651void CompilerBase<Adaptor, Derived, Config>::CallBuilderBase<
652 CBDerived>::add_ret(ValuePart &vp, CCAssignment cca) noexcept {
653 cca.bank = vp.bank();
654 cca.size = vp.part_size();
655 assigner.assign_ret(cca);
656 assert(cca.reg.valid() && "return value must be in register");
657 vp.set_value_reg(&compiler, cca.reg);
658}
659
660template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
661template <typename CBDerived>
662void CompilerBase<Adaptor, Derived, Config>::CallBuilderBase<
663 CBDerived>::add_ret(ValueRef &vr) noexcept {
664 assert(vr.has_assignment());
665 u32 part_count = vr.assignment()->part_count;
666 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
667 CCAssignment cca;
668 add_ret(vr.part(part_idx), cca);
669 }
670}
671
672template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
673void CompilerBase<Adaptor, Derived, Config>::RetBuilder::add(
674 ValuePart &&vp, CCAssignment cca) noexcept {
675 cca.bank = vp.bank();
676 u32 size = cca.size = vp.part_size();
677 assigner.assign_ret(cca);
678 assert(cca.reg.valid() && "indirect return value must use sret argument");
679
680 bool needs_ext = cca.int_ext != 0;
681 bool ext_sign = cca.int_ext >> 7;
682 unsigned ext_bits = cca.int_ext & 0x3f;
683
684 if (vp.is_in_reg(cca.reg)) {
685 if (!vp.can_salvage()) {
686 compiler.evict_reg(cca.reg);
687 } else {
688 vp.salvage(&compiler);
689 }
690 if (needs_ext) {
691 compiler.generate_raw_intext(cca.reg, cca.reg, ext_sign, ext_bits, 64);
692 }
693 } else {
694 if (compiler.register_file.is_used(cca.reg)) {
695 compiler.evict_reg(cca.reg);
696 }
697 if (vp.can_salvage()) {
698 AsmReg vp_reg = vp.salvage(&compiler);
699 if (needs_ext) {
700 compiler.generate_raw_intext(cca.reg, vp_reg, ext_sign, ext_bits, 64);
701 } else {
702 compiler.mov(cca.reg, vp_reg, size);
703 }
704 } else {
705 vp.reload_into_specific_fixed(&compiler, cca.reg);
706 if (needs_ext) {
707 compiler.generate_raw_intext(cca.reg, cca.reg, ext_sign, ext_bits, 64);
708 }
709 }
710 }
711 vp.reset(&compiler);
712 assert(!compiler.register_file.is_used(cca.reg));
713 compiler.register_file.mark_clobbered(cca.reg);
714 compiler.register_file.allocatable &= ~(u64{1} << cca.reg.id());
715 ret_regs |= (1ull << cca.reg.id());
716}
717
718template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
719void CompilerBase<Adaptor, Derived, Config>::RetBuilder::add(
720 IRValueRef val) noexcept {
721 u32 part_count = compiler.val_parts(val).count();
722 ValueRef vr = compiler.val_ref(val);
723 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
724 add(vr.part(part_idx), CCAssignment{});
725 }
726}
727
728template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
729void CompilerBase<Adaptor, Derived, Config>::RetBuilder::ret() noexcept {
730 assert((compiler.register_file.allocatable & ret_regs) == 0);
731 compiler.register_file.allocatable |= ret_regs;
732
733 compiler.gen_func_epilog();
734 compiler.release_regs_after_return();
735}
736
737template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
739 // create function symbols
740 text_writer.switch_section(
741 assembler.get_section(assembler.get_text_section()));
742
743 assert(func_syms.empty());
744 for (const IRFuncRef func : adaptor->funcs()) {
745 auto binding = Assembler::SymBinding::GLOBAL;
746 if (adaptor->func_has_weak_linkage(func)) {
747 binding = Assembler::SymBinding::WEAK;
748 } else if (adaptor->func_only_local(func)) {
749 binding = Assembler::SymBinding::LOCAL;
750 }
751 if (adaptor->func_extern(func)) {
752 func_syms.push_back(derived()->assembler.sym_add_undef(
753 adaptor->func_link_name(func), binding));
754 } else {
755 func_syms.push_back(derived()->assembler.sym_predef_func(
756 adaptor->func_link_name(func), binding));
757 }
758 derived()->define_func_idx(func, func_syms.size() - 1);
759 }
760
761 if (!derived()->hook_post_func_sym_init()) {
762 TPDE_LOG_ERR("hook_pust_func_sym_init failed");
763 return false;
764 }
765
766 // TODO(ts): create function labels?
767
768 bool success = true;
769
770 u32 func_idx = 0;
771 for (const IRFuncRef func : adaptor->funcs()) {
772 if (adaptor->func_extern(func)) {
773 TPDE_LOG_TRACE("Skipping compilation of func {}",
774 adaptor->func_link_name(func));
775 ++func_idx;
776 continue;
777 }
778
779 TPDE_LOG_TRACE("Compiling func {}", adaptor->func_link_name(func));
780 if (!derived()->compile_func(func, func_idx)) {
781 TPDE_LOG_ERR("Failed to compile function {}",
782 adaptor->func_link_name(func));
783 success = false;
784 }
785 ++func_idx;
786 }
787
788 text_writer.flush();
789 assembler.finalize();
790
791 // TODO(ts): generate object/map?
792
793 return success;
794}
795
796template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
798 adaptor->reset();
799
800 for (auto &e : stack.fixed_free_lists) {
801 e.clear();
802 }
803 stack.dynamic_free_lists.clear();
804
805 assembler.reset();
806 func_syms.clear();
807 block_labels.clear();
808 personality_syms.clear();
809}
810
811template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
812void CompilerBase<Adaptor, Derived, Config>::init_assignment(
813 IRValueRef value, ValLocalIdx local_idx) noexcept {
814 assert(val_assignment(local_idx) == nullptr);
815 TPDE_LOG_TRACE("Initializing assignment for value {}",
816 static_cast<u32>(local_idx));
817
818 const auto parts = derived()->val_parts(value);
819 const u32 part_count = parts.count();
820 assert(part_count > 0);
821 auto *assignment = assignments.allocator.allocate(part_count);
822 assignments.value_ptrs[static_cast<u32>(local_idx)] = assignment;
823
824 u32 max_part_size = 0;
825 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
826 auto ap = AssignmentPartRef{assignment, part_idx};
827 ap.reset();
828 ap.set_bank(parts.reg_bank(part_idx));
829 const u32 size = parts.size_bytes(part_idx);
830 assert(size > 0);
831 max_part_size = std::max(max_part_size, size);
832 ap.set_part_size(size);
833 }
834
835 const auto &liveness = analyzer.liveness_info(local_idx);
836
837 // if there is only one part, try to hand out a fixed assignment
838 // if the value is used for longer than one block and there aren't too many
839 // definitions in child loops this could interfere with
840 // TODO(ts): try out only fixed assignments if the value is live for more
841 // than two blocks?
842 // TODO(ts): move this to ValuePartRef::alloc_reg to be able to defer this
843 // for results?
844 if (part_count == 1) {
845 const auto &cur_loop =
846 analyzer.loop_from_idx(analyzer.block_loop_idx(cur_block_idx));
847 auto ap = AssignmentPartRef{assignment, 0};
848
849 auto try_fixed =
850 liveness.last > cur_block_idx &&
851 cur_loop.definitions_in_childs +
852 assignments.cur_fixed_assignment_count[ap.bank().id()] <
853 Derived::NUM_FIXED_ASSIGNMENTS[ap.bank().id()];
854 if (derived()->try_force_fixed_assignment(value)) {
855 try_fixed = assignments.cur_fixed_assignment_count[ap.bank().id()] <
856 Derived::NUM_FIXED_ASSIGNMENTS[ap.bank().id()];
857 }
858
859 if (try_fixed) {
860 // check if there is a fixed register available
861 AsmReg reg = derived()->select_fixed_assignment_reg(ap, value);
862 TPDE_LOG_TRACE("Trying to assign fixed reg to value {}",
863 static_cast<u32>(local_idx));
864
865 // TODO: if the register is used, we can free it most of the time, but not
866 // always, e.g. for PHI nodes. Detect this case and free_reg otherwise.
867 if (!reg.invalid() && !register_file.is_used(reg)) {
868 TPDE_LOG_TRACE("Assigning fixed assignment to reg {} for value {}",
869 reg.id(),
870 static_cast<u32>(local_idx));
871 ap.set_reg(reg);
872 ap.set_register_valid(true);
873 ap.set_fixed_assignment(true);
874 register_file.mark_used(reg, local_idx, 0);
875 register_file.inc_lock_count(reg); // fixed assignments always locked
876 register_file.mark_clobbered(reg);
877 ++assignments.cur_fixed_assignment_count[ap.bank().id()];
878 }
879 }
880 }
881
882 const auto last_full = liveness.last_full;
883 const auto ref_count = liveness.ref_count;
884
885 assert(max_part_size <= 256);
886 assignment->max_part_size = max_part_size;
887 assignment->pending_free = false;
888 assignment->variable_ref = false;
889 assignment->stack_variable = false;
890 assignment->delay_free = last_full;
891 assignment->part_count = part_count;
892 assignment->frame_off = 0;
893 assignment->references_left = ref_count;
894}
895
896template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
898 ValLocalIdx local_idx, ValueAssignment *assignment) noexcept {
899 TPDE_LOG_TRACE("Freeing assignment for value {}",
900 static_cast<u32>(local_idx));
901
902 assert(assignments.value_ptrs[static_cast<u32>(local_idx)] == assignment);
903 assignments.value_ptrs[static_cast<u32>(local_idx)] = nullptr;
904 const auto is_var_ref = assignment->variable_ref;
905 const u32 part_count = assignment->part_count;
906
907 // free registers
908 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
909 auto ap = AssignmentPartRef{assignment, part_idx};
910 if (ap.fixed_assignment()) [[unlikely]] {
911 const auto reg = ap.get_reg();
912 assert(register_file.is_fixed(reg));
913 assert(register_file.reg_local_idx(reg) == local_idx);
914 assert(register_file.reg_part(reg) == part_idx);
915 --assignments.cur_fixed_assignment_count[ap.bank().id()];
916 register_file.dec_lock_count_must_zero(reg); // release lock for fixed reg
917 register_file.unmark_used(reg);
918 } else if (ap.register_valid()) {
919 const auto reg = ap.get_reg();
920 assert(!register_file.is_fixed(reg));
921 register_file.unmark_used(reg);
922 }
923 }
924
925#ifdef TPDE_ASSERTS
926 for (auto reg_id : register_file.used_regs()) {
927 assert(register_file.reg_local_idx(AsmReg{reg_id}) != local_idx &&
928 "freeing assignment that is still referenced by a register");
929 }
930#endif
931
932 // variable references do not have a stack slot
933 if (!is_var_ref && assignment->frame_off != 0) {
934 free_stack_slot(assignment->frame_off, assignment->size());
935 }
936
937 assignments.allocator.deallocate(assignment);
938}
939
940template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
941[[gnu::noinline]] void
943 ValLocalIdx local_idx, ValueAssignment *assignment) noexcept {
944 if (!assignment->delay_free) {
945 free_assignment(local_idx, assignment);
946 return;
947 }
948
949 // need to wait until release
950 TPDE_LOG_TRACE("Delay freeing assignment for value {}",
951 static_cast<u32>(local_idx));
952 const auto &liveness = analyzer.liveness_info(local_idx);
953 auto &free_list_head = assignments.delayed_free_lists[u32(liveness.last)];
954 assignment->next_delayed_free_entry = free_list_head;
955 assignment->pending_free = true;
956 free_list_head = local_idx;
957}
958
959template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
961 ValLocalIdx local_idx, u32 var_ref_data) noexcept {
962 TPDE_LOG_TRACE("Initializing variable-ref assignment for value {}",
963 static_cast<u32>(local_idx));
964
965 assert(val_assignment(local_idx) == nullptr);
966 auto *assignment = assignments.allocator.allocate_slow(1, true);
967 assignments.value_ptrs[static_cast<u32>(local_idx)] = assignment;
968
969 assignment->max_part_size = Config::PLATFORM_POINTER_SIZE;
970 assignment->variable_ref = true;
971 assignment->stack_variable = false;
972 assignment->part_count = 1;
973 assignment->var_ref_custom_idx = var_ref_data;
974 assignment->next_delayed_free_entry = assignments.variable_ref_list;
975
976 assignments.variable_ref_list = local_idx;
977
978 AssignmentPartRef ap{assignment, 0};
979 ap.reset();
980 ap.set_bank(Config::GP_BANK);
981 ap.set_part_size(Config::PLATFORM_POINTER_SIZE);
982}
983
984template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
985i32 CompilerBase<Adaptor, Derived, Config>::allocate_stack_slot(
986 u32 size) noexcept {
987 unsigned align_bits = 4;
988 if (size == 0) {
989 return 0; // 0 is the "invalid" stack slot
990 } else if (size <= 16) {
991 // Align up to next power of two.
992 u32 free_list_idx = size == 1 ? 0 : 32 - util::cnt_lz<u32>(size - 1);
993 assert(size <= 1u << free_list_idx);
994 size = 1 << free_list_idx;
995 align_bits = free_list_idx;
996
997 if (!stack.fixed_free_lists[free_list_idx].empty()) {
998 auto slot = stack.fixed_free_lists[free_list_idx].back();
999 stack.fixed_free_lists[free_list_idx].pop_back();
1000 return slot;
1001 }
1002 } else {
1003 size = util::align_up(size, 16);
1004 auto it = stack.dynamic_free_lists.find(size);
1005 if (it != stack.dynamic_free_lists.end() && !it->second.empty()) {
1006 const auto slot = it->second.back();
1007 it->second.pop_back();
1008 return slot;
1009 }
1010 }
1011
1012 assert(stack.frame_size != ~0u &&
1013 "cannot allocate stack slot before stack frame is initialized");
1014
1015 // Align frame_size to align_bits
1016 for (u32 list_idx = util::cnt_tz(stack.frame_size); list_idx < align_bits;
1017 list_idx = util::cnt_tz(stack.frame_size)) {
1018 i32 slot = stack.frame_size;
1019 if constexpr (Config::FRAME_INDEXING_NEGATIVE) {
1020 slot = -(slot + (1ull << list_idx));
1021 }
1022 stack.fixed_free_lists[list_idx].push_back(slot);
1023 stack.frame_size += 1ull << list_idx;
1024 }
1025
1026 auto slot = stack.frame_size;
1027 assert(slot != 0 && "stack slot 0 is reserved");
1028 stack.frame_size += size;
1029
1030 if constexpr (Config::FRAME_INDEXING_NEGATIVE) {
1031 slot = -(slot + size);
1032 }
1033 return slot;
1034}
1035
1036template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1038 u32 slot, u32 size) noexcept {
1039 if (size == 0) [[unlikely]] {
1040 assert(slot == 0 && "unexpected slot for zero-sized stack-slot?");
1041 // Do nothing.
1042 } else if (size <= 16) [[likely]] {
1043 u32 free_list_idx = size == 1 ? 0 : 32 - util::cnt_lz<u32>(size - 1);
1044 stack.fixed_free_lists[free_list_idx].push_back(slot);
1045 } else {
1046 size = util::align_up(size, 16);
1047 stack.dynamic_free_lists[size].push_back(slot);
1048 }
1049}
1050
1051template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1052template <typename Fn>
1054 u32 arg_idx, IRValueRef arg, Fn add_arg) noexcept {
1055 ValueRef vr = derived()->result_ref(arg);
1056 if (adaptor->cur_arg_is_byval(arg_idx)) {
1057 std::optional<i32> byval_frame_off =
1058 add_arg(vr.part(0),
1059 CCAssignment{
1060 .byval = true,
1061 .align = u8(adaptor->cur_arg_byval_align(arg_idx)),
1062 .size = adaptor->cur_arg_byval_size(arg_idx),
1063 });
1064
1065 if (byval_frame_off) {
1066 // We need to convert the assignment into a stack variable ref.
1067 ValLocalIdx local_idx = val_idx(arg);
1068 // TODO: we shouldn't create the result_ref for such cases in the first
1069 // place. However, this is not easy to detect up front, it depends on the
1070 // target and the calling convention whether this is possible.
1071 vr.reset();
1072 // Value assignment might have been free'd by ValueRef reset.
1073 if (ValueAssignment *assignment = val_assignment(local_idx)) {
1074 free_assignment(local_idx, assignment);
1075 }
1076 init_variable_ref(local_idx, 0);
1077 ValueAssignment *assignment = this->val_assignment(local_idx);
1078 assignment->stack_variable = true;
1079 assignment->frame_off = *byval_frame_off;
1080 }
1081 return;
1082 }
1083
1084 if (adaptor->cur_arg_is_sret(arg_idx)) {
1085 add_arg(vr.part(0), CCAssignment{.sret = true});
1086 return;
1087 }
1088
1089 const u32 part_count = vr.assignment()->part_count;
1090
1091 u32 align = 1;
1092 u32 consecutive = 0;
1093 u32 consec_def = 0;
1094 if (derived()->arg_is_int128(arg)) {
1095 // TODO: this also applies to composites with 16-byte alignment
1096 align = 16;
1097 consecutive = 1;
1098 } else if (part_count > 1 &&
1099 !derived()->arg_allow_split_reg_stack_passing(arg)) {
1100 consecutive = 1;
1101 if (part_count > UINT8_MAX) {
1102 // Must be completely passed on the stack.
1103 consecutive = 0;
1104 consec_def = -1;
1105 }
1106 }
1107
1108 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
1109 add_arg(vr.part(part_idx),
1110 CCAssignment{
1111 .consecutive =
1112 u8(consecutive ? part_count - part_idx - 1 : consec_def),
1113 .align = u8(part_idx == 0 ? align : 1),
1114 });
1115 }
1116}
1117
1118template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1120 CompilerBase<Adaptor, Derived, Config>::val_ref(IRValueRef value) noexcept {
1121 if (auto special = derived()->val_ref_special(value); special) {
1122 return ValueRef{this, std::move(*special)};
1123 }
1124
1125 const ValLocalIdx local_idx = analyzer.adaptor->val_local_idx(value);
1126 assert(val_assignment(local_idx) != nullptr && "value use before def");
1127 return ValueRef{this, local_idx};
1128}
1129
1130template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1131std::pair<typename CompilerBase<Adaptor, Derived, Config>::ValueRef,
1134 IRValueRef value) noexcept {
1135 std::pair<ValueRef, ValuePartRef> res{val_ref(value), this};
1136 res.second = res.first.part(0);
1137 return res;
1138}
1139
1140template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1143 IRValueRef value) noexcept {
1144 const ValLocalIdx local_idx = analyzer.adaptor->val_local_idx(value);
1145 if (val_assignment(local_idx) == nullptr) {
1146 init_assignment(value, local_idx);
1147 }
1148 return ValueRef{this, local_idx};
1149}
1150
1151template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1152std::pair<typename CompilerBase<Adaptor, Derived, Config>::ValueRef,
1153 typename CompilerBase<Adaptor, Derived, Config>::ValuePartRef>
1154 CompilerBase<Adaptor, Derived, Config>::result_ref_single(
1155 IRValueRef value) noexcept {
1156 std::pair<ValueRef, ValuePartRef> res{result_ref(value), this};
1157 res.second = res.first.part(0);
1158 return res;
1159}
1160
1161template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1164 IRValueRef dst, ValueRef &&src) noexcept {
1165 const ValLocalIdx local_idx = analyzer.adaptor->val_local_idx(dst);
1166 assert(!val_assignment(local_idx) && "alias target already defined");
1167 assert(src.has_assignment() && "alias src must have an assignment");
1168 // Consider implementing aliases where multiple values share the same
1169 // assignment, e.g. for implementing trunc as a no-op sharing registers. Live
1170 // ranges can be merged and reference counts added. However, this needs
1171 // additional infrastructure to clear one of the value_ptrs.
1172 assert(src.is_owned() && "alias src must be owned");
1173
1174 ValueAssignment *assignment = src.assignment();
1175 u32 part_count = assignment->part_count;
1176 assert(!assignment->pending_free);
1177 assert(!assignment->variable_ref);
1178 assert(!assignment->pending_free);
1179#ifndef NDEBUG
1180 {
1181 const auto &src_liveness = analyzer.liveness_info(src.local_idx());
1182 assert(!src_liveness.last_full); // implied by is_owned()
1183 assert(assignment->references_left == 1); // implied by is_owned()
1184
1185 // Validate that part configuration is identical.
1186 const auto parts = derived()->val_parts(dst);
1187 assert(parts.count() == part_count);
1188 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
1189 AssignmentPartRef ap{assignment, part_idx};
1190 assert(parts.reg_bank(part_idx) == ap.bank());
1191 assert(parts.size_bytes(part_idx) == ap.part_size());
1192 }
1193 }
1194#endif
1195
1196 // Update local_idx of registers.
1197 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
1198 AssignmentPartRef ap{assignment, part_idx};
1199 if (ap.register_valid()) {
1200 register_file.update_reg_assignment(ap.get_reg(), local_idx, part_idx);
1201 }
1202 }
1203
1204 const auto &liveness = analyzer.liveness_info(local_idx);
1205 assignment->delay_free = liveness.last_full;
1206 assignment->references_left = liveness.ref_count;
1207 assignments.value_ptrs[static_cast<u32>(src.local_idx())] = nullptr;
1208 assignments.value_ptrs[static_cast<u32>(local_idx)] = assignment;
1209 src.disown();
1210 src.reset();
1211
1212 return ValueRef{this, local_idx};
1213}
1214
1215template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1216typename CompilerBase<Adaptor, Derived, Config>::ValueRef
1218 IRValueRef dst, AssignmentPartRef base, i32 off) noexcept {
1219 const ValLocalIdx local_idx = analyzer.adaptor->val_local_idx(dst);
1220 assert(!val_assignment(local_idx) && "new value already defined");
1221 init_variable_ref(local_idx, 0);
1222 ValueAssignment *assignment = this->val_assignment(local_idx);
1223 assignment->stack_variable = true;
1224 assignment->frame_off = base.variable_stack_off() + off;
1225 return ValueRef{this, local_idx};
1226}
1227
1228template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1229void CompilerBase<Adaptor, Derived, Config>::set_value(
1230 ValuePartRef &val_ref, ScratchReg &scratch) noexcept {
1231 val_ref.set_value(std::move(scratch));
1232}
1233
1234template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1235typename CompilerBase<Adaptor, Derived, Config>::AsmReg
1237 GenericValuePart &gv) noexcept {
1238 if (std::holds_alternative<ScratchReg>(gv.state)) {
1239 return std::get<ScratchReg>(gv.state).cur_reg();
1240 }
1241 if (std::holds_alternative<ValuePartRef>(gv.state)) {
1242 auto &vpr = std::get<ValuePartRef>(gv.state);
1243 if (vpr.has_reg()) {
1244 return vpr.cur_reg();
1245 }
1246 return vpr.load_to_reg();
1247 }
1248 if (auto *expr = std::get_if<typename GenericValuePart::Expr>(&gv.state)) {
1249 if (expr->has_base() && !expr->has_index() && expr->disp == 0) {
1250 return expr->base_reg();
1251 }
1252 return derived()->gval_expr_as_reg(gv);
1253 }
1254 TPDE_UNREACHABLE("gval_as_reg on empty GenericValuePart");
1255}
1256
1257template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1258typename CompilerBase<Adaptor, Derived, Config>::AsmReg
1260 GenericValuePart &gv, ScratchReg &dst) noexcept {
1261 AsmReg reg = gval_as_reg(gv);
1262 if (!dst.has_reg()) {
1263 if (auto *scratch = std::get_if<ScratchReg>(&gv.state)) {
1264 dst = std::move(*scratch);
1265 } else if (auto *val_ref = std::get_if<ValuePartRef>(&gv.state)) {
1266 if (val_ref->can_salvage()) {
1267 dst.alloc_specific(val_ref->salvage());
1268 assert(dst.cur_reg() == reg && "salvaging unsuccessful");
1269 }
1270 }
1271 }
1272 return reg;
1273}
1274
1275template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1276Reg CompilerBase<Adaptor, Derived, Config>::select_reg_evict(
1277 RegBank bank, u64 exclusion_mask) noexcept {
1278 TPDE_LOG_DBG("select_reg_evict for bank {}", bank.id());
1279 auto candidates =
1280 register_file.used & register_file.bank_regs(bank) & ~exclusion_mask;
1281
1282 Reg candidate = Reg::make_invalid();
1283 u32 max_score = 0;
1284 for (auto reg_id : util::BitSetIterator<>(candidates)) {
1285 Reg reg{reg_id};
1286 if (register_file.is_fixed(reg)) {
1287 continue;
1288 }
1289
1290 // Must be an evictable value, not a temporary.
1291 auto local_idx = register_file.reg_local_idx(reg);
1292 u32 part = register_file.reg_part(Reg{reg});
1293 assert(local_idx != INVALID_VAL_LOCAL_IDX);
1294 ValueAssignment *va = val_assignment(local_idx);
1295 AssignmentPartRef ap{va, part};
1296
1297 // We want to sort registers by the following (ordered by priority):
1298 // - stack variable ref (~1 add/sub to reconstruct)
1299 // - other variable ref (1-2 instrs to reconstruct)
1300 // - already spilled (no store needed)
1301 // - last use farthest away (most likely to get spilled anyhow, so there's
1302 // not much harm in spilling earlier)
1303 // - lowest ref-count (least used)
1304 //
1305 // TODO: evaluate and refine this heuristic
1306
1307 // TODO: evict stack variable refs before others
1308 if (ap.variable_ref()) {
1309 TPDE_LOG_DBG(" r{} ({}) is variable-ref", reg_id, u32(local_idx));
1310 candidate = reg;
1311 break;
1312 }
1313
1314 u32 score = 0;
1315 if (ap.stack_valid()) {
1316 score |= u32{1} << 31;
1317 }
1318
1319 const auto &liveness = analyzer.liveness_info(local_idx);
1320 u32 last_use_dist = u32(liveness.last) - u32(cur_block_idx);
1321 score |= (last_use_dist < 0x8000 ? 0x8000 - last_use_dist : 0) << 16;
1322
1323 u32 refs_left = va->pending_free ? 0 : va->references_left;
1324 score |= (refs_left < 0xffff ? 0x10000 - refs_left : 1);
1325
1326 TPDE_LOG_DBG(" r{} ({}:{}) rc={}/{} live={}-{}{} spilled={} score={:#x}",
1327 reg_id,
1328 u32(local_idx),
1329 part,
1330 refs_left,
1331 liveness.ref_count,
1332 u32(liveness.first),
1333 u32(liveness.last),
1334 &"*"[!liveness.last_full],
1335 ap.stack_valid(),
1336 score);
1337
1338 assert(score != 0);
1339 if (score > max_score) {
1340 candidate = reg;
1341 max_score = score;
1342 }
1343 }
1344 if (candidate.invalid()) [[unlikely]] {
1345 TPDE_FATAL("ran out of registers for scratch registers");
1346 }
1347 TPDE_LOG_DBG(" selected r{}", candidate.id());
1348 evict_reg(candidate);
1349 return candidate;
1350}
1351
1352template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1354 AsmReg dst, AssignmentPartRef ap) noexcept {
1355 if (!ap.variable_ref()) {
1356 assert(ap.stack_valid());
1357 derived()->load_from_stack(dst, ap.frame_off(), ap.part_size());
1358 } else if (ap.is_stack_variable()) {
1359 derived()->load_address_of_stack_var(dst, ap);
1360 } else if constexpr (!Config::DEFAULT_VAR_REF_HANDLING) {
1361 derived()->load_address_of_var_reference(dst, ap);
1362 } else {
1363 TPDE_UNREACHABLE("non-stack-variable needs custom var-ref handling");
1364 }
1365}
1366
1367template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1368void CompilerBase<Adaptor, Derived, Config>::allocate_spill_slot(
1369 AssignmentPartRef ap) noexcept {
1370 assert(!ap.variable_ref() && "cannot allocate spill slot for variable ref");
1371 if (ap.assignment()->frame_off == 0) {
1372 assert(!ap.stack_valid() && "stack-valid set without spill slot");
1373 ap.assignment()->frame_off = allocate_stack_slot(ap.assignment()->size());
1374 assert(ap.assignment()->frame_off != 0);
1375 }
1376}
1377
1378template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1380 AssignmentPartRef ap) noexcept {
1381 assert(may_change_value_state());
1382 if (!ap.stack_valid() && !ap.variable_ref()) {
1383 assert(ap.register_valid() && "cannot spill uninitialized assignment part");
1384 allocate_spill_slot(ap);
1385 derived()->spill_reg(ap.get_reg(), ap.frame_off(), ap.part_size());
1386 ap.set_stack_valid();
1387 }
1388}
1389
1390template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1392 AssignmentPartRef ap) noexcept {
1393 assert(may_change_value_state());
1394 assert(ap.register_valid());
1395 derived()->spill(ap);
1396 ap.set_register_valid(false);
1397 register_file.unmark_used(ap.get_reg());
1398}
1399
1400template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1402 assert(may_change_value_state());
1403 assert(!register_file.is_fixed(reg));
1404 assert(register_file.reg_local_idx(reg) != INVALID_VAL_LOCAL_IDX);
1405
1406 ValLocalIdx local_idx = register_file.reg_local_idx(reg);
1407 auto part = register_file.reg_part(reg);
1408 AssignmentPartRef evict_part{val_assignment(local_idx), part};
1409 assert(evict_part.register_valid());
1410 assert(evict_part.get_reg() == reg);
1411 derived()->spill(evict_part);
1412 evict_part.set_register_valid(false);
1413 register_file.unmark_used(reg);
1414}
1415
1416template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1418 assert(may_change_value_state());
1419 assert(!register_file.is_fixed(reg));
1420 assert(register_file.reg_local_idx(reg) != INVALID_VAL_LOCAL_IDX);
1421
1422 ValLocalIdx local_idx = register_file.reg_local_idx(reg);
1423 auto part = register_file.reg_part(reg);
1424 AssignmentPartRef ap{val_assignment(local_idx), part};
1425 assert(ap.register_valid());
1426 assert(ap.get_reg() == reg);
1427 assert(!ap.modified() || ap.variable_ref());
1428 ap.set_register_valid(false);
1429 register_file.unmark_used(reg);
1430}
1431
1432template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1433typename CompilerBase<Adaptor, Derived, Config>::RegisterFile::RegBitSet
1434 CompilerBase<Adaptor, Derived, Config>::spill_before_branch(
1435 bool force_spill) noexcept {
1436 // since we do not explicitly keep track of register assignments per block,
1437 // whenever we might branch off to a block that we do not directly compile
1438 // afterwards (i.e. the register assignments might change in between), we
1439 // need to spill all registers which are not fixed and remove them from the
1440 // register state.
1441 //
1442 // This leads to worse codegen but saves a significant overhead to
1443 // store/manage the register assignment for each block (256 bytes/block for
1444 // x64) and possible compile-time as there might be additional logic to move
1445 // values around
1446
1447 // First, we consider the case that the current block only has one successor
1448 // which is compiled directly after the current one, in which case we do not
1449 // have to spill anything.
1450 //
1451 // Secondly, if the next block has multiple incoming edges, we always have
1452 // to spill and remove from the register assignment. Otherwise, we
1453 // only need to spill values if they are alive in any successor which is not
1454 // the next block.
1455 //
1456 // Values which are only read from PHI-Nodes and have no extended lifetimes,
1457 // do not need to be spilled as they die at the edge.
1458
1459 using RegBitSet = typename RegisterFile::RegBitSet;
1460
1461 assert(may_change_value_state());
1462
1463 const IRBlockRef cur_block_ref = analyzer.block_ref(cur_block_idx);
1464 // Earliest succeeding block after the current block that is not the
1465 // immediately succeeding block. Used to determine whether a value needs to
1466 // be spilled.
1467 BlockIndex earliest_next_succ = Analyzer<Adaptor>::INVALID_BLOCK_IDX;
1468
1469 bool must_spill = force_spill;
1470 if (!must_spill) {
1471 // We must always spill if no block is immediately succeeding or that block
1472 // has multiple incoming edges.
1473 auto next_block_is_succ = false;
1474 auto next_block_has_multiple_incoming = false;
1475 u32 succ_count = 0;
1476 for (const IRBlockRef succ : adaptor->block_succs(cur_block_ref)) {
1477 ++succ_count;
1478 BlockIndex succ_idx = analyzer.block_idx(succ);
1479 if (u32(succ_idx) == u32(cur_block_idx) + 1) {
1480 next_block_is_succ = true;
1481 if (analyzer.block_has_multiple_incoming(succ)) {
1482 next_block_has_multiple_incoming = true;
1483 }
1484 } else if (succ_idx > cur_block_idx && succ_idx < earliest_next_succ) {
1485 earliest_next_succ = succ_idx;
1486 }
1487 }
1488
1489 must_spill = !next_block_is_succ || next_block_has_multiple_incoming;
1490
1491 if (succ_count == 1 && !must_spill) {
1492 return RegBitSet{};
1493 }
1494 }
1495
1496 auto release_regs = RegBitSet{};
1497 // TODO(ts): just use register_file.used_nonfixed_regs()?
1498 for (auto reg : register_file.used_regs()) {
1499 auto local_idx = register_file.reg_local_idx(Reg{reg});
1500 auto part = register_file.reg_part(Reg{reg});
1501 if (local_idx == INVALID_VAL_LOCAL_IDX) {
1502 // scratch regs can never be held across blocks
1503 continue;
1504 }
1505 AssignmentPartRef ap{val_assignment(local_idx), part};
1506 if (ap.fixed_assignment()) {
1507 // fixed registers do not need to be spilled
1508 continue;
1509 }
1510
1511 // Remove from register assignment if the next block cannot rely on the
1512 // value being in the specific register.
1513 if (must_spill) {
1514 release_regs |= RegBitSet{1ull} << reg;
1515 }
1516
1517 if (!ap.modified() || ap.variable_ref()) {
1518 // No need to spill values that were already spilled or are variable refs.
1519 continue;
1520 }
1521
1522 const auto &liveness = analyzer.liveness_info(local_idx);
1523 if (liveness.last <= cur_block_idx) {
1524 // No need to spill value if it dies immediately after the block.
1525 continue;
1526 }
1527
1528 // If the value is live in a different block, we might need to spill it.
1529 // Cases:
1530 // - Live in successor that precedes current block => ignore, value must
1531 // have been spilled already due to RPO.
1532 // - Live only in successor that immediately follows this block => no need
1533 // to spill, value can be kept in register, register state for next block
1534 // is still valid.
1535 // - Live in successor in some distance after current block => spill
1536 if (must_spill || earliest_next_succ <= liveness.last) {
1537 spill(ap);
1538 }
1539 }
1540
1541 return release_regs;
1542}
1543
1544template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1546 typename RegisterFile::RegBitSet regs) noexcept {
1547 assert(may_change_value_state());
1548
1549 // TODO(ts): needs changes for other RegisterFile impls
1550 for (auto reg_id : util::BitSetIterator<>{regs & register_file.used}) {
1551 if (!register_file.is_fixed(Reg{reg_id})) {
1552 free_reg(Reg{reg_id});
1553 }
1554 }
1555}
1556
1557template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1559 release_regs_after_return() noexcept {
1560 // we essentially have to free all non-fixed registers
1561 for (auto reg_id : register_file.used_regs()) {
1562 if (!register_file.is_fixed(Reg{reg_id})) {
1563 free_reg(Reg{reg_id});
1564 }
1565 }
1566}
1567
1568template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1570 ScratchReg &&cond,
1571 u32 width,
1572 IRBlockRef default_block,
1573 std::span<const std::pair<u64, IRBlockRef>> cases) noexcept {
1574 // This function takes cond as a ScratchReg as opposed to a ValuePart, because
1575 // the ValueRef for the condition must be ref-counted before we enter the
1576 // branch region.
1577
1578 assert(width <= 64);
1579 // We don't support sections with more than 4 GiB, so switches with more than
1580 // 4G cases are impossible to support.
1581 assert(cases.size() < UINT32_MAX && "large switches are unsupported");
1582
1583 AsmReg cmp_reg = cond.cur_reg();
1584 bool width_is_32 = width <= 32;
1585 if (u32 dst_width = util::align_up(width, 32); width != dst_width) {
1586 derived()->generate_raw_intext(cmp_reg, cmp_reg, false, width, dst_width);
1587 }
1588
1589 // We must not evict any registers in the branching code, as we don't track
1590 // the individual value states per block. Hence, we must not allocate any
1591 // registers (e.g., for constants, jump table address) below.
1592 ScratchReg tmp_scratch{this};
1593 AsmReg tmp_reg = tmp_scratch.alloc_gp();
1594
1595 const auto spilled = this->spill_before_branch();
1596 this->begin_branch_region();
1597
1598 // because some blocks might have PHI-values we need to first jump to a
1599 // label which then fixes the registers and then jumps to the block
1600 // TODO(ts): check which blocks need PHIs and otherwise jump directly to
1601 // them? probably better for branch predictor
1602
1603 tpde::util::SmallVector<tpde::Label, 64> case_labels;
1604 for (auto i = 0u; i < cases.size(); ++i) {
1605 case_labels.push_back(this->text_writer.label_create());
1606 }
1607
1608 const auto default_label = this->text_writer.label_create();
1609
1610 const auto build_range = [&,
1611 this](size_t begin, size_t end, const auto &self) {
1612 assert(begin <= end);
1613 const auto num_cases = end - begin;
1614 if (num_cases <= 4) {
1615 // if there are four or less cases we just compare the values
1616 // against each of them
1617 for (auto i = 0u; i < num_cases; ++i) {
1618 derived()->switch_emit_cmpeq(case_labels[begin + i],
1619 cmp_reg,
1620 tmp_reg,
1621 cases[begin + i].first,
1622 width_is_32);
1623 }
1624
1625 derived()->generate_raw_jump(Derived::Jump::jmp, default_label);
1626 return;
1627 }
1628
1629 // check if the density of the values is high enough to warrant building
1630 // a jump table
1631 auto range = cases[end - 1].first - cases[begin].first;
1632 // we will get wrong results if range is -1 so skip the jump table if
1633 // that is the case
1634 if (range != 0xFFFF'FFFF'FFFF'FFFF && (range / num_cases) < 8) {
1635 // for gcc, it seems that if there are less than 8 values per
1636 // case it will build a jump table so we do that, too
1637
1638 // the actual range is one greater than the result we get from
1639 // subtracting so adjust for that
1640 range += 1;
1641
1642 tpde::util::SmallVector<tpde::Label, 32> label_vec;
1643 std::span<tpde::Label> labels;
1644 if (range == num_cases) {
1645 labels = std::span{case_labels.begin() + begin, num_cases};
1646 } else {
1647 label_vec.resize(range, default_label);
1648 for (auto i = 0u; i < num_cases; ++i) {
1649 label_vec[cases[begin + i].first - cases[begin].first] =
1650 case_labels[begin + i];
1651 }
1652 labels = std::span{label_vec.begin(), range};
1653 }
1654
1655 // Give target the option to emit a jump table.
1656 if (derived()->switch_emit_jump_table(default_label,
1657 labels,
1658 cmp_reg,
1659 tmp_reg,
1660 cases[begin].first,
1661 cases[end - 1].first,
1662 width_is_32)) {
1663 return;
1664 }
1665 }
1666
1667 // do a binary search step
1668 const auto half_len = num_cases / 2;
1669 const auto half_value = cases[begin + half_len].first;
1670 const auto gt_label = this->text_writer.label_create();
1671
1672 // this will cmp against the input value, jump to the case if it is
1673 // equal or to gt_label if the value is greater. Otherwise it will
1674 // fall-through
1675 derived()->switch_emit_binary_step(case_labels[begin + half_len],
1676 gt_label,
1677 cmp_reg,
1678 tmp_reg,
1679 half_value,
1680 width_is_32);
1681 // search the lower half
1682 self(begin, begin + half_len, self);
1683
1684 // and the upper half
1685 this->label_place(gt_label);
1686 self(begin + half_len + 1, end, self);
1687 };
1688
1689 build_range(0, case_labels.size(), build_range);
1690
1691 // write out the labels
1692 this->label_place(default_label);
1693 derived()->generate_branch_to_block(
1694 Derived::Jump::jmp, default_block, false, false);
1695
1696 for (auto i = 0u; i < cases.size(); ++i) {
1697 this->label_place(case_labels[i]);
1698 derived()->generate_branch_to_block(
1699 Derived::Jump::jmp, cases[i].second, false, false);
1700 }
1701
1702 this->end_branch_region();
1703 this->release_spilled_regs(spilled);
1704}
1705
1706template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1707void CompilerBase<Adaptor, Derived, Config>::move_to_phi_nodes_impl(
1708 BlockIndex target) noexcept {
1709 // PHI-nodes are always moved to their stack-slot (unless they are fixed)
1710 //
1711 // However, we need to take care of PHI-dependencies (cycles and chains)
1712 // as to not overwrite values which might be needed.
1713 //
1714 // In most cases, we expect the number of PHIs to be small but we want to
1715 // stay reasonably efficient even with larger numbers of PHIs
1716
1717 struct ScratchWrapper {
1718 Derived *self;
1719 AsmReg cur_reg = AsmReg::make_invalid();
1720 bool backed_up = false;
1721 bool was_modified = false;
1722 u8 part = 0;
1723 ValLocalIdx local_idx = INVALID_VAL_LOCAL_IDX;
1724
1725 ScratchWrapper(Derived *self) : self{self} {}
1726
1727 ~ScratchWrapper() { reset(); }
1728
1729 void reset() {
1730 if (cur_reg.invalid()) {
1731 return;
1732 }
1733
1734 self->register_file.unmark_fixed(cur_reg);
1735 self->register_file.unmark_used(cur_reg);
1736
1737 if (backed_up) {
1738 // restore the register state
1739 // TODO(ts): do we actually need the reload?
1740 auto *assignment = self->val_assignment(local_idx);
1741 // check if the value was free'd, then we dont need to restore
1742 // it
1743 if (assignment) {
1744 auto ap = AssignmentPartRef{assignment, part};
1745 if (!ap.variable_ref()) {
1746 // TODO(ts): assert that this always happens?
1747 assert(ap.stack_valid());
1748 self->load_from_stack(cur_reg, ap.frame_off(), ap.part_size());
1749 }
1750 ap.set_reg(cur_reg);
1751 ap.set_register_valid(true);
1752 ap.set_modified(was_modified);
1753 self->register_file.mark_used(cur_reg, local_idx, part);
1754 }
1755 backed_up = false;
1756 }
1757 cur_reg = AsmReg::make_invalid();
1758 }
1759
1760 AsmReg alloc_from_bank(RegBank bank) {
1761 if (cur_reg.valid() && self->register_file.reg_bank(cur_reg) == bank) {
1762 return cur_reg;
1763 }
1764 if (cur_reg.valid()) {
1765 reset();
1766 }
1767
1768 // TODO(ts): try to first find a non callee-saved/clobbered
1769 // register...
1770 auto &reg_file = self->register_file;
1771 auto reg = reg_file.find_first_free_excluding(bank, 0);
1772 if (reg.invalid()) {
1773 // TODO(ts): use clock here?
1774 reg = reg_file.find_first_nonfixed_excluding(bank, 0);
1775 if (reg.invalid()) {
1776 TPDE_FATAL("ran out of registers for scratch registers");
1777 }
1778
1779 backed_up = true;
1780 local_idx = reg_file.reg_local_idx(reg);
1781 part = reg_file.reg_part(reg);
1782 AssignmentPartRef ap{self->val_assignment(local_idx), part};
1783 was_modified = ap.modified();
1784 // TODO(ts): this does not spill for variable refs
1785 // We don't use evict_reg here, as we know that we can't change the
1786 // value state.
1787 assert(ap.register_valid() && ap.get_reg() == reg);
1788 if (!ap.stack_valid() && !ap.variable_ref()) {
1789 self->allocate_spill_slot(ap);
1790 self->spill_reg(ap.get_reg(), ap.frame_off(), ap.part_size());
1791 ap.set_stack_valid();
1792 }
1793 ap.set_register_valid(false);
1794 reg_file.unmark_used(reg);
1795 }
1796
1797 reg_file.mark_used(reg, INVALID_VAL_LOCAL_IDX, 0);
1798 reg_file.mark_clobbered(reg);
1799 reg_file.mark_fixed(reg);
1800 cur_reg = reg;
1801 return reg;
1802 }
1803
1804 ScratchWrapper &operator=(const ScratchWrapper &) = delete;
1805 ScratchWrapper &operator=(ScratchWrapper &&) = delete;
1806 };
1807
1808 IRBlockRef target_ref = analyzer.block_ref(target);
1809 IRBlockRef cur_ref = analyzer.block_ref(cur_block_idx);
1810
1811 // collect all the nodes
1812 struct NodeEntry {
1813 IRValueRef phi;
1814 IRValueRef incoming_val;
1815 ValLocalIdx phi_local_idx;
1816 // local idx of same-block phi node that needs special handling
1817 ValLocalIdx incoming_phi_local_idx = INVALID_VAL_LOCAL_IDX;
1818 // bool incoming_is_phi;
1819 u32 ref_count;
1820
1821 bool operator<(const NodeEntry &other) const noexcept {
1822 return phi_local_idx < other.phi_local_idx;
1823 }
1824
1825 bool operator<(ValLocalIdx other) const noexcept {
1826 return phi_local_idx < other;
1827 }
1828 };
1829
1830 util::SmallVector<NodeEntry, 16> nodes;
1831 for (IRValueRef phi : adaptor->block_phis(target_ref)) {
1832 ValLocalIdx phi_local_idx = adaptor->val_local_idx(phi);
1833 auto incoming = adaptor->val_as_phi(phi).incoming_val_for_block(cur_ref);
1834 nodes.emplace_back(NodeEntry{
1835 .phi = phi, .incoming_val = incoming, .phi_local_idx = phi_local_idx});
1836 }
1837
1838 // We check that the block has phi nodes before getting here.
1839 assert(!nodes.empty() && "block marked has having phi nodes has none");
1840
1841 ScratchWrapper scratch{derived()};
1842 const auto move_to_phi = [this, &scratch](IRValueRef phi,
1843 IRValueRef incoming_val) {
1844 auto phi_vr = derived()->result_ref(phi);
1845 auto val_vr = derived()->val_ref(incoming_val);
1846 if (phi == incoming_val) {
1847 return;
1848 }
1849
1850 u32 part_count = phi_vr.assignment()->part_count;
1851 for (u32 i = 0; i < part_count; ++i) {
1852 AssignmentPartRef phi_ap{phi_vr.assignment(), i};
1853 ValuePartRef val_vpr = val_vr.part(i);
1854
1855 if (phi_ap.fixed_assignment()) {
1856 if (AsmReg reg = val_vpr.cur_reg_unlocked(); reg.valid()) {
1857 derived()->mov(phi_ap.get_reg(), reg, phi_ap.part_size());
1858 } else {
1859 val_vpr.reload_into_specific_fixed(phi_ap.get_reg());
1860 }
1861 } else {
1862 AsmReg reg = val_vpr.cur_reg_unlocked();
1863 if (!reg.valid()) {
1864 reg = scratch.alloc_from_bank(val_vpr.bank());
1865 val_vpr.reload_into_specific_fixed(reg);
1866 }
1867 allocate_spill_slot(phi_ap);
1868 derived()->spill_reg(reg, phi_ap.frame_off(), phi_ap.part_size());
1869 phi_ap.set_stack_valid();
1870 }
1871 }
1872 };
1873
1874 if (nodes.size() == 1) {
1875 move_to_phi(nodes[0].phi, nodes[0].incoming_val);
1876 return;
1877 }
1878
1879 // sort so we can binary search later
1880 std::sort(nodes.begin(), nodes.end());
1881
1882 // fill in the refcount
1883 auto all_zero_ref = true;
1884 for (auto &node : nodes) {
1885 // We don't need to do anything for PHIs that don't reference other PHIs or
1886 // self-referencing PHIs.
1887 bool incoming_is_phi = adaptor->val_is_phi(node.incoming_val);
1888 if (!incoming_is_phi || node.incoming_val == node.phi) {
1889 continue;
1890 }
1891
1892 ValLocalIdx inc_local_idx = adaptor->val_local_idx(node.incoming_val);
1893 auto it = std::lower_bound(nodes.begin(), nodes.end(), inc_local_idx);
1894 if (it == nodes.end() || it->phi != node.incoming_val) {
1895 // Incoming value is a PHI node, but it's not from our block, so we don't
1896 // need to be particularly careful when assigning values.
1897 continue;
1898 }
1899 node.incoming_phi_local_idx = inc_local_idx;
1900 ++it->ref_count;
1901 all_zero_ref = false;
1902 }
1903
1904 if (all_zero_ref) {
1905 // no cycles/chain that we need to take care of
1906 for (auto &node : nodes) {
1907 move_to_phi(node.phi, node.incoming_val);
1908 }
1909 return;
1910 }
1911
1912 // TODO(ts): this is rather inefficient...
1913 util::SmallVector<u32, 32> ready_indices;
1914 ready_indices.reserve(nodes.size());
1915 util::SmallBitSet<256> waiting_nodes;
1916 waiting_nodes.resize(nodes.size());
1917 for (u32 i = 0; i < nodes.size(); ++i) {
1918 if (nodes[i].ref_count) {
1919 waiting_nodes.mark_set(i);
1920 } else {
1921 ready_indices.push_back(i);
1922 }
1923 }
1924
1925 u32 handled_count = 0;
1926 u32 cur_tmp_part_count = 0;
1927 i32 cur_tmp_slot = 0;
1928 u32 cur_tmp_slot_size = 0;
1929 IRValueRef cur_tmp_val = Adaptor::INVALID_VALUE_REF;
1930 ScratchWrapper tmp_reg1{derived()}, tmp_reg2{derived()};
1931
1932 const auto move_from_tmp_phi = [&](IRValueRef target_phi) {
1933 auto phi_vr = val_ref(target_phi);
1934 if (cur_tmp_part_count <= 2) {
1935 AssignmentPartRef ap{phi_vr.assignment(), 0};
1936 assert(!tmp_reg1.cur_reg.invalid());
1937 if (ap.fixed_assignment()) {
1938 derived()->mov(ap.get_reg(), tmp_reg1.cur_reg, ap.part_size());
1939 } else {
1940 derived()->spill_reg(tmp_reg1.cur_reg, ap.frame_off(), ap.part_size());
1941 }
1942
1943 if (cur_tmp_part_count == 2) {
1944 AssignmentPartRef ap_high{phi_vr.assignment(), 1};
1945 assert(!ap_high.fixed_assignment());
1946 assert(!tmp_reg2.cur_reg.invalid());
1947 derived()->spill_reg(
1948 tmp_reg2.cur_reg, ap_high.frame_off(), ap_high.part_size());
1949 }
1950 return;
1951 }
1952
1953 for (u32 i = 0; i < cur_tmp_part_count; ++i) {
1954 AssignmentPartRef phi_ap{phi_vr.assignment(), i};
1955 assert(!phi_ap.fixed_assignment());
1956
1957 auto slot_off = cur_tmp_slot + phi_ap.part_off();
1958 auto reg = tmp_reg1.alloc_from_bank(phi_ap.bank());
1959 derived()->load_from_stack(reg, slot_off, phi_ap.part_size());
1960 derived()->spill_reg(reg, phi_ap.frame_off(), phi_ap.part_size());
1961 }
1962 };
1963
1964 while (handled_count != nodes.size()) {
1965 if (ready_indices.empty()) {
1966 // need to break a cycle
1967 auto cur_idx_opt = waiting_nodes.first_set();
1968 assert(cur_idx_opt);
1969 auto cur_idx = *cur_idx_opt;
1970 assert(nodes[cur_idx].ref_count == 1);
1971 assert(cur_tmp_val == Adaptor::INVALID_VALUE_REF);
1972
1973 auto phi_val = nodes[cur_idx].phi;
1974 auto phi_vr = this->val_ref(phi_val);
1975 auto *assignment = phi_vr.assignment();
1976 cur_tmp_part_count = assignment->part_count;
1977 cur_tmp_val = phi_val;
1978
1979 if (cur_tmp_part_count > 2) {
1980 // use a stack slot to store the temporaries
1981 cur_tmp_slot_size = assignment->size();
1982 cur_tmp_slot = allocate_stack_slot(cur_tmp_slot_size);
1983
1984 for (u32 i = 0; i < cur_tmp_part_count; ++i) {
1985 auto ap = AssignmentPartRef{assignment, i};
1986 assert(!ap.fixed_assignment());
1987 auto slot_off = cur_tmp_slot + ap.part_off();
1988
1989 if (ap.register_valid()) {
1990 auto reg = ap.get_reg();
1991 derived()->spill_reg(reg, slot_off, ap.part_size());
1992 } else {
1993 auto reg = tmp_reg1.alloc_from_bank(ap.bank());
1994 assert(ap.stack_valid());
1995 derived()->load_from_stack(reg, ap.frame_off(), ap.part_size());
1996 derived()->spill_reg(reg, slot_off, ap.part_size());
1997 }
1998 }
1999 } else {
2000 // TODO(ts): if the PHI is not fixed, then we can just reuse its
2001 // register if it has one
2002 auto phi_vpr = phi_vr.part(0);
2003 auto reg = tmp_reg1.alloc_from_bank(phi_vpr.bank());
2004 phi_vpr.reload_into_specific_fixed(this, reg);
2005
2006 if (cur_tmp_part_count == 2) {
2007 // TODO(ts): just change the part ref on the lower ref?
2008 auto phi_vpr_high = phi_vr.part(1);
2009 auto reg_high = tmp_reg2.alloc_from_bank(phi_vpr_high.bank());
2010 phi_vpr_high.reload_into_specific_fixed(this, reg_high);
2011 }
2012 }
2013
2014 nodes[cur_idx].ref_count = 0;
2015 ready_indices.push_back(cur_idx);
2016 waiting_nodes.mark_unset(cur_idx);
2017 }
2018
2019 for (u32 i = 0; i < ready_indices.size(); ++i) {
2020 ++handled_count;
2021 auto cur_idx = ready_indices[i];
2022 auto phi_val = nodes[cur_idx].phi;
2023 IRValueRef incoming_val = nodes[cur_idx].incoming_val;
2024 if (incoming_val == phi_val) {
2025 // no need to do anything, except ref-counting
2026 (void)val_ref(incoming_val);
2027 (void)val_ref(phi_val);
2028 continue;
2029 }
2030
2031 if (incoming_val == cur_tmp_val) {
2032 move_from_tmp_phi(phi_val);
2033
2034 if (cur_tmp_part_count > 2) {
2035 free_stack_slot(cur_tmp_slot, cur_tmp_slot_size);
2036 cur_tmp_slot = 0xFFFF'FFFF;
2037 cur_tmp_slot_size = 0;
2038 }
2039 cur_tmp_val = Adaptor::INVALID_VALUE_REF;
2040 // skip the code below as the ref count of the temp val is
2041 // already 0 and we don't want to reinsert it into the ready
2042 // list
2043 continue;
2044 }
2045
2046 move_to_phi(phi_val, incoming_val);
2047
2048 if (nodes[cur_idx].incoming_phi_local_idx == INVALID_VAL_LOCAL_IDX) {
2049 continue;
2050 }
2051
2052 auto it = std::lower_bound(
2053 nodes.begin(), nodes.end(), nodes[cur_idx].incoming_phi_local_idx);
2054 assert(it != nodes.end() && it->phi == incoming_val &&
2055 "incoming_phi_local_idx set incorrectly");
2056
2057 assert(it->ref_count > 0);
2058 if (--it->ref_count == 0) {
2059 auto node_idx = static_cast<u32>(it - nodes.begin());
2060 ready_indices.push_back(node_idx);
2061 waiting_nodes.mark_unset(node_idx);
2062 }
2063 }
2064 ready_indices.clear();
2065 }
2066}
2067
2068template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
2071 return static_cast<BlockIndex>(static_cast<u32>(cur_block_idx) + 1);
2072}
2073
2074template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
2076 SymRef personality_sym;
2077 if (this->adaptor->cur_needs_unwind_info()) {
2078 SymRef personality_func = derived()->cur_personality_func();
2079 if (personality_func.valid()) {
2080 for (const auto &[fn_sym, ptr_sym] : personality_syms) {
2081 if (fn_sym == personality_func) {
2082 personality_sym = ptr_sym;
2083 break;
2084 }
2085 }
2086
2087 if (!personality_sym.valid()) {
2088 // create symbol that contains the address of the personality
2089 // function
2090 u32 off;
2091 static constexpr std::array<u8, 8> zero{};
2092
2093 auto rodata = this->assembler.get_data_section(true, true);
2094 personality_sym = this->assembler.sym_def_data(
2095 rodata, "", zero, 8, Assembler::SymBinding::LOCAL, &off);
2096 this->assembler.reloc_abs(rodata, personality_func, off, 0);
2097
2098 personality_syms.emplace_back(personality_func, personality_sym);
2099 }
2100 }
2101 }
2102 return personality_sym;
2103}
2104
2105template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
2107 const IRFuncRef func, const u32 func_idx) noexcept {
2108 if (!adaptor->switch_func(func)) {
2109 return false;
2110 }
2111 derived()->analysis_start();
2112 analyzer.switch_func(func);
2113 derived()->analysis_end();
2114
2115#ifndef NDEBUG
2116 stack.frame_size = ~0u;
2117#endif
2118 for (auto &e : stack.fixed_free_lists) {
2119 e.clear();
2120 }
2121 stack.dynamic_free_lists.clear();
2122 // TODO: sort out the inconsistency about adaptor vs. compiler methods.
2123 stack.has_dynamic_alloca = this->adaptor->cur_has_dynamic_alloca();
2124 stack.is_leaf_function = !derived()->cur_func_may_emit_calls();
2125 stack.generated_call = false;
2126
2127 assignments.cur_fixed_assignment_count = {};
2128 assert(std::ranges::none_of(assignments.value_ptrs, std::identity{}));
2129 if (assignments.value_ptrs.size() < analyzer.liveness.size()) {
2130 assignments.value_ptrs.resize(analyzer.liveness.size());
2131 }
2132
2133 assignments.allocator.reset();
2134 assignments.variable_ref_list = INVALID_VAL_LOCAL_IDX;
2135 assignments.delayed_free_lists.clear();
2136 assignments.delayed_free_lists.resize(analyzer.block_layout.size(),
2137 INVALID_VAL_LOCAL_IDX);
2138
2139 cur_block_idx =
2140 static_cast<BlockIndex>(analyzer.block_idx(adaptor->cur_entry_block()));
2141
2142 register_file.reset();
2143#ifndef NDEBUG
2144 generating_branch = false;
2145#endif
2146
2147 // Simple heuristic for initial allocation size
2148 u32 expected_code_size = 0x8 * analyzer.num_insts + 0x40;
2149 this->text_writer.begin_func(expected_code_size);
2150
2151 derived()->start_func(func_idx);
2152
2153 block_labels.clear();
2154 block_labels.resize_uninitialized(analyzer.block_layout.size());
2155 for (u32 i = 0; i < analyzer.block_layout.size(); ++i) {
2156 block_labels[i] = text_writer.label_create();
2157 }
2158
2159 // TODO(ts): place function label
2160 // TODO(ts): make function labels optional?
2161
2162 CCAssigner *cc_assigner = derived()->cur_cc_assigner();
2163 assert(cc_assigner != nullptr);
2164
2165 register_file.allocatable = cc_assigner->get_ccinfo().allocatable_regs;
2166
2167 // This initializes the stack frame, which must reserve space for
2168 // callee-saved registers, vararg save area, etc.
2169 cc_assigner->reset();
2170 derived()->gen_func_prolog_and_args(cc_assigner);
2171
2172 for (const IRValueRef alloca : adaptor->cur_static_allocas()) {
2173 auto size = adaptor->val_alloca_size(alloca);
2174 size = util::align_up(size, adaptor->val_alloca_align(alloca));
2175
2176 ValLocalIdx local_idx = adaptor->val_local_idx(alloca);
2177 init_variable_ref(local_idx, 0);
2178 ValueAssignment *assignment = val_assignment(local_idx);
2179 assignment->stack_variable = true;
2180 assignment->frame_off = allocate_stack_slot(size);
2181 }
2182
2183 if constexpr (!Config::DEFAULT_VAR_REF_HANDLING) {
2184 derived()->setup_var_ref_assignments();
2185 }
2186
2187 for (u32 i = 0; i < analyzer.block_layout.size(); ++i) {
2188 const auto block_ref = analyzer.block_layout[i];
2189 TPDE_LOG_TRACE(
2190 "Compiling block {} ({})", i, adaptor->block_fmt_ref(block_ref));
2191 if (!derived()->compile_block(block_ref, i)) [[unlikely]] {
2192 TPDE_LOG_ERR("Failed to compile block {} ({})",
2193 i,
2194 adaptor->block_fmt_ref(block_ref));
2195 // Ensure invariant that value_ptrs only contains nullptr at the end.
2196 assignments.value_ptrs.clear();
2197 return false;
2198 }
2199 }
2200
2201 // Reset all variable-ref assignment pointers to nullptr.
2202 ValLocalIdx variable_ref_list = assignments.variable_ref_list;
2203 while (variable_ref_list != INVALID_VAL_LOCAL_IDX) {
2204 u32 idx = u32(variable_ref_list);
2205 ValLocalIdx next = assignments.value_ptrs[idx]->next_delayed_free_entry;
2206 assignments.value_ptrs[idx] = nullptr;
2207 variable_ref_list = next;
2208 }
2209
2210 assert(std::ranges::none_of(assignments.value_ptrs, std::identity{}) &&
2211 "found non-freed ValueAssignment, maybe missing ref-count?");
2212
2213 derived()->finish_func(func_idx);
2214 this->text_writer.finish_func();
2215
2216 return true;
2217}
2218
2219template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
2221 const IRBlockRef block, const u32 block_idx) noexcept {
2222 cur_block_idx =
2223 static_cast<typename Analyzer<Adaptor>::BlockIndex>(block_idx);
2224
2225 label_place(block_labels[block_idx]);
2226 auto &&val_range = adaptor->block_insts(block);
2227 auto end = val_range.end();
2228 for (auto it = val_range.begin(); it != end; ++it) {
2229 const IRInstRef inst = *it;
2230 if (this->adaptor->inst_fused(inst)) {
2231 continue;
2232 }
2233
2234 auto it_cpy = it;
2235 ++it_cpy;
2236 if (!derived()->compile_inst(inst, InstRange{.from = it_cpy, .to = end}))
2237 [[unlikely]] {
2238 TPDE_LOG_ERR("Failed to compile instruction {}",
2239 this->adaptor->inst_fmt_ref(inst));
2240 return false;
2241 }
2242 }
2243
2244#ifndef NDEBUG
2245 // Some consistency checks. Register assignment information must match, all
2246 // used registers must have an assignment (no temporaries across blocks), and
2247 // fixed registers must be fixed assignments.
2248 for (auto reg_id : register_file.used_regs()) {
2249 Reg reg{reg_id};
2250 assert(register_file.reg_local_idx(reg) != INVALID_VAL_LOCAL_IDX);
2251 AssignmentPartRef ap{val_assignment(register_file.reg_local_idx(reg)),
2252 register_file.reg_part(reg)};
2253 assert(ap.register_valid());
2254 assert(ap.get_reg() == reg);
2255 assert(!register_file.is_fixed(reg) || ap.fixed_assignment());
2256 }
2257#endif
2258
2259 if (static_cast<u32>(assignments.delayed_free_lists[block_idx]) != ~0u) {
2260 auto list_entry = assignments.delayed_free_lists[block_idx];
2261 while (static_cast<u32>(list_entry) != ~0u) {
2262 auto *assignment = assignments.value_ptrs[static_cast<u32>(list_entry)];
2263 auto next_entry = assignment->next_delayed_free_entry;
2264 derived()->free_assignment(list_entry, assignment);
2265 list_entry = next_entry;
2266 }
2267 }
2268 return true;
2269}
2270
2271} // namespace tpde
A default implementation for ValRefSpecial.
AsmReg gval_as_reg_reuse(GenericValuePart &gv, ScratchReg &dst) noexcept
ValueRef result_ref_stack_slot(IRValueRef value, AssignmentPartRef base, i32 off) noexcept
void spill(AssignmentPartRef ap) noexcept
Ensure the value is spilled in its stack slot (except variable refs).
void evict(AssignmentPartRef ap) noexcept
Evict the value from its register, spilling if needed, and free register.
void init_variable_ref(ValLocalIdx local_idx, u32 var_ref_data) noexcept
Init a variable-ref assignment.
std::unordered_map< u32, std::vector< i32 > > dynamic_free_lists
Free-Lists for all other sizes.
AsmReg gval_as_reg(GenericValuePart &gv) noexcept
u32 frame_size
The current size of the stack frame.
void generate_switch(ScratchReg &&cond, u32 width, IRBlockRef default_block, std::span< const std::pair< u64, IRBlockRef > > cases) noexcept
void begin_branch_region() noexcept
Indicate beginning of region where value-state must not change.
ValueRef result_ref_alias(IRValueRef dst, ValueRef &&src) noexcept
ValueRef result_ref(IRValueRef value) noexcept
Get a defining reference to a value.
void init_variable_ref(IRValueRef value, u32 var_ref_data) noexcept
Init a variable-ref assignment.
void end_branch_region() noexcept
Indicate end of region where value-state must not change.
void reload_to_reg(AsmReg dst, AssignmentPartRef ap) noexcept
Reload a value part from memory or recompute variable address.
CCAssigner * cur_cc_assigner() noexcept
Get CCAssigner for current function.
void free_reg(Reg reg) noexcept
Free the register. Requires that the contained value is already spilled.
util::SmallVector< i32, 16 > fixed_free_lists[5]
Free-Lists for 1/2/4/8/16 sized allocations.
Reg select_reg(RegBank bank, u64 exclusion_mask) noexcept
Select an available register, evicting loaded values if needed.
CompilerBase(Adaptor *adaptor)
Initialize a CompilerBase, should be called by the derived classes.
void evict_reg(Reg reg) noexcept
Evict the value from the register, spilling if needed, and free register.
void release_assignment(ValLocalIdx local_idx, ValueAssignment *) noexcept