TPDE
Loading...
Searching...
No Matches
CompilerBase.hpp
1// SPDX-FileCopyrightText: 2025 Contributors to TPDE <https://tpde.org>
2//
3// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4#pragma once
5
6#include <algorithm>
7#include <functional>
8#include <unordered_map>
9#include <variant>
10
11#include "Analyzer.hpp"
12#include "Compiler.hpp"
13#include "CompilerConfig.hpp"
14#include "IRAdaptor.hpp"
15#include "tpde/AssignmentPartRef.hpp"
16#include "tpde/FunctionWriter.hpp"
17#include "tpde/RegisterFile.hpp"
18#include "tpde/ValLocalIdx.hpp"
19#include "tpde/ValueAssignment.hpp"
20#include "tpde/base.hpp"
21#include "tpde/util/function_ref.hpp"
22#include "tpde/util/misc.hpp"
23
24namespace tpde {
25// TODO(ts): formulate concept for full compiler so that there is *some* check
26// whether all the required derived methods are implemented?
27
28/// Thread-local storage access mode
29enum class TLSModel {
30 GlobalDynamic,
31 LocalDynamic,
32 InitialExec,
33 LocalExec,
34};
35
36struct CCAssignment {
37 Reg reg = Reg::make_invalid(); ///< Assigned register, invalid implies stack.
38
39 /// If non-zero, indicates that this and the next N values must be
40 /// assigned to consecutive registers, or to the stack. The following
41 /// values must be in the same register bank as this value. u8 is sufficient,
42 /// no architecture has more than 255 parameter registers in a single bank.
43 u8 consecutive = 0;
44 bool sret : 1 = false; ///< Argument is return value pointer.
45
46 /// The argument is passed by value on the stack. The provided argument is a
47 /// pointer; for the call, size bytes will be copied into the corresponding
48 /// stack slot. Behaves like LLVM's byval.
49 ///
50 /// Note: On x86-64 SysV, this is used to pass larger structs in memory. Note
51 /// that AArch64 AAPCS doesn't use byval for structs, instead, the pointer is
52 /// passed without byval and it is the responsibility of the caller to
53 /// explicitly copy the value.
54 bool byval : 1 = false;
55
56 /// Extend integer argument. Highest bit indicates signed-ness, lower bits
57 /// indicate the source width from which the argument should be extended.
58 u8 int_ext = 0;
59
60 u8 align = 0; ///< Argument alignment
61 RegBank bank = RegBank{}; ///< Register bank to assign the value to.
62 u32 size = 0; ///< Argument size, for byval the stack slot size.
63 u32 stack_off = 0; ///< Assigned stack slot, only valid if reg is invalid.
64};
65
66struct CCInfo {
67 // TODO: use RegBitSet
68 const u64 allocatable_regs;
69 const u64 callee_saved_regs;
70 /// Possible argument registers; these registers will not be allocated until
71 /// all arguments have been assigned.
72 const u64 arg_regs;
73 /// Size of red zone below stack pointer.
74 const u8 red_zone_size = 0;
75};
76
77class CCAssigner {
78public:
79 const CCInfo *ccinfo;
80
81 CCAssigner(const CCInfo &ccinfo) noexcept : ccinfo(&ccinfo) {}
82 virtual ~CCAssigner() noexcept {}
83
84 virtual void reset() noexcept = 0;
85
86 const CCInfo &get_ccinfo() const noexcept { return *ccinfo; }
87
88 virtual void assign_arg(CCAssignment &cca) noexcept = 0;
89 virtual u32 get_stack_size() noexcept = 0;
90 /// Some calling conventions need different call behavior when calling a
91 /// vararg function.
92 virtual bool is_vararg() const noexcept { return false; }
93 virtual void assign_ret(CCAssignment &cca) noexcept = 0;
94};
95
96/// The base class for the compiler.
97/// It implements the main platform independent compilation logic and houses the
98/// analyzer
99template <IRAdaptor Adaptor,
100 typename Derived,
101 CompilerConfig Config = CompilerConfigDefault>
103 // some forwards for the IR type defs
104 using IRValueRef = typename Adaptor::IRValueRef;
105 using IRInstRef = typename Adaptor::IRInstRef;
106 using IRBlockRef = typename Adaptor::IRBlockRef;
107 using IRFuncRef = typename Adaptor::IRFuncRef;
108
109 using BlockIndex = typename Analyzer<Adaptor>::BlockIndex;
110
111 using AsmReg = typename Config::AsmReg;
112
113 using RegisterFile = tpde::RegisterFile<Config::NUM_BANKS, 32>;
114
115 /// A default implementation for ValRefSpecial.
116 // Note: Subclasses can override this, always used Derived::ValRefSpecial.
118 uint8_t mode = 4;
119 u64 const_data;
120 };
121
122#pragma region CompilerData
123 Adaptor *adaptor;
124 Analyzer<Adaptor> analyzer;
125
126 // data for frame management
127
128 struct {
129 /// The current size of the stack frame
130 u32 frame_size = 0;
131 /// Whether the stack frame might have dynamic alloca. Dynamic allocas may
132 /// require a different and less efficient frame setup.
134 /// Whether the function is guaranteed to be a leaf function. Throughout the
135 /// entire function, the compiler may assume the absence of function calls.
137 /// Whether the function actually includes a call. There are cases, where it
138 /// is not clear from the beginning whether a function has function calls.
139 /// If a function has no calls, this will allow using the red zone
140 /// guaranteed by some ABIs.
142 /// Whether the stack frame is used. If the stack frame is never used, the
143 /// frame setup can in some cases be omitted entirely.
145 /// Free-Lists for 1/2/4/8/16 sized allocations
146 // TODO(ts): make the allocations for 4/8 different from the others
147 // since they are probably the one's most used?
148 util::SmallVector<i32, 16> fixed_free_lists[5] = {};
149 /// Free-Lists for all other sizes
150 // TODO(ts): think about which data structure we want here
151 std::unordered_map<u32, std::vector<i32>> dynamic_free_lists{};
152 } stack = {};
153
154 typename Analyzer<Adaptor>::BlockIndex cur_block_idx;
155
156 // Assignments
157
158 static constexpr ValLocalIdx INVALID_VAL_LOCAL_IDX =
159 static_cast<ValLocalIdx>(~0u);
160
161 // TODO(ts): think about different ways to store this that are maybe more
162 // compact?
163 struct {
164 AssignmentAllocator allocator;
165
166 std::array<u32, Config::NUM_BANKS> cur_fixed_assignment_count = {};
167 util::SmallVector<ValueAssignment *, Analyzer<Adaptor>::SMALL_VALUE_NUM>
168 value_ptrs;
169
170 ValLocalIdx variable_ref_list;
171 util::SmallVector<ValLocalIdx, Analyzer<Adaptor>::SMALL_BLOCK_NUM>
172 delayed_free_lists;
173 } assignments = {};
174
175 RegisterFile register_file;
176#ifndef NDEBUG
177 /// Whether we are currently in the middle of generating branch-related code
178 /// and therefore must not change any value-related state.
179 bool generating_branch = false;
180#endif
181
182private:
183 /// Default CCAssigner if the implementation doesn't override cur_cc_assigner.
184 typename Config::DefaultCCAssigner default_cc_assigner;
185
186public:
187 typename Config::Assembler assembler;
188 Config::FunctionWriter text_writer;
189 // TODO(ts): smallvector?
190 std::vector<SymRef> func_syms;
191 // TODO(ts): combine this with the block vectors in the analyzer to save on
192 // allocations
193 util::SmallVector<Label> block_labels;
194
195 util::SmallVector<std::pair<SymRef, SymRef>, 4> personality_syms = {};
196
197 struct ScratchReg;
198 class ValuePart;
199 struct ValuePartRef;
200 struct ValueRef;
201 struct GenericValuePart;
202#pragma endregion
203
204 struct InstRange {
205 using Range = decltype(std::declval<Adaptor>().block_insts(
206 std::declval<IRBlockRef>()));
207 using Iter = decltype(std::declval<Range>().begin());
208 using EndIter = decltype(std::declval<Range>().end());
209 Iter from;
210 EndIter to;
211 };
212
213 /// Call argument, enhancing an IRValueRef with information on how to pass it.
214 struct CallArg {
215 enum class Flag : u8 {
216 none, ///< No extra handling.
217 zext, ///< Scalar integer, zero-extend to target-specific size.
218 sext, ///< Scalar integer, sign-extend to target-specific size.
219 sret, ///< Struct return pointer.
220 byval, ///< Value is copied into corresponding stack slot.
221 allow_split, ///< Value parts can be split across stack/registers.
222 };
223
224 explicit CallArg(IRValueRef value,
225 Flag flags = Flag::none,
226 u8 byval_align = 1,
227 u32 byval_size = 0)
228 : value(value),
229 flag(flags),
232
233 IRValueRef value; ///< Argument IR value.
234 Flag flag; ///< Value handling flag.
235 u8 byval_align; ///< For Flag::byval, the stack alignment.
236 u8 ext_bits = 0; ///< For Flag::zext and Flag::sext, the source bit width.
237 u32 byval_size; ///< For Flag::byval, the argument size.
238 };
239
240 /// Base class for target-specific CallBuilder implementations.
241 template <typename CBDerived>
242 class CallBuilderBase {
243 protected:
244 Derived &compiler;
245 CCAssigner &assigner;
246
247 RegisterFile::RegBitSet arg_regs{};
248
249 CallBuilderBase(Derived &compiler, CCAssigner &assigner) noexcept
250 : compiler(compiler), assigner(assigner) {}
251
252 // CBDerived needs:
253 // void add_arg_byval(ValuePart &vp, CCAssignment &cca) noexcept;
254 // void add_arg_stack(ValuePart &vp, CCAssignment &cca) noexcept;
255 // void call_impl(std::variant<SymRef, ValuePart> &&) noexcept;
256 CBDerived *derived() noexcept { return static_cast<CBDerived *>(this); }
257
258 public:
259 /// Add a value part as argument. cca must be populated with information
260 /// about the argument, except for the reg/stack_off, which are set by the
261 /// CCAssigner. If no register bank is assigned, the register bank and size
262 /// are retrieved from the value part, otherwise, the size must be set, too.
263 void add_arg(ValuePart &&vp, CCAssignment cca) noexcept;
264 /// Add a full IR value as argument, with an explicit number of parts.
265 /// Values are decomposed into their parts and are typically either fully
266 /// in registers or fully on the stack (except CallArg::Flag::allow_split).
267 void add_arg(const CallArg &arg, u32 part_count) noexcept;
268 /// Add a full IR value as argument. The number of value parts must be
269 /// exposed via val_parts. Values are decomposed into their parts and are
270 /// typically either fully in registers or fully on the stack (except
271 /// CallArg::Flag::allow_split).
272 void add_arg(const CallArg &arg) noexcept {
273 add_arg(std::move(arg), compiler.val_parts(arg.value).count());
274 }
275
276 /// Generate the function call (evict registers, call, reset stack frame).
277 void call(std::variant<SymRef, ValuePart>) noexcept;
278
279 /// Assign next return value part to vp.
280 void add_ret(ValuePart &vp, CCAssignment cca) noexcept;
281 /// Assign next return value part to vp.
282 void add_ret(ValuePart &&vp, CCAssignment cca) noexcept {
283 add_ret(vp, cca);
284 }
285 /// Assign return values to the IR value.
286 void add_ret(ValueRef &vr) noexcept;
287 };
288
289 class RetBuilder {
290 Derived &compiler;
291 CCAssigner &assigner;
292
293 RegisterFile::RegBitSet ret_regs{};
294
295 public:
296 RetBuilder(Derived &compiler, CCAssigner &assigner) noexcept
297 : compiler(compiler), assigner(assigner) {
298 assigner.reset();
299 }
300
301 void add(ValuePart &&vp, CCAssignment cca) noexcept;
302 void add(IRValueRef val) noexcept;
303
304 void ret() noexcept;
305 };
306
307 /// Initialize a CompilerBase, should be called by the derived classes
308 explicit CompilerBase(Adaptor *adaptor)
309 : adaptor(adaptor), analyzer(adaptor), assembler() {
310 static_assert(std::is_base_of_v<CompilerBase, Derived>);
311 static_assert(Compiler<Derived, Config>);
312 }
313
314 /// shortcut for casting to the Derived class so that overloading
315 /// works
316 Derived *derived() { return static_cast<Derived *>(this); }
317
318 const Derived *derived() const { return static_cast<const Derived *>(this); }
319
320 [[nodiscard]] ValLocalIdx val_idx(const IRValueRef value) const noexcept {
321 return analyzer.adaptor->val_local_idx(value);
322 }
323
324 [[nodiscard]] ValueAssignment *
325 val_assignment(const ValLocalIdx idx) noexcept {
326 return assignments.value_ptrs[static_cast<u32>(idx)];
327 }
328
329 /// Compile the functions returned by Adaptor::funcs
330 ///
331 /// \warning If you intend to call this multiple times, you must call reset
332 /// in-between the calls.
333 ///
334 /// \returns Whether the compilation was successful
335 bool compile();
336
337 /// Reset any leftover data from the previous compilation such that it will
338 /// not affect the next compilation
339 void reset();
340
341 /// Get CCAssigner for current function.
342 CCAssigner *cur_cc_assigner() noexcept { return &default_cc_assigner; }
343
344 void init_assignment(IRValueRef value, ValLocalIdx local_idx) noexcept;
345
346private:
347 /// Frees an assignment, its stack slot and registers
348 void free_assignment(ValLocalIdx local_idx, ValueAssignment *) noexcept;
349
350public:
351 /// Release an assignment when reference count drops to zero, either frees
352 /// the assignment immediately or delays free to the end of the live range.
353 void release_assignment(ValLocalIdx local_idx, ValueAssignment *) noexcept;
354
355 /// Init a variable-ref assignment
356 void init_variable_ref(ValLocalIdx local_idx, u32 var_ref_data) noexcept;
357 /// Init a variable-ref assignment
358 void init_variable_ref(IRValueRef value, u32 var_ref_data) noexcept {
359 init_variable_ref(adaptor->val_local_idx(value), var_ref_data);
360 }
361
362 /// \name Stack Slots
363 /// @{
364
365 /// Allocate a static stack slot.
366 i32 allocate_stack_slot(u32 size) noexcept;
367 /// Free a static stack slot.
368 void free_stack_slot(u32 slot, u32 size) noexcept;
369
370 /// @}
371
372 /// Assign function argument in prologue. \ref align can be used to increase
373 /// the minimal stack alignment of the first part of the argument. If \ref
374 /// allow_split is set, the argument can be passed partially in registers,
375 /// otherwise (default) it must be either passed completely in registers or
376 /// completely on the stack.
377 void prologue_assign_arg(CCAssigner *cc_assigner,
378 u32 arg_idx,
379 IRValueRef arg,
380 u32 align = 1,
381 bool allow_split = false) noexcept;
382
383 /// \name Value References
384 /// @{
385
386 /// Get a using reference to a value.
387 ValueRef val_ref(IRValueRef value) noexcept;
388
389 /// Get a using reference to a single-part value and provide direct access to
390 /// the only part. This is a convenience function; note that the ValueRef must
391 /// outlive the ValuePartRef (i.e. auto p = val_ref().part(0); won't work, as
392 /// the value will possibly be deallocated when the ValueRef is destroyed).
393 std::pair<ValueRef, ValuePartRef> val_ref_single(IRValueRef value) noexcept;
394
395 /// Get a defining reference to a value.
396 ValueRef result_ref(IRValueRef value) noexcept;
397
398 /// Get a defining reference to a single-part value and provide direct access
399 /// to the only part. Similar to val_ref_single().
400 std::pair<ValueRef, ValuePartRef>
401 result_ref_single(IRValueRef value) noexcept;
402
403 /// Make dst an alias for src, which must be a non-constant value with an
404 /// identical part configuration. src must be in its last use (is_owned()),
405 /// and the assignment will be repurposed for dst, keeping all assigned
406 /// registers and stack slots.
407 ValueRef result_ref_alias(IRValueRef dst, ValueRef &&src) noexcept;
408
409 /// Initialize value as a pointer into a stack variable (i.e., a value
410 /// allocated from cur_static_allocas() or similar) with an offset. The
411 /// result value will be a stack variable itself.
412 ValueRef result_ref_stack_slot(IRValueRef value,
413 AssignmentPartRef base,
414 i32 off) noexcept;
415
416 /// @}
417
418 [[deprecated("Use ValuePartRef::set_value")]]
419 void set_value(ValuePartRef &val_ref, ScratchReg &scratch) noexcept;
420 [[deprecated("Use ValuePartRef::set_value")]]
421 void set_value(ValuePartRef &&val_ref, ScratchReg &scratch) noexcept {
422 set_value(val_ref, scratch);
423 }
424
425 /// Get generic value part into a single register, evaluating expressions
426 /// and materializing immediates as required.
427 AsmReg gval_as_reg(GenericValuePart &gv) noexcept;
428
429 /// Like gval_as_reg; if the GenericValuePart owns a reusable register
430 /// (either a ScratchReg, possibly due to materialization, or a reusable
431 /// ValuePartRef), store it in dst.
432 AsmReg gval_as_reg_reuse(GenericValuePart &gv, ScratchReg &dst) noexcept;
433
434private:
435 /// @internal Select register when a value needs to be evicted.
436 Reg select_reg_evict(RegBank bank, u64 exclusion_mask) noexcept;
437
438public:
439 /// \name Low-Level Assignment Register Handling
440 /// @{
441
442 /// Select an available register, evicting loaded values if needed.
443 Reg select_reg(RegBank bank, u64 exclusion_mask) noexcept {
444 Reg res = register_file.find_first_free_excluding(bank, exclusion_mask);
445 if (res.valid()) [[likely]] {
446 return res;
447 }
448 return select_reg_evict(bank, exclusion_mask);
449 }
450
451 /// Reload a value part from memory or recompute variable address.
452 void reload_to_reg(AsmReg dst, AssignmentPartRef ap) noexcept;
453
454 /// Allocate a stack slot for an assignment.
455 void allocate_spill_slot(AssignmentPartRef ap) noexcept;
456
457 /// Ensure the value is spilled in its stack slot (except variable refs).
458 void spill(AssignmentPartRef ap) noexcept;
459
460 /// Evict the value from its register, spilling if needed, and free register.
461 void evict(AssignmentPartRef ap) noexcept;
462
463 /// Evict the value from the register, spilling if needed, and free register.
464 void evict_reg(Reg reg) noexcept;
465
466 /// Free the register. Requires that the contained value is already spilled.
467 void free_reg(Reg reg) noexcept;
468
469 /// @}
470
471 /// \name High-Level Branch Generation
472 /// @{
473
474 /// Generate an unconditional branch at the end of a basic block. No further
475 /// instructions must follow. If target is the next block in the block order,
476 /// the branch is omitted.
477 void generate_uncond_branch(IRBlockRef target) noexcept;
478
479 /// Generate an conditional branch at the end of a basic block.
480 template <typename Jump>
481 void generate_cond_branch(Jump jmp,
482 IRBlockRef true_target,
483 IRBlockRef false_target) noexcept;
484
485 /// Generate a switch at the end of a basic block. Only the lowest bits of the
486 /// condition are considered. The condition must be a general-purpose
487 /// register. The cases must be sorted and every case value must appear at
488 /// most once.
490 ScratchReg &&cond,
491 u32 width,
492 IRBlockRef default_block,
493 std::span<const std::pair<u64, IRBlockRef>> cases) noexcept;
494
495 /// @}
496
497 /// \name Low-Level Branch Primitives
498 /// The general flow of using these low-level primitive is:
499 /// 1. spill_before_branch()
500 /// 2. begin_branch_region()
501 /// 3. One or more calls to generate_branch_to_block()
502 /// 4. end_branch_region()
503 /// 5. release_spilled_regs()
504 /// @{
505
506 // TODO(ts): switch to a branch_spill_before naming style?
507 /// Spill values that need to be spilled for later blocks. Returns the set
508 /// of registers that will be free'd at the end of the block; pass this to
509 /// release_spilled_regs().
510 typename RegisterFile::RegBitSet
511 spill_before_branch(bool force_spill = false) noexcept;
512 /// Free registers marked by spill_before_branch().
513 void release_spilled_regs(typename RegisterFile::RegBitSet) noexcept;
514
515 /// When reaching a point in the function where no other blocks will be
516 /// reached anymore, use this function to release register assignments after
517 /// the end of that block so the compiler does not accidentally use
518 /// registers which don't contain any values
520
521 /// Indicate beginning of region where value-state must not change.
522 void begin_branch_region() noexcept {
523#ifndef NDEBUG
524 assert(!generating_branch);
525 generating_branch = true;
526#endif
527 }
528
529 /// Indicate end of region where value-state must not change.
530 void end_branch_region() noexcept {
531#ifndef NDEBUG
532 assert(generating_branch);
533 generating_branch = false;
534#endif
535 }
536
537 /// Generate a branch to a basic block; execution continues afterwards.
538 /// Multiple calls to this function can be used to build conditional branches.
539 /// @tparam Jump Target-defined Jump type (e.g., CompilerX64::Jump).
540 /// @param needs_split Result of branch_needs_split(); pass false for an
541 /// unconditional branch.
542 /// @param last_inst Whether fall-through to target is possible.
543 template <typename Jump>
545 IRBlockRef target,
546 bool needs_split,
547 bool last_inst) noexcept;
548
549#ifndef NDEBUG
550 bool may_change_value_state() const noexcept { return !generating_branch; }
551#endif
552
553 void move_to_phi_nodes(BlockIndex target) noexcept {
554 if (analyzer.block_has_phis(target)) {
555 move_to_phi_nodes_impl(target);
556 }
557 }
558
559 void move_to_phi_nodes_impl(BlockIndex target) noexcept;
560
561 /// Whether branch to a block requires additional instructions and therefore
562 /// a direct jump to the block is not possible.
563 bool branch_needs_split(IRBlockRef target) noexcept {
564 // for now, if the target has PHI-nodes, we split
565 return analyzer.block_has_phis(target);
566 }
567
568 /// @}
569
570 BlockIndex next_block() const noexcept;
571
572 bool try_force_fixed_assignment(IRValueRef) const noexcept { return false; }
573
574 bool hook_post_func_sym_init() noexcept { return true; }
575
576 void analysis_start() noexcept {}
577
578 void analysis_end() noexcept {}
579
580 void reloc_text(SymRef sym, u32 type, u64 offset, i64 addend = 0) noexcept {
581 this->assembler.reloc_sec(
582 text_writer.get_sec_ref(), sym, type, offset, addend);
583 }
584
585 /// Convenience function to place a label at the current position.
586 void label_place(Label label) noexcept {
587 this->text_writer.label_place(label, text_writer.offset());
588 }
589
590protected:
591 SymRef get_personality_sym() noexcept;
592
593 bool compile_func(IRFuncRef func, u32 func_idx) noexcept;
594
595 bool compile_block(IRBlockRef block, u32 block_idx) noexcept;
596};
597} // namespace tpde
598
599#include "GenericValuePart.hpp"
600#include "ScratchReg.hpp"
601#include "ValuePartRef.hpp"
602#include "ValueRef.hpp"
603
604namespace tpde {
605
606template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
607template <typename CBDerived>
609 CBDerived>::add_arg(ValuePart &&vp, CCAssignment cca) noexcept {
610 if (!cca.byval && cca.bank == RegBank{}) {
611 cca.bank = vp.bank();
612 cca.size = vp.part_size();
613 }
614
615 assigner.assign_arg(cca);
616 bool needs_ext = cca.int_ext != 0;
617 bool ext_sign = cca.int_ext >> 7;
618 unsigned ext_bits = cca.int_ext & 0x3f;
619
620 if (cca.byval) {
621 derived()->add_arg_byval(vp, cca);
622 vp.reset(&compiler);
623 } else if (!cca.reg.valid()) {
624 if (needs_ext) {
625 auto ext = std::move(vp).into_extended(&compiler, ext_sign, ext_bits, 64);
626 derived()->add_arg_stack(ext, cca);
627 ext.reset(&compiler);
628 } else {
629 derived()->add_arg_stack(vp, cca);
630 }
631 vp.reset(&compiler);
632 } else {
633 u32 size = vp.part_size();
634 if (vp.is_in_reg(cca.reg)) {
635 if (!vp.can_salvage()) {
636 compiler.evict_reg(cca.reg);
637 } else {
638 vp.salvage(&compiler);
639 }
640 if (needs_ext) {
641 compiler.generate_raw_intext(cca.reg, cca.reg, ext_sign, ext_bits, 64);
642 }
643 } else {
644 if (compiler.register_file.is_used(cca.reg)) {
645 compiler.evict_reg(cca.reg);
646 }
647 if (vp.can_salvage()) {
648 AsmReg vp_reg = vp.salvage(&compiler);
649 if (needs_ext) {
650 compiler.generate_raw_intext(cca.reg, vp_reg, ext_sign, ext_bits, 64);
651 } else {
652 compiler.mov(cca.reg, vp_reg, size);
653 }
654 } else if (needs_ext && vp.is_const()) {
655 u64 val = vp.const_data()[0];
656 u64 extended =
657 ext_sign ? util::sext(val, ext_bits) : util::zext(val, ext_bits);
658 compiler.materialize_constant(&extended, cca.bank, 8, cca.reg);
659 } else {
660 vp.reload_into_specific_fixed(&compiler, cca.reg);
661 if (needs_ext) {
662 compiler.generate_raw_intext(
663 cca.reg, cca.reg, ext_sign, ext_bits, 64);
664 }
665 }
666 }
667 vp.reset(&compiler);
668 assert(!compiler.register_file.is_used(cca.reg));
669 compiler.register_file.mark_clobbered(cca.reg);
670 compiler.register_file.allocatable &= ~(u64{1} << cca.reg.id());
671 arg_regs |= (1ull << cca.reg.id());
672 }
673}
674
675template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
676template <typename CBDerived>
678 CBDerived>::add_arg(const CallArg &arg, u32 part_count) noexcept {
679 ValueRef vr = compiler.val_ref(arg.value);
680
681 if (arg.flag == CallArg::Flag::byval) {
682 assert(part_count == 1);
683 add_arg(vr.part(0),
684 CCAssignment{
685 .byval = true,
686 .align = arg.byval_align,
687 .size = arg.byval_size,
688 });
689 return;
690 }
691
692 u32 align = arg.byval_align;
693 bool allow_split = arg.flag == CallArg::Flag::allow_split;
694
695 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
696 u8 int_ext = 0;
697 if (arg.flag == CallArg::Flag::sext || arg.flag == CallArg::Flag::zext) {
698 assert(arg.ext_bits != 0 && "cannot extend zero-bit integer");
699 int_ext = arg.ext_bits | (arg.flag == CallArg::Flag::sext ? 0x80 : 0);
700 }
701 u32 remaining = part_count < 256 ? part_count - part_idx - 1 : 255;
702 derived()->add_arg(vr.part(part_idx),
703 CCAssignment{
704 .consecutive = u8(allow_split ? 0 : remaining),
705 .sret = arg.flag == CallArg::Flag::sret,
706 .int_ext = int_ext,
707 .align = u8(part_idx == 0 ? align : 1),
708 });
709 }
710}
711
712template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
713template <typename CBDerived>
715 std::variant<SymRef, ValuePart> target) noexcept {
716 assert(!compiler.stack.is_leaf_function && "leaf func must not have calls");
717 compiler.stack.generated_call = true;
718 typename RegisterFile::RegBitSet skip_evict = arg_regs;
719 if (auto *vp = std::get_if<ValuePart>(&target); vp && vp->can_salvage()) {
720 // call_impl will reset vp, thereby unlock+free the register.
721 assert(vp->cur_reg_unlocked().valid() && "can_salvage implies register");
722 skip_evict |= (1ull << vp->cur_reg_unlocked().id());
723 }
724
725 auto clobbered = ~assigner.get_ccinfo().callee_saved_regs;
726 for (auto reg_id : util::BitSetIterator<>{compiler.register_file.used &
727 clobbered & ~skip_evict}) {
728 compiler.evict_reg(AsmReg{reg_id});
729 compiler.register_file.mark_clobbered(Reg{reg_id});
730 }
731
732 derived()->call_impl(std::move(target));
733
734 assert((compiler.register_file.allocatable & arg_regs) == 0);
735 compiler.register_file.allocatable |= arg_regs;
736}
737
738template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
739template <typename CBDerived>
741 CBDerived>::add_ret(ValuePart &vp, CCAssignment cca) noexcept {
742 cca.bank = vp.bank();
743 cca.size = vp.part_size();
744 assigner.assign_ret(cca);
745 assert(cca.reg.valid() && "return value must be in register");
746 vp.set_value_reg(&compiler, cca.reg);
747}
748
749template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
750template <typename CBDerived>
752 CBDerived>::add_ret(ValueRef &vr) noexcept {
753 assert(vr.has_assignment());
754 u32 part_count = vr.assignment()->part_count;
755 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
756 CCAssignment cca;
757 add_ret(vr.part(part_idx), cca);
758 }
759}
760
761template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
762void CompilerBase<Adaptor, Derived, Config>::RetBuilder::add(
763 ValuePart &&vp, CCAssignment cca) noexcept {
764 cca.bank = vp.bank();
765 u32 size = cca.size = vp.part_size();
766 assigner.assign_ret(cca);
767 assert(cca.reg.valid() && "indirect return value must use sret argument");
768
769 bool needs_ext = cca.int_ext != 0;
770 bool ext_sign = cca.int_ext >> 7;
771 unsigned ext_bits = cca.int_ext & 0x3f;
772
773 if (vp.is_in_reg(cca.reg)) {
774 if (!vp.can_salvage()) {
775 compiler.evict_reg(cca.reg);
776 } else {
777 vp.salvage(&compiler);
778 }
779 if (needs_ext) {
780 compiler.generate_raw_intext(cca.reg, cca.reg, ext_sign, ext_bits, 64);
781 }
782 } else {
783 if (compiler.register_file.is_used(cca.reg)) {
784 compiler.evict_reg(cca.reg);
785 }
786 if (vp.can_salvage()) {
787 AsmReg vp_reg = vp.salvage(&compiler);
788 if (needs_ext) {
789 compiler.generate_raw_intext(cca.reg, vp_reg, ext_sign, ext_bits, 64);
790 } else {
791 compiler.mov(cca.reg, vp_reg, size);
792 }
793 } else {
794 vp.reload_into_specific_fixed(&compiler, cca.reg);
795 if (needs_ext) {
796 compiler.generate_raw_intext(cca.reg, cca.reg, ext_sign, ext_bits, 64);
797 }
798 }
799 }
800 vp.reset(&compiler);
801 assert(!compiler.register_file.is_used(cca.reg));
802 compiler.register_file.mark_clobbered(cca.reg);
803 compiler.register_file.allocatable &= ~(u64{1} << cca.reg.id());
804 ret_regs |= (1ull << cca.reg.id());
805}
806
807template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
808void CompilerBase<Adaptor, Derived, Config>::RetBuilder::add(
809 IRValueRef val) noexcept {
810 u32 part_count = compiler.val_parts(val).count();
811 ValueRef vr = compiler.val_ref(val);
812 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
813 add(vr.part(part_idx), CCAssignment{});
814 }
815}
816
817template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
818void CompilerBase<Adaptor, Derived, Config>::RetBuilder::ret() noexcept {
819 assert((compiler.register_file.allocatable & ret_regs) == 0);
820 compiler.register_file.allocatable |= ret_regs;
821
822 compiler.gen_func_epilog();
823 compiler.release_regs_after_return();
824}
825
826template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
828 // create function symbols
829 text_writer.begin_module(assembler);
830 text_writer.switch_section(
831 assembler.get_section(assembler.get_text_section()));
832
833 assert(func_syms.empty());
834 for (const IRFuncRef func : adaptor->funcs()) {
835 auto binding = Assembler::SymBinding::GLOBAL;
836 if (adaptor->func_has_weak_linkage(func)) {
838 } else if (adaptor->func_only_local(func)) {
840 }
841 if (adaptor->func_extern(func)) {
842 func_syms.push_back(derived()->assembler.sym_add_undef(
843 adaptor->func_link_name(func), binding));
844 } else {
845 func_syms.push_back(derived()->assembler.sym_predef_func(
846 adaptor->func_link_name(func), binding));
847 }
848 derived()->define_func_idx(func, func_syms.size() - 1);
849 }
850
851 if (!derived()->hook_post_func_sym_init()) {
852 TPDE_LOG_ERR("hook_pust_func_sym_init failed");
853 return false;
854 }
855
856 // TODO(ts): create function labels?
857
858 bool success = true;
859
860 u32 func_idx = 0;
861 for (const IRFuncRef func : adaptor->funcs()) {
862 if (adaptor->func_extern(func)) {
863 TPDE_LOG_TRACE("Skipping compilation of func {}",
864 adaptor->func_link_name(func));
865 ++func_idx;
866 continue;
867 }
868
869 TPDE_LOG_TRACE("Compiling func {}", adaptor->func_link_name(func));
870 if (!derived()->compile_func(func, func_idx)) {
871 TPDE_LOG_ERR("Failed to compile function {}",
872 adaptor->func_link_name(func));
873 success = false;
874 }
875 ++func_idx;
876 }
877
878 text_writer.flush();
879 text_writer.end_module();
880 assembler.finalize();
881
882 // TODO(ts): generate object/map?
883
884 return success;
885}
886
887template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
889 adaptor->reset();
890
891 for (auto &e : stack.fixed_free_lists) {
892 e.clear();
893 }
894 stack.dynamic_free_lists.clear();
895
896 assembler.reset();
897 func_syms.clear();
898 block_labels.clear();
899 personality_syms.clear();
900}
901
902template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
903void CompilerBase<Adaptor, Derived, Config>::init_assignment(
904 IRValueRef value, ValLocalIdx local_idx) noexcept {
905 assert(val_assignment(local_idx) == nullptr);
906 TPDE_LOG_TRACE("Initializing assignment for value {}",
907 static_cast<u32>(local_idx));
908
909 const auto parts = derived()->val_parts(value);
910 const u32 part_count = parts.count();
911 assert(part_count > 0);
912 auto *assignment = assignments.allocator.allocate(part_count);
913 assignments.value_ptrs[static_cast<u32>(local_idx)] = assignment;
914
915 u32 max_part_size = 0;
916 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
917 auto ap = AssignmentPartRef{assignment, part_idx};
918 ap.reset();
919 ap.set_bank(parts.reg_bank(part_idx));
920 const u32 size = parts.size_bytes(part_idx);
921 assert(size > 0);
922 max_part_size = std::max(max_part_size, size);
923 ap.set_part_size(size);
924 }
925
926 const auto &liveness = analyzer.liveness_info(local_idx);
927
928 // if there is only one part, try to hand out a fixed assignment
929 // if the value is used for longer than one block and there aren't too many
930 // definitions in child loops this could interfere with
931 // TODO(ts): try out only fixed assignments if the value is live for more
932 // than two blocks?
933 // TODO(ts): move this to ValuePartRef::alloc_reg to be able to defer this
934 // for results?
935 if (part_count == 1) {
936 const auto &cur_loop =
937 analyzer.loop_from_idx(analyzer.block_loop_idx(cur_block_idx));
938 auto ap = AssignmentPartRef{assignment, 0};
939
940 auto try_fixed =
941 liveness.last > cur_block_idx &&
942 cur_loop.definitions_in_childs +
943 assignments.cur_fixed_assignment_count[ap.bank().id()] <
944 Derived::NUM_FIXED_ASSIGNMENTS[ap.bank().id()];
945 if (derived()->try_force_fixed_assignment(value)) {
946 try_fixed = assignments.cur_fixed_assignment_count[ap.bank().id()] <
947 Derived::NUM_FIXED_ASSIGNMENTS[ap.bank().id()];
948 }
949
950 if (try_fixed) {
951 // check if there is a fixed register available
952 AsmReg reg = derived()->select_fixed_assignment_reg(ap, value);
953 TPDE_LOG_TRACE("Trying to assign fixed reg to value {}",
954 static_cast<u32>(local_idx));
955
956 // TODO: if the register is used, we can free it most of the time, but not
957 // always, e.g. for PHI nodes. Detect this case and free_reg otherwise.
958 if (!reg.invalid() && !register_file.is_used(reg)) {
959 TPDE_LOG_TRACE("Assigning fixed assignment to reg {} for value {}",
960 reg.id(),
961 static_cast<u32>(local_idx));
962 ap.set_reg(reg);
963 ap.set_register_valid(true);
964 ap.set_fixed_assignment(true);
965 register_file.mark_used(reg, local_idx, 0);
966 register_file.inc_lock_count(reg); // fixed assignments always locked
967 register_file.mark_clobbered(reg);
968 ++assignments.cur_fixed_assignment_count[ap.bank().id()];
969 }
970 }
971 }
972
973 const auto last_full = liveness.last_full;
974 const auto ref_count = liveness.ref_count;
975
976 assert(max_part_size <= 256);
977 assignment->max_part_size = max_part_size;
978 assignment->pending_free = false;
979 assignment->variable_ref = false;
980 assignment->stack_variable = false;
981 assignment->delay_free = last_full;
982 assignment->part_count = part_count;
983 assignment->frame_off = 0;
984 assignment->references_left = ref_count;
985}
986
987template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
989 ValLocalIdx local_idx, ValueAssignment *assignment) noexcept {
990 TPDE_LOG_TRACE("Freeing assignment for value {}",
991 static_cast<u32>(local_idx));
992
993 assert(assignments.value_ptrs[static_cast<u32>(local_idx)] == assignment);
994 assignments.value_ptrs[static_cast<u32>(local_idx)] = nullptr;
995 const auto is_var_ref = assignment->variable_ref;
996 const u32 part_count = assignment->part_count;
997
998 // free registers
999 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
1000 auto ap = AssignmentPartRef{assignment, part_idx};
1001 if (ap.fixed_assignment()) [[unlikely]] {
1002 const auto reg = ap.get_reg();
1003 assert(register_file.is_fixed(reg));
1004 assert(register_file.reg_local_idx(reg) == local_idx);
1005 assert(register_file.reg_part(reg) == part_idx);
1006 --assignments.cur_fixed_assignment_count[ap.bank().id()];
1007 register_file.dec_lock_count_must_zero(reg); // release lock for fixed reg
1008 register_file.unmark_used(reg);
1009 } else if (ap.register_valid()) {
1010 const auto reg = ap.get_reg();
1011 assert(!register_file.is_fixed(reg));
1012 register_file.unmark_used(reg);
1013 }
1014 }
1015
1016#ifdef TPDE_ASSERTS
1017 for (auto reg_id : register_file.used_regs()) {
1018 assert(register_file.reg_local_idx(AsmReg{reg_id}) != local_idx &&
1019 "freeing assignment that is still referenced by a register");
1020 }
1021#endif
1022
1023 // variable references do not have a stack slot
1024 bool has_stack = Config::FRAME_INDEXING_NEGATIVE ? assignment->frame_off < 0
1025 : assignment->frame_off != 0;
1026 if (!is_var_ref && has_stack) {
1027 free_stack_slot(assignment->frame_off, assignment->size());
1028 }
1029
1030 assignments.allocator.deallocate(assignment);
1031}
1032
1033template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1034[[gnu::noinline]] void
1036 ValLocalIdx local_idx, ValueAssignment *assignment) noexcept {
1037 if (!assignment->delay_free) {
1038 free_assignment(local_idx, assignment);
1039 return;
1040 }
1041
1042 // need to wait until release
1043 TPDE_LOG_TRACE("Delay freeing assignment for value {}",
1044 static_cast<u32>(local_idx));
1045 const auto &liveness = analyzer.liveness_info(local_idx);
1046 auto &free_list_head = assignments.delayed_free_lists[u32(liveness.last)];
1047 assignment->next_delayed_free_entry = free_list_head;
1048 assignment->pending_free = true;
1049 free_list_head = local_idx;
1050}
1051
1052template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1054 ValLocalIdx local_idx, u32 var_ref_data) noexcept {
1055 TPDE_LOG_TRACE("Initializing variable-ref assignment for value {}",
1056 static_cast<u32>(local_idx));
1057
1058 assert(val_assignment(local_idx) == nullptr);
1059 auto *assignment = assignments.allocator.allocate_slow(1, true);
1060 assignments.value_ptrs[static_cast<u32>(local_idx)] = assignment;
1061
1062 assignment->max_part_size = Config::PLATFORM_POINTER_SIZE;
1063 assignment->variable_ref = true;
1064 assignment->stack_variable = false;
1065 assignment->part_count = 1;
1066 assignment->var_ref_custom_idx = var_ref_data;
1067 assignment->next_delayed_free_entry = assignments.variable_ref_list;
1068
1069 assignments.variable_ref_list = local_idx;
1070
1071 AssignmentPartRef ap{assignment, 0};
1072 ap.reset();
1073 ap.set_bank(Config::GP_BANK);
1074 ap.set_part_size(Config::PLATFORM_POINTER_SIZE);
1075}
1076
1077template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1079 u32 size) noexcept {
1080 this->stack.frame_used = true;
1081 unsigned align_bits = 4;
1082 if (size == 0) {
1083 return 0; // 0 is the "invalid" stack slot
1084 } else if (size <= 16) {
1085 // Align up to next power of two.
1086 u32 free_list_idx = size == 1 ? 0 : 32 - util::cnt_lz<u32>(size - 1);
1087 assert(size <= 1u << free_list_idx);
1088 size = 1 << free_list_idx;
1089 align_bits = free_list_idx;
1090
1091 if (!stack.fixed_free_lists[free_list_idx].empty()) {
1092 auto slot = stack.fixed_free_lists[free_list_idx].back();
1093 stack.fixed_free_lists[free_list_idx].pop_back();
1094 return slot;
1095 }
1096 } else {
1097 size = util::align_up(size, 16);
1098 auto it = stack.dynamic_free_lists.find(size);
1099 if (it != stack.dynamic_free_lists.end() && !it->second.empty()) {
1100 const auto slot = it->second.back();
1101 it->second.pop_back();
1102 return slot;
1103 }
1104 }
1105
1106 assert(stack.frame_size != ~0u &&
1107 "cannot allocate stack slot before stack frame is initialized");
1108
1109 // Align frame_size to align_bits
1110 for (u32 list_idx = util::cnt_tz(stack.frame_size); list_idx < align_bits;
1111 list_idx = util::cnt_tz(stack.frame_size)) {
1112 i32 slot = stack.frame_size;
1113 if constexpr (Config::FRAME_INDEXING_NEGATIVE) {
1114 slot = -(slot + (1ull << list_idx));
1115 }
1116 stack.fixed_free_lists[list_idx].push_back(slot);
1117 stack.frame_size += 1ull << list_idx;
1118 }
1119
1120 auto slot = stack.frame_size;
1121 assert(slot != 0 && "stack slot 0 is reserved");
1122 stack.frame_size += size;
1123
1124 if constexpr (Config::FRAME_INDEXING_NEGATIVE) {
1125 slot = -(slot + size);
1126 }
1127 return slot;
1128}
1129
1130template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1132 u32 slot, u32 size) noexcept {
1133 if (size == 0) [[unlikely]] {
1134 assert(slot == 0 && "unexpected slot for zero-sized stack-slot?");
1135 // Do nothing.
1136 } else if (size <= 16) [[likely]] {
1137 u32 free_list_idx = size == 1 ? 0 : 32 - util::cnt_lz<u32>(size - 1);
1138 stack.fixed_free_lists[free_list_idx].push_back(slot);
1139 } else {
1140 size = util::align_up(size, 16);
1141 stack.dynamic_free_lists[size].push_back(slot);
1142 }
1143}
1144
1145template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1147 CCAssigner *cc_assigner,
1148 u32 arg_idx,
1149 IRValueRef arg,
1150 u32 align,
1151 bool allow_split) noexcept {
1152 ValueRef vr = derived()->result_ref(arg);
1153 if (adaptor->cur_arg_is_byval(arg_idx)) {
1154 CCAssignment cca{
1155 .byval = true,
1156 .align = u8(adaptor->cur_arg_byval_align(arg_idx)),
1157 .size = adaptor->cur_arg_byval_size(arg_idx),
1158 };
1159 cc_assigner->assign_arg(cca);
1160 std::optional<i32> byval_frame_off =
1161 derived()->prologue_assign_arg_part(vr.part(0), cca);
1162
1163 if (byval_frame_off) {
1164 // We need to convert the assignment into a stack variable ref.
1165 ValLocalIdx local_idx = val_idx(arg);
1166 // TODO: we shouldn't create the result_ref for such cases in the first
1167 // place. However, this is not easy to detect up front, it depends on the
1168 // target and the calling convention whether this is possible.
1169 vr.reset();
1170 // Value assignment might have been free'd by ValueRef reset.
1171 if (ValueAssignment *assignment = val_assignment(local_idx)) {
1172 free_assignment(local_idx, assignment);
1173 }
1174 init_variable_ref(local_idx, 0);
1175 ValueAssignment *assignment = this->val_assignment(local_idx);
1176 assignment->stack_variable = true;
1177 assignment->frame_off = *byval_frame_off;
1178 }
1179 return;
1180 }
1181
1182 if (adaptor->cur_arg_is_sret(arg_idx)) {
1183 assert(vr.assignment()->part_count == 1 && "sret must be single-part");
1184 ValuePartRef vp = vr.part(0);
1185 CCAssignment cca{
1186 .sret = true, .bank = vp.bank(), .size = Config::PLATFORM_POINTER_SIZE};
1187 cc_assigner->assign_arg(cca);
1188 derived()->prologue_assign_arg_part(std::move(vp), cca);
1189 return;
1190 }
1191
1192 const u32 part_count = vr.assignment()->part_count;
1193 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
1194 ValuePartRef vp = vr.part(part_idx);
1195 u32 remaining = part_count < 256 ? part_count - part_idx - 1 : 255;
1196 CCAssignment cca{
1197 .consecutive = u8(allow_split ? 0 : remaining),
1198 .align = u8(part_idx == 0 ? align : 1),
1199 .bank = vp.bank(),
1200 .size = vp.part_size(),
1201 };
1202 cc_assigner->assign_arg(cca);
1203 derived()->prologue_assign_arg_part(std::move(vp), cca);
1204 }
1205}
1206
1207template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1208typename CompilerBase<Adaptor, Derived, Config>::ValueRef
1210 if (auto special = derived()->val_ref_special(value); special) {
1211 return ValueRef{this, std::move(*special)};
1212 }
1213
1214 const ValLocalIdx local_idx = analyzer.adaptor->val_local_idx(value);
1215 assert(val_assignment(local_idx) != nullptr && "value use before def");
1216 return ValueRef{this, local_idx};
1217}
1218
1219template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1220std::pair<typename CompilerBase<Adaptor, Derived, Config>::ValueRef,
1221 typename CompilerBase<Adaptor, Derived, Config>::ValuePartRef>
1223 IRValueRef value) noexcept {
1224 std::pair<ValueRef, ValuePartRef> res{val_ref(value), this};
1225 res.second = res.first.part(0);
1226 return res;
1227}
1228
1229template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1230typename CompilerBase<Adaptor, Derived, Config>::ValueRef
1232 IRValueRef value) noexcept {
1233 const ValLocalIdx local_idx = analyzer.adaptor->val_local_idx(value);
1234 if (val_assignment(local_idx) == nullptr) {
1235 init_assignment(value, local_idx);
1236 }
1237 return ValueRef{this, local_idx};
1238}
1239
1240template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1241std::pair<typename CompilerBase<Adaptor, Derived, Config>::ValueRef,
1242 typename CompilerBase<Adaptor, Derived, Config>::ValuePartRef>
1244 IRValueRef value) noexcept {
1245 std::pair<ValueRef, ValuePartRef> res{result_ref(value), this};
1246 res.second = res.first.part(0);
1247 return res;
1248}
1249
1250template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1251typename CompilerBase<Adaptor, Derived, Config>::ValueRef
1253 IRValueRef dst, ValueRef &&src) noexcept {
1254 const ValLocalIdx local_idx = analyzer.adaptor->val_local_idx(dst);
1255 assert(!val_assignment(local_idx) && "alias target already defined");
1256 assert(src.has_assignment() && "alias src must have an assignment");
1257 // Consider implementing aliases where multiple values share the same
1258 // assignment, e.g. for implementing trunc as a no-op sharing registers. Live
1259 // ranges can be merged and reference counts added. However, this needs
1260 // additional infrastructure to clear one of the value_ptrs.
1261 assert(src.is_owned() && "alias src must be owned");
1262
1263 ValueAssignment *assignment = src.assignment();
1264 u32 part_count = assignment->part_count;
1265 assert(!assignment->pending_free);
1266 assert(!assignment->variable_ref);
1267 assert(!assignment->pending_free);
1268#ifndef NDEBUG
1269 {
1270 const auto &src_liveness = analyzer.liveness_info(src.local_idx());
1271 assert(!src_liveness.last_full); // implied by is_owned()
1272 assert(assignment->references_left == 1); // implied by is_owned()
1273
1274 // Validate that part configuration is identical.
1275 const auto parts = derived()->val_parts(dst);
1276 assert(parts.count() == part_count);
1277 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
1278 AssignmentPartRef ap{assignment, part_idx};
1279 assert(parts.reg_bank(part_idx) == ap.bank());
1280 assert(parts.size_bytes(part_idx) == ap.part_size());
1281 }
1282 }
1283#endif
1284
1285 // Update local_idx of registers.
1286 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
1287 AssignmentPartRef ap{assignment, part_idx};
1288 if (ap.register_valid()) {
1289 register_file.update_reg_assignment(ap.get_reg(), local_idx, part_idx);
1290 }
1291 }
1292
1293 const auto &liveness = analyzer.liveness_info(local_idx);
1294 assignment->delay_free = liveness.last_full;
1295 assignment->references_left = liveness.ref_count;
1296 assignments.value_ptrs[static_cast<u32>(src.local_idx())] = nullptr;
1297 assignments.value_ptrs[static_cast<u32>(local_idx)] = assignment;
1298 src.disown();
1299 src.reset();
1300
1301 return ValueRef{this, local_idx};
1302}
1303
1304template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1305typename CompilerBase<Adaptor, Derived, Config>::ValueRef
1307 IRValueRef dst, AssignmentPartRef base, i32 off) noexcept {
1308 const ValLocalIdx local_idx = analyzer.adaptor->val_local_idx(dst);
1309 assert(!val_assignment(local_idx) && "new value already defined");
1310 init_variable_ref(local_idx, 0);
1311 ValueAssignment *assignment = this->val_assignment(local_idx);
1312 assignment->stack_variable = true;
1313 assignment->frame_off = base.variable_stack_off() + off;
1314 return ValueRef{this, local_idx};
1315}
1316
1317template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1318void CompilerBase<Adaptor, Derived, Config>::set_value(
1319 ValuePartRef &val_ref, ScratchReg &scratch) noexcept {
1320 val_ref.set_value(std::move(scratch));
1321}
1322
1323template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1324typename CompilerBase<Adaptor, Derived, Config>::AsmReg
1326 GenericValuePart &gv) noexcept {
1327 if (std::holds_alternative<ScratchReg>(gv.state)) {
1328 return std::get<ScratchReg>(gv.state).cur_reg();
1329 }
1330 if (std::holds_alternative<ValuePartRef>(gv.state)) {
1331 auto &vpr = std::get<ValuePartRef>(gv.state);
1332 if (vpr.has_reg()) {
1333 return vpr.cur_reg();
1334 }
1335 return vpr.load_to_reg();
1336 }
1337 if (auto *expr = std::get_if<typename GenericValuePart::Expr>(&gv.state)) {
1338 if (expr->has_base() && !expr->has_index() && expr->disp == 0) {
1339 return expr->base_reg();
1340 }
1341 return derived()->gval_expr_as_reg(gv);
1342 }
1343 TPDE_UNREACHABLE("gval_as_reg on empty GenericValuePart");
1344}
1345
1346template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1347typename CompilerBase<Adaptor, Derived, Config>::AsmReg
1349 GenericValuePart &gv, ScratchReg &dst) noexcept {
1350 AsmReg reg = gval_as_reg(gv);
1351 if (!dst.has_reg()) {
1352 if (auto *scratch = std::get_if<ScratchReg>(&gv.state)) {
1353 dst = std::move(*scratch);
1354 } else if (auto *val_ref = std::get_if<ValuePartRef>(&gv.state)) {
1355 if (val_ref->can_salvage()) {
1356 dst.alloc_specific(val_ref->salvage());
1357 assert(dst.cur_reg() == reg && "salvaging unsuccessful");
1358 }
1359 }
1360 }
1361 return reg;
1362}
1363
1364template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1365Reg CompilerBase<Adaptor, Derived, Config>::select_reg_evict(
1366 RegBank bank, u64 exclusion_mask) noexcept {
1367 TPDE_LOG_DBG("select_reg_evict for bank {}", bank.id());
1368 auto candidates =
1369 register_file.used & register_file.bank_regs(bank) & ~exclusion_mask;
1370
1371 Reg candidate = Reg::make_invalid();
1372 u32 max_score = 0;
1373 for (auto reg_id : util::BitSetIterator<>(candidates)) {
1374 Reg reg{reg_id};
1375 if (register_file.is_fixed(reg)) {
1376 continue;
1377 }
1378
1379 // Must be an evictable value, not a temporary.
1380 auto local_idx = register_file.reg_local_idx(reg);
1381 u32 part = register_file.reg_part(Reg{reg});
1382 assert(local_idx != INVALID_VAL_LOCAL_IDX);
1383 ValueAssignment *va = val_assignment(local_idx);
1384 AssignmentPartRef ap{va, part};
1385
1386 // We want to sort registers by the following (ordered by priority):
1387 // - stack variable ref (~1 add/sub to reconstruct)
1388 // - other variable ref (1-2 instrs to reconstruct)
1389 // - already spilled (no store needed)
1390 // - last use farthest away (most likely to get spilled anyhow, so there's
1391 // not much harm in spilling earlier)
1392 // - lowest ref-count (least used)
1393 //
1394 // TODO: evaluate and refine this heuristic
1395
1396 // TODO: evict stack variable refs before others
1397 if (ap.variable_ref()) {
1398 TPDE_LOG_DBG(" r{} ({}) is variable-ref", reg_id, u32(local_idx));
1399 candidate = reg;
1400 break;
1401 }
1402
1403 u32 score = 0;
1404 if (ap.stack_valid()) {
1405 score |= u32{1} << 31;
1406 }
1407
1408 const auto &liveness = analyzer.liveness_info(local_idx);
1409 u32 last_use_dist = u32(liveness.last) - u32(cur_block_idx);
1410 score |= (last_use_dist < 0x8000 ? 0x8000 - last_use_dist : 0) << 16;
1411
1412 u32 refs_left = va->pending_free ? 0 : va->references_left;
1413 score |= (refs_left < 0xffff ? 0x10000 - refs_left : 1);
1414
1415 TPDE_LOG_DBG(" r{} ({}:{}) rc={}/{} live={}-{}{} spilled={} score={:#x}",
1416 reg_id,
1417 u32(local_idx),
1418 part,
1419 refs_left,
1420 liveness.ref_count,
1421 u32(liveness.first),
1422 u32(liveness.last),
1423 &"*"[!liveness.last_full],
1424 ap.stack_valid(),
1425 score);
1426
1427 assert(score != 0);
1428 if (score > max_score) {
1429 candidate = reg;
1430 max_score = score;
1431 }
1432 }
1433 if (candidate.invalid()) [[unlikely]] {
1434 TPDE_FATAL("ran out of registers for scratch registers");
1435 }
1436 TPDE_LOG_DBG(" selected r{}", candidate.id());
1437 evict_reg(candidate);
1438 return candidate;
1439}
1440
1441template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1443 AsmReg dst, AssignmentPartRef ap) noexcept {
1444 if (!ap.variable_ref()) {
1445 assert(ap.stack_valid());
1446 derived()->load_from_stack(dst, ap.frame_off(), ap.part_size());
1447 } else if (ap.is_stack_variable()) {
1448 derived()->load_address_of_stack_var(dst, ap);
1449 } else if constexpr (!Config::DEFAULT_VAR_REF_HANDLING) {
1450 derived()->load_address_of_var_reference(dst, ap);
1451 } else {
1452 TPDE_UNREACHABLE("non-stack-variable needs custom var-ref handling");
1453 }
1454}
1455
1456template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1458 AssignmentPartRef ap) noexcept {
1459 assert(!ap.variable_ref() && "cannot allocate spill slot for variable ref");
1460 if (ap.assignment()->frame_off == 0) {
1461 assert(!ap.stack_valid() && "stack-valid set without spill slot");
1462 ap.assignment()->frame_off = allocate_stack_slot(ap.assignment()->size());
1463 assert(ap.assignment()->frame_off != 0);
1464 }
1465}
1466
1467template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1469 AssignmentPartRef ap) noexcept {
1470 assert(may_change_value_state());
1471 if (!ap.stack_valid() && !ap.variable_ref()) {
1472 assert(ap.register_valid() && "cannot spill uninitialized assignment part");
1474 derived()->spill_reg(ap.get_reg(), ap.frame_off(), ap.part_size());
1475 ap.set_stack_valid();
1476 }
1477}
1478
1479template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1481 AssignmentPartRef ap) noexcept {
1482 assert(may_change_value_state());
1483 assert(ap.register_valid());
1484 derived()->spill(ap);
1485 ap.set_register_valid(false);
1486 register_file.unmark_used(ap.get_reg());
1487}
1488
1489template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1491 assert(may_change_value_state());
1492 assert(!register_file.is_fixed(reg));
1493 assert(register_file.reg_local_idx(reg) != INVALID_VAL_LOCAL_IDX);
1494
1495 ValLocalIdx local_idx = register_file.reg_local_idx(reg);
1496 auto part = register_file.reg_part(reg);
1497 AssignmentPartRef evict_part{val_assignment(local_idx), part};
1498 assert(evict_part.register_valid());
1499 assert(evict_part.get_reg() == reg);
1500 derived()->spill(evict_part);
1501 evict_part.set_register_valid(false);
1502 register_file.unmark_used(reg);
1503}
1504
1505template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1507 assert(may_change_value_state());
1508 assert(!register_file.is_fixed(reg));
1509 assert(register_file.reg_local_idx(reg) != INVALID_VAL_LOCAL_IDX);
1510
1511 ValLocalIdx local_idx = register_file.reg_local_idx(reg);
1512 auto part = register_file.reg_part(reg);
1513 AssignmentPartRef ap{val_assignment(local_idx), part};
1514 assert(ap.register_valid());
1515 assert(ap.get_reg() == reg);
1516 assert(!ap.modified() || ap.variable_ref());
1517 ap.set_register_valid(false);
1518 register_file.unmark_used(reg);
1519}
1520
1521template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1522typename CompilerBase<Adaptor, Derived, Config>::RegisterFile::RegBitSet
1524 bool force_spill) noexcept {
1525 // since we do not explicitly keep track of register assignments per block,
1526 // whenever we might branch off to a block that we do not directly compile
1527 // afterwards (i.e. the register assignments might change in between), we
1528 // need to spill all registers which are not fixed and remove them from the
1529 // register state.
1530 //
1531 // This leads to worse codegen but saves a significant overhead to
1532 // store/manage the register assignment for each block (256 bytes/block for
1533 // x64) and possible compile-time as there might be additional logic to move
1534 // values around
1535
1536 // First, we consider the case that the current block only has one successor
1537 // which is compiled directly after the current one, in which case we do not
1538 // have to spill anything.
1539 //
1540 // Secondly, if the next block has multiple incoming edges, we always have
1541 // to spill and remove from the register assignment. Otherwise, we
1542 // only need to spill values if they are alive in any successor which is not
1543 // the next block.
1544 //
1545 // Values which are only read from PHI-Nodes and have no extended lifetimes,
1546 // do not need to be spilled as they die at the edge.
1547
1548 using RegBitSet = typename RegisterFile::RegBitSet;
1549
1550 assert(may_change_value_state());
1551
1552 const IRBlockRef cur_block_ref = analyzer.block_ref(cur_block_idx);
1553 // Earliest succeeding block after the current block that is not the
1554 // immediately succeeding block. Used to determine whether a value needs to
1555 // be spilled.
1556 BlockIndex earliest_next_succ = Analyzer<Adaptor>::INVALID_BLOCK_IDX;
1557
1558 bool must_spill = force_spill;
1559 if (!must_spill) {
1560 // We must always spill if no block is immediately succeeding or that block
1561 // has multiple incoming edges.
1562 auto next_block_is_succ = false;
1563 auto next_block_has_multiple_incoming = false;
1564 u32 succ_count = 0;
1565 for (const IRBlockRef succ : adaptor->block_succs(cur_block_ref)) {
1566 ++succ_count;
1567 BlockIndex succ_idx = analyzer.block_idx(succ);
1568 if (u32(succ_idx) == u32(cur_block_idx) + 1) {
1569 next_block_is_succ = true;
1570 if (analyzer.block_has_multiple_incoming(succ)) {
1571 next_block_has_multiple_incoming = true;
1572 }
1573 } else if (succ_idx > cur_block_idx && succ_idx < earliest_next_succ) {
1574 earliest_next_succ = succ_idx;
1575 }
1576 }
1577
1578 must_spill = !next_block_is_succ || next_block_has_multiple_incoming;
1579
1580 if (succ_count == 1 && !must_spill) {
1581 return RegBitSet{};
1582 }
1583 }
1584
1585 auto release_regs = RegBitSet{};
1586 // TODO(ts): just use register_file.used_nonfixed_regs()?
1587 for (auto reg : register_file.used_regs()) {
1588 auto local_idx = register_file.reg_local_idx(Reg{reg});
1589 auto part = register_file.reg_part(Reg{reg});
1590 if (local_idx == INVALID_VAL_LOCAL_IDX) {
1591 // scratch regs can never be held across blocks
1592 continue;
1593 }
1594 AssignmentPartRef ap{val_assignment(local_idx), part};
1595 if (ap.fixed_assignment()) {
1596 // fixed registers do not need to be spilled
1597 continue;
1598 }
1599
1600 // Remove from register assignment if the next block cannot rely on the
1601 // value being in the specific register.
1602 if (must_spill) {
1603 release_regs |= RegBitSet{1ull} << reg;
1604 }
1605
1606 if (!ap.modified() || ap.variable_ref()) {
1607 // No need to spill values that were already spilled or are variable refs.
1608 continue;
1609 }
1610
1611 const auto &liveness = analyzer.liveness_info(local_idx);
1612 if (liveness.last <= cur_block_idx) {
1613 // No need to spill value if it dies immediately after the block.
1614 continue;
1615 }
1616
1617 // If the value is live in a different block, we might need to spill it.
1618 // Cases:
1619 // - Live in successor that precedes current block => ignore, value must
1620 // have been spilled already due to RPO.
1621 // - Live only in successor that immediately follows this block => no need
1622 // to spill, value can be kept in register, register state for next block
1623 // is still valid.
1624 // - Live in successor in some distance after current block => spill
1625 if (must_spill || earliest_next_succ <= liveness.last) {
1626 spill(ap);
1627 }
1628 }
1629
1630 return release_regs;
1631}
1632
1633template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1635 typename RegisterFile::RegBitSet regs) noexcept {
1636 assert(may_change_value_state());
1637
1638 // TODO(ts): needs changes for other RegisterFile impls
1639 for (auto reg_id : util::BitSetIterator<>{regs & register_file.used}) {
1640 if (!register_file.is_fixed(Reg{reg_id})) {
1641 free_reg(Reg{reg_id});
1642 }
1643 }
1644}
1645
1646template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1648 release_regs_after_return() noexcept {
1649 // we essentially have to free all non-fixed registers
1650 for (auto reg_id : register_file.used_regs()) {
1651 if (!register_file.is_fixed(Reg{reg_id})) {
1652 free_reg(Reg{reg_id});
1653 }
1654 }
1655}
1656
1657template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1658template <typename Jump>
1660 Jump jmp, IRBlockRef target, bool needs_split, bool last_inst) noexcept {
1661 BlockIndex target_idx = this->analyzer.block_idx(target);
1662 Label target_label = this->block_labels[u32(target_idx)];
1663 if (!needs_split) {
1664 move_to_phi_nodes(target_idx);
1665 if (!last_inst || target_idx != this->next_block()) {
1666 derived()->generate_raw_jump(jmp, target_label);
1667 }
1668 } else {
1669 Label tmp_label = this->text_writer.label_create();
1670 derived()->generate_raw_jump(derived()->invert_jump(jmp), tmp_label);
1671 move_to_phi_nodes(target_idx);
1672 derived()->generate_raw_jump(Jump::jmp, target_label);
1673 this->label_place(tmp_label);
1674 }
1675}
1676
1677template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1679 IRBlockRef target) noexcept {
1680 auto spilled = spill_before_branch();
1682
1683 generate_branch_to_block(Derived::Jump::jmp, target, false, true);
1684
1686 release_spilled_regs(spilled);
1687}
1688
1689template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1690template <typename Jump>
1692 Jump jmp, IRBlockRef true_target, IRBlockRef false_target) noexcept {
1693 IRBlockRef next = analyzer.block_ref(next_block());
1694
1695 bool true_needs_split = branch_needs_split(true_target);
1696 bool false_needs_split = branch_needs_split(false_target);
1697
1698 auto spilled = spill_before_branch();
1700
1701 if (next == true_target || (next != false_target && true_needs_split)) {
1703 derived()->invert_jump(jmp), false_target, false_needs_split, false);
1704 generate_branch_to_block(Derived::Jump::jmp, true_target, false, true);
1705 } else if (next == false_target) {
1706 generate_branch_to_block(jmp, true_target, true_needs_split, false);
1707 generate_branch_to_block(Derived::Jump::jmp, false_target, false, true);
1708 } else {
1709 assert(!true_needs_split);
1710 generate_branch_to_block(jmp, true_target, false, false);
1711 generate_branch_to_block(Derived::Jump::jmp, false_target, false, true);
1712 }
1713
1715 release_spilled_regs(spilled);
1716}
1717
1718template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1720 ScratchReg &&cond,
1721 u32 width,
1722 IRBlockRef default_block,
1723 std::span<const std::pair<u64, IRBlockRef>> cases) noexcept {
1724 // This function takes cond as a ScratchReg as opposed to a ValuePart, because
1725 // the ValueRef for the condition must be ref-counted before we enter the
1726 // branch region.
1727
1728 assert(width <= 64);
1729 // We don't support sections with more than 4 GiB, so switches with more than
1730 // 4G cases are impossible to support.
1731 assert(cases.size() < UINT32_MAX && "large switches are unsupported");
1732
1733 AsmReg cmp_reg = cond.cur_reg();
1734 bool width_is_32 = width <= 32;
1735 if (u32 dst_width = util::align_up(width, 32); width != dst_width) {
1736 derived()->generate_raw_intext(cmp_reg, cmp_reg, false, width, dst_width);
1737 }
1738
1739 // We must not evict any registers in the branching code, as we don't track
1740 // the individual value states per block. Hence, we must not allocate any
1741 // registers (e.g., for constants, jump table address) below.
1742 ScratchReg tmp_scratch{this};
1743 AsmReg tmp_reg = tmp_scratch.alloc_gp();
1744
1745 const auto spilled = this->spill_before_branch();
1746 this->begin_branch_region();
1747
1748 tpde::util::SmallVector<tpde::Label, 64> case_labels;
1749 // Labels that need an intermediate block to setup registers. This is
1750 // separate, because most switch targets don't need this.
1751 tpde::util::SmallVector<std::pair<tpde::Label, IRBlockRef>, 64> case_blocks;
1752 for (auto i = 0u; i < cases.size(); ++i) {
1753 // If the target might need additional register moves, we can't branch there
1754 // immediately.
1755 // TODO: more precise condition?
1756 BlockIndex target = this->analyzer.block_idx(cases[i].second);
1757 if (analyzer.block_has_phis(target)) {
1758 case_labels.push_back(this->text_writer.label_create());
1759 case_blocks.emplace_back(case_labels.back(), cases[i].second);
1760 } else {
1761 case_labels.push_back(this->block_labels[u32(target)]);
1762 }
1763 }
1764
1765 const auto default_label = this->text_writer.label_create();
1766
1767 const auto build_range = [&,
1768 this](size_t begin, size_t end, const auto &self) {
1769 assert(begin <= end);
1770 const auto num_cases = end - begin;
1771 if (num_cases <= 4) {
1772 // if there are four or less cases we just compare the values
1773 // against each of them
1774 for (auto i = 0u; i < num_cases; ++i) {
1775 derived()->switch_emit_cmpeq(case_labels[begin + i],
1776 cmp_reg,
1777 tmp_reg,
1778 cases[begin + i].first,
1779 width_is_32);
1780 }
1781
1782 derived()->generate_raw_jump(Derived::Jump::jmp, default_label);
1783 return;
1784 }
1785
1786 // check if the density of the values is high enough to warrant building
1787 // a jump table
1788 auto range = cases[end - 1].first - cases[begin].first;
1789 // we will get wrong results if range is -1 so skip the jump table if
1790 // that is the case
1791 if (range != 0xFFFF'FFFF'FFFF'FFFF && (range / num_cases) < 8) {
1792 // for gcc, it seems that if there are less than 8 values per
1793 // case it will build a jump table so we do that, too
1794
1795 // the actual range is one greater than the result we get from
1796 // subtracting so adjust for that
1797 range += 1;
1798
1799 tpde::util::SmallVector<tpde::Label, 32> label_vec;
1800 std::span<tpde::Label> labels;
1801 if (range == num_cases) {
1802 labels = std::span{case_labels.begin() + begin, num_cases};
1803 } else {
1804 label_vec.resize(range, default_label);
1805 for (auto i = 0u; i < num_cases; ++i) {
1806 label_vec[cases[begin + i].first - cases[begin].first] =
1807 case_labels[begin + i];
1808 }
1809 labels = std::span{label_vec.begin(), range};
1810 }
1811
1812 // Give target the option to emit a jump table.
1813 if (derived()->switch_emit_jump_table(default_label,
1814 labels,
1815 cmp_reg,
1816 tmp_reg,
1817 cases[begin].first,
1818 cases[end - 1].first,
1819 width_is_32)) {
1820 return;
1821 }
1822 }
1823
1824 // do a binary search step
1825 const auto half_len = num_cases / 2;
1826 const auto half_value = cases[begin + half_len].first;
1827 const auto gt_label = this->text_writer.label_create();
1828
1829 // this will cmp against the input value, jump to the case if it is
1830 // equal or to gt_label if the value is greater. Otherwise it will
1831 // fall-through
1832 derived()->switch_emit_binary_step(case_labels[begin + half_len],
1833 gt_label,
1834 cmp_reg,
1835 tmp_reg,
1836 half_value,
1837 width_is_32);
1838 // search the lower half
1839 self(begin, begin + half_len, self);
1840
1841 // and the upper half
1842 this->label_place(gt_label);
1843 self(begin + half_len + 1, end, self);
1844 };
1845
1846 build_range(0, case_labels.size(), build_range);
1847
1848 // write out the labels
1849 this->label_place(default_label);
1850 derived()->generate_branch_to_block(
1851 Derived::Jump::jmp, default_block, false, false);
1852
1853 for (const auto &[label, target] : case_blocks) {
1854 // Branch predictors typically have problems if too many branches follow too
1855 // closely. Ensure a minimum alignment.
1856 this->text_writer.align(8);
1857 this->label_place(label);
1858 derived()->generate_branch_to_block(
1859 Derived::Jump::jmp, target, false, false);
1860 }
1861
1862 this->end_branch_region();
1863 this->release_spilled_regs(spilled);
1864}
1865
1866template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1867void CompilerBase<Adaptor, Derived, Config>::move_to_phi_nodes_impl(
1868 BlockIndex target) noexcept {
1869 // PHI-nodes are always moved to their stack-slot (unless they are fixed)
1870 //
1871 // However, we need to take care of PHI-dependencies (cycles and chains)
1872 // as to not overwrite values which might be needed.
1873 //
1874 // In most cases, we expect the number of PHIs to be small but we want to
1875 // stay reasonably efficient even with larger numbers of PHIs
1876
1877 struct ScratchWrapper {
1878 Derived *self;
1879 AsmReg cur_reg = AsmReg::make_invalid();
1880 bool backed_up = false;
1881 bool was_modified = false;
1882 u8 part = 0;
1883 ValLocalIdx local_idx = INVALID_VAL_LOCAL_IDX;
1884
1885 ScratchWrapper(Derived *self) : self{self} {}
1886
1887 ~ScratchWrapper() { reset(); }
1888
1889 void reset() {
1890 if (cur_reg.invalid()) {
1891 return;
1892 }
1893
1894 self->register_file.unmark_fixed(cur_reg);
1895 self->register_file.unmark_used(cur_reg);
1896
1897 if (backed_up) {
1898 // restore the register state
1899 // TODO(ts): do we actually need the reload?
1900 auto *assignment = self->val_assignment(local_idx);
1901 // check if the value was free'd, then we dont need to restore
1902 // it
1903 if (assignment) {
1904 auto ap = AssignmentPartRef{assignment, part};
1905 if (!ap.variable_ref()) {
1906 // TODO(ts): assert that this always happens?
1907 assert(ap.stack_valid());
1908 self->load_from_stack(cur_reg, ap.frame_off(), ap.part_size());
1909 }
1910 ap.set_reg(cur_reg);
1911 ap.set_register_valid(true);
1912 ap.set_modified(was_modified);
1913 self->register_file.mark_used(cur_reg, local_idx, part);
1914 }
1915 backed_up = false;
1916 }
1917 cur_reg = AsmReg::make_invalid();
1918 }
1919
1920 AsmReg alloc_from_bank(RegBank bank) {
1921 if (cur_reg.valid() && self->register_file.reg_bank(cur_reg) == bank) {
1922 return cur_reg;
1923 }
1924 if (cur_reg.valid()) {
1925 reset();
1926 }
1927
1928 // TODO(ts): try to first find a non callee-saved/clobbered
1929 // register...
1930 auto &reg_file = self->register_file;
1931 auto reg = reg_file.find_first_free_excluding(bank, 0);
1932 if (reg.invalid()) {
1933 // TODO(ts): use clock here?
1934 reg = reg_file.find_first_nonfixed_excluding(bank, 0);
1935 if (reg.invalid()) {
1936 TPDE_FATAL("ran out of registers for scratch registers");
1937 }
1938
1939 backed_up = true;
1940 local_idx = reg_file.reg_local_idx(reg);
1941 part = reg_file.reg_part(reg);
1942 AssignmentPartRef ap{self->val_assignment(local_idx), part};
1943 was_modified = ap.modified();
1944 // TODO(ts): this does not spill for variable refs
1945 // We don't use evict_reg here, as we know that we can't change the
1946 // value state.
1947 assert(ap.register_valid() && ap.get_reg() == reg);
1948 if (!ap.stack_valid() && !ap.variable_ref()) {
1949 self->allocate_spill_slot(ap);
1950 self->spill_reg(ap.get_reg(), ap.frame_off(), ap.part_size());
1951 ap.set_stack_valid();
1952 }
1953 ap.set_register_valid(false);
1954 reg_file.unmark_used(reg);
1955 }
1956
1957 reg_file.mark_used(reg, INVALID_VAL_LOCAL_IDX, 0);
1958 reg_file.mark_clobbered(reg);
1959 reg_file.mark_fixed(reg);
1960 cur_reg = reg;
1961 return reg;
1962 }
1963
1964 ScratchWrapper &operator=(const ScratchWrapper &) = delete;
1965 ScratchWrapper &operator=(ScratchWrapper &&) = delete;
1966 };
1967
1968 IRBlockRef target_ref = analyzer.block_ref(target);
1969 IRBlockRef cur_ref = analyzer.block_ref(cur_block_idx);
1970
1971 // collect all the nodes
1972 struct NodeEntry {
1973 IRValueRef phi;
1974 IRValueRef incoming_val;
1975 ValLocalIdx phi_local_idx;
1976 // local idx of same-block phi node that needs special handling
1977 ValLocalIdx incoming_phi_local_idx = INVALID_VAL_LOCAL_IDX;
1978 // bool incoming_is_phi;
1979 u32 ref_count;
1980
1981 bool operator<(const NodeEntry &other) const noexcept {
1982 return phi_local_idx < other.phi_local_idx;
1983 }
1984
1985 bool operator<(ValLocalIdx other) const noexcept {
1986 return phi_local_idx < other;
1987 }
1988 };
1989
1990 util::SmallVector<NodeEntry, 16> nodes;
1991 for (IRValueRef phi : adaptor->block_phis(target_ref)) {
1992 ValLocalIdx phi_local_idx = adaptor->val_local_idx(phi);
1993 auto incoming = adaptor->val_as_phi(phi).incoming_val_for_block(cur_ref);
1994 nodes.emplace_back(NodeEntry{
1995 .phi = phi, .incoming_val = incoming, .phi_local_idx = phi_local_idx});
1996 }
1997
1998 // We check that the block has phi nodes before getting here.
1999 assert(!nodes.empty() && "block marked has having phi nodes has none");
2000
2001 ScratchWrapper scratch{derived()};
2002 const auto move_to_phi = [this, &scratch](IRValueRef phi,
2003 IRValueRef incoming_val) {
2004 auto phi_vr = derived()->result_ref(phi);
2005 auto val_vr = derived()->val_ref(incoming_val);
2006 if (phi == incoming_val) {
2007 return;
2008 }
2009
2010 u32 part_count = phi_vr.assignment()->part_count;
2011 for (u32 i = 0; i < part_count; ++i) {
2012 AssignmentPartRef phi_ap{phi_vr.assignment(), i};
2013 ValuePartRef val_vpr = val_vr.part(i);
2014
2015 if (phi_ap.fixed_assignment()) {
2016 if (AsmReg reg = val_vpr.cur_reg_unlocked(); reg.valid()) {
2017 derived()->mov(phi_ap.get_reg(), reg, phi_ap.part_size());
2018 } else {
2019 val_vpr.reload_into_specific_fixed(phi_ap.get_reg());
2020 }
2021 } else {
2022 AsmReg reg = val_vpr.cur_reg_unlocked();
2023 if (!reg.valid()) {
2024 reg = scratch.alloc_from_bank(val_vpr.bank());
2025 val_vpr.reload_into_specific_fixed(reg);
2026 }
2027 allocate_spill_slot(phi_ap);
2028 derived()->spill_reg(reg, phi_ap.frame_off(), phi_ap.part_size());
2029 phi_ap.set_stack_valid();
2030 }
2031 }
2032 };
2033
2034 if (nodes.size() == 1) {
2035 move_to_phi(nodes[0].phi, nodes[0].incoming_val);
2036 return;
2037 }
2038
2039 // sort so we can binary search later
2040 std::sort(nodes.begin(), nodes.end());
2041
2042 // fill in the refcount
2043 auto all_zero_ref = true;
2044 for (auto &node : nodes) {
2045 // We don't need to do anything for PHIs that don't reference other PHIs or
2046 // self-referencing PHIs.
2047 bool incoming_is_phi = adaptor->val_is_phi(node.incoming_val);
2048 if (!incoming_is_phi || node.incoming_val == node.phi) {
2049 continue;
2050 }
2051
2052 ValLocalIdx inc_local_idx = adaptor->val_local_idx(node.incoming_val);
2053 auto it = std::lower_bound(nodes.begin(), nodes.end(), inc_local_idx);
2054 if (it == nodes.end() || it->phi != node.incoming_val) {
2055 // Incoming value is a PHI node, but it's not from our block, so we don't
2056 // need to be particularly careful when assigning values.
2057 continue;
2058 }
2059 node.incoming_phi_local_idx = inc_local_idx;
2060 ++it->ref_count;
2061 all_zero_ref = false;
2062 }
2063
2064 if (all_zero_ref) {
2065 // no cycles/chain that we need to take care of
2066 for (auto &node : nodes) {
2067 move_to_phi(node.phi, node.incoming_val);
2068 }
2069 return;
2070 }
2071
2072 // TODO(ts): this is rather inefficient...
2073 util::SmallVector<u32, 32> ready_indices;
2074 ready_indices.reserve(nodes.size());
2075 util::SmallBitSet<256> waiting_nodes;
2076 waiting_nodes.resize(nodes.size());
2077 for (u32 i = 0; i < nodes.size(); ++i) {
2078 if (nodes[i].ref_count) {
2079 waiting_nodes.mark_set(i);
2080 } else {
2081 ready_indices.push_back(i);
2082 }
2083 }
2084
2085 u32 handled_count = 0;
2086 u32 cur_tmp_part_count = 0;
2087 i32 cur_tmp_slot = 0;
2088 u32 cur_tmp_slot_size = 0;
2089 IRValueRef cur_tmp_val = Adaptor::INVALID_VALUE_REF;
2090 ScratchWrapper tmp_reg1{derived()}, tmp_reg2{derived()};
2091
2092 const auto move_from_tmp_phi = [&](IRValueRef target_phi) {
2093 auto phi_vr = val_ref(target_phi);
2094 if (cur_tmp_part_count <= 2) {
2095 AssignmentPartRef ap{phi_vr.assignment(), 0};
2096 assert(!tmp_reg1.cur_reg.invalid());
2097 if (ap.fixed_assignment()) {
2098 derived()->mov(ap.get_reg(), tmp_reg1.cur_reg, ap.part_size());
2099 } else {
2100 derived()->spill_reg(tmp_reg1.cur_reg, ap.frame_off(), ap.part_size());
2101 }
2102
2103 if (cur_tmp_part_count == 2) {
2104 AssignmentPartRef ap_high{phi_vr.assignment(), 1};
2105 assert(!ap_high.fixed_assignment());
2106 assert(!tmp_reg2.cur_reg.invalid());
2107 derived()->spill_reg(
2108 tmp_reg2.cur_reg, ap_high.frame_off(), ap_high.part_size());
2109 }
2110 return;
2111 }
2112
2113 for (u32 i = 0; i < cur_tmp_part_count; ++i) {
2114 AssignmentPartRef phi_ap{phi_vr.assignment(), i};
2115 assert(!phi_ap.fixed_assignment());
2116
2117 auto slot_off = cur_tmp_slot + phi_ap.part_off();
2118 auto reg = tmp_reg1.alloc_from_bank(phi_ap.bank());
2119 derived()->load_from_stack(reg, slot_off, phi_ap.part_size());
2120 derived()->spill_reg(reg, phi_ap.frame_off(), phi_ap.part_size());
2121 }
2122 };
2123
2124 while (handled_count != nodes.size()) {
2125 if (ready_indices.empty()) {
2126 // need to break a cycle
2127 auto cur_idx_opt = waiting_nodes.first_set();
2128 assert(cur_idx_opt);
2129 auto cur_idx = *cur_idx_opt;
2130 assert(nodes[cur_idx].ref_count == 1);
2131 assert(cur_tmp_val == Adaptor::INVALID_VALUE_REF);
2132
2133 auto phi_val = nodes[cur_idx].phi;
2134 auto phi_vr = this->val_ref(phi_val);
2135 auto *assignment = phi_vr.assignment();
2136 cur_tmp_part_count = assignment->part_count;
2137 cur_tmp_val = phi_val;
2138
2139 if (cur_tmp_part_count > 2) {
2140 // use a stack slot to store the temporaries
2141 cur_tmp_slot_size = assignment->size();
2142 cur_tmp_slot = allocate_stack_slot(cur_tmp_slot_size);
2143
2144 for (u32 i = 0; i < cur_tmp_part_count; ++i) {
2145 auto ap = AssignmentPartRef{assignment, i};
2146 assert(!ap.fixed_assignment());
2147 auto slot_off = cur_tmp_slot + ap.part_off();
2148
2149 if (ap.register_valid()) {
2150 auto reg = ap.get_reg();
2151 derived()->spill_reg(reg, slot_off, ap.part_size());
2152 } else {
2153 auto reg = tmp_reg1.alloc_from_bank(ap.bank());
2154 assert(ap.stack_valid());
2155 derived()->load_from_stack(reg, ap.frame_off(), ap.part_size());
2156 derived()->spill_reg(reg, slot_off, ap.part_size());
2157 }
2158 }
2159 } else {
2160 // TODO(ts): if the PHI is not fixed, then we can just reuse its
2161 // register if it has one
2162 auto phi_vpr = phi_vr.part(0);
2163 auto reg = tmp_reg1.alloc_from_bank(phi_vpr.bank());
2164 phi_vpr.reload_into_specific_fixed(this, reg);
2165
2166 if (cur_tmp_part_count == 2) {
2167 // TODO(ts): just change the part ref on the lower ref?
2168 auto phi_vpr_high = phi_vr.part(1);
2169 auto reg_high = tmp_reg2.alloc_from_bank(phi_vpr_high.bank());
2170 phi_vpr_high.reload_into_specific_fixed(this, reg_high);
2171 }
2172 }
2173
2174 nodes[cur_idx].ref_count = 0;
2175 ready_indices.push_back(cur_idx);
2176 waiting_nodes.mark_unset(cur_idx);
2177 }
2178
2179 for (u32 i = 0; i < ready_indices.size(); ++i) {
2180 ++handled_count;
2181 auto cur_idx = ready_indices[i];
2182 auto phi_val = nodes[cur_idx].phi;
2183 IRValueRef incoming_val = nodes[cur_idx].incoming_val;
2184 if (incoming_val == phi_val) {
2185 // no need to do anything, except ref-counting
2186 (void)val_ref(incoming_val);
2187 (void)val_ref(phi_val);
2188 continue;
2189 }
2190
2191 if (incoming_val == cur_tmp_val) {
2192 move_from_tmp_phi(phi_val);
2193
2194 if (cur_tmp_part_count > 2) {
2195 free_stack_slot(cur_tmp_slot, cur_tmp_slot_size);
2196 cur_tmp_slot = 0xFFFF'FFFF;
2197 cur_tmp_slot_size = 0;
2198 }
2199 cur_tmp_val = Adaptor::INVALID_VALUE_REF;
2200 // skip the code below as the ref count of the temp val is
2201 // already 0 and we don't want to reinsert it into the ready
2202 // list
2203 continue;
2204 }
2205
2206 move_to_phi(phi_val, incoming_val);
2207
2208 if (nodes[cur_idx].incoming_phi_local_idx == INVALID_VAL_LOCAL_IDX) {
2209 continue;
2210 }
2211
2212 auto it = std::lower_bound(
2213 nodes.begin(), nodes.end(), nodes[cur_idx].incoming_phi_local_idx);
2214 assert(it != nodes.end() && it->phi == incoming_val &&
2215 "incoming_phi_local_idx set incorrectly");
2216
2217 assert(it->ref_count > 0);
2218 if (--it->ref_count == 0) {
2219 auto node_idx = static_cast<u32>(it - nodes.begin());
2220 ready_indices.push_back(node_idx);
2221 waiting_nodes.mark_unset(node_idx);
2222 }
2223 }
2224 ready_indices.clear();
2225 }
2226}
2227
2228template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
2231 return static_cast<BlockIndex>(static_cast<u32>(cur_block_idx) + 1);
2232}
2233
2234template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
2236 SymRef personality_sym;
2237 if (this->adaptor->cur_needs_unwind_info()) {
2238 SymRef personality_func = derived()->cur_personality_func();
2239 if (personality_func.valid()) {
2240 for (const auto &[fn_sym, ptr_sym] : personality_syms) {
2241 if (fn_sym == personality_func) {
2242 personality_sym = ptr_sym;
2243 break;
2244 }
2245 }
2246
2247 if (!personality_sym.valid()) {
2248 // create symbol that contains the address of the personality
2249 // function
2250 u32 off;
2251 static constexpr std::array<u8, 8> zero{};
2252
2253 auto rodata = this->assembler.get_data_section(true, true);
2254 personality_sym = this->assembler.sym_def_data(
2255 rodata, "", zero, 8, Assembler::SymBinding::LOCAL, &off);
2256 this->assembler.reloc_abs(rodata, personality_func, off, 0);
2257
2258 personality_syms.emplace_back(personality_func, personality_sym);
2259 }
2260 }
2261 }
2262 return personality_sym;
2263}
2264
2265template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
2267 const IRFuncRef func, const u32 func_idx) noexcept {
2268 if (!adaptor->switch_func(func)) {
2269 return false;
2270 }
2271 derived()->analysis_start();
2272 analyzer.switch_func(func);
2273 derived()->analysis_end();
2274
2275#ifndef NDEBUG
2276 stack.frame_size = ~0u;
2277#endif
2278 for (auto &e : stack.fixed_free_lists) {
2279 e.clear();
2280 }
2281 stack.dynamic_free_lists.clear();
2282 // TODO: sort out the inconsistency about adaptor vs. compiler methods.
2283 stack.has_dynamic_alloca = this->adaptor->cur_has_dynamic_alloca();
2284 stack.is_leaf_function = !derived()->cur_func_may_emit_calls();
2285 stack.generated_call = false;
2286 stack.frame_used = false;
2287
2288 assignments.cur_fixed_assignment_count = {};
2289 assert(std::ranges::none_of(assignments.value_ptrs, std::identity{}));
2290 if (assignments.value_ptrs.size() < analyzer.liveness.size()) {
2291 assignments.value_ptrs.resize(analyzer.liveness.size());
2292 }
2293
2294 assignments.allocator.reset();
2295 assignments.variable_ref_list = INVALID_VAL_LOCAL_IDX;
2296 assignments.delayed_free_lists.clear();
2297 assignments.delayed_free_lists.resize(analyzer.block_layout.size(),
2298 INVALID_VAL_LOCAL_IDX);
2299
2300 cur_block_idx =
2301 static_cast<BlockIndex>(analyzer.block_idx(adaptor->cur_entry_block()));
2302
2303 register_file.reset();
2304#ifndef NDEBUG
2305 generating_branch = false;
2306#endif
2307
2308 // Simple heuristic for initial allocation size
2309 u32 expected_code_size = 0x8 * analyzer.num_insts + 0x40;
2310 this->text_writer.begin_func(/*alignment=*/16, expected_code_size);
2311
2312 derived()->start_func(func_idx);
2313
2314 block_labels.clear();
2315 block_labels.resize_uninitialized(analyzer.block_layout.size());
2316 for (u32 i = 0; i < analyzer.block_layout.size(); ++i) {
2317 block_labels[i] = text_writer.label_create();
2318 }
2319
2320 // TODO(ts): place function label
2321 // TODO(ts): make function labels optional?
2322
2323 CCAssigner *cc_assigner = derived()->cur_cc_assigner();
2324 assert(cc_assigner != nullptr);
2325
2326 register_file.allocatable = cc_assigner->get_ccinfo().allocatable_regs;
2327
2328 cc_assigner->reset();
2329 // Temporarily prevent argument registers from being assigned.
2330 const CCInfo &cc_info = cc_assigner->get_ccinfo();
2331 assert((cc_info.allocatable_regs & cc_info.arg_regs) == cc_info.arg_regs &&
2332 "argument registers must also be allocatable");
2333 this->register_file.allocatable &= ~cc_info.arg_regs;
2334
2335 // Begin prologue, prepare for handling arguments.
2336 derived()->prologue_begin(cc_assigner);
2337 u32 arg_idx = 0;
2338 for (const IRValueRef arg : this->adaptor->cur_args()) {
2339 // Init assignment for all arguments. This can be substituted for more
2340 // complex mappings of arguments to value parts.
2341 derived()->prologue_assign_arg(cc_assigner, arg_idx++, arg);
2342 }
2343 // Finish prologue, storing relevant data from the argument cc_assigner.
2344 derived()->prologue_end(cc_assigner);
2345
2346 this->register_file.allocatable |= cc_info.arg_regs;
2347
2348 for (const IRValueRef alloca : adaptor->cur_static_allocas()) {
2349 auto size = adaptor->val_alloca_size(alloca);
2350 size = util::align_up(size, adaptor->val_alloca_align(alloca));
2351
2352 ValLocalIdx local_idx = adaptor->val_local_idx(alloca);
2353 init_variable_ref(local_idx, 0);
2354 ValueAssignment *assignment = val_assignment(local_idx);
2355 assignment->stack_variable = true;
2356 assignment->frame_off = allocate_stack_slot(size);
2357 }
2358
2359 if constexpr (!Config::DEFAULT_VAR_REF_HANDLING) {
2360 derived()->setup_var_ref_assignments();
2361 }
2362
2363 for (u32 i = 0; i < analyzer.block_layout.size(); ++i) {
2364 const auto block_ref = analyzer.block_layout[i];
2365 TPDE_LOG_TRACE(
2366 "Compiling block {} ({})", i, adaptor->block_fmt_ref(block_ref));
2367 if (!derived()->compile_block(block_ref, i)) [[unlikely]] {
2368 TPDE_LOG_ERR("Failed to compile block {} ({})",
2369 i,
2370 adaptor->block_fmt_ref(block_ref));
2371 // Ensure invariant that value_ptrs only contains nullptr at the end.
2372 assignments.value_ptrs.clear();
2373 return false;
2374 }
2375 }
2376
2377 // Reset all variable-ref assignment pointers to nullptr.
2378 ValLocalIdx variable_ref_list = assignments.variable_ref_list;
2379 while (variable_ref_list != INVALID_VAL_LOCAL_IDX) {
2380 u32 idx = u32(variable_ref_list);
2381 ValLocalIdx next = assignments.value_ptrs[idx]->next_delayed_free_entry;
2382 assignments.value_ptrs[idx] = nullptr;
2383 variable_ref_list = next;
2384 }
2385
2386 assert(std::ranges::none_of(assignments.value_ptrs, std::identity{}) &&
2387 "found non-freed ValueAssignment, maybe missing ref-count?");
2388
2389 derived()->finish_func(func_idx);
2390 this->text_writer.finish_func();
2391
2392 return true;
2393}
2394
2395template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
2397 const IRBlockRef block, const u32 block_idx) noexcept {
2398 cur_block_idx =
2399 static_cast<typename Analyzer<Adaptor>::BlockIndex>(block_idx);
2400
2401 label_place(block_labels[block_idx]);
2402 auto &&val_range = adaptor->block_insts(block);
2403 auto end = val_range.end();
2404 for (auto it = val_range.begin(); it != end; ++it) {
2405 const IRInstRef inst = *it;
2406 if (this->adaptor->inst_fused(inst)) {
2407 continue;
2408 }
2409
2410 auto it_cpy = it;
2411 ++it_cpy;
2412 if (!derived()->compile_inst(inst, InstRange{.from = it_cpy, .to = end}))
2413 [[unlikely]] {
2414 TPDE_LOG_ERR("Failed to compile instruction {}",
2415 this->adaptor->inst_fmt_ref(inst));
2416 return false;
2417 }
2418 }
2419
2420#ifndef NDEBUG
2421 // Some consistency checks. Register assignment information must match, all
2422 // used registers must have an assignment (no temporaries across blocks), and
2423 // fixed registers must be fixed assignments.
2424 for (auto reg_id : register_file.used_regs()) {
2425 Reg reg{reg_id};
2426 assert(register_file.reg_local_idx(reg) != INVALID_VAL_LOCAL_IDX);
2427 AssignmentPartRef ap{val_assignment(register_file.reg_local_idx(reg)),
2428 register_file.reg_part(reg)};
2429 assert(ap.register_valid());
2430 assert(ap.get_reg() == reg);
2431 assert(!register_file.is_fixed(reg) || ap.fixed_assignment());
2432 }
2433#endif
2434
2435 if (static_cast<u32>(assignments.delayed_free_lists[block_idx]) != ~0u) {
2436 auto list_entry = assignments.delayed_free_lists[block_idx];
2437 while (static_cast<u32>(list_entry) != ~0u) {
2438 auto *assignment = assignments.value_ptrs[static_cast<u32>(list_entry)];
2439 auto next_entry = assignment->next_delayed_free_entry;
2440 derived()->free_assignment(list_entry, assignment);
2441 list_entry = next_entry;
2442 }
2443 }
2444 return true;
2445}
2446
2447} // namespace tpde
@ LOCAL
Symbol with local linkage, must be defined.
Base class for target-specific CallBuilder implementations.
void call(std::variant< SymRef, ValuePart >) noexcept
Generate the function call (evict registers, call, reset stack frame).
void add_arg(const CallArg &arg) noexcept
Add a full IR value as argument.
void add_ret(ValuePart &&vp, CCAssignment cca) noexcept
Assign next return value part to vp.
void add_ret(ValuePart &vp, CCAssignment cca) noexcept
Assign next return value part to vp.
void add_arg(ValuePart &&vp, CCAssignment cca) noexcept
Add a value part as argument.
The IRAdaptor specifies the interface with which the IR-independent parts of the compiler interact wi...
Definition IRAdaptor.hpp:91
Call argument, enhancing an IRValueRef with information on how to pass it.
u8 ext_bits
For Flag::zext and Flag::sext, the source bit width.
@ byval
Value is copied into corresponding stack slot.
@ zext
Scalar integer, zero-extend to target-specific size.
@ sext
Scalar integer, sign-extend to target-specific size.
@ allow_split
Value parts can be split across stack/registers.
Flag flag
Value handling flag.
u8 byval_align
For Flag::byval, the stack alignment.
u32 byval_size
For Flag::byval, the argument size.
IRValueRef value
Argument IR value.
Owned unspillable and unevictable temporary register with RAII semantics.
AsmReg alloc_gp() noexcept
Allocate a general-purpose register.
A default implementation for ValRefSpecial.
bool has_dynamic_alloca
Whether the stack frame might have dynamic alloca.
AsmReg gval_as_reg_reuse(GenericValuePart &gv, ScratchReg &dst) noexcept
Like gval_as_reg; if the GenericValuePart owns a reusable register (either a ScratchReg,...
void reset()
Reset any leftover data from the previous compilation such that it will not affect the next compilati...
ValueRef result_ref_stack_slot(IRValueRef value, AssignmentPartRef base, i32 off) noexcept
bool compile()
Compile the functions returned by Adaptor::funcs.
void label_place(Label label) noexcept
Convenience function to place a label at the current position.
bool generated_call
Whether the function actually includes a call.
void spill(AssignmentPartRef ap) noexcept
Ensure the value is spilled in its stack slot (except variable refs).
void evict(AssignmentPartRef ap) noexcept
Evict the value from its register, spilling if needed, and free register.
void prologue_assign_arg(CCAssigner *cc_assigner, u32 arg_idx, IRValueRef arg, u32 align=1, bool allow_split=false) noexcept
Assign function argument in prologue.
RegisterFile::RegBitSet spill_before_branch(bool force_spill=false) noexcept
Spill values that need to be spilled for later blocks.
void init_variable_ref(ValLocalIdx local_idx, u32 var_ref_data) noexcept
Init a variable-ref assignment.
std::unordered_map< u32, std::vector< i32 > > dynamic_free_lists
Free-Lists for all other sizes.
AsmReg gval_as_reg(GenericValuePart &gv) noexcept
Get generic value part into a single register, evaluating expressions and materializing immediates as...
u32 frame_size
The current size of the stack frame.
void generate_switch(ScratchReg &&cond, u32 width, IRBlockRef default_block, std::span< const std::pair< u64, IRBlockRef > > cases) noexcept
Generate a switch at the end of a basic block.
void begin_branch_region() noexcept
Indicate beginning of region where value-state must not change.
ValueRef result_ref_alias(IRValueRef dst, ValueRef &&src) noexcept
std::pair< ValueRef, ValuePartRef > val_ref_single(IRValueRef value) noexcept
void init_variable_ref(IRValueRef value, u32 var_ref_data) noexcept
Init a variable-ref assignment.
void end_branch_region() noexcept
Indicate end of region where value-state must not change.
void reload_to_reg(AsmReg dst, AssignmentPartRef ap) noexcept
Reload a value part from memory or recompute variable address.
void free_stack_slot(u32 slot, u32 size) noexcept
Free a static stack slot.
bool frame_used
Whether the stack frame is used.
bool generating_branch
Whether we are currently in the middle of generating branch-related code and therefore must not chang...
CCAssigner * cur_cc_assigner() noexcept
Get CCAssigner for current function.
Derived * derived()
shortcut for casting to the Derived class so that overloading works
void free_reg(Reg reg) noexcept
Free the register. Requires that the contained value is already spilled.
void generate_cond_branch(Jump jmp, IRBlockRef true_target, IRBlockRef false_target) noexcept
Generate an conditional branch at the end of a basic block.
bool branch_needs_split(IRBlockRef target) noexcept
Whether branch to a block requires additional instructions and therefore a direct jump to the block i...
util::SmallVector< i32, 16 > fixed_free_lists[5]
Free-Lists for 1/2/4/8/16 sized allocations.
void generate_uncond_branch(IRBlockRef target) noexcept
Generate an unconditional branch at the end of a basic block.
Reg select_reg(RegBank bank, u64 exclusion_mask) noexcept
Select an available register, evicting loaded values if needed.
std::pair< ValueRef, ValuePartRef > result_ref_single(IRValueRef value) noexcept
CompilerBase(Adaptor *adaptor)
Initialize a CompilerBase, should be called by the derived classes.
void evict_reg(Reg reg) noexcept
Evict the value from the register, spilling if needed, and free register.
bool is_leaf_function
Whether the function is guaranteed to be a leaf function.
void release_assignment(ValLocalIdx local_idx, ValueAssignment *) noexcept
Release an assignment when reference count drops to zero, either frees the assignment immediately or ...
void allocate_spill_slot(AssignmentPartRef ap) noexcept
Allocate a stack slot for an assignment.
i32 allocate_stack_slot(u32 size) noexcept
Allocate a static stack slot.
void generate_branch_to_block(Jump jmp, IRBlockRef target, bool needs_split, bool last_inst) noexcept
Generate a branch to a basic block; execution continues afterwards.
void release_spilled_regs(typename RegisterFile::RegBitSet) noexcept