TPDE
Loading...
Searching...
No Matches
CompilerBase.hpp
1// SPDX-FileCopyrightText: 2025 Contributors to TPDE <https://tpde.org>
2//
3// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4#pragma once
5
6#include <algorithm>
7#include <functional>
8#include <unordered_map>
9#include <variant>
10
11#include "Analyzer.hpp"
12#include "Compiler.hpp"
13#include "CompilerConfig.hpp"
14#include "IRAdaptor.hpp"
15#include "tpde/AssignmentPartRef.hpp"
16#include "tpde/FunctionWriter.hpp"
17#include "tpde/RegisterFile.hpp"
18#include "tpde/ValLocalIdx.hpp"
19#include "tpde/ValueAssignment.hpp"
20#include "tpde/base.hpp"
21#include "tpde/util/function_ref.hpp"
22#include "tpde/util/misc.hpp"
23
24namespace tpde {
25// TODO(ts): formulate concept for full compiler so that there is *some* check
26// whether all the required derived methods are implemented?
27
28/// Thread-local storage access mode
29enum class TLSModel {
30 GlobalDynamic,
31 LocalDynamic,
32 InitialExec,
33 LocalExec,
34};
35
36struct CCAssignment {
37 Reg reg = Reg::make_invalid(); ///< Assigned register, invalid implies stack.
38
39 /// If non-zero, indicates that this and the next N values must be
40 /// assigned to consecutive registers, or to the stack. The following
41 /// values must be in the same register bank as this value. u8 is sufficient,
42 /// no architecture has more than 255 parameter registers in a single bank.
43 u8 consecutive = 0;
44 bool sret : 1 = false; ///< Argument is return value pointer.
45
46 /// The argument is passed by value on the stack. The provided argument is a
47 /// pointer; for the call, size bytes will be copied into the corresponding
48 /// stack slot. Behaves like LLVM's byval.
49 ///
50 /// Note: On x86-64 SysV, this is used to pass larger structs in memory. Note
51 /// that AArch64 AAPCS doesn't use byval for structs, instead, the pointer is
52 /// passed without byval and it is the responsibility of the caller to
53 /// explicitly copy the value.
54 bool byval : 1 = false;
55
56 /// Extend integer argument. Highest bit indicates signed-ness, lower bits
57 /// indicate the source width from which the argument should be extended.
58 u8 int_ext = 0;
59
60 u8 align = 0; ///< Argument alignment
61 RegBank bank = RegBank{}; ///< Register bank to assign the value to.
62 u32 size = 0; ///< Argument size, for byval the stack slot size.
63 u32 stack_off = 0; ///< Assigned stack slot, only valid if reg is invalid.
64};
65
66struct CCInfo {
67 // TODO: use RegBitSet
68 const u64 allocatable_regs;
69 const u64 callee_saved_regs;
70 /// Possible argument registers; these registers will not be allocated until
71 /// all arguments have been assigned.
72 const u64 arg_regs;
73 /// Size of red zone below stack pointer.
74 const u8 red_zone_size = 0;
75};
76
77class CCAssigner {
78public:
79 const CCInfo *ccinfo;
80
81 CCAssigner(const CCInfo &ccinfo) : ccinfo(&ccinfo) {}
82 virtual ~CCAssigner() {}
83
84 virtual void reset() = 0;
85
86 const CCInfo &get_ccinfo() const { return *ccinfo; }
87
88 virtual void assign_arg(CCAssignment &cca) = 0;
89 virtual u32 get_stack_size() = 0;
90 /// Some calling conventions need different call behavior when calling a
91 /// vararg function.
92 virtual bool is_vararg() const { return false; }
93 virtual void assign_ret(CCAssignment &cca) = 0;
94};
95
96/// The base class for the compiler.
97/// It implements the main platform independent compilation logic and houses the
98/// analyzer
99template <IRAdaptor Adaptor,
100 typename Derived,
101 CompilerConfig Config = CompilerConfigDefault>
103 // some forwards for the IR type defs
104 using IRValueRef = typename Adaptor::IRValueRef;
105 using IRInstRef = typename Adaptor::IRInstRef;
106 using IRBlockRef = typename Adaptor::IRBlockRef;
107 using IRFuncRef = typename Adaptor::IRFuncRef;
108
109 using BlockIndex = typename Analyzer<Adaptor>::BlockIndex;
110
111 using AsmReg = typename Config::AsmReg;
112
113 using RegisterFile = tpde::RegisterFile<Config::NUM_BANKS, 32>;
114
115 /// A default implementation for ValRefSpecial.
116 // Note: Subclasses can override this, always used Derived::ValRefSpecial.
118 uint8_t mode = 4;
119 u64 const_data;
120 };
121
122#pragma region CompilerData
123 Adaptor *adaptor;
124 Analyzer<Adaptor> analyzer;
125
126 // data for frame management
127
128 struct {
129 /// The current size of the stack frame
130 u32 frame_size = 0;
131 /// Whether the stack frame might have dynamic alloca. Dynamic allocas may
132 /// require a different and less efficient frame setup. As static allocas
133 /// can be converted into dynamic allocas, this is only valid after static
134 /// allocas were processed.
136 /// Whether the function is guaranteed to be a leaf function. Throughout the
137 /// entire function, the compiler may assume the absence of function calls.
139 /// Whether the function actually includes a call. There are cases, where it
140 /// is not clear from the beginning whether a function has function calls.
141 /// If a function has no calls, this will allow using the red zone
142 /// guaranteed by some ABIs.
144 /// Whether the stack frame is used. If the stack frame is never used, the
145 /// frame setup can in some cases be omitted entirely.
147 /// Free-Lists for 1/2/4/8/16 sized allocations
148 // TODO(ts): make the allocations for 4/8 different from the others
149 // since they are probably the one's most used?
150 util::SmallVector<i32, 16> fixed_free_lists[5] = {};
151 /// Free-Lists for all other sizes
152 // TODO(ts): think about which data structure we want here
153 std::unordered_map<u32, std::vector<i32>> dynamic_free_lists{};
154 } stack = {};
155
156 typename Analyzer<Adaptor>::BlockIndex cur_block_idx;
157
158 // Assignments
159
160 static constexpr ValLocalIdx INVALID_VAL_LOCAL_IDX =
161 static_cast<ValLocalIdx>(~0u);
162
163 // TODO(ts): think about different ways to store this that are maybe more
164 // compact?
165 struct {
166 AssignmentAllocator allocator;
167
168 std::array<u32, Config::NUM_BANKS> cur_fixed_assignment_count = {};
169 util::SmallVector<ValueAssignment *, Analyzer<Adaptor>::SMALL_VALUE_NUM>
170 value_ptrs;
171
172 ValLocalIdx variable_ref_list;
173 util::SmallVector<ValLocalIdx, Analyzer<Adaptor>::SMALL_BLOCK_NUM>
174 delayed_free_lists;
175 } assignments = {};
176
177 RegisterFile register_file;
178#ifndef NDEBUG
179 /// Whether we are currently in the middle of generating branch-related code
180 /// and therefore must not change any value-related state.
181 bool generating_branch = false;
182#endif
183
184private:
185 /// Default CCAssigner if the implementation doesn't override cur_cc_assigner.
186 typename Config::DefaultCCAssigner default_cc_assigner;
187
188public:
189 typename Config::Assembler assembler;
190 Config::FunctionWriter text_writer;
191 // TODO(ts): smallvector?
192 std::vector<SymRef> func_syms;
193 // TODO(ts): combine this with the block vectors in the analyzer to save on
194 // allocations
195 util::SmallVector<Label> block_labels;
196
197 util::SmallVector<std::pair<SymRef, SymRef>, 4> personality_syms = {};
198
199 struct ScratchReg;
200 class ValuePart;
201 struct ValuePartRef;
202 struct ValueRef;
203 struct GenericValuePart;
204#pragma endregion
205
206 struct InstRange {
207 using Range = decltype(std::declval<Adaptor>().block_insts(
208 std::declval<IRBlockRef>()));
209 using Iter = decltype(std::declval<Range>().begin());
210 using EndIter = decltype(std::declval<Range>().end());
211 Iter from;
212 EndIter to;
213 };
214
215 /// Call argument, enhancing an IRValueRef with information on how to pass it.
216 struct CallArg {
217 enum class Flag : u8 {
218 none, ///< No extra handling.
219 zext, ///< Scalar integer, zero-extend to target-specific size.
220 sext, ///< Scalar integer, sign-extend to target-specific size.
221 sret, ///< Struct return pointer.
222 byval, ///< Value is copied into corresponding stack slot.
223 allow_split, ///< Value parts can be split across stack/registers.
224 };
225
226 explicit CallArg(IRValueRef value,
227 Flag flags = Flag::none,
228 u8 byval_align = 1,
229 u32 byval_size = 0)
230 : value(value),
231 flag(flags),
234
235 IRValueRef value; ///< Argument IR value.
236 Flag flag; ///< Value handling flag.
237 u8 byval_align; ///< For Flag::byval, the stack alignment.
238 u8 ext_bits = 0; ///< For Flag::zext and Flag::sext, the source bit width.
239 u32 byval_size; ///< For Flag::byval, the argument size.
240 };
241
242 /// Base class for target-specific CallBuilder implementations.
243 template <typename CBDerived>
244 class CallBuilderBase {
245 protected:
246 Derived &compiler;
247 CCAssigner &assigner;
248
249 RegisterFile::RegBitSet arg_regs{};
250
251 CallBuilderBase(Derived &compiler, CCAssigner &assigner)
252 : compiler(compiler), assigner(assigner) {}
253
254 // CBDerived needs:
255 // void add_arg_byval(ValuePart &vp, CCAssignment &cca);
256 // void add_arg_stack(ValuePart &vp, CCAssignment &cca);
257 // void call_impl(std::variant<SymRef, ValuePart> &&);
258 CBDerived *derived() { return static_cast<CBDerived *>(this); }
259
260 public:
261 /// Add a value part as argument. cca must be populated with information
262 /// about the argument, except for the reg/stack_off, which are set by the
263 /// CCAssigner. If no register bank is assigned, the register bank and size
264 /// are retrieved from the value part, otherwise, the size must be set, too.
265 void add_arg(ValuePart &&vp, CCAssignment cca);
266 /// Add a full IR value as argument, with an explicit number of parts.
267 /// Values are decomposed into their parts and are typically either fully
268 /// in registers or fully on the stack (except CallArg::Flag::allow_split).
269 void add_arg(const CallArg &arg, u32 part_count);
270 /// Add a full IR value as argument. The number of value parts must be
271 /// exposed via val_parts. Values are decomposed into their parts and are
272 /// typically either fully in registers or fully on the stack (except
273 /// CallArg::Flag::allow_split).
274 void add_arg(const CallArg &arg) {
275 add_arg(std::move(arg), compiler.val_parts(arg.value).count());
276 }
277
278 /// Generate the function call (evict registers, call, reset stack frame).
279 void call(std::variant<SymRef, ValuePart>);
280
281 /// Assign next return value part to vp.
282 void add_ret(ValuePart &vp, CCAssignment cca);
283 /// Assign next return value part to vp.
284 void add_ret(ValuePart &&vp, CCAssignment cca) { add_ret(vp, cca); }
285 /// Assign return values to the IR value.
286 void add_ret(ValueRef &vr);
287 };
288
289 class RetBuilder {
290 Derived &compiler;
291 CCAssigner &assigner;
292
293 RegisterFile::RegBitSet ret_regs{};
294
295 public:
296 RetBuilder(Derived &compiler, CCAssigner &assigner)
297 : compiler(compiler), assigner(assigner) {
298 assigner.reset();
299 }
300
301 void add(ValuePart &&vp, CCAssignment cca);
302 void add(IRValueRef val);
303
304 void ret();
305 };
306
307 /// Initialize a CompilerBase, should be called by the derived classes
308 explicit CompilerBase(Adaptor *adaptor)
309 : adaptor(adaptor), analyzer(adaptor), assembler() {
310 static_assert(std::is_base_of_v<CompilerBase, Derived>);
311 static_assert(Compiler<Derived, Config>);
312 }
313
314 /// shortcut for casting to the Derived class so that overloading
315 /// works
316 Derived *derived() { return static_cast<Derived *>(this); }
317
318 const Derived *derived() const { return static_cast<const Derived *>(this); }
319
320 [[nodiscard]] ValLocalIdx val_idx(const IRValueRef value) const {
321 return analyzer.adaptor->val_local_idx(value);
322 }
323
324 [[nodiscard]] ValueAssignment *val_assignment(const ValLocalIdx idx) {
325 return assignments.value_ptrs[static_cast<u32>(idx)];
326 }
327
328 /// Compile the functions returned by Adaptor::funcs
329 ///
330 /// \warning If you intend to call this multiple times, you must call reset
331 /// in-between the calls.
332 ///
333 /// \returns Whether the compilation was successful
334 bool compile();
335
336 /// Reset any leftover data from the previous compilation such that it will
337 /// not affect the next compilation
338 void reset();
339
340 /// Get CCAssigner for current function.
341 CCAssigner *cur_cc_assigner() { return &default_cc_assigner; }
342
343 void init_assignment(IRValueRef value, ValLocalIdx local_idx);
344
345private:
346 /// Frees an assignment, its stack slot and registers
347 void free_assignment(ValLocalIdx local_idx, ValueAssignment *);
348
349public:
350 /// Release an assignment when reference count drops to zero, either frees
351 /// the assignment immediately or delays free to the end of the live range.
352 void release_assignment(ValLocalIdx local_idx, ValueAssignment *);
353
354 /// Init a variable-ref assignment
355 void init_variable_ref(ValLocalIdx local_idx, u32 var_ref_data);
356 /// Init a variable-ref assignment
357 void init_variable_ref(IRValueRef value, u32 var_ref_data) {
358 init_variable_ref(adaptor->val_local_idx(value), var_ref_data);
359 }
360
361 /// \name Stack Slots
362 /// @{
363
364 /// Allocate a static stack slot.
365 i32 allocate_stack_slot(u32 size);
366 /// Free a static stack slot.
367 void free_stack_slot(u32 slot, u32 size);
368
369 /// @}
370
371 /// Assign function argument in prologue. \ref align can be used to increase
372 /// the minimal stack alignment of the first part of the argument. If \ref
373 /// allow_split is set, the argument can be passed partially in registers,
374 /// otherwise (default) it must be either passed completely in registers or
375 /// completely on the stack.
376 void prologue_assign_arg(CCAssigner *cc_assigner,
377 u32 arg_idx,
378 IRValueRef arg,
379 u32 align = 1,
380 bool allow_split = false);
381
382 /// \name Value References
383 /// @{
384
385 /// Get a using reference to a value.
386 ValueRef val_ref(IRValueRef value);
387
388 /// Get a using reference to a single-part value and provide direct access to
389 /// the only part. This is a convenience function; note that the ValueRef must
390 /// outlive the ValuePartRef (i.e. auto p = val_ref().part(0); won't work, as
391 /// the value will possibly be deallocated when the ValueRef is destroyed).
392 std::pair<ValueRef, ValuePartRef> val_ref_single(IRValueRef value);
393
394 /// Get a defining reference to a value.
395 ValueRef result_ref(IRValueRef value);
396
397 /// Get a defining reference to a single-part value and provide direct access
398 /// to the only part. Similar to val_ref_single().
399 std::pair<ValueRef, ValuePartRef> result_ref_single(IRValueRef value);
400
401 /// Make dst an alias for src, which must be a non-constant value with an
402 /// identical part configuration. src must be in its last use (is_owned()),
403 /// and the assignment will be repurposed for dst, keeping all assigned
404 /// registers and stack slots.
405 ValueRef result_ref_alias(IRValueRef dst, ValueRef &&src);
406
407 /// Initialize value as a pointer into a stack variable (i.e., a value
408 /// allocated from cur_static_allocas() or similar) with an offset. The
409 /// result value will be a stack variable itself.
410 ValueRef
411 result_ref_stack_slot(IRValueRef value, AssignmentPartRef base, i32 off);
412
413 /// @}
414
415 [[deprecated("Use ValuePartRef::set_value")]]
416 void set_value(ValuePartRef &val_ref, ScratchReg &scratch);
417 [[deprecated("Use ValuePartRef::set_value")]]
418 void set_value(ValuePartRef &&val_ref, ScratchReg &scratch) {
419 set_value(val_ref, scratch);
420 }
421
422 /// Get generic value part into a single register, evaluating expressions
423 /// and materializing immediates as required.
424 AsmReg gval_as_reg(GenericValuePart &gv);
425
426 /// Like gval_as_reg; if the GenericValuePart owns a reusable register
427 /// (either a ScratchReg, possibly due to materialization, or a reusable
428 /// ValuePartRef), store it in dst.
429 AsmReg gval_as_reg_reuse(GenericValuePart &gv, ScratchReg &dst);
430
431 /// Like gval_as_reg; if the GenericValuePart owns a reusable register
432 /// (either a ScratchReg, possibly due to materialization, or a reusable
433 /// ValuePartRef), store it in dst.
434 AsmReg gval_as_reg_reuse(GenericValuePart &gv, ValuePart &dst);
435
436private:
437 /// @internal Select register when a value needs to be evicted.
438 Reg select_reg_evict(RegBank bank);
439
440public:
441 /// \name Low-Level Assignment Register Handling
442 /// @{
443
444 /// Select an available register, evicting loaded values if needed.
445 Reg select_reg(RegBank bank) {
446 Reg res = register_file.find_first_free_excluding(bank, 0);
447 if (res.valid()) [[likely]] {
448 return res;
449 }
450 return select_reg_evict(bank);
451 }
452
453 /// Reload a value part from memory or recompute variable address.
454 void reload_to_reg(AsmReg dst, AssignmentPartRef ap);
455
456 /// Allocate a stack slot for an assignment.
457 void allocate_spill_slot(AssignmentPartRef ap);
458
459 /// Ensure the value is spilled in its stack slot (except variable refs).
460 void spill(AssignmentPartRef ap);
461
462 /// Evict the value from its register, spilling if needed, and free register.
463 void evict(AssignmentPartRef ap);
464
465 /// Evict the value from the register, spilling if needed, and free register.
466 void evict_reg(Reg reg);
467
468 /// Free the register. Requires that the contained value is already spilled.
469 void free_reg(Reg reg);
470
471 /// @}
472
473 /// \name High-Level Branch Generation
474 /// @{
475
476 /// Generate an unconditional branch at the end of a basic block. No further
477 /// instructions must follow. If target is the next block in the block order,
478 /// the branch is omitted.
479 void generate_uncond_branch(IRBlockRef target);
480
481 /// Generate an conditional branch at the end of a basic block.
482 template <typename Jump>
483 void generate_cond_branch(Jump jmp,
484 IRBlockRef true_target,
485 IRBlockRef false_target);
486
487 /// Generate a switch at the end of a basic block. Only the lowest bits of the
488 /// condition are considered. The condition must be a general-purpose
489 /// register. The cases must be sorted and every case value must appear at
490 /// most once.
491 void generate_switch(ScratchReg &&cond,
492 u32 width,
493 IRBlockRef default_block,
494 std::span<const std::pair<u64, IRBlockRef>> cases);
495
496 /// @}
497
498 /// \name Low-Level Branch Primitives
499 /// The general flow of using these low-level primitive is:
500 /// 1. spill_before_branch()
501 /// 2. begin_branch_region()
502 /// 3. One or more calls to generate_branch_to_block()
503 /// 4. end_branch_region()
504 /// 5. release_spilled_regs()
505 /// @{
506
507 // TODO(ts): switch to a branch_spill_before naming style?
508 /// Spill values that need to be spilled for later blocks. Returns the set
509 /// of registers that will be free'd at the end of the block; pass this to
510 /// release_spilled_regs().
511 typename RegisterFile::RegBitSet
512 spill_before_branch(bool force_spill = false);
513 /// Free registers marked by spill_before_branch().
514 void release_spilled_regs(typename RegisterFile::RegBitSet);
515
516 /// When reaching a point in the function where no other blocks will be
517 /// reached anymore, use this function to release register assignments after
518 /// the end of that block so the compiler does not accidentally use
519 /// registers which don't contain any values
521
522 /// Indicate beginning of region where value-state must not change.
524#ifndef NDEBUG
525 assert(!generating_branch);
526 generating_branch = true;
527#endif
528 }
529
530 /// Indicate end of region where value-state must not change.
532#ifndef NDEBUG
533 assert(generating_branch);
534 generating_branch = false;
535#endif
536 }
537
538 /// Generate a branch to a basic block; execution continues afterwards.
539 /// Multiple calls to this function can be used to build conditional branches.
540 /// @tparam Jump Target-defined Jump type (e.g., CompilerX64::Jump).
541 /// @param needs_split Result of branch_needs_split(); pass false for an
542 /// unconditional branch.
543 /// @param last_inst Whether fall-through to target is possible.
544 template <typename Jump>
546 IRBlockRef target,
547 bool needs_split,
548 bool last_inst);
549
550#ifndef NDEBUG
551 bool may_change_value_state() const { return !generating_branch; }
552#endif
553
554 void move_to_phi_nodes(BlockIndex target) {
555 if (analyzer.block_has_phis(target)) {
556 move_to_phi_nodes_impl(target);
557 }
558 }
559
560 void move_to_phi_nodes_impl(BlockIndex target);
561
562 /// Whether branch to a block requires additional instructions and therefore
563 /// a direct jump to the block is not possible.
564 bool branch_needs_split(IRBlockRef target) {
565 // for now, if the target has PHI-nodes, we split
566 return analyzer.block_has_phis(target);
567 }
568
569 /// @}
570
571 BlockIndex next_block() const;
572
573 bool try_force_fixed_assignment(IRValueRef) const { return false; }
574
575 bool hook_post_func_sym_init() { return true; }
576
577 void analysis_start() {}
578
579 void analysis_end() {}
580
581 void reloc_text(SymRef sym, u32 type, u64 offset, i64 addend = 0) {
582 this->assembler.reloc_sec(
583 text_writer.get_sec_ref(), sym, type, offset, addend);
584 }
585
586 /// Convenience function to place a label at the current position.
587 void label_place(Label label) {
588 this->text_writer.label_place(label, text_writer.offset());
589 }
590
591protected:
592 SymRef get_personality_sym();
593
594 bool compile_func(IRFuncRef func, u32 func_idx);
595
596 bool compile_block(IRBlockRef block, u32 block_idx);
597};
598} // namespace tpde
599
600#include "GenericValuePart.hpp"
601#include "ScratchReg.hpp"
602#include "ValuePartRef.hpp"
603#include "ValueRef.hpp"
604
605namespace tpde {
606
607template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
608template <typename CBDerived>
610 CBDerived>::add_arg(ValuePart &&vp, CCAssignment cca) {
611 if (!cca.byval && cca.bank == RegBank{}) {
612 cca.bank = vp.bank();
613 cca.size = vp.part_size();
614 }
615
616 assigner.assign_arg(cca);
617 bool needs_ext = cca.int_ext != 0;
618 bool ext_sign = cca.int_ext >> 7;
619 unsigned ext_bits = cca.int_ext & 0x3f;
620
621 if (cca.byval) {
622 derived()->add_arg_byval(vp, cca);
623 vp.reset(&compiler);
624 } else if (!cca.reg.valid()) {
625 if (needs_ext) {
626 auto ext = std::move(vp).into_extended(&compiler, ext_sign, ext_bits, 64);
627 derived()->add_arg_stack(ext, cca);
628 ext.reset(&compiler);
629 } else {
630 derived()->add_arg_stack(vp, cca);
631 }
632 vp.reset(&compiler);
633 } else {
634 u32 size = vp.part_size();
635 if (vp.is_in_reg(cca.reg)) {
636 if (!vp.can_salvage()) {
637 compiler.evict_reg(cca.reg);
638 } else {
639 vp.salvage(&compiler);
640 }
641 if (needs_ext) {
642 compiler.generate_raw_intext(cca.reg, cca.reg, ext_sign, ext_bits, 64);
643 }
644 } else {
645 if (compiler.register_file.is_used(cca.reg)) {
646 compiler.evict_reg(cca.reg);
647 }
648 if (vp.can_salvage()) {
649 AsmReg vp_reg = vp.salvage(&compiler);
650 if (needs_ext) {
651 compiler.generate_raw_intext(cca.reg, vp_reg, ext_sign, ext_bits, 64);
652 } else {
653 compiler.mov(cca.reg, vp_reg, size);
654 }
655 } else if (needs_ext && vp.is_const()) {
656 u64 val = vp.const_data()[0];
657 u64 extended =
658 ext_sign ? util::sext(val, ext_bits) : util::zext(val, ext_bits);
659 compiler.materialize_constant(&extended, cca.bank, 8, cca.reg);
660 } else {
661 vp.reload_into_specific_fixed(&compiler, cca.reg);
662 if (needs_ext) {
663 compiler.generate_raw_intext(
664 cca.reg, cca.reg, ext_sign, ext_bits, 64);
665 }
666 }
667 }
668 vp.reset(&compiler);
669 assert(!compiler.register_file.is_used(cca.reg));
670 compiler.register_file.mark_clobbered(cca.reg);
671 compiler.register_file.allocatable &= ~(u64{1} << cca.reg.id());
672 arg_regs |= (1ull << cca.reg.id());
673 }
674}
675
676template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
677template <typename CBDerived>
679 CBDerived>::add_arg(const CallArg &arg, u32 part_count) {
680 ValueRef vr = compiler.val_ref(arg.value);
681
682 if (arg.flag == CallArg::Flag::byval) {
683 assert(part_count == 1);
684 add_arg(vr.part(0),
685 CCAssignment{
686 .byval = true,
687 .align = arg.byval_align,
688 .size = arg.byval_size,
689 });
690 return;
691 }
692
693 u32 align = arg.byval_align;
694 bool allow_split = arg.flag == CallArg::Flag::allow_split;
695
696 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
697 u8 int_ext = 0;
698 if (arg.flag == CallArg::Flag::sext || arg.flag == CallArg::Flag::zext) {
699 assert(arg.ext_bits != 0 && "cannot extend zero-bit integer");
700 int_ext = arg.ext_bits | (arg.flag == CallArg::Flag::sext ? 0x80 : 0);
701 }
702 u32 remaining = part_count < 256 ? part_count - part_idx - 1 : 255;
703 derived()->add_arg(vr.part(part_idx),
704 CCAssignment{
705 .consecutive = u8(allow_split ? 0 : remaining),
706 .sret = arg.flag == CallArg::Flag::sret,
707 .int_ext = int_ext,
708 .align = u8(part_idx == 0 ? align : 1),
709 });
710 }
711}
712
713template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
714template <typename CBDerived>
716 std::variant<SymRef, ValuePart> target) {
717 assert(!compiler.stack.is_leaf_function && "leaf func must not have calls");
718 compiler.stack.generated_call = true;
719 typename RegisterFile::RegBitSet skip_evict = arg_regs;
720 if (auto *vp = std::get_if<ValuePart>(&target); vp && vp->can_salvage()) {
721 // call_impl will reset vp, thereby unlock+free the register.
722 assert(vp->cur_reg_unlocked().valid() && "can_salvage implies register");
723 skip_evict |= (1ull << vp->cur_reg_unlocked().id());
724 }
725
726 auto clobbered = ~assigner.get_ccinfo().callee_saved_regs;
727 for (auto reg_id : util::BitSetIterator<>{compiler.register_file.used &
728 clobbered & ~skip_evict}) {
729 compiler.evict_reg(AsmReg{reg_id});
730 compiler.register_file.mark_clobbered(Reg{reg_id});
731 }
732
733 derived()->call_impl(std::move(target));
734
735 assert((compiler.register_file.allocatable & arg_regs) == 0);
736 compiler.register_file.allocatable |= arg_regs;
737}
738
739template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
740template <typename CBDerived>
742 CBDerived>::add_ret(ValuePart &vp, CCAssignment cca) {
743 cca.bank = vp.bank();
744 cca.size = vp.part_size();
745 assigner.assign_ret(cca);
746 assert(cca.reg.valid() && "return value must be in register");
747 vp.set_value_reg(&compiler, cca.reg);
748}
749
750template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
751template <typename CBDerived>
753 CBDerived>::add_ret(ValueRef &vr) {
754 assert(vr.has_assignment());
755 u32 part_count = vr.assignment()->part_count;
756 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
757 CCAssignment cca;
758 add_ret(vr.part(part_idx), cca);
759 }
760}
761
762template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
763void CompilerBase<Adaptor, Derived, Config>::RetBuilder::add(ValuePart &&vp,
764 CCAssignment cca) {
765 cca.bank = vp.bank();
766 u32 size = cca.size = vp.part_size();
767 assigner.assign_ret(cca);
768 assert(cca.reg.valid() && "indirect return value must use sret argument");
769
770 bool needs_ext = cca.int_ext != 0;
771 bool ext_sign = cca.int_ext >> 7;
772 unsigned ext_bits = cca.int_ext & 0x3f;
773
774 if (vp.is_in_reg(cca.reg)) {
775 if (!vp.can_salvage()) {
776 compiler.evict_reg(cca.reg);
777 } else {
778 vp.salvage(&compiler);
779 }
780 if (needs_ext) {
781 compiler.generate_raw_intext(cca.reg, cca.reg, ext_sign, ext_bits, 64);
782 }
783 } else {
784 if (compiler.register_file.is_used(cca.reg)) {
785 compiler.evict_reg(cca.reg);
786 }
787 if (vp.can_salvage()) {
788 AsmReg vp_reg = vp.salvage(&compiler);
789 if (needs_ext) {
790 compiler.generate_raw_intext(cca.reg, vp_reg, ext_sign, ext_bits, 64);
791 } else {
792 compiler.mov(cca.reg, vp_reg, size);
793 }
794 } else {
795 vp.reload_into_specific_fixed(&compiler, cca.reg);
796 if (needs_ext) {
797 compiler.generate_raw_intext(cca.reg, cca.reg, ext_sign, ext_bits, 64);
798 }
799 }
800 }
801 vp.reset(&compiler);
802 assert(!compiler.register_file.is_used(cca.reg));
803 compiler.register_file.mark_clobbered(cca.reg);
804 compiler.register_file.allocatable &= ~(u64{1} << cca.reg.id());
805 ret_regs |= (1ull << cca.reg.id());
806}
807
808template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
810 u32 part_count = compiler.val_parts(val).count();
811 ValueRef vr = compiler.val_ref(val);
812 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
813 add(vr.part(part_idx), CCAssignment{});
814 }
815}
816
817template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
819 assert((compiler.register_file.allocatable & ret_regs) == 0);
820 compiler.register_file.allocatable |= ret_regs;
821
822 compiler.gen_func_epilog();
823 compiler.release_regs_after_return();
824}
825
826template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
828 // create function symbols
829 text_writer.begin_module(assembler);
830 text_writer.switch_section(
831 assembler.get_section(assembler.get_default_section(SectionKind::Text)));
832
833 assert(func_syms.empty());
834 for (const IRFuncRef func : adaptor->funcs()) {
835 auto binding = Assembler::SymBinding::GLOBAL;
836 if (adaptor->func_has_weak_linkage(func)) {
838 } else if (adaptor->func_only_local(func)) {
840 }
841 if (adaptor->func_extern(func)) {
842 func_syms.push_back(derived()->assembler.sym_add_undef(
843 adaptor->func_link_name(func), binding));
844 } else {
845 func_syms.push_back(derived()->assembler.sym_predef_func(
846 adaptor->func_link_name(func), binding));
847 }
848 derived()->define_func_idx(func, func_syms.size() - 1);
849 }
850
851 if (!derived()->hook_post_func_sym_init()) {
852 TPDE_LOG_ERR("hook_pust_func_sym_init failed");
853 return false;
854 }
855
856 // TODO(ts): create function labels?
857
858 bool success = true;
859
860 u32 func_idx = 0;
861 for (const IRFuncRef func : adaptor->funcs()) {
862 if (adaptor->func_extern(func)) {
863 TPDE_LOG_TRACE("Skipping compilation of func {}",
864 adaptor->func_link_name(func));
865 ++func_idx;
866 continue;
867 }
868
869 TPDE_LOG_TRACE("Compiling func {}", adaptor->func_link_name(func));
870 if (!derived()->compile_func(func, func_idx)) {
871 TPDE_LOG_ERR("Failed to compile function {}",
872 adaptor->func_link_name(func));
873 success = false;
874 }
875 ++func_idx;
876 }
877
878 text_writer.flush();
879 text_writer.end_module();
880 assembler.finalize();
881
882 // TODO(ts): generate object/map?
883
884 return success;
885}
886
887template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
889 adaptor->reset();
890
891 for (auto &e : stack.fixed_free_lists) {
892 e.clear();
893 }
894 stack.dynamic_free_lists.clear();
895
896 assembler.reset();
897 func_syms.clear();
898 block_labels.clear();
899 personality_syms.clear();
900}
901
902template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
903void CompilerBase<Adaptor, Derived, Config>::init_assignment(
904 IRValueRef value, ValLocalIdx local_idx) {
905 assert(val_assignment(local_idx) == nullptr);
906 TPDE_LOG_TRACE("Initializing assignment for value {}",
907 static_cast<u32>(local_idx));
908
909 const auto parts = derived()->val_parts(value);
910 const u32 part_count = parts.count();
911 assert(part_count > 0);
912 auto *assignment = assignments.allocator.allocate(part_count);
913 assignments.value_ptrs[static_cast<u32>(local_idx)] = assignment;
914
915 u32 max_part_size = 0;
916 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
917 auto ap = AssignmentPartRef{assignment, part_idx};
918 ap.reset();
919 ap.set_bank(parts.reg_bank(part_idx));
920 const u32 size = parts.size_bytes(part_idx);
921 assert(size > 0);
922 max_part_size = std::max(max_part_size, size);
923 ap.set_part_size(size);
924 }
925
926 const auto &liveness = analyzer.liveness_info(local_idx);
927
928 // if there is only one part, try to hand out a fixed assignment
929 // if the value is used for longer than one block and there aren't too many
930 // definitions in child loops this could interfere with
931 // TODO(ts): try out only fixed assignments if the value is live for more
932 // than two blocks?
933 // TODO(ts): move this to ValuePartRef::alloc_reg to be able to defer this
934 // for results?
935 if (part_count == 1) {
936 const auto &cur_loop =
937 analyzer.loop_from_idx(analyzer.block_loop_idx(cur_block_idx));
938 auto ap = AssignmentPartRef{assignment, 0};
939
940 auto try_fixed =
941 liveness.last > cur_block_idx &&
942 cur_loop.definitions_in_childs +
943 assignments.cur_fixed_assignment_count[ap.bank().id()] <
944 Derived::NUM_FIXED_ASSIGNMENTS[ap.bank().id()];
945 if (derived()->try_force_fixed_assignment(value)) {
946 try_fixed = assignments.cur_fixed_assignment_count[ap.bank().id()] <
947 Derived::NUM_FIXED_ASSIGNMENTS[ap.bank().id()];
948 }
949
950 if (try_fixed) {
951 // check if there is a fixed register available
952 AsmReg reg = derived()->select_fixed_assignment_reg(ap, value);
953 TPDE_LOG_TRACE("Trying to assign fixed reg to value {}",
954 static_cast<u32>(local_idx));
955
956 // TODO: if the register is used, we can free it most of the time, but not
957 // always, e.g. for PHI nodes. Detect this case and free_reg otherwise.
958 if (!reg.invalid() && !register_file.is_used(reg)) {
959 TPDE_LOG_TRACE("Assigning fixed assignment to reg {} for value {}",
960 reg.id(),
961 static_cast<u32>(local_idx));
962 ap.set_reg(reg);
963 ap.set_register_valid(true);
964 ap.set_fixed_assignment(true);
965 register_file.mark_used(reg, local_idx, 0);
966 register_file.inc_lock_count(reg); // fixed assignments always locked
967 register_file.mark_clobbered(reg);
968 ++assignments.cur_fixed_assignment_count[ap.bank().id()];
969 }
970 }
971 }
972
973 const auto last_full = liveness.last_full;
974 const auto ref_count = liveness.ref_count;
975
976 assert(max_part_size <= 256);
977 assignment->max_part_size = max_part_size;
978 assignment->pending_free = false;
979 assignment->variable_ref = false;
980 assignment->stack_variable = false;
981 assignment->delay_free = last_full;
982 assignment->part_count = part_count;
983 assignment->frame_off = 0;
984 assignment->references_left = ref_count;
985}
986
987template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
989 ValLocalIdx local_idx, ValueAssignment *assignment) {
990 TPDE_LOG_TRACE("Freeing assignment for value {}",
991 static_cast<u32>(local_idx));
992
993 assert(assignments.value_ptrs[static_cast<u32>(local_idx)] == assignment);
994 assignments.value_ptrs[static_cast<u32>(local_idx)] = nullptr;
995 const auto is_var_ref = assignment->variable_ref;
996 const u32 part_count = assignment->part_count;
997
998 // free registers
999 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
1000 auto ap = AssignmentPartRef{assignment, part_idx};
1001 if (ap.fixed_assignment()) [[unlikely]] {
1002 const auto reg = ap.get_reg();
1003 assert(register_file.is_fixed(reg));
1004 assert(register_file.reg_local_idx(reg) == local_idx);
1005 assert(register_file.reg_part(reg) == part_idx);
1006 --assignments.cur_fixed_assignment_count[ap.bank().id()];
1007 register_file.dec_lock_count_must_zero(reg); // release lock for fixed reg
1008 register_file.unmark_used(reg);
1009 } else if (ap.register_valid()) {
1010 const auto reg = ap.get_reg();
1011 assert(!register_file.is_fixed(reg));
1012 register_file.unmark_used(reg);
1013 }
1014 }
1015
1016 if constexpr (WithAsserts) {
1017 for (auto reg_id : register_file.used_regs()) {
1018 assert(register_file.reg_local_idx(AsmReg{reg_id}) != local_idx &&
1019 "freeing assignment that is still referenced by a register");
1020 }
1021 }
1022
1023 // variable references do not have a stack slot
1024 bool has_stack = Config::FRAME_INDEXING_NEGATIVE ? assignment->frame_off < 0
1025 : assignment->frame_off != 0;
1026 if (!is_var_ref && has_stack) {
1027 free_stack_slot(assignment->frame_off, assignment->size());
1028 }
1029
1030 assignments.allocator.deallocate(assignment);
1031}
1032
1033template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1034[[gnu::noinline]] void
1036 ValLocalIdx local_idx, ValueAssignment *assignment) {
1037 if (!assignment->delay_free) {
1038 free_assignment(local_idx, assignment);
1039 return;
1040 }
1041
1042 // need to wait until release
1043 TPDE_LOG_TRACE("Delay freeing assignment for value {}",
1044 static_cast<u32>(local_idx));
1045 const auto &liveness = analyzer.liveness_info(local_idx);
1046 auto &free_list_head = assignments.delayed_free_lists[u32(liveness.last)];
1047 assignment->next_delayed_free_entry = free_list_head;
1048 assignment->pending_free = true;
1049 free_list_head = local_idx;
1050}
1051
1052template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1054 ValLocalIdx local_idx, u32 var_ref_data) {
1055 TPDE_LOG_TRACE("Initializing variable-ref assignment for value {}",
1056 static_cast<u32>(local_idx));
1057
1058 assert(val_assignment(local_idx) == nullptr);
1059 auto *assignment = assignments.allocator.allocate_slow(1, true);
1060 assignments.value_ptrs[static_cast<u32>(local_idx)] = assignment;
1061
1062 assignment->max_part_size = Config::PLATFORM_POINTER_SIZE;
1063 assignment->variable_ref = true;
1064 assignment->stack_variable = false;
1065 assignment->part_count = 1;
1066 assignment->var_ref_custom_idx = var_ref_data;
1067 assignment->next_delayed_free_entry = assignments.variable_ref_list;
1068
1069 assignments.variable_ref_list = local_idx;
1070
1071 AssignmentPartRef ap{assignment, 0};
1072 ap.reset();
1073 ap.set_bank(Config::GP_BANK);
1074 ap.set_part_size(Config::PLATFORM_POINTER_SIZE);
1075}
1076
1077template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1079 this->stack.frame_used = true;
1080 unsigned align_bits = 4;
1081 if (size == 0) {
1082 return 0; // 0 is the "invalid" stack slot
1083 } else if (size <= 16) {
1084 // Align up to next power of two.
1085 u32 free_list_idx = size == 1 ? 0 : 32 - util::cnt_lz<u32>(size - 1);
1086 assert(size <= 1u << free_list_idx);
1087 size = 1 << free_list_idx;
1088 align_bits = free_list_idx;
1089
1090 if (!stack.fixed_free_lists[free_list_idx].empty()) {
1091 auto slot = stack.fixed_free_lists[free_list_idx].back();
1092 stack.fixed_free_lists[free_list_idx].pop_back();
1093 return slot;
1094 }
1095 } else {
1096 size = util::align_up(size, 16);
1097 auto it = stack.dynamic_free_lists.find(size);
1098 if (it != stack.dynamic_free_lists.end() && !it->second.empty()) {
1099 const auto slot = it->second.back();
1100 it->second.pop_back();
1101 return slot;
1102 }
1103 }
1104
1105 assert(stack.frame_size != ~0u &&
1106 "cannot allocate stack slot before stack frame is initialized");
1107
1108 // Align frame_size to align_bits
1109 for (u32 list_idx = util::cnt_tz(stack.frame_size); list_idx < align_bits;
1110 list_idx = util::cnt_tz(stack.frame_size)) {
1111 i32 slot = stack.frame_size;
1112 if constexpr (Config::FRAME_INDEXING_NEGATIVE) {
1113 slot = -(slot + (1ull << list_idx));
1114 }
1115 stack.fixed_free_lists[list_idx].push_back(slot);
1116 stack.frame_size += 1ull << list_idx;
1117 }
1118
1119 auto slot = stack.frame_size;
1120 assert(slot != 0 && "stack slot 0 is reserved");
1121 stack.frame_size += size;
1122
1123 if constexpr (Config::FRAME_INDEXING_NEGATIVE) {
1124 slot = -(slot + size);
1125 }
1126 return slot;
1127}
1128
1129template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1131 u32 size) {
1132 if (size == 0) [[unlikely]] {
1133 assert(slot == 0 && "unexpected slot for zero-sized stack-slot?");
1134 // Do nothing.
1135 } else if (size <= 16) [[likely]] {
1136 u32 free_list_idx = size == 1 ? 0 : 32 - util::cnt_lz<u32>(size - 1);
1137 stack.fixed_free_lists[free_list_idx].push_back(slot);
1138 } else {
1139 size = util::align_up(size, 16);
1140 stack.dynamic_free_lists[size].push_back(slot);
1141 }
1142}
1143
1144template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1146 CCAssigner *cc_assigner,
1147 u32 arg_idx,
1148 IRValueRef arg,
1149 u32 align,
1150 bool allow_split) {
1151 ValueRef vr = derived()->result_ref(arg);
1152 if (adaptor->cur_arg_is_byval(arg_idx)) {
1153 CCAssignment cca{
1154 .byval = true,
1155 .align = u8(adaptor->cur_arg_byval_align(arg_idx)),
1156 .size = adaptor->cur_arg_byval_size(arg_idx),
1157 };
1158 cc_assigner->assign_arg(cca);
1159 std::optional<i32> byval_frame_off =
1160 derived()->prologue_assign_arg_part(vr.part(0), cca);
1161
1162 if (byval_frame_off) {
1163 // We need to convert the assignment into a stack variable ref.
1164 ValLocalIdx local_idx = val_idx(arg);
1165 // TODO: we shouldn't create the result_ref for such cases in the first
1166 // place. However, this is not easy to detect up front, it depends on the
1167 // target and the calling convention whether this is possible.
1168 vr.reset();
1169 // Value assignment might have been free'd by ValueRef reset.
1170 if (ValueAssignment *assignment = val_assignment(local_idx)) {
1171 free_assignment(local_idx, assignment);
1172 }
1173 init_variable_ref(local_idx, 0);
1174 ValueAssignment *assignment = this->val_assignment(local_idx);
1175 assignment->stack_variable = true;
1176 assignment->frame_off = *byval_frame_off;
1177 }
1178 return;
1179 }
1180
1181 if (adaptor->cur_arg_is_sret(arg_idx)) {
1182 assert(vr.assignment()->part_count == 1 && "sret must be single-part");
1183 ValuePartRef vp = vr.part(0);
1184 CCAssignment cca{
1185 .sret = true, .bank = vp.bank(), .size = Config::PLATFORM_POINTER_SIZE};
1186 cc_assigner->assign_arg(cca);
1187 derived()->prologue_assign_arg_part(std::move(vp), cca);
1188 return;
1189 }
1190
1191 const u32 part_count = vr.assignment()->part_count;
1192 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
1193 ValuePartRef vp = vr.part(part_idx);
1194 u32 remaining = part_count < 256 ? part_count - part_idx - 1 : 255;
1195 CCAssignment cca{
1196 .consecutive = u8(allow_split ? 0 : remaining),
1197 .align = u8(part_idx == 0 ? align : 1),
1198 .bank = vp.bank(),
1199 .size = vp.part_size(),
1200 };
1201 cc_assigner->assign_arg(cca);
1202 derived()->prologue_assign_arg_part(std::move(vp), cca);
1203 }
1204}
1205
1206template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1207typename CompilerBase<Adaptor, Derived, Config>::ValueRef
1209 if (auto special = derived()->val_ref_special(value); special) {
1210 return ValueRef{this, std::move(*special)};
1211 }
1212
1213 const ValLocalIdx local_idx = analyzer.adaptor->val_local_idx(value);
1214 assert(val_assignment(local_idx) != nullptr && "value use before def");
1215 return ValueRef{this, local_idx};
1216}
1217
1218template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1219std::pair<typename CompilerBase<Adaptor, Derived, Config>::ValueRef,
1220 typename CompilerBase<Adaptor, Derived, Config>::ValuePartRef>
1222 std::pair<ValueRef, ValuePartRef> res{val_ref(value), this};
1223 res.second = res.first.part(0);
1224 return res;
1225}
1226
1227template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1228typename CompilerBase<Adaptor, Derived, Config>::ValueRef
1230 const ValLocalIdx local_idx = analyzer.adaptor->val_local_idx(value);
1231 if (val_assignment(local_idx) == nullptr) {
1232 init_assignment(value, local_idx);
1233 }
1234 return ValueRef{this, local_idx};
1235}
1236
1237template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1238std::pair<typename CompilerBase<Adaptor, Derived, Config>::ValueRef,
1239 typename CompilerBase<Adaptor, Derived, Config>::ValuePartRef>
1241 IRValueRef value) {
1242 std::pair<ValueRef, ValuePartRef> res{result_ref(value), this};
1243 res.second = res.first.part(0);
1244 return res;
1245}
1246
1247template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1248typename CompilerBase<Adaptor, Derived, Config>::ValueRef
1250 ValueRef &&src) {
1251 const ValLocalIdx local_idx = analyzer.adaptor->val_local_idx(dst);
1252 assert(!val_assignment(local_idx) && "alias target already defined");
1253 assert(src.has_assignment() && "alias src must have an assignment");
1254 // Consider implementing aliases where multiple values share the same
1255 // assignment, e.g. for implementing trunc as a no-op sharing registers. Live
1256 // ranges can be merged and reference counts added. However, this needs
1257 // additional infrastructure to clear one of the value_ptrs.
1258 assert(src.is_owned() && "alias src must be owned");
1259
1260 ValueAssignment *assignment = src.assignment();
1261 u32 part_count = assignment->part_count;
1262 assert(!assignment->pending_free);
1263 assert(!assignment->variable_ref);
1264 assert(!assignment->pending_free);
1265 if constexpr (WithAsserts) {
1266 const auto &src_liveness = analyzer.liveness_info(src.local_idx());
1267 assert(!src_liveness.last_full); // implied by is_owned()
1268 assert(assignment->references_left == 1); // implied by is_owned()
1269
1270 // Validate that part configuration is identical.
1271 const auto parts = derived()->val_parts(dst);
1272 assert(parts.count() == part_count);
1273 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
1274 AssignmentPartRef ap{assignment, part_idx};
1275 assert(parts.reg_bank(part_idx) == ap.bank());
1276 assert(parts.size_bytes(part_idx) == ap.part_size());
1277 }
1278 }
1279
1280 // Update local_idx of registers.
1281 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
1282 AssignmentPartRef ap{assignment, part_idx};
1283 if (ap.register_valid()) {
1284 register_file.update_reg_assignment(ap.get_reg(), local_idx, part_idx);
1285 }
1286 }
1287
1288 const auto &liveness = analyzer.liveness_info(local_idx);
1289 assignment->delay_free = liveness.last_full;
1290 assignment->references_left = liveness.ref_count;
1291 assignments.value_ptrs[static_cast<u32>(src.local_idx())] = nullptr;
1292 assignments.value_ptrs[static_cast<u32>(local_idx)] = assignment;
1293 src.disown();
1294 src.reset();
1295
1296 return ValueRef{this, local_idx};
1297}
1298
1299template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1300typename CompilerBase<Adaptor, Derived, Config>::ValueRef
1302 IRValueRef dst, AssignmentPartRef base, i32 off) {
1303 const ValLocalIdx local_idx = analyzer.adaptor->val_local_idx(dst);
1304 assert(!val_assignment(local_idx) && "new value already defined");
1305 init_variable_ref(local_idx, 0);
1306 ValueAssignment *assignment = this->val_assignment(local_idx);
1307 assignment->stack_variable = true;
1308 assignment->frame_off = base.variable_stack_off() + off;
1309 return ValueRef{this, local_idx};
1310}
1311
1312template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1313void CompilerBase<Adaptor, Derived, Config>::set_value(ValuePartRef &val_ref,
1314 ScratchReg &scratch) {
1315 val_ref.set_value(std::move(scratch));
1316}
1317
1318template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1319typename CompilerBase<Adaptor, Derived, Config>::AsmReg
1321 if (std::holds_alternative<ScratchReg>(gv.state)) {
1322 return std::get<ScratchReg>(gv.state).cur_reg();
1323 }
1324 if (std::holds_alternative<ValuePartRef>(gv.state)) {
1325 auto &vpr = std::get<ValuePartRef>(gv.state);
1326 if (vpr.has_reg()) {
1327 return vpr.cur_reg();
1328 }
1329 return vpr.load_to_reg();
1330 }
1331 if (auto *expr = std::get_if<typename GenericValuePart::Expr>(&gv.state)) {
1332 if (expr->has_base() && !expr->has_index() && expr->disp == 0) {
1333 return expr->base_reg();
1334 }
1335 return derived()->gval_expr_as_reg(gv);
1336 }
1337 TPDE_UNREACHABLE("gval_as_reg on empty GenericValuePart");
1338}
1339
1340template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1341typename CompilerBase<Adaptor, Derived, Config>::AsmReg
1343 GenericValuePart &gv, ScratchReg &dst) {
1344 AsmReg reg = gval_as_reg(gv);
1345 if (!dst.has_reg()) {
1346 if (auto *scratch = std::get_if<ScratchReg>(&gv.state)) {
1347 dst = std::move(*scratch);
1348 } else if (auto *val_ref = std::get_if<ValuePartRef>(&gv.state)) {
1349 if (val_ref->can_salvage()) {
1350 dst.alloc_specific(val_ref->salvage());
1351 assert(dst.cur_reg() == reg && "salvaging unsuccessful");
1352 }
1353 }
1354 }
1355 return reg;
1356}
1357
1358template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1359typename CompilerBase<Adaptor, Derived, Config>::AsmReg
1361 GenericValuePart &gv, ValuePart &dst) {
1362 AsmReg reg = gval_as_reg(gv);
1363 if (!dst.has_reg() &&
1364 (!dst.has_assignment() || !dst.assignment().fixed_assignment())) {
1365 // TODO: make this less expensive
1366 if (auto *scratch = std::get_if<ScratchReg>(&gv.state)) {
1367 dst.set_value(this, std::move(*scratch));
1368 if (dst.has_assignment()) {
1369 dst.lock(this);
1370 }
1371 } else if (auto *val_ref = std::get_if<ValuePartRef>(&gv.state)) {
1372 if (val_ref->can_salvage()) {
1373 dst.set_value(this, std::move(*val_ref));
1374 if (dst.has_assignment()) {
1375 dst.lock(this);
1376 }
1377 }
1378 }
1379 }
1380 return reg;
1381}
1382
1383template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1384Reg CompilerBase<Adaptor, Derived, Config>::select_reg_evict(RegBank bank) {
1385 TPDE_LOG_DBG("select_reg_evict for bank {}", bank.id());
1386 auto candidates = register_file.used & register_file.bank_regs(bank);
1387
1388 Reg candidate = Reg::make_invalid();
1389 u32 max_score = 0;
1390 for (auto reg_id : util::BitSetIterator<>(candidates)) {
1391 Reg reg{reg_id};
1392 if (register_file.is_fixed(reg)) {
1393 continue;
1394 }
1395
1396 // Must be an evictable value, not a temporary.
1397 auto local_idx = register_file.reg_local_idx(reg);
1398 u32 part = register_file.reg_part(Reg{reg});
1399 assert(local_idx != INVALID_VAL_LOCAL_IDX);
1400 ValueAssignment *va = val_assignment(local_idx);
1401 AssignmentPartRef ap{va, part};
1402
1403 // We want to sort registers by the following (ordered by priority):
1404 // - stack variable ref (~1 add/sub to reconstruct)
1405 // - other variable ref (1-2 instrs to reconstruct)
1406 // - already spilled (no store needed)
1407 // - last use farthest away (most likely to get spilled anyhow, so there's
1408 // not much harm in spilling earlier)
1409 // - lowest ref-count (least used)
1410 //
1411 // TODO: evaluate and refine this heuristic
1412
1413 // TODO: evict stack variable refs before others
1414 if (ap.variable_ref()) {
1415 TPDE_LOG_DBG(" r{} ({}) is variable-ref", reg_id, u32(local_idx));
1416 candidate = reg;
1417 break;
1418 }
1419
1420 u32 score = 0;
1421 if (ap.stack_valid()) {
1422 score |= u32{1} << 31;
1423 }
1424
1425 const auto &liveness = analyzer.liveness_info(local_idx);
1426 u32 last_use_dist = u32(liveness.last) - u32(cur_block_idx);
1427 score |= (last_use_dist < 0x8000 ? 0x8000 - last_use_dist : 0) << 16;
1428
1429 u32 refs_left = va->pending_free ? 0 : va->references_left;
1430 score |= (refs_left < 0xffff ? 0x10000 - refs_left : 1);
1431
1432 TPDE_LOG_DBG(" r{} ({}:{}) rc={}/{} live={}-{}{} spilled={} score={:#x}",
1433 reg_id,
1434 u32(local_idx),
1435 part,
1436 refs_left,
1437 liveness.ref_count,
1438 u32(liveness.first),
1439 u32(liveness.last),
1440 &"*"[!liveness.last_full],
1441 ap.stack_valid(),
1442 score);
1443
1444 assert(score != 0);
1445 if (score > max_score) {
1446 candidate = reg;
1447 max_score = score;
1448 }
1449 }
1450 if (candidate.invalid()) [[unlikely]] {
1451 TPDE_FATAL("ran out of registers for scratch registers");
1452 }
1453 TPDE_LOG_DBG(" selected r{}", candidate.id());
1454 evict_reg(candidate);
1455 return candidate;
1456}
1457
1458template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1460 AsmReg dst, AssignmentPartRef ap) {
1461 if (!ap.variable_ref()) {
1462 assert(ap.stack_valid());
1463 derived()->load_from_stack(dst, ap.frame_off(), ap.part_size());
1464 } else if (ap.is_stack_variable()) {
1465 derived()->load_address_of_stack_var(dst, ap);
1466 } else if constexpr (!Config::DEFAULT_VAR_REF_HANDLING) {
1467 derived()->load_address_of_var_reference(dst, ap);
1468 } else {
1469 TPDE_UNREACHABLE("non-stack-variable needs custom var-ref handling");
1470 }
1471}
1472
1473template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1475 AssignmentPartRef ap) {
1476 assert(!ap.variable_ref() && "cannot allocate spill slot for variable ref");
1477 if (ap.assignment()->frame_off == 0) {
1478 assert(!ap.stack_valid() && "stack-valid set without spill slot");
1479 ap.assignment()->frame_off = allocate_stack_slot(ap.assignment()->size());
1480 assert(ap.assignment()->frame_off != 0);
1481 }
1482}
1483
1484template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1486 assert(may_change_value_state());
1487 if (!ap.stack_valid() && !ap.variable_ref()) {
1488 assert(ap.register_valid() && "cannot spill uninitialized assignment part");
1490 derived()->spill_reg(ap.get_reg(), ap.frame_off(), ap.part_size());
1491 ap.set_stack_valid();
1492 }
1493}
1494
1495template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1497 assert(may_change_value_state());
1498 assert(ap.register_valid());
1499 derived()->spill(ap);
1500 ap.set_register_valid(false);
1501 register_file.unmark_used(ap.get_reg());
1502}
1503
1504template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1506 assert(may_change_value_state());
1507 assert(!register_file.is_fixed(reg));
1508 assert(register_file.reg_local_idx(reg) != INVALID_VAL_LOCAL_IDX);
1509
1510 ValLocalIdx local_idx = register_file.reg_local_idx(reg);
1511 auto part = register_file.reg_part(reg);
1512 AssignmentPartRef evict_part{val_assignment(local_idx), part};
1513 assert(evict_part.register_valid());
1514 assert(evict_part.get_reg() == reg);
1515 derived()->spill(evict_part);
1516 evict_part.set_register_valid(false);
1517 register_file.unmark_used(reg);
1518}
1519
1520template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1522 assert(may_change_value_state());
1523 assert(!register_file.is_fixed(reg));
1524 assert(register_file.reg_local_idx(reg) != INVALID_VAL_LOCAL_IDX);
1525
1526 ValLocalIdx local_idx = register_file.reg_local_idx(reg);
1527 auto part = register_file.reg_part(reg);
1528 AssignmentPartRef ap{val_assignment(local_idx), part};
1529 assert(ap.register_valid());
1530 assert(ap.get_reg() == reg);
1531 assert(!ap.modified() || ap.variable_ref());
1532 ap.set_register_valid(false);
1533 register_file.unmark_used(reg);
1534}
1535
1536template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1537typename CompilerBase<Adaptor, Derived, Config>::RegisterFile::RegBitSet
1539 bool force_spill) {
1540 // since we do not explicitly keep track of register assignments per block,
1541 // whenever we might branch off to a block that we do not directly compile
1542 // afterwards (i.e. the register assignments might change in between), we
1543 // need to spill all registers which are not fixed and remove them from the
1544 // register state.
1545 //
1546 // This leads to worse codegen but saves a significant overhead to
1547 // store/manage the register assignment for each block (256 bytes/block for
1548 // x64) and possible compile-time as there might be additional logic to move
1549 // values around
1550
1551 // First, we consider the case that the current block only has one successor
1552 // which is compiled directly after the current one, in which case we do not
1553 // have to spill anything.
1554 //
1555 // Secondly, if the next block has multiple incoming edges, we always have
1556 // to spill and remove from the register assignment. Otherwise, we
1557 // only need to spill values if they are alive in any successor which is not
1558 // the next block.
1559 //
1560 // Values which are only read from PHI-Nodes and have no extended lifetimes,
1561 // do not need to be spilled as they die at the edge.
1562
1563 using RegBitSet = typename RegisterFile::RegBitSet;
1564
1565 assert(may_change_value_state());
1566
1567 const IRBlockRef cur_block_ref = analyzer.block_ref(cur_block_idx);
1568 // Earliest succeeding block after the current block that is not the
1569 // immediately succeeding block. Used to determine whether a value needs to
1570 // be spilled.
1571 BlockIndex earliest_next_succ = Analyzer<Adaptor>::INVALID_BLOCK_IDX;
1572
1573 bool must_spill = force_spill;
1574 if (!must_spill) {
1575 // We must always spill if no block is immediately succeeding or that block
1576 // has multiple incoming edges.
1577 auto next_block_is_succ = false;
1578 auto next_block_has_multiple_incoming = false;
1579 u32 succ_count = 0;
1580 for (const IRBlockRef succ : adaptor->block_succs(cur_block_ref)) {
1581 ++succ_count;
1582 BlockIndex succ_idx = analyzer.block_idx(succ);
1583 if (u32(succ_idx) == u32(cur_block_idx) + 1) {
1584 next_block_is_succ = true;
1585 if (analyzer.block_has_multiple_incoming(succ)) {
1586 next_block_has_multiple_incoming = true;
1587 }
1588 } else if (succ_idx > cur_block_idx && succ_idx < earliest_next_succ) {
1589 earliest_next_succ = succ_idx;
1590 }
1591 }
1592
1593 must_spill = !next_block_is_succ || next_block_has_multiple_incoming;
1594
1595 if (succ_count == 1 && !must_spill) {
1596 return RegBitSet{};
1597 }
1598 }
1599
1600 auto release_regs = RegBitSet{};
1601 // TODO(ts): just use register_file.used_nonfixed_regs()?
1602 for (auto reg : register_file.used_regs()) {
1603 auto local_idx = register_file.reg_local_idx(Reg{reg});
1604 auto part = register_file.reg_part(Reg{reg});
1605 if (local_idx == INVALID_VAL_LOCAL_IDX) {
1606 // scratch regs can never be held across blocks
1607 continue;
1608 }
1609 AssignmentPartRef ap{val_assignment(local_idx), part};
1610 if (ap.fixed_assignment()) {
1611 // fixed registers do not need to be spilled
1612 continue;
1613 }
1614
1615 // Remove from register assignment if the next block cannot rely on the
1616 // value being in the specific register.
1617 if (must_spill) {
1618 release_regs |= RegBitSet{1ull} << reg;
1619 }
1620
1621 if (!ap.modified() || ap.variable_ref()) {
1622 // No need to spill values that were already spilled or are variable refs.
1623 continue;
1624 }
1625
1626 const auto &liveness = analyzer.liveness_info(local_idx);
1627 if (liveness.last <= cur_block_idx) {
1628 // No need to spill value if it dies immediately after the block.
1629 continue;
1630 }
1631
1632 // If the value is live in a different block, we might need to spill it.
1633 // Cases:
1634 // - Live in successor that precedes current block => ignore, value must
1635 // have been spilled already due to RPO.
1636 // - Live only in successor that immediately follows this block => no need
1637 // to spill, value can be kept in register, register state for next block
1638 // is still valid.
1639 // - Live in successor in some distance after current block => spill
1640 if (must_spill || earliest_next_succ <= liveness.last) {
1641 spill(ap);
1642 }
1643 }
1644
1645 return release_regs;
1646}
1647
1648template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1650 typename RegisterFile::RegBitSet regs) {
1651 assert(may_change_value_state());
1652
1653 // TODO(ts): needs changes for other RegisterFile impls
1654 for (auto reg_id : util::BitSetIterator<>{regs & register_file.used}) {
1655 if (!register_file.is_fixed(Reg{reg_id})) {
1656 free_reg(Reg{reg_id});
1657 }
1658 }
1659}
1660
1661template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1663 // we essentially have to free all non-fixed registers
1664 for (auto reg_id : register_file.used_regs()) {
1665 if (!register_file.is_fixed(Reg{reg_id})) {
1666 free_reg(Reg{reg_id});
1667 }
1668 }
1669}
1670
1671template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1672template <typename Jump>
1674 Jump jmp, IRBlockRef target, bool needs_split, bool last_inst) {
1675 BlockIndex target_idx = this->analyzer.block_idx(target);
1676 Label target_label = this->block_labels[u32(target_idx)];
1677 if (!needs_split) {
1678 move_to_phi_nodes(target_idx);
1679 if (!last_inst || target_idx != this->next_block()) {
1680 derived()->generate_raw_jump(jmp, target_label);
1681 }
1682 } else {
1683 Label tmp_label = this->text_writer.label_create();
1684 derived()->generate_raw_jump(derived()->invert_jump(jmp), tmp_label);
1685 move_to_phi_nodes(target_idx);
1686 derived()->generate_raw_jump(Jump::jmp, target_label);
1687 this->label_place(tmp_label);
1688 }
1689}
1690
1691template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1693 IRBlockRef target) {
1694 auto spilled = spill_before_branch();
1696
1697 generate_branch_to_block(Derived::Jump::jmp, target, false, true);
1698
1700 release_spilled_regs(spilled);
1701}
1702
1703template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1704template <typename Jump>
1706 Jump jmp, IRBlockRef true_target, IRBlockRef false_target) {
1707 IRBlockRef next = analyzer.block_ref(next_block());
1708
1709 bool true_needs_split = branch_needs_split(true_target);
1710 bool false_needs_split = branch_needs_split(false_target);
1711
1712 auto spilled = spill_before_branch();
1714
1715 if (next == true_target || (next != false_target && true_needs_split)) {
1717 derived()->invert_jump(jmp), false_target, false_needs_split, false);
1718 generate_branch_to_block(Derived::Jump::jmp, true_target, false, true);
1719 } else if (next == false_target) {
1720 generate_branch_to_block(jmp, true_target, true_needs_split, false);
1721 generate_branch_to_block(Derived::Jump::jmp, false_target, false, true);
1722 } else {
1723 assert(!true_needs_split);
1724 generate_branch_to_block(jmp, true_target, false, false);
1725 generate_branch_to_block(Derived::Jump::jmp, false_target, false, true);
1726 }
1727
1729 release_spilled_regs(spilled);
1730}
1731
1732template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1734 ScratchReg &&cond,
1735 u32 width,
1736 IRBlockRef default_block,
1737 std::span<const std::pair<u64, IRBlockRef>> cases) {
1738 // This function takes cond as a ScratchReg as opposed to a ValuePart, because
1739 // the ValueRef for the condition must be ref-counted before we enter the
1740 // branch region.
1741
1742 assert(width <= 64);
1743 // We don't support sections with more than 4 GiB, so switches with more than
1744 // 4G cases are impossible to support.
1745 assert(cases.size() < UINT32_MAX && "large switches are unsupported");
1746
1747 AsmReg cmp_reg = cond.cur_reg();
1748 bool width_is_32 = width <= 32;
1749 if (u32 dst_width = util::align_up(width, 32); width != dst_width) {
1750 derived()->generate_raw_intext(cmp_reg, cmp_reg, false, width, dst_width);
1751 }
1752
1753 // We must not evict any registers in the branching code, as we don't track
1754 // the individual value states per block. Hence, we must not allocate any
1755 // registers (e.g., for constants, jump table address) below.
1756 ScratchReg tmp_scratch{this};
1757 AsmReg tmp_reg = tmp_scratch.alloc_gp();
1758
1759 const auto spilled = this->spill_before_branch();
1760 this->begin_branch_region();
1761
1762 tpde::util::SmallVector<tpde::Label, 64> case_labels;
1763 // Labels that need an intermediate block to setup registers. This is
1764 // separate, because most switch targets don't need this.
1765 tpde::util::SmallVector<std::pair<tpde::Label, IRBlockRef>, 64> case_blocks;
1766 for (auto i = 0u; i < cases.size(); ++i) {
1767 // If the target might need additional register moves, we can't branch there
1768 // immediately.
1769 // TODO: more precise condition?
1770 BlockIndex target = this->analyzer.block_idx(cases[i].second);
1771 if (analyzer.block_has_phis(target)) {
1772 case_labels.push_back(this->text_writer.label_create());
1773 case_blocks.emplace_back(case_labels.back(), cases[i].second);
1774 } else {
1775 case_labels.push_back(this->block_labels[u32(target)]);
1776 }
1777 }
1778
1779 const auto default_label = this->text_writer.label_create();
1780
1781 const auto build_range = [&,
1782 this](size_t begin, size_t end, const auto &self) {
1783 assert(begin <= end);
1784 const auto num_cases = end - begin;
1785 if (num_cases <= 4) {
1786 // if there are four or less cases we just compare the values
1787 // against each of them
1788 for (auto i = 0u; i < num_cases; ++i) {
1789 derived()->switch_emit_cmpeq(case_labels[begin + i],
1790 cmp_reg,
1791 tmp_reg,
1792 cases[begin + i].first,
1793 width_is_32);
1794 }
1795
1796 derived()->generate_raw_jump(Derived::Jump::jmp, default_label);
1797 return;
1798 }
1799
1800 // check if the density of the values is high enough to warrant building
1801 // a jump table
1802 u64 low_bound = cases[begin].first;
1803 u64 high_bound = cases[end - 1].first;
1804 auto range = high_bound - low_bound + 1;
1805 // we will get wrong results if range is 0 so skip the jump table if
1806 // that is the case
1807 if (range != 0 && (range / num_cases) < 8) {
1808 // for gcc, it seems that if there are less than 8 values per
1809 // case it will build a jump table so we do that, too
1810
1811 // Give target the option to emit a jump table.
1812 auto *jt = derived()->switch_create_jump_table(
1813 default_label, cmp_reg, tmp_reg, low_bound, high_bound, width_is_32);
1814 if (jt) {
1815 if (range == num_cases) {
1816 std::copy(case_labels.begin() + begin,
1817 case_labels.begin() + end,
1818 jt->labels().begin());
1819 } else {
1820 std::ranges::fill(jt->labels(), default_label);
1821 for (auto i = begin; i != end; ++i) {
1822 jt->labels()[cases[i].first - low_bound] = case_labels[i];
1823 }
1824 }
1825 return;
1826 }
1827 }
1828
1829 // do a binary search step
1830 const auto half_len = num_cases / 2;
1831 const auto half_value = cases[begin + half_len].first;
1832 const auto gt_label = this->text_writer.label_create();
1833
1834 // this will cmp against the input value, jump to the case if it is
1835 // equal or to gt_label if the value is greater. Otherwise it will
1836 // fall-through
1837 derived()->switch_emit_binary_step(case_labels[begin + half_len],
1838 gt_label,
1839 cmp_reg,
1840 tmp_reg,
1841 half_value,
1842 width_is_32);
1843 // search the lower half
1844 self(begin, begin + half_len, self);
1845
1846 // and the upper half
1847 this->label_place(gt_label);
1848 self(begin + half_len + 1, end, self);
1849 };
1850
1851 build_range(0, case_labels.size(), build_range);
1852
1853 // write out the labels
1854 this->label_place(default_label);
1855 derived()->generate_branch_to_block(
1856 Derived::Jump::jmp, default_block, false, false);
1857
1858 for (const auto &[label, target] : case_blocks) {
1859 // Branch predictors typically have problems if too many branches follow too
1860 // closely. Ensure a minimum alignment.
1861 this->text_writer.align(8);
1862 this->label_place(label);
1863 derived()->generate_branch_to_block(
1864 Derived::Jump::jmp, target, false, false);
1865 }
1866
1867 this->end_branch_region();
1868 this->release_spilled_regs(spilled);
1869}
1870
1871template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1872void CompilerBase<Adaptor, Derived, Config>::move_to_phi_nodes_impl(
1873 BlockIndex target) {
1874 // PHI-nodes are always moved to their stack-slot (unless they are fixed)
1875 //
1876 // However, we need to take care of PHI-dependencies (cycles and chains)
1877 // as to not overwrite values which might be needed.
1878 //
1879 // In most cases, we expect the number of PHIs to be small but we want to
1880 // stay reasonably efficient even with larger numbers of PHIs
1881
1882 struct ScratchWrapper {
1883 Derived *self;
1884 AsmReg cur_reg = AsmReg::make_invalid();
1885 bool backed_up = false;
1886 bool was_modified = false;
1887 u8 part = 0;
1888 ValLocalIdx local_idx = INVALID_VAL_LOCAL_IDX;
1889
1890 ScratchWrapper(Derived *self) : self{self} {}
1891
1892 ~ScratchWrapper() { reset(); }
1893
1894 void reset() {
1895 if (cur_reg.invalid()) {
1896 return;
1897 }
1898
1899 self->register_file.unmark_fixed(cur_reg);
1900 self->register_file.unmark_used(cur_reg);
1901
1902 if (backed_up) {
1903 // restore the register state
1904 // TODO(ts): do we actually need the reload?
1905 auto *assignment = self->val_assignment(local_idx);
1906 // check if the value was free'd, then we dont need to restore
1907 // it
1908 if (assignment) {
1909 auto ap = AssignmentPartRef{assignment, part};
1910 if (!ap.variable_ref()) {
1911 // TODO(ts): assert that this always happens?
1912 assert(ap.stack_valid());
1913 self->load_from_stack(cur_reg, ap.frame_off(), ap.part_size());
1914 }
1915 ap.set_reg(cur_reg);
1916 ap.set_register_valid(true);
1917 ap.set_modified(was_modified);
1918 self->register_file.mark_used(cur_reg, local_idx, part);
1919 }
1920 backed_up = false;
1921 }
1922 cur_reg = AsmReg::make_invalid();
1923 }
1924
1925 AsmReg alloc_from_bank(RegBank bank) {
1926 if (cur_reg.valid() && self->register_file.reg_bank(cur_reg) == bank) {
1927 return cur_reg;
1928 }
1929 if (cur_reg.valid()) {
1930 reset();
1931 }
1932
1933 // TODO(ts): try to first find a non callee-saved/clobbered
1934 // register...
1935 auto &reg_file = self->register_file;
1936 auto reg = reg_file.find_first_free_excluding(bank, 0);
1937 if (reg.invalid()) {
1938 // TODO(ts): use clock here?
1939 reg = reg_file.find_first_nonfixed_excluding(bank, 0);
1940 if (reg.invalid()) {
1941 TPDE_FATAL("ran out of registers for scratch registers");
1942 }
1943
1944 backed_up = true;
1945 local_idx = reg_file.reg_local_idx(reg);
1946 part = reg_file.reg_part(reg);
1947 AssignmentPartRef ap{self->val_assignment(local_idx), part};
1948 was_modified = ap.modified();
1949 // TODO(ts): this does not spill for variable refs
1950 // We don't use evict_reg here, as we know that we can't change the
1951 // value state.
1952 assert(ap.register_valid() && ap.get_reg() == reg);
1953 if (!ap.stack_valid() && !ap.variable_ref()) {
1954 self->allocate_spill_slot(ap);
1955 self->spill_reg(ap.get_reg(), ap.frame_off(), ap.part_size());
1956 ap.set_stack_valid();
1957 }
1958 ap.set_register_valid(false);
1959 reg_file.unmark_used(reg);
1960 }
1961
1962 reg_file.mark_used(reg, INVALID_VAL_LOCAL_IDX, 0);
1963 reg_file.mark_clobbered(reg);
1964 reg_file.mark_fixed(reg);
1965 cur_reg = reg;
1966 return reg;
1967 }
1968
1969 ScratchWrapper &operator=(const ScratchWrapper &) = delete;
1970 ScratchWrapper &operator=(ScratchWrapper &&) = delete;
1971 };
1972
1973 IRBlockRef target_ref = analyzer.block_ref(target);
1974 IRBlockRef cur_ref = analyzer.block_ref(cur_block_idx);
1975
1976 // collect all the nodes
1977 struct NodeEntry {
1978 IRValueRef phi;
1979 IRValueRef incoming_val;
1980 ValLocalIdx phi_local_idx;
1981 // local idx of same-block phi node that needs special handling
1982 ValLocalIdx incoming_phi_local_idx = INVALID_VAL_LOCAL_IDX;
1983 // bool incoming_is_phi;
1984 u32 ref_count;
1985
1986 bool operator<(const NodeEntry &other) const {
1987 return phi_local_idx < other.phi_local_idx;
1988 }
1989
1990 bool operator<(ValLocalIdx other) const { return phi_local_idx < other; }
1991 };
1992
1993 util::SmallVector<NodeEntry, 16> nodes;
1994 for (IRValueRef phi : adaptor->block_phis(target_ref)) {
1995 ValLocalIdx phi_local_idx = adaptor->val_local_idx(phi);
1996 auto incoming = adaptor->val_as_phi(phi).incoming_val_for_block(cur_ref);
1997 nodes.emplace_back(NodeEntry{
1998 .phi = phi, .incoming_val = incoming, .phi_local_idx = phi_local_idx});
1999 }
2000
2001 // We check that the block has phi nodes before getting here.
2002 assert(!nodes.empty() && "block marked has having phi nodes has none");
2003
2004 ScratchWrapper scratch{derived()};
2005 const auto move_to_phi = [this, &scratch](IRValueRef phi,
2006 IRValueRef incoming_val) {
2007 auto phi_vr = derived()->result_ref(phi);
2008 auto val_vr = derived()->val_ref(incoming_val);
2009 if (phi == incoming_val) {
2010 return;
2011 }
2012
2013 u32 part_count = phi_vr.assignment()->part_count;
2014 for (u32 i = 0; i < part_count; ++i) {
2015 AssignmentPartRef phi_ap{phi_vr.assignment(), i};
2016 ValuePartRef val_vpr = val_vr.part(i);
2017
2018 if (phi_ap.fixed_assignment()) {
2019 if (AsmReg reg = val_vpr.cur_reg_unlocked(); reg.valid()) {
2020 derived()->mov(phi_ap.get_reg(), reg, phi_ap.part_size());
2021 } else {
2022 val_vpr.reload_into_specific_fixed(phi_ap.get_reg());
2023 }
2024 } else {
2025 AsmReg reg = val_vpr.cur_reg_unlocked();
2026 if (!reg.valid()) {
2027 reg = scratch.alloc_from_bank(val_vpr.bank());
2028 val_vpr.reload_into_specific_fixed(reg);
2029 }
2030 allocate_spill_slot(phi_ap);
2031 derived()->spill_reg(reg, phi_ap.frame_off(), phi_ap.part_size());
2032 phi_ap.set_stack_valid();
2033 }
2034 }
2035 };
2036
2037 if (nodes.size() == 1) {
2038 move_to_phi(nodes[0].phi, nodes[0].incoming_val);
2039 return;
2040 }
2041
2042 // sort so we can binary search later
2043 std::sort(nodes.begin(), nodes.end());
2044
2045 // fill in the refcount
2046 auto all_zero_ref = true;
2047 for (auto &node : nodes) {
2048 // We don't need to do anything for PHIs that don't reference other PHIs or
2049 // self-referencing PHIs.
2050 bool incoming_is_phi = adaptor->val_is_phi(node.incoming_val);
2051 if (!incoming_is_phi || node.incoming_val == node.phi) {
2052 continue;
2053 }
2054
2055 ValLocalIdx inc_local_idx = adaptor->val_local_idx(node.incoming_val);
2056 auto it = std::lower_bound(nodes.begin(), nodes.end(), inc_local_idx);
2057 if (it == nodes.end() || it->phi != node.incoming_val) {
2058 // Incoming value is a PHI node, but it's not from our block, so we don't
2059 // need to be particularly careful when assigning values.
2060 continue;
2061 }
2062 node.incoming_phi_local_idx = inc_local_idx;
2063 ++it->ref_count;
2064 all_zero_ref = false;
2065 }
2066
2067 if (all_zero_ref) {
2068 // no cycles/chain that we need to take care of
2069 for (auto &node : nodes) {
2070 move_to_phi(node.phi, node.incoming_val);
2071 }
2072 return;
2073 }
2074
2075 // TODO(ts): this is rather inefficient...
2076 util::SmallVector<u32, 32> ready_indices;
2077 ready_indices.reserve(nodes.size());
2078 util::SmallBitSet<256> waiting_nodes;
2079 waiting_nodes.resize(nodes.size());
2080 for (u32 i = 0; i < nodes.size(); ++i) {
2081 if (nodes[i].ref_count) {
2082 waiting_nodes.mark_set(i);
2083 } else {
2084 ready_indices.push_back(i);
2085 }
2086 }
2087
2088 u32 handled_count = 0;
2089 u32 cur_tmp_part_count = 0;
2090 i32 cur_tmp_slot = 0;
2091 u32 cur_tmp_slot_size = 0;
2092 IRValueRef cur_tmp_val = Adaptor::INVALID_VALUE_REF;
2093 ScratchWrapper tmp_reg1{derived()}, tmp_reg2{derived()};
2094
2095 const auto move_from_tmp_phi = [&](IRValueRef target_phi) {
2096 auto phi_vr = val_ref(target_phi);
2097 if (cur_tmp_part_count <= 2) {
2098 AssignmentPartRef ap{phi_vr.assignment(), 0};
2099 assert(!tmp_reg1.cur_reg.invalid());
2100 if (ap.fixed_assignment()) {
2101 derived()->mov(ap.get_reg(), tmp_reg1.cur_reg, ap.part_size());
2102 } else {
2103 derived()->spill_reg(tmp_reg1.cur_reg, ap.frame_off(), ap.part_size());
2104 }
2105
2106 if (cur_tmp_part_count == 2) {
2107 AssignmentPartRef ap_high{phi_vr.assignment(), 1};
2108 assert(!ap_high.fixed_assignment());
2109 assert(!tmp_reg2.cur_reg.invalid());
2110 derived()->spill_reg(
2111 tmp_reg2.cur_reg, ap_high.frame_off(), ap_high.part_size());
2112 }
2113 return;
2114 }
2115
2116 for (u32 i = 0; i < cur_tmp_part_count; ++i) {
2117 AssignmentPartRef phi_ap{phi_vr.assignment(), i};
2118 assert(!phi_ap.fixed_assignment());
2119
2120 auto slot_off = cur_tmp_slot + phi_ap.part_off();
2121 auto reg = tmp_reg1.alloc_from_bank(phi_ap.bank());
2122 derived()->load_from_stack(reg, slot_off, phi_ap.part_size());
2123 derived()->spill_reg(reg, phi_ap.frame_off(), phi_ap.part_size());
2124 }
2125 };
2126
2127 while (handled_count != nodes.size()) {
2128 if (ready_indices.empty()) {
2129 // need to break a cycle
2130 auto cur_idx_opt = waiting_nodes.first_set();
2131 assert(cur_idx_opt);
2132 auto cur_idx = *cur_idx_opt;
2133 assert(nodes[cur_idx].ref_count == 1);
2134 assert(cur_tmp_val == Adaptor::INVALID_VALUE_REF);
2135
2136 auto phi_val = nodes[cur_idx].phi;
2137 auto phi_vr = this->val_ref(phi_val);
2138 auto *assignment = phi_vr.assignment();
2139 cur_tmp_part_count = assignment->part_count;
2140 cur_tmp_val = phi_val;
2141
2142 if (cur_tmp_part_count > 2) {
2143 // use a stack slot to store the temporaries
2144 cur_tmp_slot_size = assignment->size();
2145 cur_tmp_slot = allocate_stack_slot(cur_tmp_slot_size);
2146
2147 for (u32 i = 0; i < cur_tmp_part_count; ++i) {
2148 auto ap = AssignmentPartRef{assignment, i};
2149 assert(!ap.fixed_assignment());
2150 auto slot_off = cur_tmp_slot + ap.part_off();
2151
2152 if (ap.register_valid()) {
2153 auto reg = ap.get_reg();
2154 derived()->spill_reg(reg, slot_off, ap.part_size());
2155 } else {
2156 auto reg = tmp_reg1.alloc_from_bank(ap.bank());
2157 assert(ap.stack_valid());
2158 derived()->load_from_stack(reg, ap.frame_off(), ap.part_size());
2159 derived()->spill_reg(reg, slot_off, ap.part_size());
2160 }
2161 }
2162 } else {
2163 // TODO(ts): if the PHI is not fixed, then we can just reuse its
2164 // register if it has one
2165 auto phi_vpr = phi_vr.part(0);
2166 auto reg = tmp_reg1.alloc_from_bank(phi_vpr.bank());
2167 phi_vpr.reload_into_specific_fixed(this, reg);
2168
2169 if (cur_tmp_part_count == 2) {
2170 // TODO(ts): just change the part ref on the lower ref?
2171 auto phi_vpr_high = phi_vr.part(1);
2172 auto reg_high = tmp_reg2.alloc_from_bank(phi_vpr_high.bank());
2173 phi_vpr_high.reload_into_specific_fixed(this, reg_high);
2174 }
2175 }
2176
2177 nodes[cur_idx].ref_count = 0;
2178 ready_indices.push_back(cur_idx);
2179 waiting_nodes.mark_unset(cur_idx);
2180 }
2181
2182 for (u32 i = 0; i < ready_indices.size(); ++i) {
2183 ++handled_count;
2184 auto cur_idx = ready_indices[i];
2185 auto phi_val = nodes[cur_idx].phi;
2186 IRValueRef incoming_val = nodes[cur_idx].incoming_val;
2187 if (incoming_val == phi_val) {
2188 // no need to do anything, except ref-counting
2189 (void)val_ref(incoming_val);
2190 (void)val_ref(phi_val);
2191 continue;
2192 }
2193
2194 if (incoming_val == cur_tmp_val) {
2195 move_from_tmp_phi(phi_val);
2196
2197 if (cur_tmp_part_count > 2) {
2198 free_stack_slot(cur_tmp_slot, cur_tmp_slot_size);
2199 cur_tmp_slot = 0xFFFF'FFFF;
2200 cur_tmp_slot_size = 0;
2201 }
2202 cur_tmp_val = Adaptor::INVALID_VALUE_REF;
2203 // skip the code below as the ref count of the temp val is
2204 // already 0 and we don't want to reinsert it into the ready
2205 // list
2206 continue;
2207 }
2208
2209 move_to_phi(phi_val, incoming_val);
2210
2211 if (nodes[cur_idx].incoming_phi_local_idx == INVALID_VAL_LOCAL_IDX) {
2212 continue;
2213 }
2214
2215 auto it = std::lower_bound(
2216 nodes.begin(), nodes.end(), nodes[cur_idx].incoming_phi_local_idx);
2217 assert(it != nodes.end() && it->phi == incoming_val &&
2218 "incoming_phi_local_idx set incorrectly");
2219
2220 assert(it->ref_count > 0);
2221 if (--it->ref_count == 0) {
2222 auto node_idx = static_cast<u32>(it - nodes.begin());
2223 ready_indices.push_back(node_idx);
2224 waiting_nodes.mark_unset(node_idx);
2225 }
2226 }
2227 ready_indices.clear();
2228 }
2229}
2230
2231template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
2234 return static_cast<BlockIndex>(static_cast<u32>(cur_block_idx) + 1);
2235}
2236
2237template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
2239 SymRef personality_sym;
2240 if (this->adaptor->cur_needs_unwind_info()) {
2241 SymRef personality_func = derived()->cur_personality_func();
2242 if (personality_func.valid()) {
2243 for (const auto &[fn_sym, ptr_sym] : personality_syms) {
2244 if (fn_sym == personality_func) {
2245 personality_sym = ptr_sym;
2246 break;
2247 }
2248 }
2249
2250 if (!personality_sym.valid()) {
2251 // create symbol that contains the address of the personality
2252 // function
2253 u32 off;
2254 static constexpr std::array<u8, 8> zero{};
2255
2256 auto rodata =
2257 this->assembler.get_default_section(SectionKind::DataRelRO);
2258 personality_sym = this->assembler.sym_def_data(
2259 rodata, "", zero, 8, Assembler::SymBinding::LOCAL, &off);
2260 this->assembler.reloc_abs(rodata, personality_func, off, 0);
2261
2262 personality_syms.emplace_back(personality_func, personality_sym);
2263 }
2264 }
2265 }
2266 return personality_sym;
2267}
2268
2269template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
2271 const u32 func_idx) {
2272 if (!adaptor->switch_func(func)) {
2273 return false;
2274 }
2275 derived()->analysis_start();
2276 analyzer.switch_func(func);
2277 derived()->analysis_end();
2278
2279 if constexpr (WithAsserts) {
2280 stack.frame_size = ~0u;
2281 }
2282 for (auto &e : stack.fixed_free_lists) {
2283 e.clear();
2284 }
2285 stack.dynamic_free_lists.clear();
2286 // TODO: sort out the inconsistency about adaptor vs. compiler methods.
2287 stack.has_dynamic_alloca = this->adaptor->cur_has_dynamic_alloca();
2288 stack.is_leaf_function = !derived()->cur_func_may_emit_calls();
2289 stack.generated_call = false;
2290 stack.frame_used = false;
2291
2292 assignments.cur_fixed_assignment_count = {};
2293 assert(std::ranges::none_of(assignments.value_ptrs, std::identity{}));
2294 if (assignments.value_ptrs.size() < analyzer.liveness.size()) {
2295 assignments.value_ptrs.resize(analyzer.liveness.size());
2296 }
2297
2298 assignments.allocator.reset();
2299 assignments.variable_ref_list = INVALID_VAL_LOCAL_IDX;
2300 assignments.delayed_free_lists.clear();
2301 assignments.delayed_free_lists.resize(analyzer.block_layout.size(),
2302 INVALID_VAL_LOCAL_IDX);
2303
2304 cur_block_idx =
2305 static_cast<BlockIndex>(analyzer.block_idx(adaptor->cur_entry_block()));
2306
2307 register_file.reset();
2308#ifndef NDEBUG
2309 generating_branch = false;
2310#endif
2311
2312 // Simple heuristic for initial allocation size
2313 u32 expected_code_size = 0x8 * analyzer.num_insts + 0x40;
2314 this->text_writer.begin_func(/*alignment=*/16, expected_code_size);
2315
2316 derived()->start_func(func_idx);
2317
2318 block_labels.clear();
2319 block_labels.resize_uninitialized(analyzer.block_layout.size());
2320 for (u32 i = 0; i < analyzer.block_layout.size(); ++i) {
2321 block_labels[i] = text_writer.label_create();
2322 }
2323
2324 // TODO(ts): place function label
2325 // TODO(ts): make function labels optional?
2326
2327 CCAssigner *cc_assigner = derived()->cur_cc_assigner();
2328 assert(cc_assigner != nullptr);
2329
2330 register_file.allocatable = cc_assigner->get_ccinfo().allocatable_regs;
2331
2332 cc_assigner->reset();
2333 // Temporarily prevent argument registers from being assigned.
2334 const CCInfo &cc_info = cc_assigner->get_ccinfo();
2335 assert((cc_info.allocatable_regs & cc_info.arg_regs) == cc_info.arg_regs &&
2336 "argument registers must also be allocatable");
2337 this->register_file.allocatable &= ~cc_info.arg_regs;
2338
2339 // Begin prologue, prepare for handling arguments.
2340 derived()->prologue_begin(cc_assigner);
2341 u32 arg_idx = 0;
2342 for (const IRValueRef arg : this->adaptor->cur_args()) {
2343 // Init assignment for all arguments. This can be substituted for more
2344 // complex mappings of arguments to value parts.
2345 derived()->prologue_assign_arg(cc_assigner, arg_idx++, arg);
2346 }
2347 // Finish prologue, storing relevant data from the argument cc_assigner.
2348 derived()->prologue_end(cc_assigner);
2349
2350 this->register_file.allocatable |= cc_info.arg_regs;
2351
2352 // Small allocas get stack slot, larger allocas need dynamic allocations.
2353 util::SmallVector<std::tuple<IRValueRef, u32, u32>> dyn_allocas;
2354 for (const IRValueRef alloca : adaptor->cur_static_allocas()) {
2355 auto size = adaptor->val_alloca_size(alloca);
2356 auto align = adaptor->val_alloca_align(alloca);
2357 if (align > 16 || size > Derived::MaxStaticAllocaSize) {
2358 stack.has_dynamic_alloca = true;
2359 dyn_allocas.emplace_back(alloca, size, align);
2360 continue;
2361 }
2362
2363 ValLocalIdx local_idx = adaptor->val_local_idx(alloca);
2364 init_variable_ref(local_idx, 0);
2365 ValueAssignment *assignment = val_assignment(local_idx);
2366 assignment->stack_variable = true;
2367 assignment->frame_off = allocate_stack_slot(util::align_up(size, align));
2368 }
2369
2370 if constexpr (!Config::DEFAULT_VAR_REF_HANDLING) {
2371 derived()->setup_var_ref_assignments();
2372 }
2373
2374 for (auto &[alloca, size, align] : dyn_allocas) {
2375 auto [_, vr] = this->result_ref_single(alloca);
2376 derived()->alloca_fixed(size, align, vr);
2377 }
2378
2379 for (u32 i = 0; i < analyzer.block_layout.size(); ++i) {
2380 const auto block_ref = analyzer.block_layout[i];
2381 TPDE_LOG_TRACE(
2382 "Compiling block {} ({})", i, adaptor->block_fmt_ref(block_ref));
2383 if (!derived()->compile_block(block_ref, i)) [[unlikely]] {
2384 TPDE_LOG_ERR("Failed to compile block {} ({})",
2385 i,
2386 adaptor->block_fmt_ref(block_ref));
2387 // Ensure invariant that value_ptrs only contains nullptr at the end.
2388 assignments.value_ptrs.clear();
2389 return false;
2390 }
2391 }
2392
2393 // Reset all variable-ref assignment pointers to nullptr.
2394 ValLocalIdx variable_ref_list = assignments.variable_ref_list;
2395 while (variable_ref_list != INVALID_VAL_LOCAL_IDX) {
2396 u32 idx = u32(variable_ref_list);
2397 ValLocalIdx next = assignments.value_ptrs[idx]->next_delayed_free_entry;
2398 assignments.value_ptrs[idx] = nullptr;
2399 variable_ref_list = next;
2400 }
2401
2402 assert(std::ranges::none_of(assignments.value_ptrs, std::identity{}) &&
2403 "found non-freed ValueAssignment, maybe missing ref-count?");
2404
2405 derived()->finish_func(func_idx);
2406 this->text_writer.finish_func();
2407
2408 return true;
2409}
2410
2411template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
2413 const IRBlockRef block, const u32 block_idx) {
2414 cur_block_idx =
2415 static_cast<typename Analyzer<Adaptor>::BlockIndex>(block_idx);
2416
2417 label_place(block_labels[block_idx]);
2418 auto &&val_range = adaptor->block_insts(block);
2419 auto end = val_range.end();
2420 for (auto it = val_range.begin(); it != end; ++it) {
2421 const IRInstRef inst = *it;
2422 if (this->adaptor->inst_fused(inst)) {
2423 continue;
2424 }
2425
2426 auto it_cpy = it;
2427 ++it_cpy;
2428 if (!derived()->compile_inst(inst, InstRange{.from = it_cpy, .to = end}))
2429 [[unlikely]] {
2430 TPDE_LOG_ERR("Failed to compile instruction {}",
2431 this->adaptor->inst_fmt_ref(inst));
2432 return false;
2433 }
2434 }
2435
2436 if constexpr (WithAsserts) {
2437 // Some consistency checks. Register assignment information must match, all
2438 // used registers must have an assignment (no temporaries across blocks),
2439 // and fixed registers must be fixed assignments.
2440 for (auto reg_id : register_file.used_regs()) {
2441 Reg reg{reg_id};
2442 assert(register_file.reg_local_idx(reg) != INVALID_VAL_LOCAL_IDX);
2443 AssignmentPartRef ap{val_assignment(register_file.reg_local_idx(reg)),
2444 register_file.reg_part(reg)};
2445 assert(ap.register_valid());
2446 assert(ap.get_reg() == reg);
2447 assert(!register_file.is_fixed(reg) || ap.fixed_assignment());
2448 }
2449 }
2450
2451 if (static_cast<u32>(assignments.delayed_free_lists[block_idx]) != ~0u) {
2452 auto list_entry = assignments.delayed_free_lists[block_idx];
2453 while (static_cast<u32>(list_entry) != ~0u) {
2454 auto *assignment = assignments.value_ptrs[static_cast<u32>(list_entry)];
2455 auto next_entry = assignment->next_delayed_free_entry;
2456 derived()->free_assignment(list_entry, assignment);
2457 list_entry = next_entry;
2458 }
2459 }
2460 return true;
2461}
2462
2463} // namespace tpde
@ LOCAL
Symbol with local linkage, must be defined.
Base class for target-specific CallBuilder implementations.
void call(std::variant< SymRef, ValuePart >)
Generate the function call (evict registers, call, reset stack frame).
void add_ret(ValuePart &&vp, CCAssignment cca)
Assign next return value part to vp.
void add_arg(ValuePart &&vp, CCAssignment cca)
Add a value part as argument.
void add_arg(const CallArg &arg)
Add a full IR value as argument.
void add_ret(ValuePart &vp, CCAssignment cca)
Assign next return value part to vp.
The IRAdaptor specifies the interface with which the IR-independent parts of the compiler interact wi...
Definition IRAdaptor.hpp:91
Call argument, enhancing an IRValueRef with information on how to pass it.
u8 ext_bits
For Flag::zext and Flag::sext, the source bit width.
@ byval
Value is copied into corresponding stack slot.
@ zext
Scalar integer, zero-extend to target-specific size.
@ sext
Scalar integer, sign-extend to target-specific size.
@ allow_split
Value parts can be split across stack/registers.
Flag flag
Value handling flag.
u8 byval_align
For Flag::byval, the stack alignment.
u32 byval_size
For Flag::byval, the argument size.
IRValueRef value
Argument IR value.
Owned unspillable and unevictable temporary register with RAII semantics.
AsmReg cur_reg() const
The allocated register.
bool has_reg() const
Whether a register is currently allocated.
AsmReg alloc_specific(AsmReg reg)
Allocate a specific register.
AsmReg alloc_gp()
Allocate a general-purpose register.
A default implementation for ValRefSpecial.
RegisterFile::RegBitSet spill_before_branch(bool force_spill=false)
Spill values that need to be spilled for later blocks.
void generate_switch(ScratchReg &&cond, u32 width, IRBlockRef default_block, std::span< const std::pair< u64, IRBlockRef > > cases)
Generate a switch at the end of a basic block.
bool has_dynamic_alloca
Whether the stack frame might have dynamic alloca.
void reset()
Reset any leftover data from the previous compilation such that it will not affect the next compilati...
void free_reg(Reg reg)
Free the register. Requires that the contained value is already spilled.
AsmReg gval_as_reg_reuse(GenericValuePart &gv, ValuePart &dst)
Like gval_as_reg; if the GenericValuePart owns a reusable register (either a ScratchReg,...
ValueRef result_ref_stack_slot(IRValueRef value, AssignmentPartRef base, i32 off)
Initialize value as a pointer into a stack variable (i.e., a value allocated from cur_static_allocas(...
CCAssigner * cur_cc_assigner()
Get CCAssigner for current function.
bool compile()
Compile the functions returned by Adaptor::funcs.
std::pair< ValueRef, ValuePartRef > result_ref_single(IRValueRef value)
Get a defining reference to a single-part value and provide direct access to the only part.
ValueRef val_ref(IRValueRef value)
Get a using reference to a value.
bool generated_call
Whether the function actually includes a call.
ValueRef result_ref_alias(IRValueRef dst, ValueRef &&src)
Make dst an alias for src, which must be a non-constant value with an identical part configuration.
void allocate_spill_slot(AssignmentPartRef ap)
Allocate a stack slot for an assignment.
std::unordered_map< u32, std::vector< i32 > > dynamic_free_lists
Free-Lists for all other sizes.
bool branch_needs_split(IRBlockRef target)
Whether branch to a block requires additional instructions and therefore a direct jump to the block i...
std::pair< ValueRef, ValuePartRef > val_ref_single(IRValueRef value)
Get a using reference to a single-part value and provide direct access to the only part.
void release_spilled_regs(typename RegisterFile::RegBitSet)
Free registers marked by spill_before_branch().
void reload_to_reg(AsmReg dst, AssignmentPartRef ap)
Reload a value part from memory or recompute variable address.
u32 frame_size
The current size of the stack frame.
void generate_cond_branch(Jump jmp, IRBlockRef true_target, IRBlockRef false_target)
Generate an conditional branch at the end of a basic block.
i32 allocate_stack_slot(u32 size)
Allocate a static stack slot.
void spill(AssignmentPartRef ap)
Ensure the value is spilled in its stack slot (except variable refs).
void label_place(Label label)
Convenience function to place a label at the current position.
bool frame_used
Whether the stack frame is used.
bool generating_branch
Whether we are currently in the middle of generating branch-related code and therefore must not chang...
ValueRef result_ref(IRValueRef value)
Get a defining reference to a value.
void free_stack_slot(u32 slot, u32 size)
Free a static stack slot.
Reg select_reg(RegBank bank)
Select an available register, evicting loaded values if needed.
void evict(AssignmentPartRef ap)
Evict the value from its register, spilling if needed, and free register.
Derived * derived()
shortcut for casting to the Derived class so that overloading works
void generate_uncond_branch(IRBlockRef target)
Generate an unconditional branch at the end of a basic block.
void release_regs_after_return()
When reaching a point in the function where no other blocks will be reached anymore,...
void begin_branch_region()
Indicate beginning of region where value-state must not change.
util::SmallVector< i32, 16 > fixed_free_lists[5]
Free-Lists for 1/2/4/8/16 sized allocations.
AsmReg gval_as_reg_reuse(GenericValuePart &gv, ScratchReg &dst)
Like gval_as_reg; if the GenericValuePart owns a reusable register (either a ScratchReg,...
CompilerBase(Adaptor *adaptor)
Initialize a CompilerBase, should be called by the derived classes.
void end_branch_region()
Indicate end of region where value-state must not change.
void release_assignment(ValLocalIdx local_idx, ValueAssignment *)
Release an assignment when reference count drops to zero, either frees the assignment immediately or ...
AsmReg gval_as_reg(GenericValuePart &gv)
Get generic value part into a single register, evaluating expressions and materializing immediates as...
void prologue_assign_arg(CCAssigner *cc_assigner, u32 arg_idx, IRValueRef arg, u32 align=1, bool allow_split=false)
Assign function argument in prologue.
void init_variable_ref(IRValueRef value, u32 var_ref_data)
Init a variable-ref assignment.
void generate_branch_to_block(Jump jmp, IRBlockRef target, bool needs_split, bool last_inst)
Generate a branch to a basic block; execution continues afterwards.
bool is_leaf_function
Whether the function is guaranteed to be a leaf function.
void evict_reg(Reg reg)
Evict the value from the register, spilling if needed, and free register.
void init_variable_ref(ValLocalIdx local_idx, u32 var_ref_data)
Init a variable-ref assignment.