TPDE
Loading...
Searching...
No Matches
CompilerBase.hpp
1// SPDX-FileCopyrightText: 2025 Contributors to TPDE <https://tpde.org>
2//
3// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4#pragma once
5
6#include <algorithm>
7#include <functional>
8#include <unordered_map>
9#include <variant>
10
11#include "Analyzer.hpp"
12#include "Compiler.hpp"
13#include "CompilerConfig.hpp"
14#include "IRAdaptor.hpp"
15#include "tpde/AssignmentPartRef.hpp"
16#include "tpde/FunctionWriter.hpp"
17#include "tpde/RegisterFile.hpp"
18#include "tpde/ValLocalIdx.hpp"
19#include "tpde/ValueAssignment.hpp"
20#include "tpde/base.hpp"
21#include "tpde/util/function_ref.hpp"
22#include "tpde/util/misc.hpp"
23
24namespace tpde {
25// TODO(ts): formulate concept for full compiler so that there is *some* check
26// whether all the required derived methods are implemented?
27
28/// Thread-local storage access mode
29enum class TLSModel {
30 GlobalDynamic,
31 LocalDynamic,
32 InitialExec,
33 LocalExec,
34};
35
36struct CCAssignment {
37 Reg reg = Reg::make_invalid(); ///< Assigned register, invalid implies stack.
38
39 /// If non-zero, indicates that this and the next N values must be
40 /// assigned to consecutive registers, or to the stack. The following
41 /// values must be in the same register bank as this value. u8 is sufficient,
42 /// no architecture has more than 255 parameter registers in a single bank.
43 u8 consecutive = 0;
44 bool sret : 1 = false; ///< Argument is return value pointer.
45
46 /// The argument is passed by value on the stack. The provided argument is a
47 /// pointer; for the call, size bytes will be copied into the corresponding
48 /// stack slot. Behaves like LLVM's byval.
49 ///
50 /// Note: On x86-64 SysV, this is used to pass larger structs in memory. Note
51 /// that AArch64 AAPCS doesn't use byval for structs, instead, the pointer is
52 /// passed without byval and it is the responsibility of the caller to
53 /// explicitly copy the value.
54 bool byval : 1 = false;
55
56 /// Extend integer argument. Highest bit indicates signed-ness, lower bits
57 /// indicate the source width from which the argument should be extended.
58 u8 int_ext = 0;
59
60 u8 align = 0; ///< Argument alignment
61 RegBank bank = RegBank{}; ///< Register bank to assign the value to.
62 u32 size = 0; ///< Argument size, for byval the stack slot size.
63 u32 stack_off = 0; ///< Assigned stack slot, only valid if reg is invalid.
64};
65
66struct CCInfo {
67 // TODO: use RegBitSet
68 const u64 allocatable_regs;
69 const u64 callee_saved_regs;
70 /// Possible argument registers; these registers will not be allocated until
71 /// all arguments have been assigned.
72 const u64 arg_regs;
73 /// Size of red zone below stack pointer.
74 const u8 red_zone_size = 0;
75};
76
77class CCAssigner {
78public:
79 const CCInfo *ccinfo;
80
81 CCAssigner(const CCInfo &ccinfo) noexcept : ccinfo(&ccinfo) {}
82 virtual ~CCAssigner() noexcept {}
83
84 virtual void reset() noexcept = 0;
85
86 const CCInfo &get_ccinfo() const noexcept { return *ccinfo; }
87
88 virtual void assign_arg(CCAssignment &cca) noexcept = 0;
89 virtual u32 get_stack_size() noexcept = 0;
90 /// Some calling conventions need different call behavior when calling a
91 /// vararg function.
92 virtual bool is_vararg() const noexcept { return false; }
93 virtual void assign_ret(CCAssignment &cca) noexcept = 0;
94};
95
96/// The base class for the compiler.
97/// It implements the main platform independent compilation logic and houses the
98/// analyzer
99template <IRAdaptor Adaptor,
100 typename Derived,
101 CompilerConfig Config = CompilerConfigDefault>
103 // some forwards for the IR type defs
104 using IRValueRef = typename Adaptor::IRValueRef;
105 using IRInstRef = typename Adaptor::IRInstRef;
106 using IRBlockRef = typename Adaptor::IRBlockRef;
107 using IRFuncRef = typename Adaptor::IRFuncRef;
108
109 using BlockIndex = typename Analyzer<Adaptor>::BlockIndex;
110
111 using AsmReg = typename Config::AsmReg;
112
113 using RegisterFile = tpde::RegisterFile<Config::NUM_BANKS, 32>;
114
115 /// A default implementation for ValRefSpecial.
116 // Note: Subclasses can override this, always used Derived::ValRefSpecial.
118 uint8_t mode = 4;
119 u64 const_data;
120 };
121
122#pragma region CompilerData
123 Adaptor *adaptor;
124 Analyzer<Adaptor> analyzer;
125
126 // data for frame management
127
128 struct {
129 /// The current size of the stack frame
130 u32 frame_size = 0;
131 /// Whether the stack frame might have dynamic alloca. Dynamic allocas may
132 /// require a different and less efficient frame setup.
134 /// Whether the function is guaranteed to be a leaf function. Throughout the
135 /// entire function, the compiler may assume the absence of function calls.
137 /// Whether the function actually includes a call. There are cases, where it
138 /// is not clear from the beginning whether a function has function calls.
139 /// If a function has no calls, this will allow using the red zone
140 /// guaranteed by some ABIs.
142 /// Whether the stack frame is used. If the stack frame is never used, the
143 /// frame setup can in some cases be omitted entirely.
145 /// Free-Lists for 1/2/4/8/16 sized allocations
146 // TODO(ts): make the allocations for 4/8 different from the others
147 // since they are probably the one's most used?
148 util::SmallVector<i32, 16> fixed_free_lists[5] = {};
149 /// Free-Lists for all other sizes
150 // TODO(ts): think about which data structure we want here
151 std::unordered_map<u32, std::vector<i32>> dynamic_free_lists{};
152 } stack = {};
153
154 typename Analyzer<Adaptor>::BlockIndex cur_block_idx;
155
156 // Assignments
157
158 static constexpr ValLocalIdx INVALID_VAL_LOCAL_IDX =
159 static_cast<ValLocalIdx>(~0u);
160
161 // TODO(ts): think about different ways to store this that are maybe more
162 // compact?
163 struct {
164 AssignmentAllocator allocator;
165
166 std::array<u32, Config::NUM_BANKS> cur_fixed_assignment_count = {};
167 util::SmallVector<ValueAssignment *, Analyzer<Adaptor>::SMALL_VALUE_NUM>
168 value_ptrs;
169
170 ValLocalIdx variable_ref_list;
171 util::SmallVector<ValLocalIdx, Analyzer<Adaptor>::SMALL_BLOCK_NUM>
172 delayed_free_lists;
173 } assignments = {};
174
175 RegisterFile register_file;
176#ifndef NDEBUG
177 /// Whether we are currently in the middle of generating branch-related code
178 /// and therefore must not change any value-related state.
179 bool generating_branch = false;
180#endif
181
182private:
183 /// Default CCAssigner if the implementation doesn't override cur_cc_assigner.
184 typename Config::DefaultCCAssigner default_cc_assigner;
185
186public:
187 typename Config::Assembler assembler;
188 Config::FunctionWriter text_writer;
189 // TODO(ts): smallvector?
190 std::vector<SymRef> func_syms;
191 // TODO(ts): combine this with the block vectors in the analyzer to save on
192 // allocations
193 util::SmallVector<Label> block_labels;
194
195 util::SmallVector<std::pair<SymRef, SymRef>, 4> personality_syms = {};
196
197 struct ScratchReg;
198 class ValuePart;
199 struct ValuePartRef;
200 struct ValueRef;
201 struct GenericValuePart;
202#pragma endregion
203
204 struct InstRange {
205 using Range = decltype(std::declval<Adaptor>().block_insts(
206 std::declval<IRBlockRef>()));
207 using Iter = decltype(std::declval<Range>().begin());
208 using EndIter = decltype(std::declval<Range>().end());
209 Iter from;
210 EndIter to;
211 };
212
213 /// Call argument, enhancing an IRValueRef with information on how to pass it.
214 struct CallArg {
215 enum class Flag : u8 {
216 none, ///< No extra handling.
217 zext, ///< Scalar integer, zero-extend to target-specific size.
218 sext, ///< Scalar integer, sign-extend to target-specific size.
219 sret, ///< Struct return pointer.
220 byval, ///< Value is copied into corresponding stack slot.
221 allow_split, ///< Value parts can be split across stack/registers.
222 };
223
224 explicit CallArg(IRValueRef value,
225 Flag flags = Flag::none,
226 u8 byval_align = 1,
227 u32 byval_size = 0)
228 : value(value),
229 flag(flags),
232
233 IRValueRef value; ///< Argument IR value.
234 Flag flag; ///< Value handling flag.
235 u8 byval_align; ///< For Flag::byval, the stack alignment.
236 u8 ext_bits = 0; ///< For Flag::zext and Flag::sext, the source bit width.
237 u32 byval_size; ///< For Flag::byval, the argument size.
238 };
239
240 /// Base class for target-specific CallBuilder implementations.
241 template <typename CBDerived>
242 class CallBuilderBase {
243 protected:
244 Derived &compiler;
245 CCAssigner &assigner;
246
247 RegisterFile::RegBitSet arg_regs{};
248
249 CallBuilderBase(Derived &compiler, CCAssigner &assigner) noexcept
250 : compiler(compiler), assigner(assigner) {}
251
252 // CBDerived needs:
253 // void add_arg_byval(ValuePart &vp, CCAssignment &cca) noexcept;
254 // void add_arg_stack(ValuePart &vp, CCAssignment &cca) noexcept;
255 // void call_impl(std::variant<SymRef, ValuePart> &&) noexcept;
256 CBDerived *derived() noexcept { return static_cast<CBDerived *>(this); }
257
258 public:
259 /// Add a value part as argument. cca must be populated with information
260 /// about the argument, except for the reg/stack_off, which are set by the
261 /// CCAssigner. If no register bank is assigned, the register bank and size
262 /// are retrieved from the value part, otherwise, the size must be set, too.
263 void add_arg(ValuePart &&vp, CCAssignment cca) noexcept;
264 /// Add a full IR value as argument, with an explicit number of parts.
265 /// Values are decomposed into their parts and are typically either fully
266 /// in registers or fully on the stack (except CallArg::Flag::allow_split).
267 void add_arg(const CallArg &arg, u32 part_count) noexcept;
268 /// Add a full IR value as argument. The number of value parts must be
269 /// exposed via val_parts. Values are decomposed into their parts and are
270 /// typically either fully in registers or fully on the stack (except
271 /// CallArg::Flag::allow_split).
272 void add_arg(const CallArg &arg) noexcept {
273 add_arg(std::move(arg), compiler.val_parts(arg.value).count());
274 }
275
276 /// Generate the function call (evict registers, call, reset stack frame).
277 void call(std::variant<SymRef, ValuePart>) noexcept;
278
279 /// Assign next return value part to vp.
280 void add_ret(ValuePart &vp, CCAssignment cca) noexcept;
281 /// Assign next return value part to vp.
282 void add_ret(ValuePart &&vp, CCAssignment cca) noexcept {
283 add_ret(vp, cca);
284 }
285 /// Assign return values to the IR value.
286 void add_ret(ValueRef &vr) noexcept;
287 };
288
289 class RetBuilder {
290 Derived &compiler;
291 CCAssigner &assigner;
292
293 RegisterFile::RegBitSet ret_regs{};
294
295 public:
296 RetBuilder(Derived &compiler, CCAssigner &assigner) noexcept
297 : compiler(compiler), assigner(assigner) {
298 assigner.reset();
299 }
300
301 void add(ValuePart &&vp, CCAssignment cca) noexcept;
302 void add(IRValueRef val) noexcept;
303
304 void ret() noexcept;
305 };
306
307 /// Initialize a CompilerBase, should be called by the derived classes
308 explicit CompilerBase(Adaptor *adaptor)
309 : adaptor(adaptor), analyzer(adaptor), assembler() {
310 static_assert(std::is_base_of_v<CompilerBase, Derived>);
311 static_assert(Compiler<Derived, Config>);
312 }
313
314 /// shortcut for casting to the Derived class so that overloading
315 /// works
316 Derived *derived() { return static_cast<Derived *>(this); }
317
318 const Derived *derived() const { return static_cast<const Derived *>(this); }
319
320 [[nodiscard]] ValLocalIdx val_idx(const IRValueRef value) const noexcept {
321 return analyzer.adaptor->val_local_idx(value);
322 }
323
324 [[nodiscard]] ValueAssignment *
325 val_assignment(const ValLocalIdx idx) noexcept {
326 return assignments.value_ptrs[static_cast<u32>(idx)];
327 }
328
329 /// Compile the functions returned by Adaptor::funcs
330 ///
331 /// \warning If you intend to call this multiple times, you must call reset
332 /// in-between the calls.
333 ///
334 /// \returns Whether the compilation was successful
335 bool compile();
336
337 /// Reset any leftover data from the previous compilation such that it will
338 /// not affect the next compilation
339 void reset();
340
341 /// Get CCAssigner for current function.
342 CCAssigner *cur_cc_assigner() noexcept { return &default_cc_assigner; }
343
344 void init_assignment(IRValueRef value, ValLocalIdx local_idx) noexcept;
345
346private:
347 /// Frees an assignment, its stack slot and registers
348 void free_assignment(ValLocalIdx local_idx, ValueAssignment *) noexcept;
349
350public:
351 /// Release an assignment when reference count drops to zero, either frees
352 /// the assignment immediately or delays free to the end of the live range.
353 void release_assignment(ValLocalIdx local_idx, ValueAssignment *) noexcept;
354
355 /// Init a variable-ref assignment
356 void init_variable_ref(ValLocalIdx local_idx, u32 var_ref_data) noexcept;
357 /// Init a variable-ref assignment
358 void init_variable_ref(IRValueRef value, u32 var_ref_data) noexcept {
359 init_variable_ref(adaptor->val_local_idx(value), var_ref_data);
360 }
361
362 /// \name Stack Slots
363 /// @{
364
365 /// Allocate a static stack slot.
366 i32 allocate_stack_slot(u32 size) noexcept;
367 /// Free a static stack slot.
368 void free_stack_slot(u32 slot, u32 size) noexcept;
369
370 /// @}
371
372 /// Assign function argument in prologue. \ref align can be used to increase
373 /// the minimal stack alignment of the first part of the argument. If \ref
374 /// allow_split is set, the argument can be passed partially in registers,
375 /// otherwise (default) it must be either passed completely in registers or
376 /// completely on the stack.
377 void prologue_assign_arg(CCAssigner *cc_assigner,
378 u32 arg_idx,
379 IRValueRef arg,
380 u32 align = 1,
381 bool allow_split = false) noexcept;
382
383 /// \name Value References
384 /// @{
385
386 /// Get a using reference to a value.
387 ValueRef val_ref(IRValueRef value) noexcept;
388
389 /// Get a using reference to a single-part value and provide direct access to
390 /// the only part. This is a convenience function; note that the ValueRef must
391 /// outlive the ValuePartRef (i.e. auto p = val_ref().part(0); won't work, as
392 /// the value will possibly be deallocated when the ValueRef is destroyed).
393 std::pair<ValueRef, ValuePartRef> val_ref_single(IRValueRef value) noexcept;
394
395 /// Get a defining reference to a value.
396 ValueRef result_ref(IRValueRef value) noexcept;
397
398 /// Get a defining reference to a single-part value and provide direct access
399 /// to the only part. Similar to val_ref_single().
400 std::pair<ValueRef, ValuePartRef>
401 result_ref_single(IRValueRef value) noexcept;
402
403 /// Make dst an alias for src, which must be a non-constant value with an
404 /// identical part configuration. src must be in its last use (is_owned()),
405 /// and the assignment will be repurposed for dst, keeping all assigned
406 /// registers and stack slots.
407 ValueRef result_ref_alias(IRValueRef dst, ValueRef &&src) noexcept;
408
409 /// Initialize value as a pointer into a stack variable (i.e., a value
410 /// allocated from cur_static_allocas() or similar) with an offset. The
411 /// result value will be a stack variable itself.
412 ValueRef result_ref_stack_slot(IRValueRef value,
413 AssignmentPartRef base,
414 i32 off) noexcept;
415
416 /// @}
417
418 [[deprecated("Use ValuePartRef::set_value")]]
419 void set_value(ValuePartRef &val_ref, ScratchReg &scratch) noexcept;
420 [[deprecated("Use ValuePartRef::set_value")]]
421 void set_value(ValuePartRef &&val_ref, ScratchReg &scratch) noexcept {
422 set_value(val_ref, scratch);
423 }
424
425 /// Get generic value part into a single register, evaluating expressions
426 /// and materializing immediates as required.
427 AsmReg gval_as_reg(GenericValuePart &gv) noexcept;
428
429 /// Like gval_as_reg; if the GenericValuePart owns a reusable register
430 /// (either a ScratchReg, possibly due to materialization, or a reusable
431 /// ValuePartRef), store it in dst.
432 AsmReg gval_as_reg_reuse(GenericValuePart &gv, ScratchReg &dst) noexcept;
433
434 /// Like gval_as_reg; if the GenericValuePart owns a reusable register
435 /// (either a ScratchReg, possibly due to materialization, or a reusable
436 /// ValuePartRef), store it in dst.
437 AsmReg gval_as_reg_reuse(GenericValuePart &gv, ValuePart &dst) noexcept;
438
439private:
440 /// @internal Select register when a value needs to be evicted.
441 Reg select_reg_evict(RegBank bank) noexcept;
442
443public:
444 /// \name Low-Level Assignment Register Handling
445 /// @{
446
447 /// Select an available register, evicting loaded values if needed.
448 Reg select_reg(RegBank bank) noexcept {
449 Reg res = register_file.find_first_free_excluding(bank, 0);
450 if (res.valid()) [[likely]] {
451 return res;
452 }
453 return select_reg_evict(bank);
454 }
455
456 /// Reload a value part from memory or recompute variable address.
457 void reload_to_reg(AsmReg dst, AssignmentPartRef ap) noexcept;
458
459 /// Allocate a stack slot for an assignment.
460 void allocate_spill_slot(AssignmentPartRef ap) noexcept;
461
462 /// Ensure the value is spilled in its stack slot (except variable refs).
463 void spill(AssignmentPartRef ap) noexcept;
464
465 /// Evict the value from its register, spilling if needed, and free register.
466 void evict(AssignmentPartRef ap) noexcept;
467
468 /// Evict the value from the register, spilling if needed, and free register.
469 void evict_reg(Reg reg) noexcept;
470
471 /// Free the register. Requires that the contained value is already spilled.
472 void free_reg(Reg reg) noexcept;
473
474 /// @}
475
476 /// \name High-Level Branch Generation
477 /// @{
478
479 /// Generate an unconditional branch at the end of a basic block. No further
480 /// instructions must follow. If target is the next block in the block order,
481 /// the branch is omitted.
482 void generate_uncond_branch(IRBlockRef target) noexcept;
483
484 /// Generate an conditional branch at the end of a basic block.
485 template <typename Jump>
486 void generate_cond_branch(Jump jmp,
487 IRBlockRef true_target,
488 IRBlockRef false_target) noexcept;
489
490 /// Generate a switch at the end of a basic block. Only the lowest bits of the
491 /// condition are considered. The condition must be a general-purpose
492 /// register. The cases must be sorted and every case value must appear at
493 /// most once.
495 ScratchReg &&cond,
496 u32 width,
497 IRBlockRef default_block,
498 std::span<const std::pair<u64, IRBlockRef>> cases) noexcept;
499
500 /// @}
501
502 /// \name Low-Level Branch Primitives
503 /// The general flow of using these low-level primitive is:
504 /// 1. spill_before_branch()
505 /// 2. begin_branch_region()
506 /// 3. One or more calls to generate_branch_to_block()
507 /// 4. end_branch_region()
508 /// 5. release_spilled_regs()
509 /// @{
510
511 // TODO(ts): switch to a branch_spill_before naming style?
512 /// Spill values that need to be spilled for later blocks. Returns the set
513 /// of registers that will be free'd at the end of the block; pass this to
514 /// release_spilled_regs().
515 typename RegisterFile::RegBitSet
516 spill_before_branch(bool force_spill = false) noexcept;
517 /// Free registers marked by spill_before_branch().
518 void release_spilled_regs(typename RegisterFile::RegBitSet) noexcept;
519
520 /// When reaching a point in the function where no other blocks will be
521 /// reached anymore, use this function to release register assignments after
522 /// the end of that block so the compiler does not accidentally use
523 /// registers which don't contain any values
525
526 /// Indicate beginning of region where value-state must not change.
527 void begin_branch_region() noexcept {
528#ifndef NDEBUG
529 assert(!generating_branch);
530 generating_branch = true;
531#endif
532 }
533
534 /// Indicate end of region where value-state must not change.
535 void end_branch_region() noexcept {
536#ifndef NDEBUG
537 assert(generating_branch);
538 generating_branch = false;
539#endif
540 }
541
542 /// Generate a branch to a basic block; execution continues afterwards.
543 /// Multiple calls to this function can be used to build conditional branches.
544 /// @tparam Jump Target-defined Jump type (e.g., CompilerX64::Jump).
545 /// @param needs_split Result of branch_needs_split(); pass false for an
546 /// unconditional branch.
547 /// @param last_inst Whether fall-through to target is possible.
548 template <typename Jump>
550 IRBlockRef target,
551 bool needs_split,
552 bool last_inst) noexcept;
553
554#ifndef NDEBUG
555 bool may_change_value_state() const noexcept { return !generating_branch; }
556#endif
557
558 void move_to_phi_nodes(BlockIndex target) noexcept {
559 if (analyzer.block_has_phis(target)) {
560 move_to_phi_nodes_impl(target);
561 }
562 }
563
564 void move_to_phi_nodes_impl(BlockIndex target) noexcept;
565
566 /// Whether branch to a block requires additional instructions and therefore
567 /// a direct jump to the block is not possible.
568 bool branch_needs_split(IRBlockRef target) noexcept {
569 // for now, if the target has PHI-nodes, we split
570 return analyzer.block_has_phis(target);
571 }
572
573 /// @}
574
575 BlockIndex next_block() const noexcept;
576
577 bool try_force_fixed_assignment(IRValueRef) const noexcept { return false; }
578
579 bool hook_post_func_sym_init() noexcept { return true; }
580
581 void analysis_start() noexcept {}
582
583 void analysis_end() noexcept {}
584
585 void reloc_text(SymRef sym, u32 type, u64 offset, i64 addend = 0) noexcept {
586 this->assembler.reloc_sec(
587 text_writer.get_sec_ref(), sym, type, offset, addend);
588 }
589
590 /// Convenience function to place a label at the current position.
591 void label_place(Label label) noexcept {
592 this->text_writer.label_place(label, text_writer.offset());
593 }
594
595protected:
596 SymRef get_personality_sym() noexcept;
597
598 bool compile_func(IRFuncRef func, u32 func_idx) noexcept;
599
600 bool compile_block(IRBlockRef block, u32 block_idx) noexcept;
601};
602} // namespace tpde
603
604#include "GenericValuePart.hpp"
605#include "ScratchReg.hpp"
606#include "ValuePartRef.hpp"
607#include "ValueRef.hpp"
608
609namespace tpde {
610
611template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
612template <typename CBDerived>
614 CBDerived>::add_arg(ValuePart &&vp, CCAssignment cca) noexcept {
615 if (!cca.byval && cca.bank == RegBank{}) {
616 cca.bank = vp.bank();
617 cca.size = vp.part_size();
618 }
619
620 assigner.assign_arg(cca);
621 bool needs_ext = cca.int_ext != 0;
622 bool ext_sign = cca.int_ext >> 7;
623 unsigned ext_bits = cca.int_ext & 0x3f;
624
625 if (cca.byval) {
626 derived()->add_arg_byval(vp, cca);
627 vp.reset(&compiler);
628 } else if (!cca.reg.valid()) {
629 if (needs_ext) {
630 auto ext = std::move(vp).into_extended(&compiler, ext_sign, ext_bits, 64);
631 derived()->add_arg_stack(ext, cca);
632 ext.reset(&compiler);
633 } else {
634 derived()->add_arg_stack(vp, cca);
635 }
636 vp.reset(&compiler);
637 } else {
638 u32 size = vp.part_size();
639 if (vp.is_in_reg(cca.reg)) {
640 if (!vp.can_salvage()) {
641 compiler.evict_reg(cca.reg);
642 } else {
643 vp.salvage(&compiler);
644 }
645 if (needs_ext) {
646 compiler.generate_raw_intext(cca.reg, cca.reg, ext_sign, ext_bits, 64);
647 }
648 } else {
649 if (compiler.register_file.is_used(cca.reg)) {
650 compiler.evict_reg(cca.reg);
651 }
652 if (vp.can_salvage()) {
653 AsmReg vp_reg = vp.salvage(&compiler);
654 if (needs_ext) {
655 compiler.generate_raw_intext(cca.reg, vp_reg, ext_sign, ext_bits, 64);
656 } else {
657 compiler.mov(cca.reg, vp_reg, size);
658 }
659 } else if (needs_ext && vp.is_const()) {
660 u64 val = vp.const_data()[0];
661 u64 extended =
662 ext_sign ? util::sext(val, ext_bits) : util::zext(val, ext_bits);
663 compiler.materialize_constant(&extended, cca.bank, 8, cca.reg);
664 } else {
665 vp.reload_into_specific_fixed(&compiler, cca.reg);
666 if (needs_ext) {
667 compiler.generate_raw_intext(
668 cca.reg, cca.reg, ext_sign, ext_bits, 64);
669 }
670 }
671 }
672 vp.reset(&compiler);
673 assert(!compiler.register_file.is_used(cca.reg));
674 compiler.register_file.mark_clobbered(cca.reg);
675 compiler.register_file.allocatable &= ~(u64{1} << cca.reg.id());
676 arg_regs |= (1ull << cca.reg.id());
677 }
678}
679
680template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
681template <typename CBDerived>
683 CBDerived>::add_arg(const CallArg &arg, u32 part_count) noexcept {
684 ValueRef vr = compiler.val_ref(arg.value);
685
686 if (arg.flag == CallArg::Flag::byval) {
687 assert(part_count == 1);
688 add_arg(vr.part(0),
689 CCAssignment{
690 .byval = true,
691 .align = arg.byval_align,
692 .size = arg.byval_size,
693 });
694 return;
695 }
696
697 u32 align = arg.byval_align;
698 bool allow_split = arg.flag == CallArg::Flag::allow_split;
699
700 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
701 u8 int_ext = 0;
702 if (arg.flag == CallArg::Flag::sext || arg.flag == CallArg::Flag::zext) {
703 assert(arg.ext_bits != 0 && "cannot extend zero-bit integer");
704 int_ext = arg.ext_bits | (arg.flag == CallArg::Flag::sext ? 0x80 : 0);
705 }
706 u32 remaining = part_count < 256 ? part_count - part_idx - 1 : 255;
707 derived()->add_arg(vr.part(part_idx),
708 CCAssignment{
709 .consecutive = u8(allow_split ? 0 : remaining),
710 .sret = arg.flag == CallArg::Flag::sret,
711 .int_ext = int_ext,
712 .align = u8(part_idx == 0 ? align : 1),
713 });
714 }
715}
716
717template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
718template <typename CBDerived>
720 std::variant<SymRef, ValuePart> target) noexcept {
721 assert(!compiler.stack.is_leaf_function && "leaf func must not have calls");
722 compiler.stack.generated_call = true;
723 typename RegisterFile::RegBitSet skip_evict = arg_regs;
724 if (auto *vp = std::get_if<ValuePart>(&target); vp && vp->can_salvage()) {
725 // call_impl will reset vp, thereby unlock+free the register.
726 assert(vp->cur_reg_unlocked().valid() && "can_salvage implies register");
727 skip_evict |= (1ull << vp->cur_reg_unlocked().id());
728 }
729
730 auto clobbered = ~assigner.get_ccinfo().callee_saved_regs;
731 for (auto reg_id : util::BitSetIterator<>{compiler.register_file.used &
732 clobbered & ~skip_evict}) {
733 compiler.evict_reg(AsmReg{reg_id});
734 compiler.register_file.mark_clobbered(Reg{reg_id});
735 }
736
737 derived()->call_impl(std::move(target));
738
739 assert((compiler.register_file.allocatable & arg_regs) == 0);
740 compiler.register_file.allocatable |= arg_regs;
741}
742
743template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
744template <typename CBDerived>
746 CBDerived>::add_ret(ValuePart &vp, CCAssignment cca) noexcept {
747 cca.bank = vp.bank();
748 cca.size = vp.part_size();
749 assigner.assign_ret(cca);
750 assert(cca.reg.valid() && "return value must be in register");
751 vp.set_value_reg(&compiler, cca.reg);
752}
753
754template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
755template <typename CBDerived>
757 CBDerived>::add_ret(ValueRef &vr) noexcept {
758 assert(vr.has_assignment());
759 u32 part_count = vr.assignment()->part_count;
760 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
761 CCAssignment cca;
762 add_ret(vr.part(part_idx), cca);
763 }
764}
765
766template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
767void CompilerBase<Adaptor, Derived, Config>::RetBuilder::add(
768 ValuePart &&vp, CCAssignment cca) noexcept {
769 cca.bank = vp.bank();
770 u32 size = cca.size = vp.part_size();
771 assigner.assign_ret(cca);
772 assert(cca.reg.valid() && "indirect return value must use sret argument");
773
774 bool needs_ext = cca.int_ext != 0;
775 bool ext_sign = cca.int_ext >> 7;
776 unsigned ext_bits = cca.int_ext & 0x3f;
777
778 if (vp.is_in_reg(cca.reg)) {
779 if (!vp.can_salvage()) {
780 compiler.evict_reg(cca.reg);
781 } else {
782 vp.salvage(&compiler);
783 }
784 if (needs_ext) {
785 compiler.generate_raw_intext(cca.reg, cca.reg, ext_sign, ext_bits, 64);
786 }
787 } else {
788 if (compiler.register_file.is_used(cca.reg)) {
789 compiler.evict_reg(cca.reg);
790 }
791 if (vp.can_salvage()) {
792 AsmReg vp_reg = vp.salvage(&compiler);
793 if (needs_ext) {
794 compiler.generate_raw_intext(cca.reg, vp_reg, ext_sign, ext_bits, 64);
795 } else {
796 compiler.mov(cca.reg, vp_reg, size);
797 }
798 } else {
799 vp.reload_into_specific_fixed(&compiler, cca.reg);
800 if (needs_ext) {
801 compiler.generate_raw_intext(cca.reg, cca.reg, ext_sign, ext_bits, 64);
802 }
803 }
804 }
805 vp.reset(&compiler);
806 assert(!compiler.register_file.is_used(cca.reg));
807 compiler.register_file.mark_clobbered(cca.reg);
808 compiler.register_file.allocatable &= ~(u64{1} << cca.reg.id());
809 ret_regs |= (1ull << cca.reg.id());
810}
811
812template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
813void CompilerBase<Adaptor, Derived, Config>::RetBuilder::add(
814 IRValueRef val) noexcept {
815 u32 part_count = compiler.val_parts(val).count();
816 ValueRef vr = compiler.val_ref(val);
817 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
818 add(vr.part(part_idx), CCAssignment{});
819 }
820}
821
822template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
823void CompilerBase<Adaptor, Derived, Config>::RetBuilder::ret() noexcept {
824 assert((compiler.register_file.allocatable & ret_regs) == 0);
825 compiler.register_file.allocatable |= ret_regs;
826
827 compiler.gen_func_epilog();
828 compiler.release_regs_after_return();
829}
830
831template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
833 // create function symbols
834 text_writer.begin_module(assembler);
835 text_writer.switch_section(
836 assembler.get_section(assembler.get_text_section()));
837
838 assert(func_syms.empty());
839 for (const IRFuncRef func : adaptor->funcs()) {
840 auto binding = Assembler::SymBinding::GLOBAL;
841 if (adaptor->func_has_weak_linkage(func)) {
843 } else if (adaptor->func_only_local(func)) {
845 }
846 if (adaptor->func_extern(func)) {
847 func_syms.push_back(derived()->assembler.sym_add_undef(
848 adaptor->func_link_name(func), binding));
849 } else {
850 func_syms.push_back(derived()->assembler.sym_predef_func(
851 adaptor->func_link_name(func), binding));
852 }
853 derived()->define_func_idx(func, func_syms.size() - 1);
854 }
855
856 if (!derived()->hook_post_func_sym_init()) {
857 TPDE_LOG_ERR("hook_pust_func_sym_init failed");
858 return false;
859 }
860
861 // TODO(ts): create function labels?
862
863 bool success = true;
864
865 u32 func_idx = 0;
866 for (const IRFuncRef func : adaptor->funcs()) {
867 if (adaptor->func_extern(func)) {
868 TPDE_LOG_TRACE("Skipping compilation of func {}",
869 adaptor->func_link_name(func));
870 ++func_idx;
871 continue;
872 }
873
874 TPDE_LOG_TRACE("Compiling func {}", adaptor->func_link_name(func));
875 if (!derived()->compile_func(func, func_idx)) {
876 TPDE_LOG_ERR("Failed to compile function {}",
877 adaptor->func_link_name(func));
878 success = false;
879 }
880 ++func_idx;
881 }
882
883 text_writer.flush();
884 text_writer.end_module();
885 assembler.finalize();
886
887 // TODO(ts): generate object/map?
888
889 return success;
890}
891
892template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
894 adaptor->reset();
895
896 for (auto &e : stack.fixed_free_lists) {
897 e.clear();
898 }
899 stack.dynamic_free_lists.clear();
900
901 assembler.reset();
902 func_syms.clear();
903 block_labels.clear();
904 personality_syms.clear();
905}
906
907template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
908void CompilerBase<Adaptor, Derived, Config>::init_assignment(
909 IRValueRef value, ValLocalIdx local_idx) noexcept {
910 assert(val_assignment(local_idx) == nullptr);
911 TPDE_LOG_TRACE("Initializing assignment for value {}",
912 static_cast<u32>(local_idx));
913
914 const auto parts = derived()->val_parts(value);
915 const u32 part_count = parts.count();
916 assert(part_count > 0);
917 auto *assignment = assignments.allocator.allocate(part_count);
918 assignments.value_ptrs[static_cast<u32>(local_idx)] = assignment;
919
920 u32 max_part_size = 0;
921 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
922 auto ap = AssignmentPartRef{assignment, part_idx};
923 ap.reset();
924 ap.set_bank(parts.reg_bank(part_idx));
925 const u32 size = parts.size_bytes(part_idx);
926 assert(size > 0);
927 max_part_size = std::max(max_part_size, size);
928 ap.set_part_size(size);
929 }
930
931 const auto &liveness = analyzer.liveness_info(local_idx);
932
933 // if there is only one part, try to hand out a fixed assignment
934 // if the value is used for longer than one block and there aren't too many
935 // definitions in child loops this could interfere with
936 // TODO(ts): try out only fixed assignments if the value is live for more
937 // than two blocks?
938 // TODO(ts): move this to ValuePartRef::alloc_reg to be able to defer this
939 // for results?
940 if (part_count == 1) {
941 const auto &cur_loop =
942 analyzer.loop_from_idx(analyzer.block_loop_idx(cur_block_idx));
943 auto ap = AssignmentPartRef{assignment, 0};
944
945 auto try_fixed =
946 liveness.last > cur_block_idx &&
947 cur_loop.definitions_in_childs +
948 assignments.cur_fixed_assignment_count[ap.bank().id()] <
949 Derived::NUM_FIXED_ASSIGNMENTS[ap.bank().id()];
950 if (derived()->try_force_fixed_assignment(value)) {
951 try_fixed = assignments.cur_fixed_assignment_count[ap.bank().id()] <
952 Derived::NUM_FIXED_ASSIGNMENTS[ap.bank().id()];
953 }
954
955 if (try_fixed) {
956 // check if there is a fixed register available
957 AsmReg reg = derived()->select_fixed_assignment_reg(ap, value);
958 TPDE_LOG_TRACE("Trying to assign fixed reg to value {}",
959 static_cast<u32>(local_idx));
960
961 // TODO: if the register is used, we can free it most of the time, but not
962 // always, e.g. for PHI nodes. Detect this case and free_reg otherwise.
963 if (!reg.invalid() && !register_file.is_used(reg)) {
964 TPDE_LOG_TRACE("Assigning fixed assignment to reg {} for value {}",
965 reg.id(),
966 static_cast<u32>(local_idx));
967 ap.set_reg(reg);
968 ap.set_register_valid(true);
969 ap.set_fixed_assignment(true);
970 register_file.mark_used(reg, local_idx, 0);
971 register_file.inc_lock_count(reg); // fixed assignments always locked
972 register_file.mark_clobbered(reg);
973 ++assignments.cur_fixed_assignment_count[ap.bank().id()];
974 }
975 }
976 }
977
978 const auto last_full = liveness.last_full;
979 const auto ref_count = liveness.ref_count;
980
981 assert(max_part_size <= 256);
982 assignment->max_part_size = max_part_size;
983 assignment->pending_free = false;
984 assignment->variable_ref = false;
985 assignment->stack_variable = false;
986 assignment->delay_free = last_full;
987 assignment->part_count = part_count;
988 assignment->frame_off = 0;
989 assignment->references_left = ref_count;
990}
991
992template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
994 ValLocalIdx local_idx, ValueAssignment *assignment) noexcept {
995 TPDE_LOG_TRACE("Freeing assignment for value {}",
996 static_cast<u32>(local_idx));
997
998 assert(assignments.value_ptrs[static_cast<u32>(local_idx)] == assignment);
999 assignments.value_ptrs[static_cast<u32>(local_idx)] = nullptr;
1000 const auto is_var_ref = assignment->variable_ref;
1001 const u32 part_count = assignment->part_count;
1002
1003 // free registers
1004 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
1005 auto ap = AssignmentPartRef{assignment, part_idx};
1006 if (ap.fixed_assignment()) [[unlikely]] {
1007 const auto reg = ap.get_reg();
1008 assert(register_file.is_fixed(reg));
1009 assert(register_file.reg_local_idx(reg) == local_idx);
1010 assert(register_file.reg_part(reg) == part_idx);
1011 --assignments.cur_fixed_assignment_count[ap.bank().id()];
1012 register_file.dec_lock_count_must_zero(reg); // release lock for fixed reg
1013 register_file.unmark_used(reg);
1014 } else if (ap.register_valid()) {
1015 const auto reg = ap.get_reg();
1016 assert(!register_file.is_fixed(reg));
1017 register_file.unmark_used(reg);
1018 }
1019 }
1020
1021#ifdef TPDE_ASSERTS
1022 for (auto reg_id : register_file.used_regs()) {
1023 assert(register_file.reg_local_idx(AsmReg{reg_id}) != local_idx &&
1024 "freeing assignment that is still referenced by a register");
1025 }
1026#endif
1027
1028 // variable references do not have a stack slot
1029 bool has_stack = Config::FRAME_INDEXING_NEGATIVE ? assignment->frame_off < 0
1030 : assignment->frame_off != 0;
1031 if (!is_var_ref && has_stack) {
1032 free_stack_slot(assignment->frame_off, assignment->size());
1033 }
1034
1035 assignments.allocator.deallocate(assignment);
1036}
1037
1038template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1039[[gnu::noinline]] void
1041 ValLocalIdx local_idx, ValueAssignment *assignment) noexcept {
1042 if (!assignment->delay_free) {
1043 free_assignment(local_idx, assignment);
1044 return;
1045 }
1046
1047 // need to wait until release
1048 TPDE_LOG_TRACE("Delay freeing assignment for value {}",
1049 static_cast<u32>(local_idx));
1050 const auto &liveness = analyzer.liveness_info(local_idx);
1051 auto &free_list_head = assignments.delayed_free_lists[u32(liveness.last)];
1052 assignment->next_delayed_free_entry = free_list_head;
1053 assignment->pending_free = true;
1054 free_list_head = local_idx;
1055}
1056
1057template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1059 ValLocalIdx local_idx, u32 var_ref_data) noexcept {
1060 TPDE_LOG_TRACE("Initializing variable-ref assignment for value {}",
1061 static_cast<u32>(local_idx));
1062
1063 assert(val_assignment(local_idx) == nullptr);
1064 auto *assignment = assignments.allocator.allocate_slow(1, true);
1065 assignments.value_ptrs[static_cast<u32>(local_idx)] = assignment;
1066
1067 assignment->max_part_size = Config::PLATFORM_POINTER_SIZE;
1068 assignment->variable_ref = true;
1069 assignment->stack_variable = false;
1070 assignment->part_count = 1;
1071 assignment->var_ref_custom_idx = var_ref_data;
1072 assignment->next_delayed_free_entry = assignments.variable_ref_list;
1073
1074 assignments.variable_ref_list = local_idx;
1075
1076 AssignmentPartRef ap{assignment, 0};
1077 ap.reset();
1078 ap.set_bank(Config::GP_BANK);
1079 ap.set_part_size(Config::PLATFORM_POINTER_SIZE);
1080}
1081
1082template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1084 u32 size) noexcept {
1085 this->stack.frame_used = true;
1086 unsigned align_bits = 4;
1087 if (size == 0) {
1088 return 0; // 0 is the "invalid" stack slot
1089 } else if (size <= 16) {
1090 // Align up to next power of two.
1091 u32 free_list_idx = size == 1 ? 0 : 32 - util::cnt_lz<u32>(size - 1);
1092 assert(size <= 1u << free_list_idx);
1093 size = 1 << free_list_idx;
1094 align_bits = free_list_idx;
1095
1096 if (!stack.fixed_free_lists[free_list_idx].empty()) {
1097 auto slot = stack.fixed_free_lists[free_list_idx].back();
1098 stack.fixed_free_lists[free_list_idx].pop_back();
1099 return slot;
1100 }
1101 } else {
1102 size = util::align_up(size, 16);
1103 auto it = stack.dynamic_free_lists.find(size);
1104 if (it != stack.dynamic_free_lists.end() && !it->second.empty()) {
1105 const auto slot = it->second.back();
1106 it->second.pop_back();
1107 return slot;
1108 }
1109 }
1110
1111 assert(stack.frame_size != ~0u &&
1112 "cannot allocate stack slot before stack frame is initialized");
1113
1114 // Align frame_size to align_bits
1115 for (u32 list_idx = util::cnt_tz(stack.frame_size); list_idx < align_bits;
1116 list_idx = util::cnt_tz(stack.frame_size)) {
1117 i32 slot = stack.frame_size;
1118 if constexpr (Config::FRAME_INDEXING_NEGATIVE) {
1119 slot = -(slot + (1ull << list_idx));
1120 }
1121 stack.fixed_free_lists[list_idx].push_back(slot);
1122 stack.frame_size += 1ull << list_idx;
1123 }
1124
1125 auto slot = stack.frame_size;
1126 assert(slot != 0 && "stack slot 0 is reserved");
1127 stack.frame_size += size;
1128
1129 if constexpr (Config::FRAME_INDEXING_NEGATIVE) {
1130 slot = -(slot + size);
1131 }
1132 return slot;
1133}
1134
1135template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1137 u32 slot, u32 size) noexcept {
1138 if (size == 0) [[unlikely]] {
1139 assert(slot == 0 && "unexpected slot for zero-sized stack-slot?");
1140 // Do nothing.
1141 } else if (size <= 16) [[likely]] {
1142 u32 free_list_idx = size == 1 ? 0 : 32 - util::cnt_lz<u32>(size - 1);
1143 stack.fixed_free_lists[free_list_idx].push_back(slot);
1144 } else {
1145 size = util::align_up(size, 16);
1146 stack.dynamic_free_lists[size].push_back(slot);
1147 }
1148}
1149
1150template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1152 CCAssigner *cc_assigner,
1153 u32 arg_idx,
1154 IRValueRef arg,
1155 u32 align,
1156 bool allow_split) noexcept {
1157 ValueRef vr = derived()->result_ref(arg);
1158 if (adaptor->cur_arg_is_byval(arg_idx)) {
1159 CCAssignment cca{
1160 .byval = true,
1161 .align = u8(adaptor->cur_arg_byval_align(arg_idx)),
1162 .size = adaptor->cur_arg_byval_size(arg_idx),
1163 };
1164 cc_assigner->assign_arg(cca);
1165 std::optional<i32> byval_frame_off =
1166 derived()->prologue_assign_arg_part(vr.part(0), cca);
1167
1168 if (byval_frame_off) {
1169 // We need to convert the assignment into a stack variable ref.
1170 ValLocalIdx local_idx = val_idx(arg);
1171 // TODO: we shouldn't create the result_ref for such cases in the first
1172 // place. However, this is not easy to detect up front, it depends on the
1173 // target and the calling convention whether this is possible.
1174 vr.reset();
1175 // Value assignment might have been free'd by ValueRef reset.
1176 if (ValueAssignment *assignment = val_assignment(local_idx)) {
1177 free_assignment(local_idx, assignment);
1178 }
1179 init_variable_ref(local_idx, 0);
1180 ValueAssignment *assignment = this->val_assignment(local_idx);
1181 assignment->stack_variable = true;
1182 assignment->frame_off = *byval_frame_off;
1183 }
1184 return;
1185 }
1186
1187 if (adaptor->cur_arg_is_sret(arg_idx)) {
1188 assert(vr.assignment()->part_count == 1 && "sret must be single-part");
1189 ValuePartRef vp = vr.part(0);
1190 CCAssignment cca{
1191 .sret = true, .bank = vp.bank(), .size = Config::PLATFORM_POINTER_SIZE};
1192 cc_assigner->assign_arg(cca);
1193 derived()->prologue_assign_arg_part(std::move(vp), cca);
1194 return;
1195 }
1196
1197 const u32 part_count = vr.assignment()->part_count;
1198 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
1199 ValuePartRef vp = vr.part(part_idx);
1200 u32 remaining = part_count < 256 ? part_count - part_idx - 1 : 255;
1201 CCAssignment cca{
1202 .consecutive = u8(allow_split ? 0 : remaining),
1203 .align = u8(part_idx == 0 ? align : 1),
1204 .bank = vp.bank(),
1205 .size = vp.part_size(),
1206 };
1207 cc_assigner->assign_arg(cca);
1208 derived()->prologue_assign_arg_part(std::move(vp), cca);
1209 }
1210}
1211
1212template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1213typename CompilerBase<Adaptor, Derived, Config>::ValueRef
1215 if (auto special = derived()->val_ref_special(value); special) {
1216 return ValueRef{this, std::move(*special)};
1217 }
1218
1219 const ValLocalIdx local_idx = analyzer.adaptor->val_local_idx(value);
1220 assert(val_assignment(local_idx) != nullptr && "value use before def");
1221 return ValueRef{this, local_idx};
1222}
1223
1224template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1225std::pair<typename CompilerBase<Adaptor, Derived, Config>::ValueRef,
1226 typename CompilerBase<Adaptor, Derived, Config>::ValuePartRef>
1228 IRValueRef value) noexcept {
1229 std::pair<ValueRef, ValuePartRef> res{val_ref(value), this};
1230 res.second = res.first.part(0);
1231 return res;
1232}
1233
1234template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1235typename CompilerBase<Adaptor, Derived, Config>::ValueRef
1237 IRValueRef value) noexcept {
1238 const ValLocalIdx local_idx = analyzer.adaptor->val_local_idx(value);
1239 if (val_assignment(local_idx) == nullptr) {
1240 init_assignment(value, local_idx);
1241 }
1242 return ValueRef{this, local_idx};
1243}
1244
1245template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1246std::pair<typename CompilerBase<Adaptor, Derived, Config>::ValueRef,
1247 typename CompilerBase<Adaptor, Derived, Config>::ValuePartRef>
1249 IRValueRef value) noexcept {
1250 std::pair<ValueRef, ValuePartRef> res{result_ref(value), this};
1251 res.second = res.first.part(0);
1252 return res;
1253}
1254
1255template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1256typename CompilerBase<Adaptor, Derived, Config>::ValueRef
1258 IRValueRef dst, ValueRef &&src) noexcept {
1259 const ValLocalIdx local_idx = analyzer.adaptor->val_local_idx(dst);
1260 assert(!val_assignment(local_idx) && "alias target already defined");
1261 assert(src.has_assignment() && "alias src must have an assignment");
1262 // Consider implementing aliases where multiple values share the same
1263 // assignment, e.g. for implementing trunc as a no-op sharing registers. Live
1264 // ranges can be merged and reference counts added. However, this needs
1265 // additional infrastructure to clear one of the value_ptrs.
1266 assert(src.is_owned() && "alias src must be owned");
1267
1268 ValueAssignment *assignment = src.assignment();
1269 u32 part_count = assignment->part_count;
1270 assert(!assignment->pending_free);
1271 assert(!assignment->variable_ref);
1272 assert(!assignment->pending_free);
1273#ifndef NDEBUG
1274 {
1275 const auto &src_liveness = analyzer.liveness_info(src.local_idx());
1276 assert(!src_liveness.last_full); // implied by is_owned()
1277 assert(assignment->references_left == 1); // implied by is_owned()
1278
1279 // Validate that part configuration is identical.
1280 const auto parts = derived()->val_parts(dst);
1281 assert(parts.count() == part_count);
1282 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
1283 AssignmentPartRef ap{assignment, part_idx};
1284 assert(parts.reg_bank(part_idx) == ap.bank());
1285 assert(parts.size_bytes(part_idx) == ap.part_size());
1286 }
1287 }
1288#endif
1289
1290 // Update local_idx of registers.
1291 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
1292 AssignmentPartRef ap{assignment, part_idx};
1293 if (ap.register_valid()) {
1294 register_file.update_reg_assignment(ap.get_reg(), local_idx, part_idx);
1295 }
1296 }
1297
1298 const auto &liveness = analyzer.liveness_info(local_idx);
1299 assignment->delay_free = liveness.last_full;
1300 assignment->references_left = liveness.ref_count;
1301 assignments.value_ptrs[static_cast<u32>(src.local_idx())] = nullptr;
1302 assignments.value_ptrs[static_cast<u32>(local_idx)] = assignment;
1303 src.disown();
1304 src.reset();
1305
1306 return ValueRef{this, local_idx};
1307}
1308
1309template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1310typename CompilerBase<Adaptor, Derived, Config>::ValueRef
1312 IRValueRef dst, AssignmentPartRef base, i32 off) noexcept {
1313 const ValLocalIdx local_idx = analyzer.adaptor->val_local_idx(dst);
1314 assert(!val_assignment(local_idx) && "new value already defined");
1315 init_variable_ref(local_idx, 0);
1316 ValueAssignment *assignment = this->val_assignment(local_idx);
1317 assignment->stack_variable = true;
1318 assignment->frame_off = base.variable_stack_off() + off;
1319 return ValueRef{this, local_idx};
1320}
1321
1322template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1323void CompilerBase<Adaptor, Derived, Config>::set_value(
1324 ValuePartRef &val_ref, ScratchReg &scratch) noexcept {
1325 val_ref.set_value(std::move(scratch));
1326}
1327
1328template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1329typename CompilerBase<Adaptor, Derived, Config>::AsmReg
1331 GenericValuePart &gv) noexcept {
1332 if (std::holds_alternative<ScratchReg>(gv.state)) {
1333 return std::get<ScratchReg>(gv.state).cur_reg();
1334 }
1335 if (std::holds_alternative<ValuePartRef>(gv.state)) {
1336 auto &vpr = std::get<ValuePartRef>(gv.state);
1337 if (vpr.has_reg()) {
1338 return vpr.cur_reg();
1339 }
1340 return vpr.load_to_reg();
1341 }
1342 if (auto *expr = std::get_if<typename GenericValuePart::Expr>(&gv.state)) {
1343 if (expr->has_base() && !expr->has_index() && expr->disp == 0) {
1344 return expr->base_reg();
1345 }
1346 return derived()->gval_expr_as_reg(gv);
1347 }
1348 TPDE_UNREACHABLE("gval_as_reg on empty GenericValuePart");
1349}
1350
1351template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1352typename CompilerBase<Adaptor, Derived, Config>::AsmReg
1354 GenericValuePart &gv, ScratchReg &dst) noexcept {
1355 AsmReg reg = gval_as_reg(gv);
1356 if (!dst.has_reg()) {
1357 if (auto *scratch = std::get_if<ScratchReg>(&gv.state)) {
1358 dst = std::move(*scratch);
1359 } else if (auto *val_ref = std::get_if<ValuePartRef>(&gv.state)) {
1360 if (val_ref->can_salvage()) {
1361 dst.alloc_specific(val_ref->salvage());
1362 assert(dst.cur_reg() == reg && "salvaging unsuccessful");
1363 }
1364 }
1365 }
1366 return reg;
1367}
1368
1369template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1370typename CompilerBase<Adaptor, Derived, Config>::AsmReg
1372 GenericValuePart &gv, ValuePart &dst) noexcept {
1373 AsmReg reg = gval_as_reg(gv);
1374 if (!dst.has_reg() &&
1375 (!dst.has_assignment() || !dst.assignment().fixed_assignment())) {
1376 // TODO: make this less expensive
1377 if (auto *scratch = std::get_if<ScratchReg>(&gv.state)) {
1378 dst.set_value(this, std::move(*scratch));
1379 if (dst.has_assignment()) {
1380 dst.lock(this);
1381 }
1382 } else if (auto *val_ref = std::get_if<ValuePartRef>(&gv.state)) {
1383 if (val_ref->can_salvage()) {
1384 dst.set_value(this, std::move(*val_ref));
1385 if (dst.has_assignment()) {
1386 dst.lock(this);
1387 }
1388 }
1389 }
1390 }
1391 return reg;
1392}
1393
1394template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1395Reg CompilerBase<Adaptor, Derived, Config>::select_reg_evict(
1396 RegBank bank) noexcept {
1397 TPDE_LOG_DBG("select_reg_evict for bank {}", bank.id());
1398 auto candidates = register_file.used & register_file.bank_regs(bank);
1399
1400 Reg candidate = Reg::make_invalid();
1401 u32 max_score = 0;
1402 for (auto reg_id : util::BitSetIterator<>(candidates)) {
1403 Reg reg{reg_id};
1404 if (register_file.is_fixed(reg)) {
1405 continue;
1406 }
1407
1408 // Must be an evictable value, not a temporary.
1409 auto local_idx = register_file.reg_local_idx(reg);
1410 u32 part = register_file.reg_part(Reg{reg});
1411 assert(local_idx != INVALID_VAL_LOCAL_IDX);
1412 ValueAssignment *va = val_assignment(local_idx);
1413 AssignmentPartRef ap{va, part};
1414
1415 // We want to sort registers by the following (ordered by priority):
1416 // - stack variable ref (~1 add/sub to reconstruct)
1417 // - other variable ref (1-2 instrs to reconstruct)
1418 // - already spilled (no store needed)
1419 // - last use farthest away (most likely to get spilled anyhow, so there's
1420 // not much harm in spilling earlier)
1421 // - lowest ref-count (least used)
1422 //
1423 // TODO: evaluate and refine this heuristic
1424
1425 // TODO: evict stack variable refs before others
1426 if (ap.variable_ref()) {
1427 TPDE_LOG_DBG(" r{} ({}) is variable-ref", reg_id, u32(local_idx));
1428 candidate = reg;
1429 break;
1430 }
1431
1432 u32 score = 0;
1433 if (ap.stack_valid()) {
1434 score |= u32{1} << 31;
1435 }
1436
1437 const auto &liveness = analyzer.liveness_info(local_idx);
1438 u32 last_use_dist = u32(liveness.last) - u32(cur_block_idx);
1439 score |= (last_use_dist < 0x8000 ? 0x8000 - last_use_dist : 0) << 16;
1440
1441 u32 refs_left = va->pending_free ? 0 : va->references_left;
1442 score |= (refs_left < 0xffff ? 0x10000 - refs_left : 1);
1443
1444 TPDE_LOG_DBG(" r{} ({}:{}) rc={}/{} live={}-{}{} spilled={} score={:#x}",
1445 reg_id,
1446 u32(local_idx),
1447 part,
1448 refs_left,
1449 liveness.ref_count,
1450 u32(liveness.first),
1451 u32(liveness.last),
1452 &"*"[!liveness.last_full],
1453 ap.stack_valid(),
1454 score);
1455
1456 assert(score != 0);
1457 if (score > max_score) {
1458 candidate = reg;
1459 max_score = score;
1460 }
1461 }
1462 if (candidate.invalid()) [[unlikely]] {
1463 TPDE_FATAL("ran out of registers for scratch registers");
1464 }
1465 TPDE_LOG_DBG(" selected r{}", candidate.id());
1466 evict_reg(candidate);
1467 return candidate;
1468}
1469
1470template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1472 AsmReg dst, AssignmentPartRef ap) noexcept {
1473 if (!ap.variable_ref()) {
1474 assert(ap.stack_valid());
1475 derived()->load_from_stack(dst, ap.frame_off(), ap.part_size());
1476 } else if (ap.is_stack_variable()) {
1477 derived()->load_address_of_stack_var(dst, ap);
1478 } else if constexpr (!Config::DEFAULT_VAR_REF_HANDLING) {
1479 derived()->load_address_of_var_reference(dst, ap);
1480 } else {
1481 TPDE_UNREACHABLE("non-stack-variable needs custom var-ref handling");
1482 }
1483}
1484
1485template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1487 AssignmentPartRef ap) noexcept {
1488 assert(!ap.variable_ref() && "cannot allocate spill slot for variable ref");
1489 if (ap.assignment()->frame_off == 0) {
1490 assert(!ap.stack_valid() && "stack-valid set without spill slot");
1491 ap.assignment()->frame_off = allocate_stack_slot(ap.assignment()->size());
1492 assert(ap.assignment()->frame_off != 0);
1493 }
1494}
1495
1496template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1498 AssignmentPartRef ap) noexcept {
1499 assert(may_change_value_state());
1500 if (!ap.stack_valid() && !ap.variable_ref()) {
1501 assert(ap.register_valid() && "cannot spill uninitialized assignment part");
1503 derived()->spill_reg(ap.get_reg(), ap.frame_off(), ap.part_size());
1504 ap.set_stack_valid();
1505 }
1506}
1507
1508template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1510 AssignmentPartRef ap) noexcept {
1511 assert(may_change_value_state());
1512 assert(ap.register_valid());
1513 derived()->spill(ap);
1514 ap.set_register_valid(false);
1515 register_file.unmark_used(ap.get_reg());
1516}
1517
1518template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1520 assert(may_change_value_state());
1521 assert(!register_file.is_fixed(reg));
1522 assert(register_file.reg_local_idx(reg) != INVALID_VAL_LOCAL_IDX);
1523
1524 ValLocalIdx local_idx = register_file.reg_local_idx(reg);
1525 auto part = register_file.reg_part(reg);
1526 AssignmentPartRef evict_part{val_assignment(local_idx), part};
1527 assert(evict_part.register_valid());
1528 assert(evict_part.get_reg() == reg);
1529 derived()->spill(evict_part);
1530 evict_part.set_register_valid(false);
1531 register_file.unmark_used(reg);
1532}
1533
1534template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1536 assert(may_change_value_state());
1537 assert(!register_file.is_fixed(reg));
1538 assert(register_file.reg_local_idx(reg) != INVALID_VAL_LOCAL_IDX);
1539
1540 ValLocalIdx local_idx = register_file.reg_local_idx(reg);
1541 auto part = register_file.reg_part(reg);
1542 AssignmentPartRef ap{val_assignment(local_idx), part};
1543 assert(ap.register_valid());
1544 assert(ap.get_reg() == reg);
1545 assert(!ap.modified() || ap.variable_ref());
1546 ap.set_register_valid(false);
1547 register_file.unmark_used(reg);
1548}
1549
1550template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1551typename CompilerBase<Adaptor, Derived, Config>::RegisterFile::RegBitSet
1553 bool force_spill) noexcept {
1554 // since we do not explicitly keep track of register assignments per block,
1555 // whenever we might branch off to a block that we do not directly compile
1556 // afterwards (i.e. the register assignments might change in between), we
1557 // need to spill all registers which are not fixed and remove them from the
1558 // register state.
1559 //
1560 // This leads to worse codegen but saves a significant overhead to
1561 // store/manage the register assignment for each block (256 bytes/block for
1562 // x64) and possible compile-time as there might be additional logic to move
1563 // values around
1564
1565 // First, we consider the case that the current block only has one successor
1566 // which is compiled directly after the current one, in which case we do not
1567 // have to spill anything.
1568 //
1569 // Secondly, if the next block has multiple incoming edges, we always have
1570 // to spill and remove from the register assignment. Otherwise, we
1571 // only need to spill values if they are alive in any successor which is not
1572 // the next block.
1573 //
1574 // Values which are only read from PHI-Nodes and have no extended lifetimes,
1575 // do not need to be spilled as they die at the edge.
1576
1577 using RegBitSet = typename RegisterFile::RegBitSet;
1578
1579 assert(may_change_value_state());
1580
1581 const IRBlockRef cur_block_ref = analyzer.block_ref(cur_block_idx);
1582 // Earliest succeeding block after the current block that is not the
1583 // immediately succeeding block. Used to determine whether a value needs to
1584 // be spilled.
1585 BlockIndex earliest_next_succ = Analyzer<Adaptor>::INVALID_BLOCK_IDX;
1586
1587 bool must_spill = force_spill;
1588 if (!must_spill) {
1589 // We must always spill if no block is immediately succeeding or that block
1590 // has multiple incoming edges.
1591 auto next_block_is_succ = false;
1592 auto next_block_has_multiple_incoming = false;
1593 u32 succ_count = 0;
1594 for (const IRBlockRef succ : adaptor->block_succs(cur_block_ref)) {
1595 ++succ_count;
1596 BlockIndex succ_idx = analyzer.block_idx(succ);
1597 if (u32(succ_idx) == u32(cur_block_idx) + 1) {
1598 next_block_is_succ = true;
1599 if (analyzer.block_has_multiple_incoming(succ)) {
1600 next_block_has_multiple_incoming = true;
1601 }
1602 } else if (succ_idx > cur_block_idx && succ_idx < earliest_next_succ) {
1603 earliest_next_succ = succ_idx;
1604 }
1605 }
1606
1607 must_spill = !next_block_is_succ || next_block_has_multiple_incoming;
1608
1609 if (succ_count == 1 && !must_spill) {
1610 return RegBitSet{};
1611 }
1612 }
1613
1614 auto release_regs = RegBitSet{};
1615 // TODO(ts): just use register_file.used_nonfixed_regs()?
1616 for (auto reg : register_file.used_regs()) {
1617 auto local_idx = register_file.reg_local_idx(Reg{reg});
1618 auto part = register_file.reg_part(Reg{reg});
1619 if (local_idx == INVALID_VAL_LOCAL_IDX) {
1620 // scratch regs can never be held across blocks
1621 continue;
1622 }
1623 AssignmentPartRef ap{val_assignment(local_idx), part};
1624 if (ap.fixed_assignment()) {
1625 // fixed registers do not need to be spilled
1626 continue;
1627 }
1628
1629 // Remove from register assignment if the next block cannot rely on the
1630 // value being in the specific register.
1631 if (must_spill) {
1632 release_regs |= RegBitSet{1ull} << reg;
1633 }
1634
1635 if (!ap.modified() || ap.variable_ref()) {
1636 // No need to spill values that were already spilled or are variable refs.
1637 continue;
1638 }
1639
1640 const auto &liveness = analyzer.liveness_info(local_idx);
1641 if (liveness.last <= cur_block_idx) {
1642 // No need to spill value if it dies immediately after the block.
1643 continue;
1644 }
1645
1646 // If the value is live in a different block, we might need to spill it.
1647 // Cases:
1648 // - Live in successor that precedes current block => ignore, value must
1649 // have been spilled already due to RPO.
1650 // - Live only in successor that immediately follows this block => no need
1651 // to spill, value can be kept in register, register state for next block
1652 // is still valid.
1653 // - Live in successor in some distance after current block => spill
1654 if (must_spill || earliest_next_succ <= liveness.last) {
1655 spill(ap);
1656 }
1657 }
1658
1659 return release_regs;
1660}
1661
1662template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1664 typename RegisterFile::RegBitSet regs) noexcept {
1665 assert(may_change_value_state());
1666
1667 // TODO(ts): needs changes for other RegisterFile impls
1668 for (auto reg_id : util::BitSetIterator<>{regs & register_file.used}) {
1669 if (!register_file.is_fixed(Reg{reg_id})) {
1670 free_reg(Reg{reg_id});
1671 }
1672 }
1673}
1674
1675template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1677 release_regs_after_return() noexcept {
1678 // we essentially have to free all non-fixed registers
1679 for (auto reg_id : register_file.used_regs()) {
1680 if (!register_file.is_fixed(Reg{reg_id})) {
1681 free_reg(Reg{reg_id});
1682 }
1683 }
1684}
1685
1686template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1687template <typename Jump>
1689 Jump jmp, IRBlockRef target, bool needs_split, bool last_inst) noexcept {
1690 BlockIndex target_idx = this->analyzer.block_idx(target);
1691 Label target_label = this->block_labels[u32(target_idx)];
1692 if (!needs_split) {
1693 move_to_phi_nodes(target_idx);
1694 if (!last_inst || target_idx != this->next_block()) {
1695 derived()->generate_raw_jump(jmp, target_label);
1696 }
1697 } else {
1698 Label tmp_label = this->text_writer.label_create();
1699 derived()->generate_raw_jump(derived()->invert_jump(jmp), tmp_label);
1700 move_to_phi_nodes(target_idx);
1701 derived()->generate_raw_jump(Jump::jmp, target_label);
1702 this->label_place(tmp_label);
1703 }
1704}
1705
1706template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1708 IRBlockRef target) noexcept {
1709 auto spilled = spill_before_branch();
1711
1712 generate_branch_to_block(Derived::Jump::jmp, target, false, true);
1713
1715 release_spilled_regs(spilled);
1716}
1717
1718template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1719template <typename Jump>
1721 Jump jmp, IRBlockRef true_target, IRBlockRef false_target) noexcept {
1722 IRBlockRef next = analyzer.block_ref(next_block());
1723
1724 bool true_needs_split = branch_needs_split(true_target);
1725 bool false_needs_split = branch_needs_split(false_target);
1726
1727 auto spilled = spill_before_branch();
1729
1730 if (next == true_target || (next != false_target && true_needs_split)) {
1732 derived()->invert_jump(jmp), false_target, false_needs_split, false);
1733 generate_branch_to_block(Derived::Jump::jmp, true_target, false, true);
1734 } else if (next == false_target) {
1735 generate_branch_to_block(jmp, true_target, true_needs_split, false);
1736 generate_branch_to_block(Derived::Jump::jmp, false_target, false, true);
1737 } else {
1738 assert(!true_needs_split);
1739 generate_branch_to_block(jmp, true_target, false, false);
1740 generate_branch_to_block(Derived::Jump::jmp, false_target, false, true);
1741 }
1742
1744 release_spilled_regs(spilled);
1745}
1746
1747template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1749 ScratchReg &&cond,
1750 u32 width,
1751 IRBlockRef default_block,
1752 std::span<const std::pair<u64, IRBlockRef>> cases) noexcept {
1753 // This function takes cond as a ScratchReg as opposed to a ValuePart, because
1754 // the ValueRef for the condition must be ref-counted before we enter the
1755 // branch region.
1756
1757 assert(width <= 64);
1758 // We don't support sections with more than 4 GiB, so switches with more than
1759 // 4G cases are impossible to support.
1760 assert(cases.size() < UINT32_MAX && "large switches are unsupported");
1761
1762 AsmReg cmp_reg = cond.cur_reg();
1763 bool width_is_32 = width <= 32;
1764 if (u32 dst_width = util::align_up(width, 32); width != dst_width) {
1765 derived()->generate_raw_intext(cmp_reg, cmp_reg, false, width, dst_width);
1766 }
1767
1768 // We must not evict any registers in the branching code, as we don't track
1769 // the individual value states per block. Hence, we must not allocate any
1770 // registers (e.g., for constants, jump table address) below.
1771 ScratchReg tmp_scratch{this};
1772 AsmReg tmp_reg = tmp_scratch.alloc_gp();
1773
1774 const auto spilled = this->spill_before_branch();
1775 this->begin_branch_region();
1776
1777 tpde::util::SmallVector<tpde::Label, 64> case_labels;
1778 // Labels that need an intermediate block to setup registers. This is
1779 // separate, because most switch targets don't need this.
1780 tpde::util::SmallVector<std::pair<tpde::Label, IRBlockRef>, 64> case_blocks;
1781 for (auto i = 0u; i < cases.size(); ++i) {
1782 // If the target might need additional register moves, we can't branch there
1783 // immediately.
1784 // TODO: more precise condition?
1785 BlockIndex target = this->analyzer.block_idx(cases[i].second);
1786 if (analyzer.block_has_phis(target)) {
1787 case_labels.push_back(this->text_writer.label_create());
1788 case_blocks.emplace_back(case_labels.back(), cases[i].second);
1789 } else {
1790 case_labels.push_back(this->block_labels[u32(target)]);
1791 }
1792 }
1793
1794 const auto default_label = this->text_writer.label_create();
1795
1796 const auto build_range = [&,
1797 this](size_t begin, size_t end, const auto &self) {
1798 assert(begin <= end);
1799 const auto num_cases = end - begin;
1800 if (num_cases <= 4) {
1801 // if there are four or less cases we just compare the values
1802 // against each of them
1803 for (auto i = 0u; i < num_cases; ++i) {
1804 derived()->switch_emit_cmpeq(case_labels[begin + i],
1805 cmp_reg,
1806 tmp_reg,
1807 cases[begin + i].first,
1808 width_is_32);
1809 }
1810
1811 derived()->generate_raw_jump(Derived::Jump::jmp, default_label);
1812 return;
1813 }
1814
1815 // check if the density of the values is high enough to warrant building
1816 // a jump table
1817 auto range = cases[end - 1].first - cases[begin].first;
1818 // we will get wrong results if range is -1 so skip the jump table if
1819 // that is the case
1820 if (range != 0xFFFF'FFFF'FFFF'FFFF && (range / num_cases) < 8) {
1821 // for gcc, it seems that if there are less than 8 values per
1822 // case it will build a jump table so we do that, too
1823
1824 // the actual range is one greater than the result we get from
1825 // subtracting so adjust for that
1826 range += 1;
1827
1828 tpde::util::SmallVector<tpde::Label, 32> label_vec;
1829 std::span<tpde::Label> labels;
1830 if (range == num_cases) {
1831 labels = std::span{case_labels.begin() + begin, num_cases};
1832 } else {
1833 label_vec.resize(range, default_label);
1834 for (auto i = 0u; i < num_cases; ++i) {
1835 label_vec[cases[begin + i].first - cases[begin].first] =
1836 case_labels[begin + i];
1837 }
1838 labels = std::span{label_vec.begin(), range};
1839 }
1840
1841 // Give target the option to emit a jump table.
1842 if (derived()->switch_emit_jump_table(default_label,
1843 labels,
1844 cmp_reg,
1845 tmp_reg,
1846 cases[begin].first,
1847 cases[end - 1].first,
1848 width_is_32)) {
1849 return;
1850 }
1851 }
1852
1853 // do a binary search step
1854 const auto half_len = num_cases / 2;
1855 const auto half_value = cases[begin + half_len].first;
1856 const auto gt_label = this->text_writer.label_create();
1857
1858 // this will cmp against the input value, jump to the case if it is
1859 // equal or to gt_label if the value is greater. Otherwise it will
1860 // fall-through
1861 derived()->switch_emit_binary_step(case_labels[begin + half_len],
1862 gt_label,
1863 cmp_reg,
1864 tmp_reg,
1865 half_value,
1866 width_is_32);
1867 // search the lower half
1868 self(begin, begin + half_len, self);
1869
1870 // and the upper half
1871 this->label_place(gt_label);
1872 self(begin + half_len + 1, end, self);
1873 };
1874
1875 build_range(0, case_labels.size(), build_range);
1876
1877 // write out the labels
1878 this->label_place(default_label);
1879 derived()->generate_branch_to_block(
1880 Derived::Jump::jmp, default_block, false, false);
1881
1882 for (const auto &[label, target] : case_blocks) {
1883 // Branch predictors typically have problems if too many branches follow too
1884 // closely. Ensure a minimum alignment.
1885 this->text_writer.align(8);
1886 this->label_place(label);
1887 derived()->generate_branch_to_block(
1888 Derived::Jump::jmp, target, false, false);
1889 }
1890
1891 this->end_branch_region();
1892 this->release_spilled_regs(spilled);
1893}
1894
1895template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1896void CompilerBase<Adaptor, Derived, Config>::move_to_phi_nodes_impl(
1897 BlockIndex target) noexcept {
1898 // PHI-nodes are always moved to their stack-slot (unless they are fixed)
1899 //
1900 // However, we need to take care of PHI-dependencies (cycles and chains)
1901 // as to not overwrite values which might be needed.
1902 //
1903 // In most cases, we expect the number of PHIs to be small but we want to
1904 // stay reasonably efficient even with larger numbers of PHIs
1905
1906 struct ScratchWrapper {
1907 Derived *self;
1908 AsmReg cur_reg = AsmReg::make_invalid();
1909 bool backed_up = false;
1910 bool was_modified = false;
1911 u8 part = 0;
1912 ValLocalIdx local_idx = INVALID_VAL_LOCAL_IDX;
1913
1914 ScratchWrapper(Derived *self) : self{self} {}
1915
1916 ~ScratchWrapper() { reset(); }
1917
1918 void reset() {
1919 if (cur_reg.invalid()) {
1920 return;
1921 }
1922
1923 self->register_file.unmark_fixed(cur_reg);
1924 self->register_file.unmark_used(cur_reg);
1925
1926 if (backed_up) {
1927 // restore the register state
1928 // TODO(ts): do we actually need the reload?
1929 auto *assignment = self->val_assignment(local_idx);
1930 // check if the value was free'd, then we dont need to restore
1931 // it
1932 if (assignment) {
1933 auto ap = AssignmentPartRef{assignment, part};
1934 if (!ap.variable_ref()) {
1935 // TODO(ts): assert that this always happens?
1936 assert(ap.stack_valid());
1937 self->load_from_stack(cur_reg, ap.frame_off(), ap.part_size());
1938 }
1939 ap.set_reg(cur_reg);
1940 ap.set_register_valid(true);
1941 ap.set_modified(was_modified);
1942 self->register_file.mark_used(cur_reg, local_idx, part);
1943 }
1944 backed_up = false;
1945 }
1946 cur_reg = AsmReg::make_invalid();
1947 }
1948
1949 AsmReg alloc_from_bank(RegBank bank) {
1950 if (cur_reg.valid() && self->register_file.reg_bank(cur_reg) == bank) {
1951 return cur_reg;
1952 }
1953 if (cur_reg.valid()) {
1954 reset();
1955 }
1956
1957 // TODO(ts): try to first find a non callee-saved/clobbered
1958 // register...
1959 auto &reg_file = self->register_file;
1960 auto reg = reg_file.find_first_free_excluding(bank, 0);
1961 if (reg.invalid()) {
1962 // TODO(ts): use clock here?
1963 reg = reg_file.find_first_nonfixed_excluding(bank, 0);
1964 if (reg.invalid()) {
1965 TPDE_FATAL("ran out of registers for scratch registers");
1966 }
1967
1968 backed_up = true;
1969 local_idx = reg_file.reg_local_idx(reg);
1970 part = reg_file.reg_part(reg);
1971 AssignmentPartRef ap{self->val_assignment(local_idx), part};
1972 was_modified = ap.modified();
1973 // TODO(ts): this does not spill for variable refs
1974 // We don't use evict_reg here, as we know that we can't change the
1975 // value state.
1976 assert(ap.register_valid() && ap.get_reg() == reg);
1977 if (!ap.stack_valid() && !ap.variable_ref()) {
1978 self->allocate_spill_slot(ap);
1979 self->spill_reg(ap.get_reg(), ap.frame_off(), ap.part_size());
1980 ap.set_stack_valid();
1981 }
1982 ap.set_register_valid(false);
1983 reg_file.unmark_used(reg);
1984 }
1985
1986 reg_file.mark_used(reg, INVALID_VAL_LOCAL_IDX, 0);
1987 reg_file.mark_clobbered(reg);
1988 reg_file.mark_fixed(reg);
1989 cur_reg = reg;
1990 return reg;
1991 }
1992
1993 ScratchWrapper &operator=(const ScratchWrapper &) = delete;
1994 ScratchWrapper &operator=(ScratchWrapper &&) = delete;
1995 };
1996
1997 IRBlockRef target_ref = analyzer.block_ref(target);
1998 IRBlockRef cur_ref = analyzer.block_ref(cur_block_idx);
1999
2000 // collect all the nodes
2001 struct NodeEntry {
2002 IRValueRef phi;
2003 IRValueRef incoming_val;
2004 ValLocalIdx phi_local_idx;
2005 // local idx of same-block phi node that needs special handling
2006 ValLocalIdx incoming_phi_local_idx = INVALID_VAL_LOCAL_IDX;
2007 // bool incoming_is_phi;
2008 u32 ref_count;
2009
2010 bool operator<(const NodeEntry &other) const noexcept {
2011 return phi_local_idx < other.phi_local_idx;
2012 }
2013
2014 bool operator<(ValLocalIdx other) const noexcept {
2015 return phi_local_idx < other;
2016 }
2017 };
2018
2019 util::SmallVector<NodeEntry, 16> nodes;
2020 for (IRValueRef phi : adaptor->block_phis(target_ref)) {
2021 ValLocalIdx phi_local_idx = adaptor->val_local_idx(phi);
2022 auto incoming = adaptor->val_as_phi(phi).incoming_val_for_block(cur_ref);
2023 nodes.emplace_back(NodeEntry{
2024 .phi = phi, .incoming_val = incoming, .phi_local_idx = phi_local_idx});
2025 }
2026
2027 // We check that the block has phi nodes before getting here.
2028 assert(!nodes.empty() && "block marked has having phi nodes has none");
2029
2030 ScratchWrapper scratch{derived()};
2031 const auto move_to_phi = [this, &scratch](IRValueRef phi,
2032 IRValueRef incoming_val) {
2033 auto phi_vr = derived()->result_ref(phi);
2034 auto val_vr = derived()->val_ref(incoming_val);
2035 if (phi == incoming_val) {
2036 return;
2037 }
2038
2039 u32 part_count = phi_vr.assignment()->part_count;
2040 for (u32 i = 0; i < part_count; ++i) {
2041 AssignmentPartRef phi_ap{phi_vr.assignment(), i};
2042 ValuePartRef val_vpr = val_vr.part(i);
2043
2044 if (phi_ap.fixed_assignment()) {
2045 if (AsmReg reg = val_vpr.cur_reg_unlocked(); reg.valid()) {
2046 derived()->mov(phi_ap.get_reg(), reg, phi_ap.part_size());
2047 } else {
2048 val_vpr.reload_into_specific_fixed(phi_ap.get_reg());
2049 }
2050 } else {
2051 AsmReg reg = val_vpr.cur_reg_unlocked();
2052 if (!reg.valid()) {
2053 reg = scratch.alloc_from_bank(val_vpr.bank());
2054 val_vpr.reload_into_specific_fixed(reg);
2055 }
2056 allocate_spill_slot(phi_ap);
2057 derived()->spill_reg(reg, phi_ap.frame_off(), phi_ap.part_size());
2058 phi_ap.set_stack_valid();
2059 }
2060 }
2061 };
2062
2063 if (nodes.size() == 1) {
2064 move_to_phi(nodes[0].phi, nodes[0].incoming_val);
2065 return;
2066 }
2067
2068 // sort so we can binary search later
2069 std::sort(nodes.begin(), nodes.end());
2070
2071 // fill in the refcount
2072 auto all_zero_ref = true;
2073 for (auto &node : nodes) {
2074 // We don't need to do anything for PHIs that don't reference other PHIs or
2075 // self-referencing PHIs.
2076 bool incoming_is_phi = adaptor->val_is_phi(node.incoming_val);
2077 if (!incoming_is_phi || node.incoming_val == node.phi) {
2078 continue;
2079 }
2080
2081 ValLocalIdx inc_local_idx = adaptor->val_local_idx(node.incoming_val);
2082 auto it = std::lower_bound(nodes.begin(), nodes.end(), inc_local_idx);
2083 if (it == nodes.end() || it->phi != node.incoming_val) {
2084 // Incoming value is a PHI node, but it's not from our block, so we don't
2085 // need to be particularly careful when assigning values.
2086 continue;
2087 }
2088 node.incoming_phi_local_idx = inc_local_idx;
2089 ++it->ref_count;
2090 all_zero_ref = false;
2091 }
2092
2093 if (all_zero_ref) {
2094 // no cycles/chain that we need to take care of
2095 for (auto &node : nodes) {
2096 move_to_phi(node.phi, node.incoming_val);
2097 }
2098 return;
2099 }
2100
2101 // TODO(ts): this is rather inefficient...
2102 util::SmallVector<u32, 32> ready_indices;
2103 ready_indices.reserve(nodes.size());
2104 util::SmallBitSet<256> waiting_nodes;
2105 waiting_nodes.resize(nodes.size());
2106 for (u32 i = 0; i < nodes.size(); ++i) {
2107 if (nodes[i].ref_count) {
2108 waiting_nodes.mark_set(i);
2109 } else {
2110 ready_indices.push_back(i);
2111 }
2112 }
2113
2114 u32 handled_count = 0;
2115 u32 cur_tmp_part_count = 0;
2116 i32 cur_tmp_slot = 0;
2117 u32 cur_tmp_slot_size = 0;
2118 IRValueRef cur_tmp_val = Adaptor::INVALID_VALUE_REF;
2119 ScratchWrapper tmp_reg1{derived()}, tmp_reg2{derived()};
2120
2121 const auto move_from_tmp_phi = [&](IRValueRef target_phi) {
2122 auto phi_vr = val_ref(target_phi);
2123 if (cur_tmp_part_count <= 2) {
2124 AssignmentPartRef ap{phi_vr.assignment(), 0};
2125 assert(!tmp_reg1.cur_reg.invalid());
2126 if (ap.fixed_assignment()) {
2127 derived()->mov(ap.get_reg(), tmp_reg1.cur_reg, ap.part_size());
2128 } else {
2129 derived()->spill_reg(tmp_reg1.cur_reg, ap.frame_off(), ap.part_size());
2130 }
2131
2132 if (cur_tmp_part_count == 2) {
2133 AssignmentPartRef ap_high{phi_vr.assignment(), 1};
2134 assert(!ap_high.fixed_assignment());
2135 assert(!tmp_reg2.cur_reg.invalid());
2136 derived()->spill_reg(
2137 tmp_reg2.cur_reg, ap_high.frame_off(), ap_high.part_size());
2138 }
2139 return;
2140 }
2141
2142 for (u32 i = 0; i < cur_tmp_part_count; ++i) {
2143 AssignmentPartRef phi_ap{phi_vr.assignment(), i};
2144 assert(!phi_ap.fixed_assignment());
2145
2146 auto slot_off = cur_tmp_slot + phi_ap.part_off();
2147 auto reg = tmp_reg1.alloc_from_bank(phi_ap.bank());
2148 derived()->load_from_stack(reg, slot_off, phi_ap.part_size());
2149 derived()->spill_reg(reg, phi_ap.frame_off(), phi_ap.part_size());
2150 }
2151 };
2152
2153 while (handled_count != nodes.size()) {
2154 if (ready_indices.empty()) {
2155 // need to break a cycle
2156 auto cur_idx_opt = waiting_nodes.first_set();
2157 assert(cur_idx_opt);
2158 auto cur_idx = *cur_idx_opt;
2159 assert(nodes[cur_idx].ref_count == 1);
2160 assert(cur_tmp_val == Adaptor::INVALID_VALUE_REF);
2161
2162 auto phi_val = nodes[cur_idx].phi;
2163 auto phi_vr = this->val_ref(phi_val);
2164 auto *assignment = phi_vr.assignment();
2165 cur_tmp_part_count = assignment->part_count;
2166 cur_tmp_val = phi_val;
2167
2168 if (cur_tmp_part_count > 2) {
2169 // use a stack slot to store the temporaries
2170 cur_tmp_slot_size = assignment->size();
2171 cur_tmp_slot = allocate_stack_slot(cur_tmp_slot_size);
2172
2173 for (u32 i = 0; i < cur_tmp_part_count; ++i) {
2174 auto ap = AssignmentPartRef{assignment, i};
2175 assert(!ap.fixed_assignment());
2176 auto slot_off = cur_tmp_slot + ap.part_off();
2177
2178 if (ap.register_valid()) {
2179 auto reg = ap.get_reg();
2180 derived()->spill_reg(reg, slot_off, ap.part_size());
2181 } else {
2182 auto reg = tmp_reg1.alloc_from_bank(ap.bank());
2183 assert(ap.stack_valid());
2184 derived()->load_from_stack(reg, ap.frame_off(), ap.part_size());
2185 derived()->spill_reg(reg, slot_off, ap.part_size());
2186 }
2187 }
2188 } else {
2189 // TODO(ts): if the PHI is not fixed, then we can just reuse its
2190 // register if it has one
2191 auto phi_vpr = phi_vr.part(0);
2192 auto reg = tmp_reg1.alloc_from_bank(phi_vpr.bank());
2193 phi_vpr.reload_into_specific_fixed(this, reg);
2194
2195 if (cur_tmp_part_count == 2) {
2196 // TODO(ts): just change the part ref on the lower ref?
2197 auto phi_vpr_high = phi_vr.part(1);
2198 auto reg_high = tmp_reg2.alloc_from_bank(phi_vpr_high.bank());
2199 phi_vpr_high.reload_into_specific_fixed(this, reg_high);
2200 }
2201 }
2202
2203 nodes[cur_idx].ref_count = 0;
2204 ready_indices.push_back(cur_idx);
2205 waiting_nodes.mark_unset(cur_idx);
2206 }
2207
2208 for (u32 i = 0; i < ready_indices.size(); ++i) {
2209 ++handled_count;
2210 auto cur_idx = ready_indices[i];
2211 auto phi_val = nodes[cur_idx].phi;
2212 IRValueRef incoming_val = nodes[cur_idx].incoming_val;
2213 if (incoming_val == phi_val) {
2214 // no need to do anything, except ref-counting
2215 (void)val_ref(incoming_val);
2216 (void)val_ref(phi_val);
2217 continue;
2218 }
2219
2220 if (incoming_val == cur_tmp_val) {
2221 move_from_tmp_phi(phi_val);
2222
2223 if (cur_tmp_part_count > 2) {
2224 free_stack_slot(cur_tmp_slot, cur_tmp_slot_size);
2225 cur_tmp_slot = 0xFFFF'FFFF;
2226 cur_tmp_slot_size = 0;
2227 }
2228 cur_tmp_val = Adaptor::INVALID_VALUE_REF;
2229 // skip the code below as the ref count of the temp val is
2230 // already 0 and we don't want to reinsert it into the ready
2231 // list
2232 continue;
2233 }
2234
2235 move_to_phi(phi_val, incoming_val);
2236
2237 if (nodes[cur_idx].incoming_phi_local_idx == INVALID_VAL_LOCAL_IDX) {
2238 continue;
2239 }
2240
2241 auto it = std::lower_bound(
2242 nodes.begin(), nodes.end(), nodes[cur_idx].incoming_phi_local_idx);
2243 assert(it != nodes.end() && it->phi == incoming_val &&
2244 "incoming_phi_local_idx set incorrectly");
2245
2246 assert(it->ref_count > 0);
2247 if (--it->ref_count == 0) {
2248 auto node_idx = static_cast<u32>(it - nodes.begin());
2249 ready_indices.push_back(node_idx);
2250 waiting_nodes.mark_unset(node_idx);
2251 }
2252 }
2253 ready_indices.clear();
2254 }
2255}
2256
2257template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
2260 return static_cast<BlockIndex>(static_cast<u32>(cur_block_idx) + 1);
2261}
2262
2263template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
2265 SymRef personality_sym;
2266 if (this->adaptor->cur_needs_unwind_info()) {
2267 SymRef personality_func = derived()->cur_personality_func();
2268 if (personality_func.valid()) {
2269 for (const auto &[fn_sym, ptr_sym] : personality_syms) {
2270 if (fn_sym == personality_func) {
2271 personality_sym = ptr_sym;
2272 break;
2273 }
2274 }
2275
2276 if (!personality_sym.valid()) {
2277 // create symbol that contains the address of the personality
2278 // function
2279 u32 off;
2280 static constexpr std::array<u8, 8> zero{};
2281
2282 auto rodata = this->assembler.get_data_section(true, true);
2283 personality_sym = this->assembler.sym_def_data(
2284 rodata, "", zero, 8, Assembler::SymBinding::LOCAL, &off);
2285 this->assembler.reloc_abs(rodata, personality_func, off, 0);
2286
2287 personality_syms.emplace_back(personality_func, personality_sym);
2288 }
2289 }
2290 }
2291 return personality_sym;
2292}
2293
2294template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
2296 const IRFuncRef func, const u32 func_idx) noexcept {
2297 if (!adaptor->switch_func(func)) {
2298 return false;
2299 }
2300 derived()->analysis_start();
2301 analyzer.switch_func(func);
2302 derived()->analysis_end();
2303
2304#ifndef NDEBUG
2305 stack.frame_size = ~0u;
2306#endif
2307 for (auto &e : stack.fixed_free_lists) {
2308 e.clear();
2309 }
2310 stack.dynamic_free_lists.clear();
2311 // TODO: sort out the inconsistency about adaptor vs. compiler methods.
2312 stack.has_dynamic_alloca = this->adaptor->cur_has_dynamic_alloca();
2313 stack.is_leaf_function = !derived()->cur_func_may_emit_calls();
2314 stack.generated_call = false;
2315 stack.frame_used = false;
2316
2317 assignments.cur_fixed_assignment_count = {};
2318 assert(std::ranges::none_of(assignments.value_ptrs, std::identity{}));
2319 if (assignments.value_ptrs.size() < analyzer.liveness.size()) {
2320 assignments.value_ptrs.resize(analyzer.liveness.size());
2321 }
2322
2323 assignments.allocator.reset();
2324 assignments.variable_ref_list = INVALID_VAL_LOCAL_IDX;
2325 assignments.delayed_free_lists.clear();
2326 assignments.delayed_free_lists.resize(analyzer.block_layout.size(),
2327 INVALID_VAL_LOCAL_IDX);
2328
2329 cur_block_idx =
2330 static_cast<BlockIndex>(analyzer.block_idx(adaptor->cur_entry_block()));
2331
2332 register_file.reset();
2333#ifndef NDEBUG
2334 generating_branch = false;
2335#endif
2336
2337 // Simple heuristic for initial allocation size
2338 u32 expected_code_size = 0x8 * analyzer.num_insts + 0x40;
2339 this->text_writer.begin_func(/*alignment=*/16, expected_code_size);
2340
2341 derived()->start_func(func_idx);
2342
2343 block_labels.clear();
2344 block_labels.resize_uninitialized(analyzer.block_layout.size());
2345 for (u32 i = 0; i < analyzer.block_layout.size(); ++i) {
2346 block_labels[i] = text_writer.label_create();
2347 }
2348
2349 // TODO(ts): place function label
2350 // TODO(ts): make function labels optional?
2351
2352 CCAssigner *cc_assigner = derived()->cur_cc_assigner();
2353 assert(cc_assigner != nullptr);
2354
2355 register_file.allocatable = cc_assigner->get_ccinfo().allocatable_regs;
2356
2357 cc_assigner->reset();
2358 // Temporarily prevent argument registers from being assigned.
2359 const CCInfo &cc_info = cc_assigner->get_ccinfo();
2360 assert((cc_info.allocatable_regs & cc_info.arg_regs) == cc_info.arg_regs &&
2361 "argument registers must also be allocatable");
2362 this->register_file.allocatable &= ~cc_info.arg_regs;
2363
2364 // Begin prologue, prepare for handling arguments.
2365 derived()->prologue_begin(cc_assigner);
2366 u32 arg_idx = 0;
2367 for (const IRValueRef arg : this->adaptor->cur_args()) {
2368 // Init assignment for all arguments. This can be substituted for more
2369 // complex mappings of arguments to value parts.
2370 derived()->prologue_assign_arg(cc_assigner, arg_idx++, arg);
2371 }
2372 // Finish prologue, storing relevant data from the argument cc_assigner.
2373 derived()->prologue_end(cc_assigner);
2374
2375 this->register_file.allocatable |= cc_info.arg_regs;
2376
2377 for (const IRValueRef alloca : adaptor->cur_static_allocas()) {
2378 auto size = adaptor->val_alloca_size(alloca);
2379 size = util::align_up(size, adaptor->val_alloca_align(alloca));
2380
2381 ValLocalIdx local_idx = adaptor->val_local_idx(alloca);
2382 init_variable_ref(local_idx, 0);
2383 ValueAssignment *assignment = val_assignment(local_idx);
2384 assignment->stack_variable = true;
2385 assignment->frame_off = allocate_stack_slot(size);
2386 }
2387
2388 if constexpr (!Config::DEFAULT_VAR_REF_HANDLING) {
2389 derived()->setup_var_ref_assignments();
2390 }
2391
2392 for (u32 i = 0; i < analyzer.block_layout.size(); ++i) {
2393 const auto block_ref = analyzer.block_layout[i];
2394 TPDE_LOG_TRACE(
2395 "Compiling block {} ({})", i, adaptor->block_fmt_ref(block_ref));
2396 if (!derived()->compile_block(block_ref, i)) [[unlikely]] {
2397 TPDE_LOG_ERR("Failed to compile block {} ({})",
2398 i,
2399 adaptor->block_fmt_ref(block_ref));
2400 // Ensure invariant that value_ptrs only contains nullptr at the end.
2401 assignments.value_ptrs.clear();
2402 return false;
2403 }
2404 }
2405
2406 // Reset all variable-ref assignment pointers to nullptr.
2407 ValLocalIdx variable_ref_list = assignments.variable_ref_list;
2408 while (variable_ref_list != INVALID_VAL_LOCAL_IDX) {
2409 u32 idx = u32(variable_ref_list);
2410 ValLocalIdx next = assignments.value_ptrs[idx]->next_delayed_free_entry;
2411 assignments.value_ptrs[idx] = nullptr;
2412 variable_ref_list = next;
2413 }
2414
2415 assert(std::ranges::none_of(assignments.value_ptrs, std::identity{}) &&
2416 "found non-freed ValueAssignment, maybe missing ref-count?");
2417
2418 derived()->finish_func(func_idx);
2419 this->text_writer.finish_func();
2420
2421 return true;
2422}
2423
2424template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
2426 const IRBlockRef block, const u32 block_idx) noexcept {
2427 cur_block_idx =
2428 static_cast<typename Analyzer<Adaptor>::BlockIndex>(block_idx);
2429
2430 label_place(block_labels[block_idx]);
2431 auto &&val_range = adaptor->block_insts(block);
2432 auto end = val_range.end();
2433 for (auto it = val_range.begin(); it != end; ++it) {
2434 const IRInstRef inst = *it;
2435 if (this->adaptor->inst_fused(inst)) {
2436 continue;
2437 }
2438
2439 auto it_cpy = it;
2440 ++it_cpy;
2441 if (!derived()->compile_inst(inst, InstRange{.from = it_cpy, .to = end}))
2442 [[unlikely]] {
2443 TPDE_LOG_ERR("Failed to compile instruction {}",
2444 this->adaptor->inst_fmt_ref(inst));
2445 return false;
2446 }
2447 }
2448
2449#ifndef NDEBUG
2450 // Some consistency checks. Register assignment information must match, all
2451 // used registers must have an assignment (no temporaries across blocks), and
2452 // fixed registers must be fixed assignments.
2453 for (auto reg_id : register_file.used_regs()) {
2454 Reg reg{reg_id};
2455 assert(register_file.reg_local_idx(reg) != INVALID_VAL_LOCAL_IDX);
2456 AssignmentPartRef ap{val_assignment(register_file.reg_local_idx(reg)),
2457 register_file.reg_part(reg)};
2458 assert(ap.register_valid());
2459 assert(ap.get_reg() == reg);
2460 assert(!register_file.is_fixed(reg) || ap.fixed_assignment());
2461 }
2462#endif
2463
2464 if (static_cast<u32>(assignments.delayed_free_lists[block_idx]) != ~0u) {
2465 auto list_entry = assignments.delayed_free_lists[block_idx];
2466 while (static_cast<u32>(list_entry) != ~0u) {
2467 auto *assignment = assignments.value_ptrs[static_cast<u32>(list_entry)];
2468 auto next_entry = assignment->next_delayed_free_entry;
2469 derived()->free_assignment(list_entry, assignment);
2470 list_entry = next_entry;
2471 }
2472 }
2473 return true;
2474}
2475
2476} // namespace tpde
@ LOCAL
Symbol with local linkage, must be defined.
Base class for target-specific CallBuilder implementations.
void call(std::variant< SymRef, ValuePart >) noexcept
Generate the function call (evict registers, call, reset stack frame).
void add_arg(const CallArg &arg) noexcept
Add a full IR value as argument.
void add_ret(ValuePart &&vp, CCAssignment cca) noexcept
Assign next return value part to vp.
void add_ret(ValuePart &vp, CCAssignment cca) noexcept
Assign next return value part to vp.
void add_arg(ValuePart &&vp, CCAssignment cca) noexcept
Add a value part as argument.
The IRAdaptor specifies the interface with which the IR-independent parts of the compiler interact wi...
Definition IRAdaptor.hpp:91
Call argument, enhancing an IRValueRef with information on how to pass it.
u8 ext_bits
For Flag::zext and Flag::sext, the source bit width.
@ byval
Value is copied into corresponding stack slot.
@ zext
Scalar integer, zero-extend to target-specific size.
@ sext
Scalar integer, sign-extend to target-specific size.
@ allow_split
Value parts can be split across stack/registers.
Flag flag
Value handling flag.
u8 byval_align
For Flag::byval, the stack alignment.
u32 byval_size
For Flag::byval, the argument size.
IRValueRef value
Argument IR value.
Owned unspillable and unevictable temporary register with RAII semantics.
AsmReg alloc_gp() noexcept
Allocate a general-purpose register.
A default implementation for ValRefSpecial.
bool has_dynamic_alloca
Whether the stack frame might have dynamic alloca.
AsmReg gval_as_reg_reuse(GenericValuePart &gv, ScratchReg &dst) noexcept
Like gval_as_reg; if the GenericValuePart owns a reusable register (either a ScratchReg,...
void reset()
Reset any leftover data from the previous compilation such that it will not affect the next compilati...
ValueRef result_ref_stack_slot(IRValueRef value, AssignmentPartRef base, i32 off) noexcept
bool compile()
Compile the functions returned by Adaptor::funcs.
void label_place(Label label) noexcept
Convenience function to place a label at the current position.
AsmReg gval_as_reg_reuse(GenericValuePart &gv, ValuePart &dst) noexcept
Like gval_as_reg; if the GenericValuePart owns a reusable register (either a ScratchReg,...
bool generated_call
Whether the function actually includes a call.
void spill(AssignmentPartRef ap) noexcept
Ensure the value is spilled in its stack slot (except variable refs).
Reg select_reg(RegBank bank) noexcept
Select an available register, evicting loaded values if needed.
void evict(AssignmentPartRef ap) noexcept
Evict the value from its register, spilling if needed, and free register.
void prologue_assign_arg(CCAssigner *cc_assigner, u32 arg_idx, IRValueRef arg, u32 align=1, bool allow_split=false) noexcept
Assign function argument in prologue.
RegisterFile::RegBitSet spill_before_branch(bool force_spill=false) noexcept
Spill values that need to be spilled for later blocks.
void init_variable_ref(ValLocalIdx local_idx, u32 var_ref_data) noexcept
Init a variable-ref assignment.
std::unordered_map< u32, std::vector< i32 > > dynamic_free_lists
Free-Lists for all other sizes.
AsmReg gval_as_reg(GenericValuePart &gv) noexcept
Get generic value part into a single register, evaluating expressions and materializing immediates as...
u32 frame_size
The current size of the stack frame.
void generate_switch(ScratchReg &&cond, u32 width, IRBlockRef default_block, std::span< const std::pair< u64, IRBlockRef > > cases) noexcept
Generate a switch at the end of a basic block.
void begin_branch_region() noexcept
Indicate beginning of region where value-state must not change.
ValueRef result_ref_alias(IRValueRef dst, ValueRef &&src) noexcept
std::pair< ValueRef, ValuePartRef > val_ref_single(IRValueRef value) noexcept
void init_variable_ref(IRValueRef value, u32 var_ref_data) noexcept
Init a variable-ref assignment.
void end_branch_region() noexcept
Indicate end of region where value-state must not change.
void reload_to_reg(AsmReg dst, AssignmentPartRef ap) noexcept
Reload a value part from memory or recompute variable address.
void free_stack_slot(u32 slot, u32 size) noexcept
Free a static stack slot.
bool frame_used
Whether the stack frame is used.
bool generating_branch
Whether we are currently in the middle of generating branch-related code and therefore must not chang...
CCAssigner * cur_cc_assigner() noexcept
Get CCAssigner for current function.
Derived * derived()
shortcut for casting to the Derived class so that overloading works
void free_reg(Reg reg) noexcept
Free the register. Requires that the contained value is already spilled.
void generate_cond_branch(Jump jmp, IRBlockRef true_target, IRBlockRef false_target) noexcept
Generate an conditional branch at the end of a basic block.
bool branch_needs_split(IRBlockRef target) noexcept
Whether branch to a block requires additional instructions and therefore a direct jump to the block i...
util::SmallVector< i32, 16 > fixed_free_lists[5]
Free-Lists for 1/2/4/8/16 sized allocations.
void generate_uncond_branch(IRBlockRef target) noexcept
Generate an unconditional branch at the end of a basic block.
std::pair< ValueRef, ValuePartRef > result_ref_single(IRValueRef value) noexcept
CompilerBase(Adaptor *adaptor)
Initialize a CompilerBase, should be called by the derived classes.
void evict_reg(Reg reg) noexcept
Evict the value from the register, spilling if needed, and free register.
bool is_leaf_function
Whether the function is guaranteed to be a leaf function.
void release_assignment(ValLocalIdx local_idx, ValueAssignment *) noexcept
Release an assignment when reference count drops to zero, either frees the assignment immediately or ...
void allocate_spill_slot(AssignmentPartRef ap) noexcept
Allocate a stack slot for an assignment.
i32 allocate_stack_slot(u32 size) noexcept
Allocate a static stack slot.
void generate_branch_to_block(Jump jmp, IRBlockRef target, bool needs_split, bool last_inst) noexcept
Generate a branch to a basic block; execution continues afterwards.
void release_spilled_regs(typename RegisterFile::RegBitSet) noexcept