TPDE
Loading...
Searching...
No Matches
CompilerBase.hpp
1// SPDX-FileCopyrightText: 2025 Contributors to TPDE <https://tpde.org>
2//
3// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4#pragma once
5
6#include <algorithm>
7#include <functional>
8#include <unordered_map>
9
10#include "Analyzer.hpp"
11#include "Compiler.hpp"
12#include "CompilerConfig.hpp"
13#include "IRAdaptor.hpp"
14#include "tpde/AssignmentPartRef.hpp"
15#include "tpde/RegisterFile.hpp"
16#include "tpde/ValLocalIdx.hpp"
17#include "tpde/ValueAssignment.hpp"
18#include "tpde/base.hpp"
19#include "tpde/util/function_ref.hpp"
20#include "tpde/util/misc.hpp"
21
22namespace tpde {
23// TODO(ts): formulate concept for full compiler so that there is *some* check
24// whether all the required derived methods are implemented?
25
26/// Thread-local storage access mode
27enum class TLSModel {
28 GlobalDynamic,
29 LocalDynamic,
30 InitialExec,
31 LocalExec,
32};
33
34struct CCAssignment {
35 Reg reg = Reg::make_invalid();
36
37 /// If non-zero, indicates that this and the next N values must be
38 /// assigned to consecutive registers, or to the stack. The following
39 /// values must be in the same register bank as this value. u8 is sufficient,
40 /// no architecture has more than 255 parameter registers in a single bank.
41 u8 consecutive = 0;
42 bool sret : 1 = false; ///< Argument is return value pointer.
43 bool sext : 1 = false; ///< Sign-extend single integer.
44 bool zext : 1 = false; ///< Zero-extend single integer.
45 bool byval : 1 = false;
46 u8 align = 0;
47 u8 size = 0;
48 RegBank bank = RegBank{};
49 u32 byval_align = 0;
50 u32 byval_size = 0; // only used if byval is set
51 u32 stack_off = 0; // only used if reg is invalid
52};
53
54struct CCInfo {
55 // TODO: use RegBitSet
56 const u64 allocatable_regs;
57 const u64 callee_saved_regs;
58 /// Possible argument registers; these registers will not be allocated until
59 /// all arguments have been assigned.
60 const u64 arg_regs;
61};
62
63class CCAssigner {
64public:
65 const CCInfo *ccinfo;
66
67 CCAssigner(const CCInfo &ccinfo) noexcept : ccinfo(&ccinfo) {}
68 virtual ~CCAssigner() noexcept {}
69
70 virtual void reset() noexcept = 0;
71
72 const CCInfo &get_ccinfo() const noexcept { return *ccinfo; }
73
74 virtual void assign_arg(CCAssignment &cca) noexcept = 0;
75 virtual u32 get_stack_size() noexcept = 0;
76 /// Some calling conventions need different call behavior when calling a
77 /// vararg function.
78 virtual bool is_vararg() const noexcept { return false; }
79 virtual void assign_ret(CCAssignment &cca) noexcept = 0;
80};
81
82/// The base class for the compiler.
83/// It implements the main platform independent compilation logic and houses the
84/// analyzer
85template <IRAdaptor Adaptor,
86 typename Derived,
87 CompilerConfig Config = CompilerConfigDefault>
89 // some forwards for the IR type defs
90 using IRValueRef = typename Adaptor::IRValueRef;
91 using IRInstRef = typename Adaptor::IRInstRef;
92 using IRBlockRef = typename Adaptor::IRBlockRef;
93 using IRFuncRef = typename Adaptor::IRFuncRef;
94
95 using BlockIndex = typename Analyzer<Adaptor>::BlockIndex;
96
97 using Assembler = typename Config::Assembler;
98 using AsmReg = typename Config::AsmReg;
99
100 using RegisterFile = RegisterFile<Config::NUM_BANKS, 32>;
101
102 /// A default implementation for ValRefSpecial.
103 // Note: Subclasses can override this, always used Derived::ValRefSpecial.
105 uint8_t mode = 4;
106 u64 const_data;
107 };
108
109#pragma region CompilerData
110 Adaptor *adaptor;
111 Analyzer<Adaptor> analyzer;
112
113 // data for frame management
114
115 struct {
116 /// The current size of the stack frame
117 u32 frame_size = 0;
118 /// Free-Lists for 1/2/4/8/16 sized allocations
119 // TODO(ts): make the allocations for 4/8 different from the others
120 // since they are probably the one's most used?
121 util::SmallVector<i32, 16> fixed_free_lists[5] = {};
122 /// Free-Lists for all other sizes
123 // TODO(ts): think about which data structure we want here
124 std::unordered_map<u32, std::vector<i32>> dynamic_free_lists{};
125 } stack = {};
126
127 typename Analyzer<Adaptor>::BlockIndex cur_block_idx;
128
129 // Assignments
130
131 static constexpr ValLocalIdx INVALID_VAL_LOCAL_IDX =
132 static_cast<ValLocalIdx>(~0u);
133
134 // TODO(ts): think about different ways to store this that are maybe more
135 // compact?
136 struct {
137 AssignmentAllocator allocator;
138
139 std::array<u32, Config::NUM_BANKS> cur_fixed_assignment_count = {};
140 util::SmallVector<ValueAssignment *, Analyzer<Adaptor>::SMALL_VALUE_NUM>
141 value_ptrs;
142
143 ValLocalIdx variable_ref_list;
144 util::SmallVector<ValLocalIdx, Analyzer<Adaptor>::SMALL_BLOCK_NUM>
145 delayed_free_lists;
146 } assignments = {};
147
148 RegisterFile register_file;
149#ifndef NDEBUG
150 /// Whether we are currently in the middle of generating branch-related code
151 /// and therefore must not change any value-related state.
152 bool generating_branch = false;
153#endif
154
155private:
156 /// Default CCAssigner if the implementation doesn't override cur_cc_assigner.
157 typename Config::DefaultCCAssigner default_cc_assigner;
158
159public:
160 Assembler assembler;
161 Assembler::SectionWriter text_writer;
162 // TODO(ts): smallvector?
163 std::vector<typename Assembler::SymRef> func_syms;
164 // TODO(ts): combine this with the block vectors in the analyzer to save on
165 // allocations
166 util::SmallVector<typename Assembler::Label> block_labels;
167
168 util::SmallVector<
169 std::pair<typename Assembler::SymRef, typename Assembler::SymRef>,
170 4>
171 personality_syms = {};
172
173 struct ScratchReg;
174 class ValuePart;
175 struct ValuePartRef;
176 struct ValueRef;
177 struct GenericValuePart;
178#pragma endregion
179
180 struct InstRange {
181 using Range = decltype(std::declval<Adaptor>().block_insts(
182 std::declval<IRBlockRef>()));
183 using Iter = decltype(std::declval<Range>().begin());
184 using EndIter = decltype(std::declval<Range>().end());
185 Iter from;
186 EndIter to;
187 };
188
189 struct CallArg {
190 enum class Flag : u8 {
191 none,
192 zext,
193 sext,
194 sret,
195 byval
196 };
197
198 explicit CallArg(IRValueRef value,
199 Flag flags = Flag::none,
200 u32 byval_align = 0,
201 u32 byval_size = 0)
202 : value(value),
203 flag(flags),
204 byval_align(byval_align),
205 byval_size(byval_size) {}
206
207 IRValueRef value;
208 Flag flag;
209 u32 byval_align;
210 u32 byval_size;
211 };
212
213 template <typename CBDerived>
214 class CallBuilderBase {
215 protected:
216 Derived &compiler;
217 CCAssigner &assigner;
218
219 RegisterFile::RegBitSet arg_regs{};
220
221 public:
222 CallBuilderBase(Derived &compiler, CCAssigner &assigner) noexcept
223 : compiler(compiler), assigner(assigner) {}
224
225 // CBDerived needs:
226 // void add_arg_byval(ValuePart &vp, CCAssignment &cca) noexcept;
227 // void add_arg_stack(ValuePart &vp, CCAssignment &cca) noexcept;
228 // void call_impl(std::variant<Assembler::SymRef, ValuePart> &&) noexcept;
229 CBDerived *derived() noexcept { return static_cast<CBDerived *>(this); }
230
231 void add_arg(ValuePart &&vp, CCAssignment cca) noexcept;
232 void add_arg(CallArg &&arg) noexcept;
233
234 // evict registers, do call, reset stack frame
235 void call(std::variant<typename Assembler::SymRef, ValuePart>) noexcept;
236
237 void add_ret(ValuePart &vp, CCAssignment cca) noexcept;
238 void add_ret(ValuePart &&vp, CCAssignment cca) noexcept {
239 add_ret(vp, cca);
240 }
241 void add_ret(ValueRef &vr) noexcept;
242 };
243
244 class RetBuilder {
245 Derived &compiler;
246 CCAssigner &assigner;
247
248 RegisterFile::RegBitSet ret_regs{};
249
250 public:
251 RetBuilder(Derived &compiler, CCAssigner &assigner) noexcept
252 : compiler(compiler), assigner(assigner) {
253 assigner.reset();
254 }
255
256 void add(ValuePart &&vp, CCAssignment cca) noexcept;
257 void add(IRValueRef val, CallArg::Flag flag = CallArg::Flag::none) noexcept;
258
259 void ret() noexcept;
260 };
261
262 /// Initialize a CompilerBase, should be called by the derived classes
263 explicit CompilerBase(Adaptor *adaptor)
264 : adaptor(adaptor), analyzer(adaptor), assembler() {
265 static_assert(std::is_base_of_v<CompilerBase, Derived>);
266 static_assert(Compiler<Derived, Config>);
267 }
268
269 /// shortcut for casting to the Derived class so that overloading
270 /// works
271 Derived *derived() { return static_cast<Derived *>(this); }
272
273 const Derived *derived() const { return static_cast<const Derived *>(this); }
274
275 [[nodiscard]] ValLocalIdx val_idx(const IRValueRef value) const noexcept {
276 return analyzer.adaptor->val_local_idx(value);
277 }
278
279 [[nodiscard]] ValueAssignment *
280 val_assignment(const ValLocalIdx idx) noexcept {
281 return assignments.value_ptrs[static_cast<u32>(idx)];
282 }
283
284 /// Compile the functions returned by Adaptor::funcs
285 ///
286 /// \warning If you intend to call this multiple times, you must call reset
287 /// inbetween the calls.
288 ///
289 /// \returns Whether the compilation was successful
290 bool compile();
291
292 /// Reset any leftover data from the previous compilation such that it will
293 /// not affect the next compilation
294 void reset();
295
296 /// Get CCAssigner for current function.
297 CCAssigner *cur_cc_assigner() noexcept { return &default_cc_assigner; }
298
299 void init_assignment(IRValueRef value, ValLocalIdx local_idx) noexcept;
300
301 /// Frees an assignment, its stack slot and registers
302 void free_assignment(ValLocalIdx local_idx) noexcept;
303
304 /// Init a variable-ref assignment
305 void init_variable_ref(ValLocalIdx local_idx, u32 var_ref_data) noexcept;
306 /// Init a variable-ref assignment
307 void init_variable_ref(IRValueRef value, u32 var_ref_data) noexcept {
308 init_variable_ref(adaptor->val_local_idx(value), var_ref_data);
309 }
310
311 i32 allocate_stack_slot(u32 size) noexcept;
312 void free_stack_slot(u32 slot, u32 size) noexcept;
313
314 template <typename Fn>
315 void handle_func_arg(u32 arg_idx, IRValueRef arg, Fn add_arg) noexcept;
316
317 ValueRef val_ref(IRValueRef value) noexcept;
318
319 std::pair<ValueRef, ValuePartRef> val_ref_single(IRValueRef value) noexcept;
320
321 /// Get a defining reference to a value
322 ValueRef result_ref(IRValueRef value) noexcept;
323
324 std::pair<ValueRef, ValuePartRef>
325 result_ref_single(IRValueRef value) noexcept;
326
327 void set_value(ValuePartRef &val_ref, ScratchReg &scratch) noexcept;
328 void set_value(ValuePartRef &&val_ref, ScratchReg &scratch) noexcept {
329 set_value(val_ref, scratch);
330 }
331
332 /// Get generic value part into a single register, evaluating expressions
333 /// and materializing immediates as required.
334 AsmReg gval_as_reg(GenericValuePart &gv) noexcept;
335
336 /// Like gval_as_reg; if the GenericValuePart owns a reusable register
337 /// (either a ScratchReg, possibly due to materialization, or a reusable
338 /// ValuePartRef), store it in dst.
339 AsmReg gval_as_reg_reuse(GenericValuePart &gv, ScratchReg &dst) noexcept;
340
341 /// Reload a value part from memory or recompute variable address.
342 void reload_to_reg(AsmReg dst, AssignmentPartRef ap) noexcept;
343
344 void allocate_spill_slot(AssignmentPartRef ap) noexcept;
345
346 /// Ensure the value is spilled in its stack slot (except variable refs).
347 void spill(AssignmentPartRef ap) noexcept;
348
349 /// Evict the value from its register, spilling if needed, and free register.
350 void evict(AssignmentPartRef ap) noexcept;
351
352 /// Evict the value from the register, spilling if needed, and free register.
353 void evict_reg(Reg reg) noexcept;
354
355 /// Free the register. Requires that the contained value is already spilled.
356 void free_reg(Reg reg) noexcept;
357
358 // TODO(ts): switch to a branch_spill_before naming style?
359 typename RegisterFile::RegBitSet
360 spill_before_branch(bool force_spill = false) noexcept;
361 void release_spilled_regs(typename RegisterFile::RegBitSet) noexcept;
362
363 /// When reaching a point in the function where no other blocks will be
364 /// reached anymore, use this function to release register assignments after
365 /// the end of that block so the compiler does not accidentally use
366 /// registers which don't contain any values
368
369 /// Indicate beginning of region where value-state must not change.
370 void begin_branch_region() noexcept {
371#ifndef NDEBUG
372 assert(!generating_branch);
373 generating_branch = true;
374#endif
375 }
376
377 /// Indicate end of region where value-state must not change.
378 void end_branch_region() noexcept {
379#ifndef NDEBUG
380 assert(generating_branch);
381 generating_branch = false;
382#endif
383 }
384
385#ifndef NDEBUG
386 bool may_change_value_state() const noexcept { return !generating_branch; }
387#endif
388
389 void move_to_phi_nodes(BlockIndex target) noexcept {
390 if (analyzer.block_has_phis(target)) {
391 move_to_phi_nodes_impl(target);
392 }
393 }
394
395 void move_to_phi_nodes_impl(BlockIndex target) noexcept;
396
397 bool branch_needs_split(IRBlockRef target) noexcept {
398 // for now, if the target has PHI-nodes, we split
399 return analyzer.block_has_phis(target);
400 }
401
402 BlockIndex next_block() const noexcept;
403
404 bool try_force_fixed_assignment(IRValueRef) const noexcept { return false; }
405
406 bool hook_post_func_sym_init() noexcept { return true; }
407
408 void analysis_start() noexcept {}
409
410 void analysis_end() noexcept {}
411
412 void reloc_text(Assembler::SymRef sym,
413 u32 type,
414 u64 offset,
415 i64 addend = 0) noexcept {
416 this->assembler.reloc_sec(
417 text_writer.get_sec_ref(), sym, type, offset, addend);
418 }
419
420 void label_place(Assembler::Label label) noexcept {
421 this->assembler.label_place(
422 label, text_writer.get_sec_ref(), text_writer.offset());
423 }
424
425protected:
426 Assembler::SymRef get_personality_sym() noexcept;
427
428 bool compile_func(IRFuncRef func, u32 func_idx) noexcept;
429
430 bool compile_block(IRBlockRef block, u32 block_idx) noexcept;
431};
432} // namespace tpde
433
434#include "GenericValuePart.hpp"
435#include "ScratchReg.hpp"
436#include "ValuePartRef.hpp"
437#include "ValueRef.hpp"
438
439namespace tpde {
440
441template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
442template <typename CBDerived>
443void CompilerBase<Adaptor, Derived, Config>::CallBuilderBase<
444 CBDerived>::add_arg(ValuePart &&vp, CCAssignment cca) noexcept {
445 cca.bank = vp.bank();
446 cca.size = vp.part_size();
447
448 assigner.assign_arg(cca);
449
450 if (cca.byval) {
451 derived()->add_arg_byval(vp, cca);
452 vp.reset(&compiler);
453 } else if (!cca.reg.valid()) {
454 derived()->add_arg_stack(vp, cca);
455 vp.reset(&compiler);
456 } else {
457 u32 size = vp.part_size();
458 if (vp.is_in_reg(cca.reg)) {
459 if (!vp.can_salvage()) {
460 compiler.evict_reg(cca.reg);
461 } else {
462 vp.salvage(&compiler);
463 }
464 if (cca.sext || cca.zext) {
465 compiler.generate_raw_intext(cca.reg, cca.reg, cca.sext, 8 * size, 64);
466 }
467 } else {
468 if (compiler.register_file.is_used(cca.reg)) {
469 compiler.evict_reg(cca.reg);
470 }
471 if (vp.can_salvage()) {
472 AsmReg vp_reg = vp.salvage(&compiler);
473 if (cca.sext || cca.zext) {
474 compiler.generate_raw_intext(cca.reg, vp_reg, cca.sext, 8 * size, 64);
475 } else {
476 compiler.mov(cca.reg, vp_reg, size);
477 }
478 } else {
479 vp.reload_into_specific_fixed(&compiler, cca.reg);
480 if (cca.sext || cca.zext) {
481 compiler.generate_raw_intext(
482 cca.reg, cca.reg, cca.sext, 8 * size, 64);
483 }
484 }
485 }
486 vp.reset(&compiler);
487 assert(!compiler.register_file.is_used(cca.reg));
488 compiler.register_file.mark_used(
489 cca.reg, CompilerBase::INVALID_VAL_LOCAL_IDX, 0);
490 compiler.register_file.mark_fixed(cca.reg);
491 compiler.register_file.mark_clobbered(cca.reg);
492 arg_regs |= (1ull << cca.reg.id());
493 }
494}
495
496template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
497template <typename CBDerived>
498void CompilerBase<Adaptor, Derived, Config>::CallBuilderBase<
499 CBDerived>::add_arg(CallArg &&arg) noexcept {
500 ValueRef vr = compiler.val_ref(arg.value);
501 const u32 part_count = compiler.val_parts(arg.value).count();
502
503 if (arg.flag == CallArg::Flag::byval) {
504 assert(part_count == 1);
505 add_arg(vr.part(0),
506 CCAssignment{
507 .byval = true,
508 .byval_align = arg.byval_align,
509 .byval_size = arg.byval_size,
510 });
511 return;
512 }
513
514 u32 align = 1;
515 bool consecutive = false;
516 u32 consec_def = 0;
517 if (compiler.arg_is_int128(arg.value)) {
518 // TODO: this also applies to composites with 16-byte alignment
519 align = 16;
520 consecutive = true;
521 } else if (part_count > 1 &&
522 !compiler.arg_allow_split_reg_stack_passing(arg.value)) {
523 consecutive = true;
524 if (part_count > UINT8_MAX) {
525 // Must be completely passed on the stack.
526 consecutive = false;
527 consec_def = -1;
528 }
529 }
530
531 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
532 derived()->add_arg(
533 vr.part(part_idx),
534 CCAssignment{
535 .consecutive =
536 u8(consecutive ? part_count - part_idx - 1 : consec_def),
537 .sret = arg.flag == CallArg::Flag::sret,
538 .sext = arg.flag == CallArg::Flag::sext,
539 .zext = arg.flag == CallArg::Flag::zext,
540 .align = u8(part_idx == 0 ? align : 1),
541 });
542 }
543}
544
545template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
546template <typename CBDerived>
547void CompilerBase<Adaptor, Derived, Config>::CallBuilderBase<CBDerived>::call(
548 std::variant<typename Assembler::SymRef, ValuePart> target) noexcept {
549 typename RegisterFile::RegBitSet skip_evict = arg_regs;
550 if (auto *vp = std::get_if<ValuePart>(&target); vp && vp->can_salvage()) {
551 // call_impl will reset vp, thereby unlock+free the register.
552 assert(vp->cur_reg_unlocked().valid() && "can_salvage implies register");
553 skip_evict |= (1ull << vp->cur_reg_unlocked().id());
554 }
555
556 auto clobbered = ~assigner.get_ccinfo().callee_saved_regs;
557 for (auto reg_id : util::BitSetIterator<>{compiler.register_file.used &
558 clobbered & ~skip_evict}) {
559 compiler.evict_reg(AsmReg{reg_id});
560 compiler.register_file.mark_clobbered(Reg{reg_id});
561 }
562
563 derived()->call_impl(std::move(target));
564
565 for (auto reg_id : util::BitSetIterator<>{arg_regs}) {
566 compiler.register_file.unmark_fixed(Reg{reg_id});
567 compiler.register_file.unmark_used(Reg{reg_id});
568 }
569}
570
571template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
572template <typename CBDerived>
573void CompilerBase<Adaptor, Derived, Config>::CallBuilderBase<
574 CBDerived>::add_ret(ValuePart &vp, CCAssignment cca) noexcept {
575 cca.bank = vp.bank();
576 cca.size = vp.part_size();
577 assigner.assign_ret(cca);
578 assert(cca.reg.valid() && "return value must be in register");
579 vp.set_value_reg(&compiler, cca.reg);
580}
581
582template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
583template <typename CBDerived>
584void CompilerBase<Adaptor, Derived, Config>::CallBuilderBase<
585 CBDerived>::add_ret(ValueRef &vr) noexcept {
586 assert(vr.has_assignment());
587 u32 part_count = vr.assignment()->part_count;
588 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
589 CCAssignment cca;
590 add_ret(vr.part(part_idx), cca);
591 }
592}
593
594template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
595void CompilerBase<Adaptor, Derived, Config>::RetBuilder::add(
596 ValuePart &&vp, CCAssignment cca) noexcept {
597 cca.bank = vp.bank();
598 u32 size = cca.size = vp.part_size();
599 assigner.assign_ret(cca);
600 assert(cca.reg.valid() && "indirect return value must use sret argument");
601
602 if (vp.is_in_reg(cca.reg)) {
603 if (!vp.can_salvage()) {
604 compiler.evict_reg(cca.reg);
605 } else {
606 vp.salvage(&compiler);
607 }
608 if (cca.sext || cca.zext) {
609 compiler.generate_raw_intext(cca.reg, cca.reg, cca.sext, 8 * size, 64);
610 }
611 } else {
612 if (compiler.register_file.is_used(cca.reg)) {
613 compiler.evict_reg(cca.reg);
614 }
615 if (vp.can_salvage()) {
616 AsmReg vp_reg = vp.salvage(&compiler);
617 if (cca.sext || cca.zext) {
618 compiler.generate_raw_intext(cca.reg, vp_reg, cca.sext, 8 * size, 64);
619 } else {
620 compiler.mov(cca.reg, vp_reg, size);
621 }
622 } else {
623 vp.reload_into_specific_fixed(&compiler, cca.reg);
624 if (cca.sext || cca.zext) {
625 compiler.generate_raw_intext(cca.reg, cca.reg, cca.sext, 8 * size, 64);
626 }
627 }
628 }
629 vp.reset(&compiler);
630 assert(!compiler.register_file.is_used(cca.reg));
631 compiler.register_file.mark_used(
632 cca.reg, CompilerBase::INVALID_VAL_LOCAL_IDX, 0);
633 compiler.register_file.mark_fixed(cca.reg);
634 compiler.register_file.mark_clobbered(cca.reg);
635 ret_regs |= (1ull << cca.reg.id());
636}
637
638template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
639void CompilerBase<Adaptor, Derived, Config>::RetBuilder::add(
640 IRValueRef val, CallArg::Flag flag) noexcept {
641 u32 part_count = compiler.val_parts(val).count();
642 ValueRef vr = compiler.val_ref(val);
643 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
644 add(vr.part(part_idx),
645 CCAssignment{
646 .sext = flag == CallArg::Flag::sext,
647 .zext = flag == CallArg::Flag::zext,
648 });
649 }
650}
651
652template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
653void CompilerBase<Adaptor, Derived, Config>::RetBuilder::ret() noexcept {
654 for (auto reg_id : util::BitSetIterator<>{ret_regs}) {
655 compiler.register_file.unmark_fixed(Reg{reg_id});
656 compiler.register_file.unmark_used(Reg{reg_id});
657 }
658
659 compiler.gen_func_epilog();
660 compiler.release_regs_after_return();
661}
662
663template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
665 // create function symbols
666 text_writer.switch_section(
667 assembler.get_section(assembler.get_text_section()));
668
669 assert(func_syms.empty());
670 for (const IRFuncRef func : adaptor->funcs()) {
671 auto binding = Assembler::SymBinding::GLOBAL;
672 if (adaptor->func_has_weak_linkage(func)) {
673 binding = Assembler::SymBinding::WEAK;
674 } else if (adaptor->func_only_local(func)) {
675 binding = Assembler::SymBinding::LOCAL;
676 }
677 if (adaptor->func_extern(func)) {
678 func_syms.push_back(derived()->assembler.sym_add_undef(
679 adaptor->func_link_name(func), binding));
680 } else {
681 func_syms.push_back(derived()->assembler.sym_predef_func(
682 adaptor->func_link_name(func), binding));
683 }
684 derived()->define_func_idx(func, func_syms.size() - 1);
685 }
686
687 if (!derived()->hook_post_func_sym_init()) {
688 TPDE_LOG_ERR("hook_pust_func_sym_init failed");
689 return false;
690 }
691
692 // TODO(ts): create function labels?
693
694 bool success = true;
695
696 u32 func_idx = 0;
697 for (const IRFuncRef func : adaptor->funcs()) {
698 if (adaptor->func_extern(func)) {
699 TPDE_LOG_TRACE("Skipping compilation of func {}",
700 adaptor->func_link_name(func));
701 ++func_idx;
702 continue;
703 }
704
705 TPDE_LOG_TRACE("Compiling func {}", adaptor->func_link_name(func));
706 if (!derived()->compile_func(func, func_idx)) {
707 TPDE_LOG_ERR("Failed to compile function {}",
708 adaptor->func_link_name(func));
709 success = false;
710 }
711 ++func_idx;
712 }
713
714 text_writer.flush();
715 assembler.finalize();
716
717 // TODO(ts): generate object/map?
718
719 return success;
720}
721
722template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
724 adaptor->reset();
725
726 for (auto &e : stack.fixed_free_lists) {
727 e.clear();
728 }
729 stack.dynamic_free_lists.clear();
730
731 assembler.reset();
732 func_syms.clear();
733 block_labels.clear();
734 personality_syms.clear();
735}
736
737template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
739 IRValueRef value, ValLocalIdx local_idx) noexcept {
740 assert(val_assignment(local_idx) == nullptr);
741 TPDE_LOG_TRACE("Initializing assignment for value {}",
742 static_cast<u32>(local_idx));
743
744 const auto parts = derived()->val_parts(value);
745 const u32 part_count = parts.count();
746 assert(part_count > 0);
747 auto *assignment = assignments.allocator.allocate(part_count);
748 assignments.value_ptrs[static_cast<u32>(local_idx)] = assignment;
749
750 u32 max_part_size = 0;
751 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
752 auto ap = AssignmentPartRef{assignment, part_idx};
753 ap.reset();
754 ap.set_bank(parts.reg_bank(part_idx));
755 const u32 size = parts.size_bytes(part_idx);
756 assert(size > 0);
757 max_part_size = std::max(max_part_size, size);
758 ap.set_part_size(size);
759 }
760
761 const auto &liveness = analyzer.liveness_info(local_idx);
762
763 // if there is only one part, try to hand out a fixed assignment
764 // if the value is used for longer than one block and there aren't too many
765 // definitions in child loops this could interfere with
766 // TODO(ts): try out only fixed assignments if the value is live for more
767 // than two blocks?
768 // TODO(ts): move this to ValuePartRef::alloc_reg to be able to defer this
769 // for results?
770 if (part_count == 1) {
771 const auto &cur_loop =
772 analyzer.loop_from_idx(analyzer.block_loop_idx(cur_block_idx));
773 auto ap = AssignmentPartRef{assignment, 0};
774
775 auto try_fixed =
776 liveness.last > cur_block_idx &&
777 cur_loop.definitions_in_childs +
778 assignments.cur_fixed_assignment_count[ap.bank().id()] <
779 Derived::NUM_FIXED_ASSIGNMENTS[ap.bank().id()];
780 if (derived()->try_force_fixed_assignment(value)) {
781 try_fixed = assignments.cur_fixed_assignment_count[ap.bank().id()] <
782 Derived::NUM_FIXED_ASSIGNMENTS[ap.bank().id()];
783 }
784
785 if (try_fixed) {
786 // check if there is a fixed register available
787 AsmReg reg = derived()->select_fixed_assignment_reg(ap.bank(), value);
788 TPDE_LOG_TRACE("Trying to assign fixed reg to value {}",
789 static_cast<u32>(local_idx));
790
791 // TODO: if the register is used, we can free it most of the time, but not
792 // always, e.g. for PHI nodes. Detect this case and free_reg otherwise.
793 if (!reg.invalid() && !register_file.is_used(reg)) {
794 TPDE_LOG_TRACE("Assigning fixed assignment to reg {} for value {}",
795 reg.id(),
796 static_cast<u32>(local_idx));
797 ap.set_reg(reg);
798 ap.set_register_valid(true);
799 ap.set_fixed_assignment(true);
800 register_file.mark_used(reg, local_idx, 0);
801 register_file.inc_lock_count(reg); // fixed assignments always locked
802 register_file.mark_clobbered(reg);
803 ++assignments.cur_fixed_assignment_count[ap.bank().id()];
804 }
805 }
806 }
807
808 const auto last_full = liveness.last_full;
809 const auto ref_count = liveness.ref_count;
810
811 assert(max_part_size <= 256);
812 assignment->max_part_size = max_part_size;
813#ifndef NDEBUG
814 assignment->pending_free = false;
815#endif
816 assignment->variable_ref = false;
817 assignment->stack_variable = false;
818 assignment->delay_free = last_full;
819 assignment->part_count = part_count;
820 assignment->frame_off = 0;
821 assignment->references_left = ref_count;
822}
823
824template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
826 const ValLocalIdx local_idx) noexcept {
827 TPDE_LOG_TRACE("Freeing assignment for value {}",
828 static_cast<u32>(local_idx));
829
830 ValueAssignment *assignment =
831 assignments.value_ptrs[static_cast<u32>(local_idx)];
832 assignments.value_ptrs[static_cast<u32>(local_idx)] = nullptr;
833 const auto is_var_ref = assignment->variable_ref;
834 const u32 part_count = assignment->part_count;
835
836 // free registers
837 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
838 auto ap = AssignmentPartRef{assignment, part_idx};
839 if (ap.fixed_assignment()) [[unlikely]] {
840 const auto reg = ap.get_reg();
841 assert(register_file.is_fixed(reg));
842 assert(register_file.reg_local_idx(reg) == local_idx);
843 assert(register_file.reg_part(reg) == part_idx);
844 --assignments.cur_fixed_assignment_count[ap.bank().id()];
845 register_file.dec_lock_count_must_zero(reg); // release lock for fixed reg
846 register_file.unmark_used(reg);
847 } else if (ap.register_valid()) {
848 const auto reg = ap.get_reg();
849 assert(!register_file.is_fixed(reg));
850 register_file.unmark_used(reg);
851 }
852 }
853
854#ifdef TPDE_ASSERTS
855 for (auto reg_id : register_file.used_regs()) {
856 assert(register_file.reg_local_idx(AsmReg{reg_id}) != local_idx &&
857 "freeing assignment that is still referenced by a register");
858 }
859#endif
860
861 // variable references do not have a stack slot
862 if (!is_var_ref && assignment->frame_off != 0) {
863 free_stack_slot(assignment->frame_off, assignment->size());
864 }
865
866 assignments.allocator.deallocate(assignment);
867}
868
869template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
871 ValLocalIdx local_idx, u32 var_ref_data) noexcept {
872 TPDE_LOG_TRACE("Initializing variable-ref assignment for value {}",
873 static_cast<u32>(local_idx));
874
875 assert(val_assignment(local_idx) == nullptr);
876 auto *assignment = assignments.allocator.allocate_slow(1, true);
877 assignments.value_ptrs[static_cast<u32>(local_idx)] = assignment;
878
879 assignment->max_part_size = Config::PLATFORM_POINTER_SIZE;
880 assignment->variable_ref = true;
881 assignment->stack_variable = false;
882 assignment->part_count = 1;
883 assignment->var_ref_custom_idx = var_ref_data;
884 assignment->next_delayed_free_entry = assignments.variable_ref_list;
885
886 assignments.variable_ref_list = local_idx;
887
888 AssignmentPartRef ap{assignment, 0};
889 ap.reset();
890 ap.set_bank(Config::GP_BANK);
891 ap.set_part_size(Config::PLATFORM_POINTER_SIZE);
892}
893
894template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
896 u32 size) noexcept {
897 unsigned align_bits = 4;
898 if (size == 0) {
899 return 0; // 0 is the "invalid" stack slot
900 } else if (size <= 16) {
901 // Align up to next power of two.
902 u32 free_list_idx = size == 1 ? 0 : 32 - util::cnt_lz<u32>(size - 1);
903 assert(size <= 1u << free_list_idx);
904 size = 1 << free_list_idx;
905 align_bits = free_list_idx;
906
907 if (!stack.fixed_free_lists[free_list_idx].empty()) {
908 auto slot = stack.fixed_free_lists[free_list_idx].back();
909 stack.fixed_free_lists[free_list_idx].pop_back();
910 return slot;
911 }
912 } else {
913 size = util::align_up(size, 16);
914 auto it = stack.dynamic_free_lists.find(size);
915 if (it != stack.dynamic_free_lists.end() && !it->second.empty()) {
916 const auto slot = it->second.back();
917 it->second.pop_back();
918 return slot;
919 }
920 }
921
922 assert(stack.frame_size != ~0u &&
923 "cannot allocate stack slot before stack frame is initialized");
924
925 // Align frame_size to align_bits
926 for (u32 list_idx = util::cnt_tz(stack.frame_size); list_idx < align_bits;
927 list_idx = util::cnt_tz(stack.frame_size)) {
928 i32 slot = stack.frame_size;
929 if constexpr (Config::FRAME_INDEXING_NEGATIVE) {
930 slot = -(slot + (1ull << list_idx));
931 }
932 stack.fixed_free_lists[list_idx].push_back(slot);
933 stack.frame_size += 1ull << list_idx;
934 }
935
936 auto slot = stack.frame_size;
937 assert(slot != 0 && "stack slot 0 is reserved");
938 stack.frame_size += size;
939
940 if constexpr (Config::FRAME_INDEXING_NEGATIVE) {
941 slot = -(slot + size);
942 }
943 return slot;
944}
945
946template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
947void CompilerBase<Adaptor, Derived, Config>::free_stack_slot(
948 u32 slot, u32 size) noexcept {
949 if (size == 0) [[unlikely]] {
950 assert(slot == 0 && "unexpected slot for zero-sized stack-slot?");
951 // Do nothing.
952 } else if (size <= 16) [[likely]] {
953 u32 free_list_idx = size == 1 ? 0 : 32 - util::cnt_lz<u32>(size - 1);
954 stack.fixed_free_lists[free_list_idx].push_back(slot);
955 } else {
956 size = util::align_up(size, 16);
957 stack.dynamic_free_lists[size].push_back(slot);
958 }
959}
960
961template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
962template <typename Fn>
963void CompilerBase<Adaptor, Derived, Config>::handle_func_arg(
964 u32 arg_idx, IRValueRef arg, Fn add_arg) noexcept {
965 ValueRef vr = derived()->result_ref(arg);
966 if (adaptor->cur_arg_is_byval(arg_idx)) {
967 add_arg(vr.part(0),
968 CCAssignment{
969 .byval = true,
970 .byval_align = adaptor->cur_arg_byval_align(arg_idx),
971 .byval_size = adaptor->cur_arg_byval_size(arg_idx),
972 });
973 return;
974 }
975
976 if (adaptor->cur_arg_is_sret(arg_idx)) {
977 add_arg(vr.part(0), CCAssignment{.sret = true});
978 return;
979 }
980
981 const u32 part_count = derived()->val_parts(arg).count();
982
983 u32 align = 1;
984 u32 consecutive = 0;
985 u32 consec_def = 0;
986 if (derived()->arg_is_int128(arg)) {
987 // TODO: this also applies to composites with 16-byte alignment
988 align = 16;
989 consecutive = 1;
990 } else if (part_count > 1 &&
991 !derived()->arg_allow_split_reg_stack_passing(arg)) {
992 consecutive = 1;
993 if (part_count > UINT8_MAX) {
994 // Must be completely passed on the stack.
995 consecutive = 0;
996 consec_def = -1;
997 }
998 }
999
1000 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
1001 add_arg(vr.part(part_idx),
1002 CCAssignment{
1003 .consecutive =
1004 u8(consecutive ? part_count - part_idx - 1 : consec_def),
1005 .align = u8(part_idx == 0 ? align : 1),
1006 });
1007 }
1008}
1009
1010template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1011typename CompilerBase<Adaptor, Derived, Config>::ValueRef
1012 CompilerBase<Adaptor, Derived, Config>::val_ref(IRValueRef value) noexcept {
1013 if (auto special = derived()->val_ref_special(value); special) {
1014 return ValueRef{this, std::move(*special)};
1015 }
1016
1017 const ValLocalIdx local_idx = analyzer.adaptor->val_local_idx(value);
1018 assert(val_assignment(local_idx) != nullptr && "value use before def");
1019 return ValueRef{this, local_idx};
1020}
1021
1022template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1023std::pair<typename CompilerBase<Adaptor, Derived, Config>::ValueRef,
1024 typename CompilerBase<Adaptor, Derived, Config>::ValuePartRef>
1025 CompilerBase<Adaptor, Derived, Config>::val_ref_single(
1026 IRValueRef value) noexcept {
1027 std::pair<ValueRef, ValuePartRef> res{val_ref(value), this};
1028 res.second = res.first.part(0);
1029 return res;
1030}
1031
1032template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1033typename CompilerBase<Adaptor, Derived, Config>::ValueRef
1035 IRValueRef value) noexcept {
1036 const ValLocalIdx local_idx = analyzer.adaptor->val_local_idx(value);
1037 if (val_assignment(local_idx) == nullptr) {
1038 init_assignment(value, local_idx);
1039 }
1040 return ValueRef{this, local_idx};
1041}
1042
1043template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1044std::pair<typename CompilerBase<Adaptor, Derived, Config>::ValueRef,
1047 IRValueRef value) noexcept {
1048 std::pair<ValueRef, ValuePartRef> res{result_ref(value), this};
1049 res.second = res.first.part(0);
1050 return res;
1051}
1052
1053template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1054void CompilerBase<Adaptor, Derived, Config>::set_value(
1055 ValuePartRef &val_ref, ScratchReg &scratch) noexcept {
1056 auto ap = val_ref.assignment();
1057 assert(scratch.has_reg());
1058 auto reg = scratch.cur_reg();
1059
1060 if (ap.fixed_assignment()) {
1061 auto cur_reg = ap.get_reg();
1062 assert(register_file.is_used(cur_reg));
1063 assert(register_file.is_fixed(cur_reg));
1064 assert(register_file.reg_local_idx(cur_reg) == val_ref.local_idx());
1065
1066 if (cur_reg.id() != reg.id()) {
1067 derived()->mov(cur_reg, reg, ap.part_size());
1068 }
1069
1070 ap.set_register_valid(true);
1071 ap.set_modified(true);
1072 return;
1073 }
1074
1075 if (ap.register_valid()) {
1076 auto cur_reg = ap.get_reg();
1077 if (cur_reg.id() == reg.id()) {
1078 ap.set_modified(true);
1079 return;
1080 }
1081 val_ref.unlock();
1082 assert(!register_file.is_fixed(cur_reg));
1083 register_file.unmark_used(cur_reg);
1084 }
1085
1086 // ScratchReg's reg is fixed and used => unfix, keep used, update assignment
1087 assert(register_file.is_used(reg));
1088 assert(register_file.is_fixed(reg));
1089 assert(register_file.is_clobbered(reg));
1090 scratch.force_set_reg(AsmReg::make_invalid());
1091 register_file.unmark_fixed(reg);
1092 register_file.update_reg_assignment(reg, val_ref.local_idx(), val_ref.part());
1093 ap.set_reg(reg);
1094 ap.set_register_valid(true);
1095 ap.set_modified(true);
1096}
1097
1098template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1099typename CompilerBase<Adaptor, Derived, Config>::AsmReg
1101 GenericValuePart &gv) noexcept {
1102 if (std::holds_alternative<ScratchReg>(gv.state)) {
1103 return std::get<ScratchReg>(gv.state).cur_reg();
1104 }
1105 if (std::holds_alternative<ValuePartRef>(gv.state)) {
1106 auto &vpr = std::get<ValuePartRef>(gv.state);
1107 if (vpr.has_reg()) {
1108 return vpr.cur_reg();
1109 }
1110 return vpr.load_to_reg();
1111 }
1112 if (auto *expr = std::get_if<typename GenericValuePart::Expr>(&gv.state)) {
1113 if (expr->has_base() && !expr->has_index() && expr->disp == 0) {
1114 return expr->base_reg();
1115 }
1116 return derived()->gval_expr_as_reg(gv);
1117 }
1118 TPDE_UNREACHABLE("gval_as_reg on empty GenericValuePart");
1119}
1120
1121template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1122typename CompilerBase<Adaptor, Derived, Config>::AsmReg
1124 GenericValuePart &gv, ScratchReg &dst) noexcept {
1125 AsmReg reg = gval_as_reg(gv);
1126 if (!dst.has_reg()) {
1127 if (auto *scratch = std::get_if<ScratchReg>(&gv.state)) {
1128 dst = std::move(*scratch);
1129 } else if (auto *val_ref = std::get_if<ValuePartRef>(&gv.state)) {
1130 if (val_ref->can_salvage()) {
1131 dst.alloc_specific(val_ref->salvage());
1132 assert(dst.cur_reg() == reg && "salvaging unsuccessful");
1133 }
1134 }
1135 }
1136 return reg;
1137}
1138
1139template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1141 AsmReg dst, AssignmentPartRef ap) noexcept {
1142 if (!ap.variable_ref()) {
1143 assert(ap.stack_valid());
1144 derived()->load_from_stack(dst, ap.frame_off(), ap.part_size());
1145 } else if (ap.is_stack_variable()) {
1146 derived()->load_address_of_stack_var(dst, ap);
1147 } else if constexpr (!Config::DEFAULT_VAR_REF_HANDLING) {
1148 derived()->load_address_of_var_reference(dst, ap);
1149 } else {
1150 TPDE_UNREACHABLE("non-stack-variable needs custom var-ref handling");
1151 }
1152}
1153
1154template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1156 AssignmentPartRef ap) noexcept {
1157 assert(!ap.variable_ref() && "cannot allocate spill slot for variable ref");
1158 if (ap.assignment()->frame_off == 0) {
1159 assert(!ap.stack_valid() && "stack-valid set without spill slot");
1160 ap.assignment()->frame_off = allocate_stack_slot(ap.assignment()->size());
1161 assert(ap.assignment()->frame_off != 0);
1162 }
1163}
1164
1165template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1167 AssignmentPartRef ap) noexcept {
1168 assert(may_change_value_state());
1169 if (!ap.stack_valid() && !ap.variable_ref()) {
1170 assert(ap.register_valid() && "cannot spill uninitialized assignment part");
1171 allocate_spill_slot(ap);
1172 derived()->spill_reg(ap.get_reg(), ap.frame_off(), ap.part_size());
1173 ap.set_stack_valid();
1174 }
1175}
1176
1177template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1179 AssignmentPartRef ap) noexcept {
1180 assert(may_change_value_state());
1181 assert(ap.register_valid());
1182 derived()->spill(ap);
1183 ap.set_register_valid(false);
1184 register_file.unmark_used(ap.get_reg());
1185}
1186
1187template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1189 assert(may_change_value_state());
1190 assert(!register_file.is_fixed(reg));
1191 assert(register_file.reg_local_idx(reg) != INVALID_VAL_LOCAL_IDX);
1192
1193 ValLocalIdx local_idx = register_file.reg_local_idx(reg);
1194 auto part = register_file.reg_part(reg);
1195 AssignmentPartRef evict_part{val_assignment(local_idx), part};
1196 assert(evict_part.register_valid());
1197 assert(evict_part.get_reg() == reg);
1198 derived()->spill(evict_part);
1199 evict_part.set_register_valid(false);
1200 register_file.unmark_used(reg);
1201}
1202
1203template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1205 assert(may_change_value_state());
1206 assert(!register_file.is_fixed(reg));
1207 assert(register_file.reg_local_idx(reg) != INVALID_VAL_LOCAL_IDX);
1208
1209 ValLocalIdx local_idx = register_file.reg_local_idx(reg);
1210 auto part = register_file.reg_part(reg);
1211 AssignmentPartRef ap{val_assignment(local_idx), part};
1212 assert(ap.register_valid());
1213 assert(ap.get_reg() == reg);
1214 assert(!ap.modified() || ap.variable_ref());
1215 ap.set_register_valid(false);
1216 register_file.unmark_used(reg);
1217}
1218
1219template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1220typename CompilerBase<Adaptor, Derived, Config>::RegisterFile::RegBitSet
1222 bool force_spill) noexcept {
1223 // since we do not explicitly keep track of register assignments per block,
1224 // whenever we might branch off to a block that we do not directly compile
1225 // afterwards (i.e. the register assignments might change in between), we
1226 // need to spill all registers which are not fixed and remove them from the
1227 // register state.
1228 //
1229 // This leads to worse codegen but saves a significant overhead to
1230 // store/manage the register assignment for each block (256 bytes/block for
1231 // x64) and possible compiletime as there might be additional logic to move
1232 // values around
1233
1234 // First, we consider the case that the current block only has one successor
1235 // which is compiled directly after the current one, in which case we do not
1236 // have to spill anything.
1237 //
1238 // Secondly, if the next block has multiple incoming edges, we always have
1239 // to spill and remove from the register assignment. Otherwise, we
1240 // only need to spill values if they are alive in any successor which is not
1241 // the next block.
1242 //
1243 // Values which are only read from PHI-Nodes and have no extended lifetimes,
1244 // do not need to be spilled as they die at the edge.
1245
1246 using RegBitSet = typename RegisterFile::RegBitSet;
1247
1248 assert(may_change_value_state());
1249
1250 const IRBlockRef cur_block_ref = analyzer.block_ref(cur_block_idx);
1251
1252 bool must_spill = force_spill;
1253 if (!must_spill) {
1254 // We must always spill if no block is immediately succeeding or that block
1255 // has multiple incoming edges.
1256 auto next_block_is_succ = false;
1257 auto next_block_has_multiple_incoming = false;
1258 u32 succ_count = 0;
1259 for (const IRBlockRef succ : adaptor->block_succs(cur_block_ref)) {
1260 ++succ_count;
1261 if (static_cast<u32>(analyzer.block_idx(succ)) ==
1262 static_cast<u32>(cur_block_idx) + 1) {
1263 next_block_is_succ = true;
1264 if (analyzer.block_has_multiple_incoming(succ)) {
1265 next_block_has_multiple_incoming = true;
1266 }
1267 }
1268 }
1269
1270 must_spill = !next_block_is_succ || next_block_has_multiple_incoming;
1271
1272 if (succ_count == 1 && !must_spill) {
1273 return RegBitSet{};
1274 }
1275 }
1276
1277 /*if (!next_block_is_succ) {
1278 // spill and remove from reg assignment
1279 auto spilled = RegisterFile::RegBitSet{};
1280 for (auto reg : register_file.used_regs()) {
1281 auto local_idx = register_file.reg_local_idx(AsmReg{reg});
1282 auto part = register_file.reg_part(AsmReg{reg});
1283 if (local_idx == INVALID_VAL_LOCAL_IDX) {
1284 // TODO(ts): can this actually happen?
1285 continue;
1286 }
1287 auto ap = AssignmentPartRef{val_assignment(local_idx), part};
1288 ap.spill_if_needed(this);
1289 spilled |= RegisterFile::RegBitSet{1ull} << reg;
1290 }
1291 return spilled;
1292 }*/
1293
1294 const auto spill_reg_if_needed = [&](const auto reg) {
1295 auto local_idx = register_file.reg_local_idx(AsmReg{reg});
1296 auto part = register_file.reg_part(AsmReg{reg});
1297 if (local_idx == INVALID_VAL_LOCAL_IDX) {
1298 // scratch regs can never be held across blocks
1299 return true;
1300 }
1301 auto *assignment = val_assignment(local_idx);
1302 auto ap = AssignmentPartRef{assignment, part};
1303 if (ap.fixed_assignment()) {
1304 // fixed registers do not need to be spilled
1305 return true;
1306 }
1307
1308 if (!ap.modified()) {
1309 // no need to spill if the value was not modified
1310 return false;
1311 }
1312
1313 if (ap.variable_ref()) {
1314 return false;
1315 }
1316
1317 const auto &liveness = analyzer.liveness_info(local_idx);
1318 if (liveness.last <= cur_block_idx) {
1319 // no need to spill value if it dies immediately after the block
1320 return false;
1321 }
1322
1323 if (must_spill) {
1324 spill(ap);
1325 return false;
1326 }
1327
1328 // spill if the value is alive in any successor that is not the next
1329 // block
1330 for (const IRBlockRef succ : adaptor->block_succs(cur_block_ref)) {
1331 const auto block_idx = analyzer.block_idx(succ);
1332 if (static_cast<u32>(block_idx) == static_cast<u32>(cur_block_idx) + 1) {
1333 continue;
1334 }
1335 if (block_idx >= liveness.first && block_idx <= liveness.last) {
1336 spill(ap);
1337 return false;
1338 }
1339 }
1340
1341 return false;
1342 };
1343
1344 auto spilled = RegBitSet{};
1345 // TODO(ts): just use register_file.used_nonfixed_regs()?
1346 for (auto reg : register_file.used_regs()) {
1347 const auto reg_fixed = spill_reg_if_needed(reg);
1348 // remove from register assignment if the next block cannot rely on the
1349 // value being in the specific register
1350 if (!reg_fixed && must_spill) {
1351 // TODO(ts): this needs to be changed if this is supposed to work
1352 // with other RegisterFile implementations
1353 spilled |= RegBitSet{1ull} << reg;
1354 }
1355 }
1356
1357 return spilled;
1358}
1359
1360template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1361void CompilerBase<Adaptor, Derived, Config>::release_spilled_regs(
1362 typename RegisterFile::RegBitSet regs) noexcept {
1363 assert(may_change_value_state());
1364
1365 // TODO(ts): needs changes for other RegisterFile impls
1366 for (auto reg_id : util::BitSetIterator<>{regs}) {
1367 auto reg = AsmReg{reg_id};
1368 if (!register_file.is_used(reg)) {
1369 continue;
1370 }
1371 if (register_file.is_fixed(reg)) {
1372 // we don't need to release fixed assignments but they should be
1373 // real assignments from values
1374 assert(register_file.reg_local_idx(reg) != INVALID_VAL_LOCAL_IDX);
1375 continue;
1376 }
1377
1378 free_reg(reg);
1379 }
1380}
1381
1382template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1384 release_regs_after_return() noexcept {
1385 // we essentially have to free all non-fixed registers
1386 for (auto reg_id : register_file.used_nonfixed_regs()) {
1387 free_reg(Reg{reg_id});
1388 }
1389}
1390
1391template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1393 BlockIndex target) noexcept {
1394 // PHI-nodes are always moved to their stack-slot (unless they are fixed)
1395 //
1396 // However, we need to take care of PHI-dependencies (cycles and chains)
1397 // as to not overwrite values which might be needed.
1398 //
1399 // In most cases, we expect the number of PHIs to be small but we want to
1400 // stay reasonably efficient even with larger numbers of PHIs
1401
1402 struct ScratchWrapper {
1403 Derived *self;
1404 AsmReg cur_reg = AsmReg::make_invalid();
1405 bool backed_up = false;
1406 bool was_modified = false;
1407 u8 part = 0;
1408 ValLocalIdx local_idx = INVALID_VAL_LOCAL_IDX;
1409
1410 ScratchWrapper(Derived *self) : self{self} {}
1411
1412 ~ScratchWrapper() { reset(); }
1413
1414 void reset() {
1415 if (cur_reg.invalid()) {
1416 return;
1417 }
1418
1419 self->register_file.unmark_fixed(cur_reg);
1420 self->register_file.unmark_used(cur_reg);
1421
1422 if (backed_up) {
1423 // restore the register state
1424 // TODO(ts): do we actually need the reload?
1425 auto *assignment = self->val_assignment(local_idx);
1426 // check if the value was free'd, then we dont need to restore
1427 // it
1428 if (assignment) {
1429 auto ap = AssignmentPartRef{assignment, part};
1430 if (!ap.variable_ref()) {
1431 // TODO(ts): assert that this always happens?
1432 assert(ap.stack_valid());
1433 self->load_from_stack(cur_reg, ap.frame_off(), ap.part_size());
1434 }
1435 ap.set_reg(cur_reg);
1436 ap.set_register_valid(true);
1437 ap.set_modified(was_modified);
1438 self->register_file.mark_used(cur_reg, local_idx, part);
1439 }
1440 backed_up = false;
1441 }
1442 cur_reg = AsmReg::make_invalid();
1443 }
1444
1445 AsmReg alloc_from_bank(RegBank bank) {
1446 if (cur_reg.valid() && self->register_file.reg_bank(cur_reg) == bank) {
1447 return cur_reg;
1448 }
1449 if (cur_reg.valid()) {
1450 reset();
1451 }
1452
1453 // TODO(ts): try to first find a non callee-saved/clobbered
1454 // register...
1455 auto &reg_file = self->register_file;
1456 auto reg = reg_file.find_first_free_excluding(bank, 0);
1457 if (reg.invalid()) {
1458 // TODO(ts): use clock here?
1459 reg = reg_file.find_first_nonfixed_excluding(bank, 0);
1460 if (reg.invalid()) {
1461 TPDE_FATAL("ran out of registers for scratch registers");
1462 }
1463
1464 backed_up = true;
1465 local_idx = reg_file.reg_local_idx(reg);
1466 part = reg_file.reg_part(reg);
1467 AssignmentPartRef ap{self->val_assignment(local_idx), part};
1468 was_modified = ap.modified();
1469 // TODO(ts): this does not spill for variable refs
1470 // We don't use evict_reg here, as we know that we can't change the
1471 // value state.
1472 assert(ap.register_valid() && ap.get_reg() == reg);
1473 if (!ap.stack_valid() && !ap.variable_ref()) {
1474 self->allocate_spill_slot(ap);
1475 self->spill_reg(ap.get_reg(), ap.frame_off(), ap.part_size());
1476 ap.set_stack_valid();
1477 }
1478 ap.set_register_valid(false);
1479 reg_file.unmark_used(reg);
1480 }
1481
1482 reg_file.mark_used(reg, INVALID_VAL_LOCAL_IDX, 0);
1483 reg_file.mark_clobbered(reg);
1484 reg_file.mark_fixed(reg);
1485 cur_reg = reg;
1486 return reg;
1487 }
1488
1489 ScratchWrapper &operator=(const ScratchWrapper &) = delete;
1490 ScratchWrapper &operator=(ScratchWrapper &&) = delete;
1491 };
1492
1493 IRBlockRef target_ref = analyzer.block_ref(target);
1494 IRBlockRef cur_ref = analyzer.block_ref(cur_block_idx);
1495
1496 // collect all the nodes
1497 struct NodeEntry {
1498 IRValueRef phi;
1499 IRValueRef incoming_val;
1500 ValLocalIdx phi_local_idx;
1501 // local idx of same-block phi node that needs special handling
1502 ValLocalIdx incoming_phi_local_idx = INVALID_VAL_LOCAL_IDX;
1503 // bool incoming_is_phi;
1504 u32 ref_count;
1505
1506 bool operator<(const NodeEntry &other) const noexcept {
1507 return phi_local_idx < other.phi_local_idx;
1508 }
1509
1510 bool operator<(ValLocalIdx other) const noexcept {
1511 return phi_local_idx < other;
1512 }
1513 };
1514
1515 util::SmallVector<NodeEntry, 16> nodes;
1516 for (IRValueRef phi : adaptor->block_phis(target_ref)) {
1517 ValLocalIdx phi_local_idx = adaptor->val_local_idx(phi);
1518 auto incoming = adaptor->val_as_phi(phi).incoming_val_for_block(cur_ref);
1519 nodes.emplace_back(NodeEntry{
1520 .phi = phi, .incoming_val = incoming, .phi_local_idx = phi_local_idx});
1521 }
1522
1523 // We check that the block has phi nodes before getting here.
1524 assert(!nodes.empty() && "block marked has having phi nodes has none");
1525
1526 ScratchWrapper scratch{derived()};
1527 const auto move_to_phi = [this, &scratch](IRValueRef phi,
1528 IRValueRef incoming_val) {
1529 // TODO(ts): if phi==incoming_val, we should be able to elide the move
1530 // even if the phi is in a fixed register, no?
1531
1532 auto phi_vr = derived()->result_ref(phi);
1533 auto val_vr = derived()->val_ref(incoming_val);
1534 if (phi == incoming_val) {
1535 return;
1536 }
1537
1538 u32 part_count = phi_vr.assignment()->part_count;
1539 for (u32 i = 0; i < part_count; ++i) {
1540 AssignmentPartRef phi_ap{phi_vr.assignment(), i};
1541
1542 AsmReg reg{};
1543 ValuePartRef val_vpr = val_vr.part(i);
1544 if (val_vpr.is_const()) {
1545 reg = scratch.alloc_from_bank(val_vpr.bank());
1546 val_vpr.reload_into_specific_fixed(reg);
1547 } else if (val_vpr.assignment().register_valid() ||
1548 val_vpr.assignment().fixed_assignment()) {
1549 reg = val_vpr.assignment().get_reg();
1550 } else {
1551 reg = val_vpr.reload_into_specific_fixed(
1552 this, scratch.alloc_from_bank(phi_ap.bank()));
1553 }
1554
1555 if (phi_ap.fixed_assignment()) {
1556 derived()->mov(phi_ap.get_reg(), reg, phi_ap.part_size());
1557 } else {
1558 allocate_spill_slot(phi_ap);
1559 derived()->spill_reg(reg, phi_ap.frame_off(), phi_ap.part_size());
1560 phi_ap.set_stack_valid();
1561 }
1562 }
1563 };
1564
1565 if (nodes.size() == 1) {
1566 move_to_phi(nodes[0].phi, nodes[0].incoming_val);
1567 return;
1568 }
1569
1570 // sort so we can binary search later
1571 std::sort(nodes.begin(), nodes.end());
1572
1573 // fill in the refcount
1574 auto all_zero_ref = true;
1575 for (auto &node : nodes) {
1576 // We don't need to do anything for PHIs that don't reference other PHIs or
1577 // self-referencing PHIs.
1578 bool incoming_is_phi = adaptor->val_is_phi(node.incoming_val);
1579 if (!incoming_is_phi || node.incoming_val == node.phi) {
1580 continue;
1581 }
1582
1583 ValLocalIdx inc_local_idx = adaptor->val_local_idx(node.incoming_val);
1584 auto it = std::lower_bound(nodes.begin(), nodes.end(), inc_local_idx);
1585 if (it == nodes.end() || it->phi != node.incoming_val) {
1586 // Incoming value is a PHI node, but it's not from our block, so we don't
1587 // need to be particularly careful when assigning values.
1588 continue;
1589 }
1590 node.incoming_phi_local_idx = inc_local_idx;
1591 ++it->ref_count;
1592 all_zero_ref = false;
1593 }
1594
1595 if (all_zero_ref) {
1596 // no cycles/chain that we need to take care of
1597 for (auto &node : nodes) {
1598 move_to_phi(node.phi, node.incoming_val);
1599 }
1600 return;
1601 }
1602
1603 // TODO(ts): this is rather inefficient...
1604 util::SmallVector<u32, 32> ready_indices;
1605 ready_indices.reserve(nodes.size());
1606 util::SmallBitSet<256> waiting_nodes;
1607 waiting_nodes.resize(nodes.size());
1608 for (u32 i = 0; i < nodes.size(); ++i) {
1609 if (nodes[i].ref_count) {
1610 waiting_nodes.mark_set(i);
1611 } else {
1612 ready_indices.push_back(i);
1613 }
1614 }
1615
1616 u32 handled_count = 0;
1617 u32 cur_tmp_part_count = 0;
1618 i32 cur_tmp_slot = 0;
1619 u32 cur_tmp_slot_size = 0;
1620 IRValueRef cur_tmp_val = Adaptor::INVALID_VALUE_REF;
1621 ScratchWrapper tmp_reg1{derived()}, tmp_reg2{derived()};
1622
1623 const auto move_from_tmp_phi = [&](IRValueRef target_phi) {
1624 auto phi_vr = val_ref(target_phi);
1625 if (cur_tmp_part_count <= 2) {
1626 AssignmentPartRef ap{phi_vr.assignment(), 0};
1627 assert(!tmp_reg1.cur_reg.invalid());
1628 if (ap.fixed_assignment()) {
1629 derived()->mov(ap.get_reg(), tmp_reg1.cur_reg, ap.part_size());
1630 } else {
1631 derived()->spill_reg(tmp_reg1.cur_reg, ap.frame_off(), ap.part_size());
1632 }
1633
1634 if (cur_tmp_part_count == 2) {
1635 AssignmentPartRef ap_high{phi_vr.assignment(), 1};
1636 assert(!ap_high.fixed_assignment());
1637 assert(!tmp_reg2.cur_reg.invalid());
1638 derived()->spill_reg(
1639 tmp_reg2.cur_reg, ap_high.frame_off(), ap_high.part_size());
1640 }
1641 return;
1642 }
1643
1644 for (u32 i = 0; i < cur_tmp_part_count; ++i) {
1645 AssignmentPartRef phi_ap{phi_vr.assignment(), i};
1646 assert(!phi_ap.fixed_assignment());
1647
1648 auto slot_off = cur_tmp_slot + phi_ap.part_off();
1649 auto reg = tmp_reg1.alloc_from_bank(phi_ap.bank());
1650 derived()->load_from_stack(reg, slot_off, phi_ap.part_size());
1651 derived()->spill_reg(reg, phi_ap.frame_off(), phi_ap.part_size());
1652 }
1653 };
1654
1655 while (handled_count != nodes.size()) {
1656 if (ready_indices.empty()) {
1657 // need to break a cycle
1658 auto cur_idx_opt = waiting_nodes.first_set();
1659 assert(cur_idx_opt);
1660 auto cur_idx = *cur_idx_opt;
1661 assert(nodes[cur_idx].ref_count == 1);
1662 assert(cur_tmp_val == Adaptor::INVALID_VALUE_REF);
1663
1664 auto phi_val = nodes[cur_idx].phi;
1665 auto phi_vr = this->val_ref(phi_val);
1666 auto *assignment = phi_vr.assignment();
1667 cur_tmp_part_count = assignment->part_count;
1668 cur_tmp_val = phi_val;
1669
1670 if (cur_tmp_part_count > 2) {
1671 // use a stack slot to store the temporaries
1672 cur_tmp_slot_size = assignment->size();
1673 cur_tmp_slot = allocate_stack_slot(cur_tmp_slot_size);
1674
1675 for (u32 i = 0; i < cur_tmp_part_count; ++i) {
1676 auto ap = AssignmentPartRef{assignment, i};
1677 assert(!ap.fixed_assignment());
1678 auto slot_off = cur_tmp_slot + ap.part_off();
1679
1680 if (ap.register_valid()) {
1681 auto reg = ap.get_reg();
1682 derived()->spill_reg(reg, slot_off, ap.part_size());
1683 } else {
1684 auto reg = tmp_reg1.alloc_from_bank(ap.bank());
1685 assert(ap.stack_valid());
1686 derived()->load_from_stack(reg, ap.frame_off(), ap.part_size());
1687 derived()->spill_reg(reg, slot_off, ap.part_size());
1688 }
1689 }
1690 } else {
1691 // TODO(ts): if the PHI is not fixed, then we can just reuse its
1692 // register if it has one
1693 auto phi_vpr = phi_vr.part(0);
1694 auto reg = tmp_reg1.alloc_from_bank(phi_vpr.bank());
1695 phi_vpr.reload_into_specific_fixed(this, reg);
1696
1697 if (cur_tmp_part_count == 2) {
1698 // TODO(ts): just change the part ref on the lower ref?
1699 auto phi_vpr_high = phi_vr.part(1);
1700 auto reg_high = tmp_reg2.alloc_from_bank(phi_vpr_high.bank());
1701 phi_vpr_high.reload_into_specific_fixed(this, reg_high);
1702 }
1703 }
1704
1705 nodes[cur_idx].ref_count = 0;
1706 ready_indices.push_back(cur_idx);
1707 waiting_nodes.mark_unset(cur_idx);
1708 }
1709
1710 for (u32 i = 0; i < ready_indices.size(); ++i) {
1711 ++handled_count;
1712 auto cur_idx = ready_indices[i];
1713 auto phi_val = nodes[cur_idx].phi;
1714 IRValueRef incoming_val = nodes[cur_idx].incoming_val;
1715 if (incoming_val == phi_val) {
1716 // no need to do anything, except ref-counting
1717 (void)val_ref(incoming_val);
1718 (void)val_ref(phi_val);
1719 continue;
1720 }
1721
1722 if (incoming_val == cur_tmp_val) {
1723 move_from_tmp_phi(phi_val);
1724
1725 if (cur_tmp_part_count > 2) {
1726 free_stack_slot(cur_tmp_slot, cur_tmp_slot_size);
1727 cur_tmp_slot = 0xFFFF'FFFF;
1728 cur_tmp_slot_size = 0;
1729 }
1730 cur_tmp_val = Adaptor::INVALID_VALUE_REF;
1731 // skip the code below as the ref count of the temp val is
1732 // already 0 and we don't want to reinsert it into the ready
1733 // list
1734 continue;
1735 }
1736
1737 move_to_phi(phi_val, incoming_val);
1738
1739 if (nodes[cur_idx].incoming_phi_local_idx == INVALID_VAL_LOCAL_IDX) {
1740 continue;
1741 }
1742
1743 auto it = std::lower_bound(
1744 nodes.begin(), nodes.end(), nodes[cur_idx].incoming_phi_local_idx);
1745 assert(it != nodes.end() && it->phi == incoming_val &&
1746 "incoming_phi_local_idx set incorrectly");
1747
1748 assert(it->ref_count > 0);
1749 if (--it->ref_count == 0) {
1750 auto node_idx = static_cast<u32>(it - nodes.begin());
1751 ready_indices.push_back(node_idx);
1752 waiting_nodes.mark_unset(node_idx);
1753 }
1754 }
1755 ready_indices.clear();
1756 }
1757}
1758
1759template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1760typename CompilerBase<Adaptor, Derived, Config>::BlockIndex
1761 CompilerBase<Adaptor, Derived, Config>::next_block() const noexcept {
1762 return static_cast<BlockIndex>(static_cast<u32>(cur_block_idx) + 1);
1763}
1764
1765template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1766typename CompilerBase<Adaptor, Derived, Config>::Assembler::SymRef
1767 CompilerBase<Adaptor, Derived, Config>::get_personality_sym() noexcept {
1768 using SymRef = typename Assembler::SymRef;
1769 SymRef personality_sym;
1770 if (this->adaptor->cur_needs_unwind_info()) {
1771 SymRef personality_func = derived()->cur_personality_func();
1772 if (personality_func.valid()) {
1773 for (const auto &[fn_sym, ptr_sym] : personality_syms) {
1774 if (fn_sym == personality_func) {
1775 personality_sym = ptr_sym;
1776 break;
1777 }
1778 }
1779
1780 if (!personality_sym.valid()) {
1781 // create symbol that contains the address of the personality
1782 // function
1783 u32 off;
1784 static constexpr std::array<u8, 8> zero{};
1785
1786 auto rodata = this->assembler.get_data_section(true, true);
1787 personality_sym = this->assembler.sym_def_data(
1788 rodata, "", zero, 8, Assembler::SymBinding::LOCAL, &off);
1789 this->assembler.reloc_abs(rodata, personality_func, off, 0);
1790
1791 personality_syms.emplace_back(personality_func, personality_sym);
1792 }
1793 }
1794 }
1795 return personality_sym;
1796}
1797
1798template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1799bool CompilerBase<Adaptor, Derived, Config>::compile_func(
1800 const IRFuncRef func, const u32 func_idx) noexcept {
1801 if (!adaptor->switch_func(func)) {
1802 return false;
1803 }
1804 derived()->analysis_start();
1805 analyzer.switch_func(func);
1806 derived()->analysis_end();
1807
1808#ifndef NDEBUG
1809 stack.frame_size = ~0u;
1810#endif
1811 for (auto &e : stack.fixed_free_lists) {
1812 e.clear();
1813 }
1814 stack.dynamic_free_lists.clear();
1815
1816 assignments.cur_fixed_assignment_count = {};
1817 assert(std::ranges::none_of(assignments.value_ptrs, std::identity{}));
1818 if (assignments.value_ptrs.size() < analyzer.liveness.size()) {
1819 assignments.value_ptrs.resize(analyzer.liveness.size());
1820 }
1821
1822 assignments.allocator.reset();
1823 assignments.variable_ref_list = INVALID_VAL_LOCAL_IDX;
1824 assignments.delayed_free_lists.clear();
1825 assignments.delayed_free_lists.resize(analyzer.block_layout.size(),
1826 INVALID_VAL_LOCAL_IDX);
1827
1828 cur_block_idx =
1829 static_cast<BlockIndex>(analyzer.block_idx(adaptor->cur_entry_block()));
1830
1831 register_file.reset();
1832#ifndef NDEBUG
1833 generating_branch = false;
1834#endif
1835
1836 // Simple heuristic for initial allocation size
1837 u32 expected_code_size = 0x8 * analyzer.num_insts + 0x40;
1838 this->text_writer.growth_size = expected_code_size;
1839 this->text_writer.ensure_space(expected_code_size);
1840
1841 derived()->start_func(func_idx);
1842
1843 block_labels.clear();
1844 block_labels.resize_uninitialized(analyzer.block_layout.size());
1845 for (u32 i = 0; i < analyzer.block_layout.size(); ++i) {
1846 block_labels[i] = assembler.label_create();
1847 }
1848
1849 // TODO(ts): place function label
1850 // TODO(ts): make function labels optional?
1851
1852 CCAssigner *cc_assigner = derived()->cur_cc_assigner();
1853 assert(cc_assigner != nullptr);
1854
1855 register_file.allocatable = cc_assigner->get_ccinfo().allocatable_regs;
1856
1857 // This initializes the stack frame, which must reserve space for
1858 // callee-saved registers, vararg save area, etc.
1859 cc_assigner->reset();
1860 derived()->gen_func_prolog_and_args(cc_assigner);
1861
1862 for (const IRValueRef alloca : adaptor->cur_static_allocas()) {
1863 auto size = adaptor->val_alloca_size(alloca);
1864 size = util::align_up(size, adaptor->val_alloca_align(alloca));
1865
1866 ValLocalIdx local_idx = adaptor->val_local_idx(alloca);
1867 init_variable_ref(local_idx, 0);
1868 ValueAssignment *assignment = val_assignment(local_idx);
1869 assignment->stack_variable = true;
1870 assignment->frame_off = allocate_stack_slot(size);
1871 }
1872
1873 if constexpr (!Config::DEFAULT_VAR_REF_HANDLING) {
1874 derived()->setup_var_ref_assignments();
1875 }
1876
1877 for (u32 i = 0; i < analyzer.block_layout.size(); ++i) {
1878 const auto block_ref = analyzer.block_layout[i];
1879 TPDE_LOG_TRACE(
1880 "Compiling block {} ({})", i, adaptor->block_fmt_ref(block_ref));
1881 if (!derived()->compile_block(block_ref, i)) [[unlikely]] {
1882 TPDE_LOG_ERR("Failed to compile block {} ({})",
1883 i,
1884 adaptor->block_fmt_ref(block_ref));
1885 // Ensure invariant that value_ptrs only contains nullptr at the end.
1886 assignments.value_ptrs.clear();
1887 return false;
1888 }
1889 }
1890
1891 // Reset all variable-ref assignment pointers to nullptr.
1892 ValLocalIdx variable_ref_list = assignments.variable_ref_list;
1893 while (variable_ref_list != INVALID_VAL_LOCAL_IDX) {
1894 u32 idx = u32(variable_ref_list);
1895 ValLocalIdx next = assignments.value_ptrs[idx]->next_delayed_free_entry;
1896 assignments.value_ptrs[idx] = nullptr;
1897 variable_ref_list = next;
1898 }
1899
1900 assert(std::ranges::none_of(assignments.value_ptrs, std::identity{}) &&
1901 "found non-freed ValueAssignment, maybe missing ref-count?");
1902
1903 derived()->finish_func(func_idx);
1904
1905 return true;
1906}
1907
1908template <IRAdaptor Adaptor, typename Derived, CompilerConfig Config>
1909bool CompilerBase<Adaptor, Derived, Config>::compile_block(
1910 const IRBlockRef block, const u32 block_idx) noexcept {
1911 cur_block_idx =
1912 static_cast<typename Analyzer<Adaptor>::BlockIndex>(block_idx);
1913
1914 label_place(block_labels[block_idx]);
1915 auto &&val_range = adaptor->block_insts(block);
1916 auto end = val_range.end();
1917 for (auto it = val_range.begin(); it != end; ++it) {
1918 const IRInstRef inst = *it;
1919 if (this->adaptor->inst_fused(inst)) {
1920 continue;
1921 }
1922
1923 auto it_cpy = it;
1924 ++it_cpy;
1925 if (!derived()->compile_inst(inst, InstRange{.from = it_cpy, .to = end}))
1926 [[unlikely]] {
1927 TPDE_LOG_ERR("Failed to compile instruction {}",
1928 this->adaptor->inst_fmt_ref(inst));
1929 return false;
1930 }
1931 }
1932
1933 if (static_cast<u32>(assignments.delayed_free_lists[block_idx]) != ~0u) {
1934 auto list_entry = assignments.delayed_free_lists[block_idx];
1935 while (static_cast<u32>(list_entry) != ~0u) {
1936 auto next_entry = assignments.value_ptrs[static_cast<u32>(list_entry)]
1937 ->next_delayed_free_entry;
1938 derived()->free_assignment(list_entry);
1939 list_entry = next_entry;
1940 }
1941 }
1942 return true;
1943}
1944
1945} // namespace tpde
A default implementation for ValRefSpecial.
AsmReg gval_as_reg_reuse(GenericValuePart &gv, ScratchReg &dst) noexcept
void spill(AssignmentPartRef ap) noexcept
Ensure the value is spilled in its stack slot (except variable refs).
void evict(AssignmentPartRef ap) noexcept
Evict the value from its register, spilling if needed, and free register.
void init_variable_ref(ValLocalIdx local_idx, u32 var_ref_data) noexcept
Init a variable-ref assignment.
std::unordered_map< u32, std::vector< i32 > > dynamic_free_lists
Free-Lists for all other sizes.
AsmReg gval_as_reg(GenericValuePart &gv) noexcept
u32 frame_size
The current size of the stack frame.
void begin_branch_region() noexcept
Indicate beginning of region where value-state must not change.
ValueRef result_ref(IRValueRef value) noexcept
Get a defining reference to a value.
void init_variable_ref(IRValueRef value, u32 var_ref_data) noexcept
Init a variable-ref assignment.
void end_branch_region() noexcept
Indicate end of region where value-state must not change.
void reload_to_reg(AsmReg dst, AssignmentPartRef ap) noexcept
Reload a value part from memory or recompute variable address.
void free_assignment(ValLocalIdx local_idx) noexcept
Frees an assignment, its stack slot and registers.
CCAssigner * cur_cc_assigner() noexcept
Get CCAssigner for current function.
void release_regs_after_return() noexcept
void free_reg(Reg reg) noexcept
Free the register. Requires that the contained value is already spilled.
util::SmallVector< i32, 16 > fixed_free_lists[5]
Free-Lists for 1/2/4/8/16 sized allocations.
void evict_reg(Reg reg) noexcept
Evict the value from the register, spilling if needed, and free register.