544 template <
typename Jump>
554 void move_to_phi_nodes(BlockIndex target) {
555 if (analyzer.block_has_phis(target)) {
556 move_to_phi_nodes_impl(target);
560 void move_to_phi_nodes_impl(BlockIndex target);
566 return analyzer.block_has_phis(target);
571 BlockIndex next_block()
const;
573 bool try_force_fixed_assignment(IRValueRef)
const {
return false; }
575 bool hook_post_func_sym_init() {
return true; }
577 void analysis_start() {}
579 void analysis_end() {}
581 void reloc_text(SymRef sym, u32 type, u64 offset, i64 addend = 0) {
582 this->assembler.reloc_sec(
583 text_writer.get_sec_ref(), sym, type, offset, addend);
588 this->text_writer.label_place(label, text_writer.offset());
592 SymRef get_personality_sym();
594 bool compile_func(IRFuncRef func, u32 func_idx);
596 bool compile_block(IRBlockRef block, u32 block_idx);
600#include "GenericValuePart.hpp"
601#include "ScratchReg.hpp"
602#include "ValuePartRef.hpp"
603#include "ValueRef.hpp"
607template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
608template <
typename CBDerived>
611 if (!cca.byval && cca.bank == RegBank{}) {
612 cca.bank = vp.bank();
613 cca.size = vp.part_size();
616 assigner.assign_arg(cca);
617 bool needs_ext = cca.int_ext != 0;
618 bool ext_sign = cca.int_ext >> 7;
619 unsigned ext_bits = cca.int_ext & 0x3f;
622 derived()->add_arg_byval(vp, cca);
624 }
else if (!cca.reg.valid()) {
626 auto ext = std::move(vp).into_extended(&compiler, ext_sign, ext_bits, 64);
627 derived()->add_arg_stack(ext, cca);
628 ext.reset(&compiler);
630 derived()->add_arg_stack(vp, cca);
634 u32 size = vp.part_size();
635 if (vp.is_in_reg(cca.reg)) {
636 if (!vp.can_salvage()) {
637 compiler.evict_reg(cca.reg);
639 vp.salvage(&compiler);
642 compiler.generate_raw_intext(cca.reg, cca.reg, ext_sign, ext_bits, 64);
645 if (compiler.register_file.is_used(cca.reg)) {
646 compiler.evict_reg(cca.reg);
648 if (vp.can_salvage()) {
649 AsmReg vp_reg = vp.salvage(&compiler);
651 compiler.generate_raw_intext(cca.reg, vp_reg, ext_sign, ext_bits, 64);
653 compiler.mov(cca.reg, vp_reg, size);
655 }
else if (needs_ext && vp.is_const()) {
656 u64 val = vp.const_data()[0];
658 ext_sign ? util::sext(val, ext_bits) : util::zext(val, ext_bits);
659 compiler.materialize_constant(&extended, cca.bank, 8, cca.reg);
661 vp.reload_into_specific_fixed(&compiler, cca.reg);
663 compiler.generate_raw_intext(
664 cca.reg, cca.reg, ext_sign, ext_bits, 64);
669 assert(!compiler.register_file.is_used(cca.reg));
670 compiler.register_file.mark_clobbered(cca.reg);
671 compiler.register_file.allocatable &= ~(u64{1} << cca.reg.id());
672 arg_regs |= (1ull << cca.reg.id());
676template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
677template <
typename CBDerived>
680 ValueRef vr = compiler.val_ref(arg.
value);
683 assert(part_count == 1);
687 .align = arg.byval_align,
688 .size = arg.byval_size,
696 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
699 assert(arg.
ext_bits != 0 &&
"cannot extend zero-bit integer");
702 u32 remaining = part_count < 256 ? part_count - part_idx - 1 : 255;
703 derived()->add_arg(vr.part(part_idx),
705 .consecutive = u8(allow_split ? 0 : remaining),
706 .sret = arg.flag == CallArg::Flag::sret,
708 .align = u8(part_idx == 0 ? align : 1),
713template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
714template <
typename CBDerived>
716 std::variant<SymRef, ValuePart> target) {
717 assert(!compiler.stack.is_leaf_function &&
"leaf func must not have calls");
718 compiler.stack.generated_call =
true;
719 typename RegisterFile::RegBitSet skip_evict = arg_regs;
720 if (
auto *vp = std::get_if<ValuePart>(&target); vp && vp->can_salvage()) {
722 assert(vp->cur_reg_unlocked().valid() &&
"can_salvage implies register");
723 skip_evict |= (1ull << vp->cur_reg_unlocked().
id());
726 auto clobbered = ~assigner.get_ccinfo().callee_saved_regs;
727 for (
auto reg_id : util::BitSetIterator<>{compiler.register_file.used &
728 clobbered & ~skip_evict}) {
729 compiler.evict_reg(AsmReg{reg_id});
730 compiler.register_file.mark_clobbered(Reg{reg_id});
733 derived()->call_impl(std::move(target));
735 assert((compiler.register_file.allocatable & arg_regs) == 0);
736 compiler.register_file.allocatable |= arg_regs;
739template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
740template <
typename CBDerived>
743 cca.bank = vp.bank();
744 cca.size = vp.part_size();
745 assigner.assign_ret(cca);
746 assert(cca.reg.valid() &&
"return value must be in register");
747 vp.set_value_reg(&compiler, cca.reg);
750template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
751template <
typename CBDerived>
754 assert(vr.has_assignment());
755 u32 part_count = vr.assignment()->part_count;
756 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
758 add_ret(vr.part(part_idx), cca);
762template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
763void CompilerBase<Adaptor, Derived, Config>::RetBuilder::add(ValuePart &&vp,
765 cca.bank = vp.bank();
766 u32 size = cca.size = vp.part_size();
767 assigner.assign_ret(cca);
768 assert(cca.reg.valid() &&
"indirect return value must use sret argument");
770 bool needs_ext = cca.int_ext != 0;
771 bool ext_sign = cca.int_ext >> 7;
772 unsigned ext_bits = cca.int_ext & 0x3f;
774 if (vp.is_in_reg(cca.reg)) {
775 if (!vp.can_salvage()) {
776 compiler.evict_reg(cca.reg);
778 vp.salvage(&compiler);
781 compiler.generate_raw_intext(cca.reg, cca.reg, ext_sign, ext_bits, 64);
784 if (compiler.register_file.is_used(cca.reg)) {
785 compiler.evict_reg(cca.reg);
787 if (vp.can_salvage()) {
788 AsmReg vp_reg = vp.salvage(&compiler);
790 compiler.generate_raw_intext(cca.reg, vp_reg, ext_sign, ext_bits, 64);
792 compiler.mov(cca.reg, vp_reg, size);
795 vp.reload_into_specific_fixed(&compiler, cca.reg);
797 compiler.generate_raw_intext(cca.reg, cca.reg, ext_sign, ext_bits, 64);
802 assert(!compiler.register_file.is_used(cca.reg));
803 compiler.register_file.mark_clobbered(cca.reg);
804 compiler.register_file.allocatable &= ~(u64{1} << cca.reg.id());
805 ret_regs |= (1ull << cca.reg.id());
808template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
810 u32 part_count = compiler.val_parts(val).count();
811 ValueRef vr = compiler.val_ref(val);
812 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
813 add(vr.part(part_idx), CCAssignment{});
817template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
819 assert((compiler.register_file.allocatable & ret_regs) == 0);
820 compiler.register_file.allocatable |= ret_regs;
822 compiler.gen_func_epilog();
823 compiler.release_regs_after_return();
826template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
829 text_writer.begin_module(assembler);
830 text_writer.switch_section(
831 assembler.get_section(assembler.get_default_section(SectionKind::Text)));
833 assert(func_syms.empty());
834 for (
const IRFuncRef func : adaptor->funcs()) {
836 if (adaptor->func_has_weak_linkage(func)) {
838 }
else if (adaptor->func_only_local(func)) {
841 if (adaptor->func_extern(func)) {
842 func_syms.push_back(
derived()->assembler.sym_add_undef(
843 adaptor->func_link_name(func), binding));
845 func_syms.push_back(
derived()->assembler.sym_predef_func(
846 adaptor->func_link_name(func), binding));
848 derived()->define_func_idx(func, func_syms.size() - 1);
851 if (!
derived()->hook_post_func_sym_init()) {
852 TPDE_LOG_ERR(
"hook_pust_func_sym_init failed");
861 for (
const IRFuncRef func : adaptor->funcs()) {
862 if (adaptor->func_extern(func)) {
863 TPDE_LOG_TRACE(
"Skipping compilation of func {}",
864 adaptor->func_link_name(func));
869 TPDE_LOG_TRACE(
"Compiling func {}", adaptor->func_link_name(func));
870 if (!
derived()->compile_func(func, func_idx)) {
871 TPDE_LOG_ERR(
"Failed to compile function {}",
872 adaptor->func_link_name(func));
879 text_writer.end_module();
880 assembler.finalize();
887template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
891 for (
auto &e : stack.fixed_free_lists) {
894 stack.dynamic_free_lists.clear();
898 block_labels.clear();
899 personality_syms.clear();
902template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
903void CompilerBase<Adaptor, Derived, Config>::init_assignment(
904 IRValueRef value, ValLocalIdx local_idx) {
905 assert(val_assignment(local_idx) ==
nullptr);
906 TPDE_LOG_TRACE(
"Initializing assignment for value {}",
907 static_cast<u32
>(local_idx));
909 const auto parts =
derived()->val_parts(value);
910 const u32 part_count = parts.count();
911 assert(part_count > 0);
912 auto *assignment = assignments.allocator.allocate(part_count);
913 assignments.value_ptrs[
static_cast<u32
>(local_idx)] = assignment;
915 u32 max_part_size = 0;
916 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
917 auto ap = AssignmentPartRef{assignment, part_idx};
919 ap.set_bank(parts.reg_bank(part_idx));
920 const u32 size = parts.size_bytes(part_idx);
922 max_part_size = std::max(max_part_size, size);
923 ap.set_part_size(size);
926 const auto &liveness = analyzer.liveness_info(local_idx);
935 if (part_count == 1) {
936 const auto &cur_loop =
937 analyzer.loop_from_idx(analyzer.block_loop_idx(cur_block_idx));
938 auto ap = AssignmentPartRef{assignment, 0};
941 liveness.last > cur_block_idx &&
942 cur_loop.definitions_in_childs +
943 assignments.cur_fixed_assignment_count[ap.bank().id()] <
944 Derived::NUM_FIXED_ASSIGNMENTS[ap.bank().id()];
945 if (
derived()->try_force_fixed_assignment(value)) {
946 try_fixed = assignments.cur_fixed_assignment_count[ap.bank().id()] <
947 Derived::NUM_FIXED_ASSIGNMENTS[ap.bank().id()];
952 AsmReg reg =
derived()->select_fixed_assignment_reg(ap, value);
953 TPDE_LOG_TRACE(
"Trying to assign fixed reg to value {}",
954 static_cast<u32
>(local_idx));
958 if (!reg.invalid() && !register_file.is_used(reg)) {
959 TPDE_LOG_TRACE(
"Assigning fixed assignment to reg {} for value {}",
961 static_cast<u32
>(local_idx));
963 ap.set_register_valid(
true);
964 ap.set_fixed_assignment(
true);
965 register_file.mark_used(reg, local_idx, 0);
966 register_file.inc_lock_count(reg);
967 register_file.mark_clobbered(reg);
968 ++assignments.cur_fixed_assignment_count[ap.bank().id()];
973 const auto last_full = liveness.last_full;
974 const auto ref_count = liveness.ref_count;
976 assert(max_part_size <= 256);
977 assignment->max_part_size = max_part_size;
978 assignment->pending_free =
false;
979 assignment->variable_ref =
false;
980 assignment->stack_variable =
false;
981 assignment->delay_free = last_full;
982 assignment->part_count = part_count;
983 assignment->frame_off = 0;
984 assignment->references_left = ref_count;
987template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
989 ValLocalIdx local_idx, ValueAssignment *assignment) {
990 TPDE_LOG_TRACE(
"Freeing assignment for value {}",
991 static_cast<u32
>(local_idx));
993 assert(assignments.value_ptrs[
static_cast<u32
>(local_idx)] == assignment);
994 assignments.value_ptrs[
static_cast<u32
>(local_idx)] =
nullptr;
995 const auto is_var_ref = assignment->variable_ref;
996 const u32 part_count = assignment->part_count;
999 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
1000 auto ap = AssignmentPartRef{assignment, part_idx};
1001 if (ap.fixed_assignment()) [[unlikely]] {
1002 const auto reg = ap.get_reg();
1003 assert(register_file.is_fixed(reg));
1004 assert(register_file.reg_local_idx(reg) == local_idx);
1005 assert(register_file.reg_part(reg) == part_idx);
1006 --assignments.cur_fixed_assignment_count[ap.bank().id()];
1007 register_file.dec_lock_count_must_zero(reg);
1008 register_file.unmark_used(reg);
1009 }
else if (ap.register_valid()) {
1010 const auto reg = ap.get_reg();
1011 assert(!register_file.is_fixed(reg));
1012 register_file.unmark_used(reg);
1016 if constexpr (WithAsserts) {
1017 for (
auto reg_id : register_file.used_regs()) {
1018 assert(register_file.reg_local_idx(AsmReg{reg_id}) != local_idx &&
1019 "freeing assignment that is still referenced by a register");
1024 bool has_stack = Config::FRAME_INDEXING_NEGATIVE ? assignment->frame_off < 0
1025 : assignment->frame_off != 0;
1026 if (!is_var_ref && has_stack) {
1030 assignments.allocator.deallocate(assignment);
1033template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1034[[gnu::noinline]]
void
1036 ValLocalIdx local_idx, ValueAssignment *assignment) {
1037 if (!assignment->delay_free) {
1038 free_assignment(local_idx, assignment);
1043 TPDE_LOG_TRACE(
"Delay freeing assignment for value {}",
1044 static_cast<u32
>(local_idx));
1045 const auto &liveness = analyzer.liveness_info(local_idx);
1046 auto &free_list_head = assignments.delayed_free_lists[u32(liveness.last)];
1047 assignment->next_delayed_free_entry = free_list_head;
1048 assignment->pending_free =
true;
1049 free_list_head = local_idx;
1052template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1054 ValLocalIdx local_idx, u32 var_ref_data) {
1055 TPDE_LOG_TRACE(
"Initializing variable-ref assignment for value {}",
1056 static_cast<u32
>(local_idx));
1058 assert(val_assignment(local_idx) ==
nullptr);
1059 auto *assignment = assignments.allocator.allocate_slow(1,
true);
1060 assignments.value_ptrs[
static_cast<u32
>(local_idx)] = assignment;
1062 assignment->max_part_size = Config::PLATFORM_POINTER_SIZE;
1063 assignment->variable_ref =
true;
1064 assignment->stack_variable =
false;
1065 assignment->part_count = 1;
1066 assignment->var_ref_custom_idx = var_ref_data;
1067 assignment->next_delayed_free_entry = assignments.variable_ref_list;
1069 assignments.variable_ref_list = local_idx;
1071 AssignmentPartRef ap{assignment, 0};
1073 ap.set_bank(Config::GP_BANK);
1074 ap.set_part_size(Config::PLATFORM_POINTER_SIZE);
1077template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1079 this->stack.frame_used =
true;
1080 unsigned align_bits = 4;
1083 }
else if (size <= 16) {
1085 u32 free_list_idx = size == 1 ? 0 : 32 - util::cnt_lz<u32>(size - 1);
1086 assert(size <= 1u << free_list_idx);
1087 size = 1 << free_list_idx;
1088 align_bits = free_list_idx;
1090 if (!stack.fixed_free_lists[free_list_idx].empty()) {
1091 auto slot = stack.fixed_free_lists[free_list_idx].back();
1092 stack.fixed_free_lists[free_list_idx].pop_back();
1096 size = util::align_up(size, 16);
1097 auto it = stack.dynamic_free_lists.find(size);
1098 if (it != stack.dynamic_free_lists.end() && !it->second.empty()) {
1099 const auto slot = it->second.back();
1100 it->second.pop_back();
1105 assert(stack.frame_size != ~0u &&
1106 "cannot allocate stack slot before stack frame is initialized");
1109 for (u32 list_idx = util::cnt_tz(stack.frame_size); list_idx < align_bits;
1110 list_idx = util::cnt_tz(stack.frame_size)) {
1111 i32 slot = stack.frame_size;
1112 if constexpr (Config::FRAME_INDEXING_NEGATIVE) {
1113 slot = -(slot + (1ull << list_idx));
1115 stack.fixed_free_lists[list_idx].push_back(slot);
1116 stack.frame_size += 1ull << list_idx;
1119 auto slot = stack.frame_size;
1120 assert(slot != 0 &&
"stack slot 0 is reserved");
1121 stack.frame_size += size;
1123 if constexpr (Config::FRAME_INDEXING_NEGATIVE) {
1124 slot = -(slot + size);
1129template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1132 if (size == 0) [[unlikely]] {
1133 assert(slot == 0 &&
"unexpected slot for zero-sized stack-slot?");
1135 }
else if (size <= 16) [[likely]] {
1136 u32 free_list_idx = size == 1 ? 0 : 32 - util::cnt_lz<u32>(size - 1);
1137 stack.fixed_free_lists[free_list_idx].push_back(slot);
1139 size = util::align_up(size, 16);
1140 stack.dynamic_free_lists[size].push_back(slot);
1144template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1146 CCAssigner *cc_assigner,
1151 ValueRef vr =
derived()->result_ref(arg);
1152 if (adaptor->cur_arg_is_byval(arg_idx)) {
1155 .align = u8(adaptor->cur_arg_byval_align(arg_idx)),
1156 .size = adaptor->cur_arg_byval_size(arg_idx),
1158 cc_assigner->assign_arg(cca);
1159 std::optional<i32> byval_frame_off =
1160 derived()->prologue_assign_arg_part(vr.part(0), cca);
1162 if (byval_frame_off) {
1164 ValLocalIdx local_idx = val_idx(arg);
1170 if (ValueAssignment *assignment = val_assignment(local_idx)) {
1171 free_assignment(local_idx, assignment);
1174 ValueAssignment *assignment = this->val_assignment(local_idx);
1175 assignment->stack_variable =
true;
1176 assignment->frame_off = *byval_frame_off;
1181 if (adaptor->cur_arg_is_sret(arg_idx)) {
1182 assert(vr.assignment()->part_count == 1 &&
"sret must be single-part");
1183 ValuePartRef vp = vr.part(0);
1185 .sret =
true, .bank = vp.bank(), .size = Config::PLATFORM_POINTER_SIZE};
1186 cc_assigner->assign_arg(cca);
1187 derived()->prologue_assign_arg_part(std::move(vp), cca);
1191 const u32 part_count = vr.assignment()->part_count;
1192 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
1193 ValuePartRef vp = vr.part(part_idx);
1194 u32 remaining = part_count < 256 ? part_count - part_idx - 1 : 255;
1196 .consecutive = u8(allow_split ? 0 : remaining),
1197 .align = u8(part_idx == 0 ? align : 1),
1199 .size = vp.part_size(),
1201 cc_assigner->assign_arg(cca);
1202 derived()->prologue_assign_arg_part(std::move(vp), cca);
1206template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1207typename CompilerBase<Adaptor, Derived, Config>::ValueRef
1209 if (
auto special =
derived()->val_ref_special(value); special) {
1210 return ValueRef{
this, std::move(*special)};
1213 const ValLocalIdx local_idx = analyzer.adaptor->val_local_idx(value);
1214 assert(val_assignment(local_idx) !=
nullptr &&
"value use before def");
1215 return ValueRef{
this, local_idx};
1218template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1219std::pair<typename CompilerBase<Adaptor, Derived, Config>::ValueRef,
1220 typename CompilerBase<Adaptor, Derived, Config>::ValuePartRef>
1222 std::pair<ValueRef, ValuePartRef> res{
val_ref(value),
this};
1223 res.second = res.first.part(0);
1227template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1228typename CompilerBase<Adaptor, Derived, Config>::ValueRef
1230 const ValLocalIdx local_idx = analyzer.adaptor->val_local_idx(value);
1231 if (val_assignment(local_idx) ==
nullptr) {
1232 init_assignment(value, local_idx);
1234 return ValueRef{
this, local_idx};
1237template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1238std::pair<typename CompilerBase<Adaptor, Derived, Config>::ValueRef,
1239 typename CompilerBase<Adaptor, Derived, Config>::ValuePartRef>
1242 std::pair<ValueRef, ValuePartRef> res{
result_ref(value),
this};
1243 res.second = res.first.part(0);
1247template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1248typename CompilerBase<Adaptor, Derived, Config>::ValueRef
1251 const ValLocalIdx local_idx = analyzer.adaptor->val_local_idx(dst);
1252 assert(!val_assignment(local_idx) &&
"alias target already defined");
1253 assert(src.has_assignment() &&
"alias src must have an assignment");
1258 assert(src.is_owned() &&
"alias src must be owned");
1260 ValueAssignment *assignment = src.assignment();
1261 u32 part_count = assignment->part_count;
1262 assert(!assignment->pending_free);
1263 assert(!assignment->variable_ref);
1264 assert(!assignment->pending_free);
1265 if constexpr (WithAsserts) {
1266 const auto &src_liveness = analyzer.liveness_info(src.local_idx());
1267 assert(!src_liveness.last_full);
1268 assert(assignment->references_left == 1);
1271 const auto parts =
derived()->val_parts(dst);
1272 assert(parts.count() == part_count);
1273 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
1274 AssignmentPartRef ap{assignment, part_idx};
1275 assert(parts.reg_bank(part_idx) == ap.bank());
1276 assert(parts.size_bytes(part_idx) == ap.part_size());
1281 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
1282 AssignmentPartRef ap{assignment, part_idx};
1283 if (ap.register_valid()) {
1284 register_file.update_reg_assignment(ap.get_reg(), local_idx, part_idx);
1288 const auto &liveness = analyzer.liveness_info(local_idx);
1289 assignment->delay_free = liveness.last_full;
1290 assignment->references_left = liveness.ref_count;
1291 assignments.value_ptrs[
static_cast<u32
>(src.local_idx())] =
nullptr;
1292 assignments.value_ptrs[
static_cast<u32
>(local_idx)] = assignment;
1296 return ValueRef{
this, local_idx};
1299template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1300typename CompilerBase<Adaptor, Derived, Config>::ValueRef
1302 IRValueRef dst, AssignmentPartRef base, i32 off) {
1303 const ValLocalIdx local_idx = analyzer.adaptor->val_local_idx(dst);
1304 assert(!val_assignment(local_idx) &&
"new value already defined");
1306 ValueAssignment *assignment = this->val_assignment(local_idx);
1307 assignment->stack_variable =
true;
1308 assignment->frame_off = base.variable_stack_off() + off;
1309 return ValueRef{
this, local_idx};
1312template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1313void CompilerBase<Adaptor, Derived, Config>::set_value(ValuePartRef &
val_ref,
1314 ScratchReg &scratch) {
1315 val_ref.set_value(std::move(scratch));
1318template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1319typename CompilerBase<Adaptor, Derived, Config>::AsmReg
1321 if (std::holds_alternative<ScratchReg>(gv.state)) {
1322 return std::get<ScratchReg>(gv.state).cur_reg();
1324 if (std::holds_alternative<ValuePartRef>(gv.state)) {
1325 auto &vpr = std::get<ValuePartRef>(gv.state);
1326 if (vpr.has_reg()) {
1327 return vpr.cur_reg();
1329 return vpr.load_to_reg();
1331 if (
auto *expr = std::get_if<typename GenericValuePart::Expr>(&gv.state)) {
1332 if (expr->has_base() && !expr->has_index() && expr->disp == 0) {
1333 return expr->base_reg();
1335 return derived()->gval_expr_as_reg(gv);
1337 TPDE_UNREACHABLE(
"gval_as_reg on empty GenericValuePart");
1340template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1341typename CompilerBase<Adaptor, Derived, Config>::AsmReg
1346 if (
auto *scratch = std::get_if<ScratchReg>(&gv.state)) {
1347 dst = std::move(*scratch);
1348 }
else if (
auto *
val_ref = std::get_if<ValuePartRef>(&gv.state)) {
1351 assert(dst.
cur_reg() == reg &&
"salvaging unsuccessful");
1358template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1359typename CompilerBase<Adaptor, Derived, Config>::AsmReg
1361 GenericValuePart &gv, ValuePart &dst) {
1363 if (!dst.has_reg() &&
1364 (!dst.has_assignment() || !dst.assignment().fixed_assignment())) {
1366 if (
auto *scratch = std::get_if<ScratchReg>(&gv.state)) {
1367 dst.set_value(
this, std::move(*scratch));
1368 if (dst.has_assignment()) {
1371 }
else if (
auto *
val_ref = std::get_if<ValuePartRef>(&gv.state)) {
1373 dst.set_value(
this, std::move(*
val_ref));
1374 if (dst.has_assignment()) {
1383template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1384Reg CompilerBase<Adaptor, Derived, Config>::select_reg_evict(RegBank bank) {
1385 TPDE_LOG_DBG(
"select_reg_evict for bank {}", bank.id());
1386 auto candidates = register_file.used & register_file.bank_regs(bank);
1388 Reg candidate = Reg::make_invalid();
1390 for (
auto reg_id : util::BitSetIterator<>(candidates)) {
1392 if (register_file.is_fixed(reg)) {
1397 auto local_idx = register_file.reg_local_idx(reg);
1398 u32 part = register_file.reg_part(Reg{reg});
1399 assert(local_idx != INVALID_VAL_LOCAL_IDX);
1400 ValueAssignment *va = val_assignment(local_idx);
1401 AssignmentPartRef ap{va, part};
1414 if (ap.variable_ref()) {
1415 TPDE_LOG_DBG(
" r{} ({}) is variable-ref", reg_id, u32(local_idx));
1421 if (ap.stack_valid()) {
1422 score |= u32{1} << 31;
1425 const auto &liveness = analyzer.liveness_info(local_idx);
1426 u32 last_use_dist = u32(liveness.last) - u32(cur_block_idx);
1427 score |= (last_use_dist < 0x8000 ? 0x8000 - last_use_dist : 0) << 16;
1429 u32 refs_left = va->pending_free ? 0 : va->references_left;
1430 score |= (refs_left < 0xffff ? 0x10000 - refs_left : 1);
1432 TPDE_LOG_DBG(
" r{} ({}:{}) rc={}/{} live={}-{}{} spilled={} score={:#x}",
1438 u32(liveness.first),
1440 &
"*"[!liveness.last_full],
1445 if (score > max_score) {
1450 if (candidate.invalid()) [[unlikely]] {
1451 TPDE_FATAL(
"ran out of registers for scratch registers");
1453 TPDE_LOG_DBG(
" selected r{}", candidate.id());
1458template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1460 AsmReg dst, AssignmentPartRef ap) {
1461 if (!ap.variable_ref()) {
1462 assert(ap.stack_valid());
1463 derived()->load_from_stack(dst, ap.frame_off(), ap.part_size());
1464 }
else if (ap.is_stack_variable()) {
1465 derived()->load_address_of_stack_var(dst, ap);
1466 }
else if constexpr (!Config::DEFAULT_VAR_REF_HANDLING) {
1467 derived()->load_address_of_var_reference(dst, ap);
1469 TPDE_UNREACHABLE(
"non-stack-variable needs custom var-ref handling");
1473template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1475 AssignmentPartRef ap) {
1476 assert(!ap.variable_ref() &&
"cannot allocate spill slot for variable ref");
1477 if (ap.assignment()->frame_off == 0) {
1478 assert(!ap.stack_valid() &&
"stack-valid set without spill slot");
1480 assert(ap.assignment()->frame_off != 0);
1484template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1486 assert(may_change_value_state());
1487 if (!ap.stack_valid() && !ap.variable_ref()) {
1488 assert(ap.register_valid() &&
"cannot spill uninitialized assignment part");
1490 derived()->spill_reg(ap.get_reg(), ap.frame_off(), ap.part_size());
1491 ap.set_stack_valid();
1495template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1497 assert(may_change_value_state());
1498 assert(ap.register_valid());
1500 ap.set_register_valid(
false);
1501 register_file.unmark_used(ap.get_reg());
1504template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1506 assert(may_change_value_state());
1507 assert(!register_file.is_fixed(reg));
1508 assert(register_file.reg_local_idx(reg) != INVALID_VAL_LOCAL_IDX);
1510 ValLocalIdx local_idx = register_file.reg_local_idx(reg);
1511 auto part = register_file.reg_part(reg);
1512 AssignmentPartRef evict_part{val_assignment(local_idx), part};
1513 assert(evict_part.register_valid());
1514 assert(evict_part.get_reg() == reg);
1516 evict_part.set_register_valid(
false);
1517 register_file.unmark_used(reg);
1520template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1522 assert(may_change_value_state());
1523 assert(!register_file.is_fixed(reg));
1524 assert(register_file.reg_local_idx(reg) != INVALID_VAL_LOCAL_IDX);
1526 ValLocalIdx local_idx = register_file.reg_local_idx(reg);
1527 auto part = register_file.reg_part(reg);
1528 AssignmentPartRef ap{val_assignment(local_idx), part};
1529 assert(ap.register_valid());
1530 assert(ap.get_reg() == reg);
1531 assert(!ap.modified() || ap.variable_ref());
1532 ap.set_register_valid(
false);
1533 register_file.unmark_used(reg);
1536template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1537typename CompilerBase<Adaptor, Derived, Config>::RegisterFile::RegBitSet
1563 using RegBitSet =
typename RegisterFile::RegBitSet;
1565 assert(may_change_value_state());
1567 const IRBlockRef cur_block_ref = analyzer.block_ref(cur_block_idx);
1571 BlockIndex earliest_next_succ = Analyzer<Adaptor>::INVALID_BLOCK_IDX;
1573 bool must_spill = force_spill;
1577 auto next_block_is_succ =
false;
1578 auto next_block_has_multiple_incoming =
false;
1580 for (
const IRBlockRef succ : adaptor->block_succs(cur_block_ref)) {
1582 BlockIndex succ_idx = analyzer.block_idx(succ);
1583 if (u32(succ_idx) == u32(cur_block_idx) + 1) {
1584 next_block_is_succ =
true;
1585 if (analyzer.block_has_multiple_incoming(succ)) {
1586 next_block_has_multiple_incoming =
true;
1588 }
else if (succ_idx > cur_block_idx && succ_idx < earliest_next_succ) {
1589 earliest_next_succ = succ_idx;
1593 must_spill = !next_block_is_succ || next_block_has_multiple_incoming;
1595 if (succ_count == 1 && !must_spill) {
1600 auto release_regs = RegBitSet{};
1602 for (
auto reg : register_file.used_regs()) {
1603 auto local_idx = register_file.reg_local_idx(Reg{reg});
1604 auto part = register_file.reg_part(Reg{reg});
1605 if (local_idx == INVALID_VAL_LOCAL_IDX) {
1609 AssignmentPartRef ap{val_assignment(local_idx), part};
1610 if (ap.fixed_assignment()) {
1618 release_regs |= RegBitSet{1ull} << reg;
1621 if (!ap.modified() || ap.variable_ref()) {
1626 const auto &liveness = analyzer.liveness_info(local_idx);
1627 if (liveness.last <= cur_block_idx) {
1640 if (must_spill || earliest_next_succ <= liveness.last) {
1645 return release_regs;
1648template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1650 typename RegisterFile::RegBitSet regs) {
1651 assert(may_change_value_state());
1654 for (
auto reg_id : util::BitSetIterator<>{regs & register_file.used}) {
1655 if (!register_file.is_fixed(Reg{reg_id})) {
1661template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1664 for (
auto reg_id : register_file.used_regs()) {
1665 if (!register_file.is_fixed(Reg{reg_id})) {
1671template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1672template <
typename Jump>
1674 Jump jmp, IRBlockRef target,
bool needs_split,
bool last_inst) {
1675 BlockIndex target_idx = this->analyzer.block_idx(target);
1676 Label target_label = this->block_labels[u32(target_idx)];
1678 move_to_phi_nodes(target_idx);
1679 if (!last_inst || target_idx != this->next_block()) {
1680 derived()->generate_raw_jump(jmp, target_label);
1683 Label tmp_label = this->text_writer.label_create();
1684 derived()->generate_raw_jump(
derived()->invert_jump(jmp), tmp_label);
1685 move_to_phi_nodes(target_idx);
1686 derived()->generate_raw_jump(Jump::jmp, target_label);
1691template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1693 IRBlockRef target) {
1703template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1704template <
typename Jump>
1706 Jump jmp, IRBlockRef true_target, IRBlockRef false_target) {
1707 IRBlockRef next = analyzer.block_ref(next_block());
1715 if (next == true_target || (next != false_target && true_needs_split)) {
1717 derived()->invert_jump(jmp), false_target, false_needs_split,
false);
1719 }
else if (next == false_target) {
1723 assert(!true_needs_split);
1732template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1736 IRBlockRef default_block,
1737 std::span<
const std::pair<u64, IRBlockRef>> cases) {
1742 assert(width <= 64);
1745 assert(cases.size() < UINT32_MAX &&
"large switches are unsupported");
1747 AsmReg cmp_reg = cond.cur_reg();
1748 bool width_is_32 = width <= 32;
1749 if (u32 dst_width = util::align_up(width, 32); width != dst_width) {
1750 derived()->generate_raw_intext(cmp_reg, cmp_reg,
false, width, dst_width);
1757 AsmReg tmp_reg = tmp_scratch.
alloc_gp();
1762 tpde::util::SmallVector<tpde::Label, 64> case_labels;
1765 tpde::util::SmallVector<std::pair<tpde::Label, IRBlockRef>, 64> case_blocks;
1766 for (
auto i = 0u; i < cases.size(); ++i) {
1770 BlockIndex target = this->analyzer.block_idx(cases[i].second);
1771 if (analyzer.block_has_phis(target)) {
1772 case_labels.push_back(this->text_writer.label_create());
1773 case_blocks.emplace_back(case_labels.back(), cases[i].second);
1775 case_labels.push_back(this->block_labels[u32(target)]);
1779 const auto default_label = this->text_writer.label_create();
1781 const auto build_range = [&,
1782 this](
size_t begin,
size_t end,
const auto &self) {
1783 assert(begin <= end);
1784 const auto num_cases = end - begin;
1785 if (num_cases <= 4) {
1788 for (
auto i = 0u; i < num_cases; ++i) {
1789 derived()->switch_emit_cmpeq(case_labels[begin + i],
1792 cases[begin + i].first,
1796 derived()->generate_raw_jump(Derived::Jump::jmp, default_label);
1802 u64 low_bound = cases[begin].first;
1803 u64 high_bound = cases[end - 1].first;
1804 auto range = high_bound - low_bound + 1;
1807 if (range != 0 && (range / num_cases) < 8) {
1812 auto *jt =
derived()->switch_create_jump_table(
1813 default_label, cmp_reg, tmp_reg, low_bound, high_bound, width_is_32);
1815 if (range == num_cases) {
1816 std::copy(case_labels.begin() + begin,
1817 case_labels.begin() + end,
1818 jt->labels().begin());
1820 std::ranges::fill(jt->labels(), default_label);
1821 for (
auto i = begin; i != end; ++i) {
1822 jt->labels()[cases[i].first - low_bound] = case_labels[i];
1830 const auto half_len = num_cases / 2;
1831 const auto half_value = cases[begin + half_len].first;
1832 const auto gt_label = this->text_writer.label_create();
1837 derived()->switch_emit_binary_step(case_labels[begin + half_len],
1844 self(begin, begin + half_len, self);
1848 self(begin + half_len + 1, end, self);
1851 build_range(0, case_labels.size(), build_range);
1855 derived()->generate_branch_to_block(
1856 Derived::Jump::jmp, default_block,
false,
false);
1858 for (
const auto &[label, target] : case_blocks) {
1861 this->text_writer.align(8);
1863 derived()->generate_branch_to_block(
1864 Derived::Jump::jmp, target,
false,
false);
1871template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1872void CompilerBase<Adaptor, Derived, Config>::move_to_phi_nodes_impl(
1873 BlockIndex target) {
1882 struct ScratchWrapper {
1884 AsmReg cur_reg = AsmReg::make_invalid();
1885 bool backed_up =
false;
1886 bool was_modified =
false;
1888 ValLocalIdx local_idx = INVALID_VAL_LOCAL_IDX;
1890 ScratchWrapper(Derived *self) : self{self} {}
1892 ~ScratchWrapper() { reset(); }
1895 if (cur_reg.invalid()) {
1899 self->register_file.unmark_fixed(cur_reg);
1900 self->register_file.unmark_used(cur_reg);
1905 auto *assignment = self->val_assignment(local_idx);
1909 auto ap = AssignmentPartRef{assignment, part};
1910 if (!ap.variable_ref()) {
1912 assert(ap.stack_valid());
1913 self->load_from_stack(cur_reg, ap.frame_off(), ap.part_size());
1915 ap.set_reg(cur_reg);
1916 ap.set_register_valid(
true);
1917 ap.set_modified(was_modified);
1918 self->register_file.mark_used(cur_reg, local_idx, part);
1922 cur_reg = AsmReg::make_invalid();
1925 AsmReg alloc_from_bank(RegBank bank) {
1926 if (cur_reg.valid() && self->register_file.reg_bank(cur_reg) == bank) {
1929 if (cur_reg.valid()) {
1935 auto ®_file = self->register_file;
1936 auto reg = reg_file.find_first_free_excluding(bank, 0);
1937 if (reg.invalid()) {
1939 reg = reg_file.find_first_nonfixed_excluding(bank, 0);
1940 if (reg.invalid()) {
1941 TPDE_FATAL(
"ran out of registers for scratch registers");
1945 local_idx = reg_file.reg_local_idx(reg);
1946 part = reg_file.reg_part(reg);
1947 AssignmentPartRef ap{self->val_assignment(local_idx), part};
1948 was_modified = ap.modified();
1952 assert(ap.register_valid() && ap.get_reg() == reg);
1953 if (!ap.stack_valid() && !ap.variable_ref()) {
1954 self->allocate_spill_slot(ap);
1955 self->spill_reg(ap.get_reg(), ap.frame_off(), ap.part_size());
1956 ap.set_stack_valid();
1958 ap.set_register_valid(
false);
1959 reg_file.unmark_used(reg);
1962 reg_file.mark_used(reg, INVALID_VAL_LOCAL_IDX, 0);
1963 reg_file.mark_clobbered(reg);
1964 reg_file.mark_fixed(reg);
1969 ScratchWrapper &operator=(
const ScratchWrapper &) =
delete;
1970 ScratchWrapper &operator=(ScratchWrapper &&) =
delete;
1973 IRBlockRef target_ref = analyzer.block_ref(target);
1974 IRBlockRef cur_ref = analyzer.block_ref(cur_block_idx);
1979 IRValueRef incoming_val;
1980 ValLocalIdx phi_local_idx;
1982 ValLocalIdx incoming_phi_local_idx = INVALID_VAL_LOCAL_IDX;
1986 bool operator<(
const NodeEntry &other)
const {
1987 return phi_local_idx < other.phi_local_idx;
1990 bool operator<(ValLocalIdx other)
const {
return phi_local_idx < other; }
1993 util::SmallVector<NodeEntry, 16> nodes;
1994 for (IRValueRef phi : adaptor->block_phis(target_ref)) {
1995 ValLocalIdx phi_local_idx = adaptor->val_local_idx(phi);
1996 auto incoming = adaptor->val_as_phi(phi).incoming_val_for_block(cur_ref);
1997 nodes.emplace_back(NodeEntry{
1998 .phi = phi, .incoming_val = incoming, .phi_local_idx = phi_local_idx});
2002 assert(!nodes.empty() &&
"block marked has having phi nodes has none");
2004 ScratchWrapper scratch{
derived()};
2005 const auto move_to_phi = [
this, &scratch](IRValueRef phi,
2006 IRValueRef incoming_val) {
2007 auto phi_vr =
derived()->result_ref(phi);
2008 auto val_vr =
derived()->val_ref(incoming_val);
2009 if (phi == incoming_val) {
2013 u32 part_count = phi_vr.assignment()->part_count;
2014 for (u32 i = 0; i < part_count; ++i) {
2015 AssignmentPartRef phi_ap{phi_vr.assignment(), i};
2016 ValuePartRef val_vpr = val_vr.part(i);
2018 if (phi_ap.fixed_assignment()) {
2019 if (AsmReg reg = val_vpr.cur_reg_unlocked(); reg.valid()) {
2020 derived()->mov(phi_ap.get_reg(), reg, phi_ap.part_size());
2022 val_vpr.reload_into_specific_fixed(phi_ap.get_reg());
2025 AsmReg reg = val_vpr.cur_reg_unlocked();
2027 reg = scratch.alloc_from_bank(val_vpr.bank());
2028 val_vpr.reload_into_specific_fixed(reg);
2031 derived()->spill_reg(reg, phi_ap.frame_off(), phi_ap.part_size());
2032 phi_ap.set_stack_valid();
2037 if (nodes.size() == 1) {
2038 move_to_phi(nodes[0].phi, nodes[0].incoming_val);
2043 std::sort(nodes.begin(), nodes.end());
2046 auto all_zero_ref =
true;
2047 for (
auto &node : nodes) {
2050 bool incoming_is_phi = adaptor->val_is_phi(node.incoming_val);
2051 if (!incoming_is_phi || node.incoming_val == node.phi) {
2055 ValLocalIdx inc_local_idx = adaptor->val_local_idx(node.incoming_val);
2056 auto it = std::lower_bound(nodes.begin(), nodes.end(), inc_local_idx);
2057 if (it == nodes.end() || it->phi != node.incoming_val) {
2062 node.incoming_phi_local_idx = inc_local_idx;
2064 all_zero_ref =
false;
2069 for (
auto &node : nodes) {
2070 move_to_phi(node.phi, node.incoming_val);
2076 util::SmallVector<u32, 32> ready_indices;
2077 ready_indices.reserve(nodes.size());
2078 util::SmallBitSet<256> waiting_nodes;
2079 waiting_nodes.resize(nodes.size());
2080 for (u32 i = 0; i < nodes.size(); ++i) {
2081 if (nodes[i].ref_count) {
2082 waiting_nodes.mark_set(i);
2084 ready_indices.push_back(i);
2088 u32 handled_count = 0;
2089 u32 cur_tmp_part_count = 0;
2090 i32 cur_tmp_slot = 0;
2091 u32 cur_tmp_slot_size = 0;
2092 IRValueRef cur_tmp_val = Adaptor::INVALID_VALUE_REF;
2095 const auto move_from_tmp_phi = [&](IRValueRef target_phi) {
2096 auto phi_vr =
val_ref(target_phi);
2097 if (cur_tmp_part_count <= 2) {
2098 AssignmentPartRef ap{phi_vr.assignment(), 0};
2099 assert(!tmp_reg1.cur_reg.invalid());
2100 if (ap.fixed_assignment()) {
2101 derived()->mov(ap.get_reg(), tmp_reg1.cur_reg, ap.part_size());
2103 derived()->spill_reg(tmp_reg1.cur_reg, ap.frame_off(), ap.part_size());
2106 if (cur_tmp_part_count == 2) {
2107 AssignmentPartRef ap_high{phi_vr.assignment(), 1};
2108 assert(!ap_high.fixed_assignment());
2109 assert(!tmp_reg2.cur_reg.invalid());
2111 tmp_reg2.cur_reg, ap_high.frame_off(), ap_high.part_size());
2116 for (u32 i = 0; i < cur_tmp_part_count; ++i) {
2117 AssignmentPartRef phi_ap{phi_vr.assignment(), i};
2118 assert(!phi_ap.fixed_assignment());
2120 auto slot_off = cur_tmp_slot + phi_ap.part_off();
2121 auto reg = tmp_reg1.alloc_from_bank(phi_ap.bank());
2122 derived()->load_from_stack(reg, slot_off, phi_ap.part_size());
2123 derived()->spill_reg(reg, phi_ap.frame_off(), phi_ap.part_size());
2127 while (handled_count != nodes.size()) {
2128 if (ready_indices.empty()) {
2130 auto cur_idx_opt = waiting_nodes.first_set();
2131 assert(cur_idx_opt);
2132 auto cur_idx = *cur_idx_opt;
2133 assert(nodes[cur_idx].ref_count == 1);
2134 assert(cur_tmp_val == Adaptor::INVALID_VALUE_REF);
2136 auto phi_val = nodes[cur_idx].phi;
2137 auto phi_vr = this->
val_ref(phi_val);
2138 auto *assignment = phi_vr.assignment();
2139 cur_tmp_part_count = assignment->part_count;
2140 cur_tmp_val = phi_val;
2142 if (cur_tmp_part_count > 2) {
2144 cur_tmp_slot_size = assignment->size();
2147 for (u32 i = 0; i < cur_tmp_part_count; ++i) {
2148 auto ap = AssignmentPartRef{assignment, i};
2149 assert(!ap.fixed_assignment());
2150 auto slot_off = cur_tmp_slot + ap.part_off();
2152 if (ap.register_valid()) {
2153 auto reg = ap.get_reg();
2154 derived()->spill_reg(reg, slot_off, ap.part_size());
2156 auto reg = tmp_reg1.alloc_from_bank(ap.bank());
2157 assert(ap.stack_valid());
2158 derived()->load_from_stack(reg, ap.frame_off(), ap.part_size());
2159 derived()->spill_reg(reg, slot_off, ap.part_size());
2165 auto phi_vpr = phi_vr.part(0);
2166 auto reg = tmp_reg1.alloc_from_bank(phi_vpr.bank());
2167 phi_vpr.reload_into_specific_fixed(
this, reg);
2169 if (cur_tmp_part_count == 2) {
2171 auto phi_vpr_high = phi_vr.part(1);
2172 auto reg_high = tmp_reg2.alloc_from_bank(phi_vpr_high.bank());
2173 phi_vpr_high.reload_into_specific_fixed(
this, reg_high);
2177 nodes[cur_idx].ref_count = 0;
2178 ready_indices.push_back(cur_idx);
2179 waiting_nodes.mark_unset(cur_idx);
2182 for (u32 i = 0; i < ready_indices.size(); ++i) {
2184 auto cur_idx = ready_indices[i];
2185 auto phi_val = nodes[cur_idx].phi;
2186 IRValueRef incoming_val = nodes[cur_idx].incoming_val;
2187 if (incoming_val == phi_val) {
2194 if (incoming_val == cur_tmp_val) {
2195 move_from_tmp_phi(phi_val);
2197 if (cur_tmp_part_count > 2) {
2199 cur_tmp_slot = 0xFFFF'FFFF;
2200 cur_tmp_slot_size = 0;
2202 cur_tmp_val = Adaptor::INVALID_VALUE_REF;
2209 move_to_phi(phi_val, incoming_val);
2211 if (nodes[cur_idx].incoming_phi_local_idx == INVALID_VAL_LOCAL_IDX) {
2215 auto it = std::lower_bound(
2216 nodes.begin(), nodes.end(), nodes[cur_idx].incoming_phi_local_idx);
2217 assert(it != nodes.end() && it->phi == incoming_val &&
2218 "incoming_phi_local_idx set incorrectly");
2220 assert(it->ref_count > 0);
2221 if (--it->ref_count == 0) {
2222 auto node_idx =
static_cast<u32
>(it - nodes.begin());
2223 ready_indices.push_back(node_idx);
2224 waiting_nodes.mark_unset(node_idx);
2227 ready_indices.clear();
2231template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
2234 return static_cast<BlockIndex
>(
static_cast<u32
>(cur_block_idx) + 1);
2237template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
2239 SymRef personality_sym;
2240 if (this->adaptor->cur_needs_unwind_info()) {
2241 SymRef personality_func =
derived()->cur_personality_func();
2242 if (personality_func.valid()) {
2243 for (
const auto &[fn_sym, ptr_sym] : personality_syms) {
2244 if (fn_sym == personality_func) {
2245 personality_sym = ptr_sym;
2250 if (!personality_sym.valid()) {
2254 static constexpr std::array<u8, 8> zero{};
2257 this->assembler.get_default_section(SectionKind::DataRelRO);
2258 personality_sym = this->assembler.sym_def_data(
2260 this->assembler.reloc_abs(rodata, personality_func, off, 0);
2262 personality_syms.emplace_back(personality_func, personality_sym);
2266 return personality_sym;
2269template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
2271 const u32 func_idx) {
2272 if (!adaptor->switch_func(func)) {
2276 analyzer.switch_func(func);
2279 if constexpr (WithAsserts) {
2280 stack.frame_size = ~0u;
2282 for (
auto &e : stack.fixed_free_lists) {
2285 stack.dynamic_free_lists.clear();
2287 stack.has_dynamic_alloca = this->adaptor->cur_has_dynamic_alloca();
2288 stack.is_leaf_function = !
derived()->cur_func_may_emit_calls();
2289 stack.generated_call =
false;
2290 stack.frame_used =
false;
2292 assignments.cur_fixed_assignment_count = {};
2293 assert(std::ranges::none_of(assignments.value_ptrs, std::identity{}));
2294 if (assignments.value_ptrs.size() < analyzer.liveness.size()) {
2295 assignments.value_ptrs.resize(analyzer.liveness.size());
2298 assignments.allocator.reset();
2299 assignments.variable_ref_list = INVALID_VAL_LOCAL_IDX;
2300 assignments.delayed_free_lists.clear();
2301 assignments.delayed_free_lists.resize(analyzer.block_layout.size(),
2302 INVALID_VAL_LOCAL_IDX);
2305 static_cast<BlockIndex
>(analyzer.block_idx(adaptor->cur_entry_block()));
2307 register_file.reset();
2313 u32 expected_code_size = 0x8 * analyzer.num_insts + 0x40;
2314 this->text_writer.begin_func(16, expected_code_size);
2316 derived()->start_func(func_idx);
2318 block_labels.clear();
2319 block_labels.resize_uninitialized(analyzer.block_layout.size());
2320 for (u32 i = 0; i < analyzer.block_layout.size(); ++i) {
2321 block_labels[i] = text_writer.label_create();
2327 CCAssigner *cc_assigner =
derived()->cur_cc_assigner();
2328 assert(cc_assigner !=
nullptr);
2330 register_file.allocatable = cc_assigner->get_ccinfo().allocatable_regs;
2332 cc_assigner->reset();
2334 const CCInfo &cc_info = cc_assigner->get_ccinfo();
2335 assert((cc_info.allocatable_regs & cc_info.arg_regs) == cc_info.arg_regs &&
2336 "argument registers must also be allocatable");
2337 this->register_file.allocatable &= ~cc_info.arg_regs;
2340 derived()->prologue_begin(cc_assigner);
2342 for (
const IRValueRef arg : this->adaptor->cur_args()) {
2345 derived()->prologue_assign_arg(cc_assigner, arg_idx++, arg);
2348 derived()->prologue_end(cc_assigner);
2350 this->register_file.allocatable |= cc_info.arg_regs;
2353 util::SmallVector<std::tuple<IRValueRef, u32, u32>> dyn_allocas;
2354 for (
const IRValueRef alloca : adaptor->cur_static_allocas()) {
2355 auto size = adaptor->val_alloca_size(alloca);
2356 auto align = adaptor->val_alloca_align(alloca);
2357 if (align > 16 || size > Derived::MaxStaticAllocaSize) {
2358 stack.has_dynamic_alloca =
true;
2359 dyn_allocas.emplace_back(alloca, size, align);
2363 ValLocalIdx local_idx = adaptor->val_local_idx(alloca);
2365 ValueAssignment *assignment = val_assignment(local_idx);
2366 assignment->stack_variable =
true;
2370 if constexpr (!Config::DEFAULT_VAR_REF_HANDLING) {
2371 derived()->setup_var_ref_assignments();
2374 for (
auto &[alloca, size, align] : dyn_allocas) {
2376 derived()->alloca_fixed(size, align, vr);
2379 for (u32 i = 0; i < analyzer.block_layout.size(); ++i) {
2380 const auto block_ref = analyzer.block_layout[i];
2382 "Compiling block {} ({})", i, adaptor->block_fmt_ref(block_ref));
2383 if (!
derived()->compile_block(block_ref, i)) [[unlikely]] {
2384 TPDE_LOG_ERR(
"Failed to compile block {} ({})",
2386 adaptor->block_fmt_ref(block_ref));
2388 assignments.value_ptrs.clear();
2394 ValLocalIdx variable_ref_list = assignments.variable_ref_list;
2395 while (variable_ref_list != INVALID_VAL_LOCAL_IDX) {
2396 u32 idx = u32(variable_ref_list);
2397 ValLocalIdx next = assignments.value_ptrs[idx]->next_delayed_free_entry;
2398 assignments.value_ptrs[idx] =
nullptr;
2399 variable_ref_list = next;
2402 assert(std::ranges::none_of(assignments.value_ptrs, std::identity{}) &&
2403 "found non-freed ValueAssignment, maybe missing ref-count?");
2405 derived()->finish_func(func_idx);
2406 this->text_writer.finish_func();
2411template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
2413 const IRBlockRef block,
const u32 block_idx) {
2415 static_cast<typename Analyzer<Adaptor>::BlockIndex
>(block_idx);
2418 auto &&val_range = adaptor->block_insts(block);
2419 auto end = val_range.end();
2420 for (
auto it = val_range.begin(); it != end; ++it) {
2421 const IRInstRef inst = *it;
2422 if (this->adaptor->inst_fused(inst)) {
2428 if (!
derived()->compile_inst(inst, InstRange{.from = it_cpy, .to = end}))
2430 TPDE_LOG_ERR(
"Failed to compile instruction {}",
2431 this->adaptor->inst_fmt_ref(inst));
2436 if constexpr (WithAsserts) {
2440 for (
auto reg_id : register_file.used_regs()) {
2442 assert(register_file.reg_local_idx(reg) != INVALID_VAL_LOCAL_IDX);
2443 AssignmentPartRef ap{val_assignment(register_file.reg_local_idx(reg)),
2444 register_file.reg_part(reg)};
2445 assert(ap.register_valid());
2446 assert(ap.get_reg() == reg);
2447 assert(!register_file.is_fixed(reg) || ap.fixed_assignment());
2451 if (
static_cast<u32
>(assignments.delayed_free_lists[block_idx]) != ~0u) {
2452 auto list_entry = assignments.delayed_free_lists[block_idx];
2453 while (
static_cast<u32
>(list_entry) != ~0u) {
2454 auto *assignment = assignments.value_ptrs[
static_cast<u32
>(list_entry)];
2455 auto next_entry = assignment->next_delayed_free_entry;
2456 derived()->free_assignment(list_entry, assignment);
2457 list_entry = next_entry;