548 template <
typename Jump>
552 bool last_inst)
noexcept;
558 void move_to_phi_nodes(BlockIndex target)
noexcept {
559 if (analyzer.block_has_phis(target)) {
560 move_to_phi_nodes_impl(target);
564 void move_to_phi_nodes_impl(BlockIndex target)
noexcept;
570 return analyzer.block_has_phis(target);
575 BlockIndex next_block() const noexcept;
577 bool try_force_fixed_assignment(IRValueRef) const noexcept {
return false; }
579 bool hook_post_func_sym_init() noexcept {
return true; }
581 void analysis_start() noexcept {}
583 void analysis_end() noexcept {}
585 void reloc_text(SymRef sym, u32 type, u64 offset, i64 addend = 0) noexcept {
586 this->assembler.reloc_sec(
587 text_writer.get_sec_ref(), sym, type, offset, addend);
592 this->text_writer.label_place(label, text_writer.offset());
596 SymRef get_personality_sym() noexcept;
598 bool compile_func(IRFuncRef func, u32 func_idx) noexcept;
600 bool compile_block(IRBlockRef block, u32 block_idx) noexcept;
604#include "GenericValuePart.hpp"
605#include "ScratchReg.hpp"
606#include "ValuePartRef.hpp"
607#include "ValueRef.hpp"
611template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
612template <
typename CBDerived>
614 CBDerived>
::add_arg(ValuePart &&vp, CCAssignment cca)
noexcept {
615 if (!cca.byval && cca.bank == RegBank{}) {
616 cca.bank = vp.bank();
617 cca.size = vp.part_size();
620 assigner.assign_arg(cca);
621 bool needs_ext = cca.int_ext != 0;
622 bool ext_sign = cca.int_ext >> 7;
623 unsigned ext_bits = cca.int_ext & 0x3f;
626 derived()->add_arg_byval(vp, cca);
628 }
else if (!cca.reg.valid()) {
630 auto ext = std::move(vp).into_extended(&compiler, ext_sign, ext_bits, 64);
631 derived()->add_arg_stack(ext, cca);
632 ext.reset(&compiler);
634 derived()->add_arg_stack(vp, cca);
638 u32 size = vp.part_size();
639 if (vp.is_in_reg(cca.reg)) {
640 if (!vp.can_salvage()) {
641 compiler.evict_reg(cca.reg);
643 vp.salvage(&compiler);
646 compiler.generate_raw_intext(cca.reg, cca.reg, ext_sign, ext_bits, 64);
649 if (compiler.register_file.is_used(cca.reg)) {
650 compiler.evict_reg(cca.reg);
652 if (vp.can_salvage()) {
653 AsmReg vp_reg = vp.salvage(&compiler);
655 compiler.generate_raw_intext(cca.reg, vp_reg, ext_sign, ext_bits, 64);
657 compiler.mov(cca.reg, vp_reg, size);
659 }
else if (needs_ext && vp.is_const()) {
660 u64 val = vp.const_data()[0];
662 ext_sign ? util::sext(val, ext_bits) : util::zext(val, ext_bits);
663 compiler.materialize_constant(&extended, cca.bank, 8, cca.reg);
665 vp.reload_into_specific_fixed(&compiler, cca.reg);
667 compiler.generate_raw_intext(
668 cca.reg, cca.reg, ext_sign, ext_bits, 64);
673 assert(!compiler.register_file.is_used(cca.reg));
674 compiler.register_file.mark_clobbered(cca.reg);
675 compiler.register_file.allocatable &= ~(u64{1} << cca.reg.id());
676 arg_regs |= (1ull << cca.reg.id());
680template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
681template <
typename CBDerived>
684 ValueRef vr = compiler.val_ref(arg.value);
687 assert(part_count == 1);
691 .align = arg.byval_align,
692 .size = arg.byval_size,
697 u32 align = arg.byval_align;
700 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
703 assert(arg.ext_bits != 0 &&
"cannot extend zero-bit integer");
706 u32 remaining = part_count < 256 ? part_count - part_idx - 1 : 255;
707 derived()->add_arg(vr.part(part_idx),
709 .consecutive = u8(allow_split ? 0 : remaining),
710 .sret = arg.flag == CallArg::Flag::sret,
712 .align = u8(part_idx == 0 ? align : 1),
717template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
718template <
typename CBDerived>
720 std::variant<SymRef, ValuePart> target)
noexcept {
721 assert(!compiler.stack.is_leaf_function &&
"leaf func must not have calls");
722 compiler.stack.generated_call =
true;
723 typename RegisterFile::RegBitSet skip_evict = arg_regs;
724 if (
auto *vp = std::get_if<ValuePart>(&target); vp && vp->can_salvage()) {
726 assert(vp->cur_reg_unlocked().valid() &&
"can_salvage implies register");
727 skip_evict |= (1ull << vp->cur_reg_unlocked().
id());
730 auto clobbered = ~assigner.get_ccinfo().callee_saved_regs;
731 for (
auto reg_id : util::BitSetIterator<>{compiler.register_file.used &
732 clobbered & ~skip_evict}) {
733 compiler.evict_reg(AsmReg{reg_id});
734 compiler.register_file.mark_clobbered(Reg{reg_id});
737 derived()->call_impl(std::move(target));
739 assert((compiler.register_file.allocatable & arg_regs) == 0);
740 compiler.register_file.allocatable |= arg_regs;
743template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
744template <
typename CBDerived>
746 CBDerived>
::add_ret(ValuePart &vp, CCAssignment cca)
noexcept {
747 cca.bank = vp.bank();
748 cca.size = vp.part_size();
749 assigner.assign_ret(cca);
750 assert(cca.reg.valid() &&
"return value must be in register");
751 vp.set_value_reg(&compiler, cca.reg);
754template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
755template <
typename CBDerived>
758 assert(vr.has_assignment());
759 u32 part_count = vr.assignment()->part_count;
760 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
762 add_ret(vr.part(part_idx), cca);
766template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
767void CompilerBase<Adaptor, Derived, Config>::RetBuilder::add(
768 ValuePart &&vp, CCAssignment cca)
noexcept {
769 cca.bank = vp.bank();
770 u32 size = cca.size = vp.part_size();
771 assigner.assign_ret(cca);
772 assert(cca.reg.valid() &&
"indirect return value must use sret argument");
774 bool needs_ext = cca.int_ext != 0;
775 bool ext_sign = cca.int_ext >> 7;
776 unsigned ext_bits = cca.int_ext & 0x3f;
778 if (vp.is_in_reg(cca.reg)) {
779 if (!vp.can_salvage()) {
780 compiler.evict_reg(cca.reg);
782 vp.salvage(&compiler);
785 compiler.generate_raw_intext(cca.reg, cca.reg, ext_sign, ext_bits, 64);
788 if (compiler.register_file.is_used(cca.reg)) {
789 compiler.evict_reg(cca.reg);
791 if (vp.can_salvage()) {
792 AsmReg vp_reg = vp.salvage(&compiler);
794 compiler.generate_raw_intext(cca.reg, vp_reg, ext_sign, ext_bits, 64);
796 compiler.mov(cca.reg, vp_reg, size);
799 vp.reload_into_specific_fixed(&compiler, cca.reg);
801 compiler.generate_raw_intext(cca.reg, cca.reg, ext_sign, ext_bits, 64);
806 assert(!compiler.register_file.is_used(cca.reg));
807 compiler.register_file.mark_clobbered(cca.reg);
808 compiler.register_file.allocatable &= ~(u64{1} << cca.reg.id());
809 ret_regs |= (1ull << cca.reg.id());
812template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
813void CompilerBase<Adaptor, Derived, Config>::RetBuilder::add(
814 IRValueRef val)
noexcept {
815 u32 part_count = compiler.val_parts(val).count();
816 ValueRef vr = compiler.val_ref(val);
817 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
818 add(vr.part(part_idx), CCAssignment{});
822template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
823void CompilerBase<Adaptor, Derived, Config>::RetBuilder::ret() noexcept {
824 assert((compiler.register_file.allocatable & ret_regs) == 0);
825 compiler.register_file.allocatable |= ret_regs;
827 compiler.gen_func_epilog();
828 compiler.release_regs_after_return();
831template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
834 text_writer.begin_module(assembler);
835 text_writer.switch_section(
836 assembler.get_section(assembler.get_text_section()));
838 assert(func_syms.empty());
839 for (
const IRFuncRef func : adaptor->funcs()) {
841 if (adaptor->func_has_weak_linkage(func)) {
843 }
else if (adaptor->func_only_local(func)) {
846 if (adaptor->func_extern(func)) {
847 func_syms.push_back(
derived()->assembler.sym_add_undef(
848 adaptor->func_link_name(func), binding));
850 func_syms.push_back(
derived()->assembler.sym_predef_func(
851 adaptor->func_link_name(func), binding));
853 derived()->define_func_idx(func, func_syms.size() - 1);
856 if (!
derived()->hook_post_func_sym_init()) {
857 TPDE_LOG_ERR(
"hook_pust_func_sym_init failed");
866 for (
const IRFuncRef func : adaptor->funcs()) {
867 if (adaptor->func_extern(func)) {
868 TPDE_LOG_TRACE(
"Skipping compilation of func {}",
869 adaptor->func_link_name(func));
874 TPDE_LOG_TRACE(
"Compiling func {}", adaptor->func_link_name(func));
875 if (!
derived()->compile_func(func, func_idx)) {
876 TPDE_LOG_ERR(
"Failed to compile function {}",
877 adaptor->func_link_name(func));
884 text_writer.end_module();
885 assembler.finalize();
892template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
896 for (
auto &e : stack.fixed_free_lists) {
899 stack.dynamic_free_lists.clear();
903 block_labels.clear();
904 personality_syms.clear();
907template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
908void CompilerBase<Adaptor, Derived, Config>::init_assignment(
909 IRValueRef value, ValLocalIdx local_idx)
noexcept {
910 assert(val_assignment(local_idx) ==
nullptr);
911 TPDE_LOG_TRACE(
"Initializing assignment for value {}",
912 static_cast<u32
>(local_idx));
914 const auto parts =
derived()->val_parts(value);
915 const u32 part_count = parts.count();
916 assert(part_count > 0);
917 auto *assignment = assignments.allocator.allocate(part_count);
918 assignments.value_ptrs[
static_cast<u32
>(local_idx)] = assignment;
920 u32 max_part_size = 0;
921 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
922 auto ap = AssignmentPartRef{assignment, part_idx};
924 ap.set_bank(parts.reg_bank(part_idx));
925 const u32 size = parts.size_bytes(part_idx);
927 max_part_size = std::max(max_part_size, size);
928 ap.set_part_size(size);
931 const auto &liveness = analyzer.liveness_info(local_idx);
940 if (part_count == 1) {
941 const auto &cur_loop =
942 analyzer.loop_from_idx(analyzer.block_loop_idx(cur_block_idx));
943 auto ap = AssignmentPartRef{assignment, 0};
946 liveness.last > cur_block_idx &&
947 cur_loop.definitions_in_childs +
948 assignments.cur_fixed_assignment_count[ap.bank().id()] <
949 Derived::NUM_FIXED_ASSIGNMENTS[ap.bank().id()];
950 if (
derived()->try_force_fixed_assignment(value)) {
951 try_fixed = assignments.cur_fixed_assignment_count[ap.bank().id()] <
952 Derived::NUM_FIXED_ASSIGNMENTS[ap.bank().id()];
957 AsmReg reg =
derived()->select_fixed_assignment_reg(ap, value);
958 TPDE_LOG_TRACE(
"Trying to assign fixed reg to value {}",
959 static_cast<u32
>(local_idx));
963 if (!reg.invalid() && !register_file.is_used(reg)) {
964 TPDE_LOG_TRACE(
"Assigning fixed assignment to reg {} for value {}",
966 static_cast<u32
>(local_idx));
968 ap.set_register_valid(
true);
969 ap.set_fixed_assignment(
true);
970 register_file.mark_used(reg, local_idx, 0);
971 register_file.inc_lock_count(reg);
972 register_file.mark_clobbered(reg);
973 ++assignments.cur_fixed_assignment_count[ap.bank().id()];
978 const auto last_full = liveness.last_full;
979 const auto ref_count = liveness.ref_count;
981 assert(max_part_size <= 256);
982 assignment->max_part_size = max_part_size;
983 assignment->pending_free =
false;
984 assignment->variable_ref =
false;
985 assignment->stack_variable =
false;
986 assignment->delay_free = last_full;
987 assignment->part_count = part_count;
988 assignment->frame_off = 0;
989 assignment->references_left = ref_count;
992template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
994 ValLocalIdx local_idx, ValueAssignment *assignment)
noexcept {
995 TPDE_LOG_TRACE(
"Freeing assignment for value {}",
996 static_cast<u32
>(local_idx));
998 assert(assignments.value_ptrs[
static_cast<u32
>(local_idx)] == assignment);
999 assignments.value_ptrs[
static_cast<u32
>(local_idx)] =
nullptr;
1000 const auto is_var_ref = assignment->variable_ref;
1001 const u32 part_count = assignment->part_count;
1004 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
1005 auto ap = AssignmentPartRef{assignment, part_idx};
1006 if (ap.fixed_assignment()) [[unlikely]] {
1007 const auto reg = ap.get_reg();
1008 assert(register_file.is_fixed(reg));
1009 assert(register_file.reg_local_idx(reg) == local_idx);
1010 assert(register_file.reg_part(reg) == part_idx);
1011 --assignments.cur_fixed_assignment_count[ap.bank().id()];
1012 register_file.dec_lock_count_must_zero(reg);
1013 register_file.unmark_used(reg);
1014 }
else if (ap.register_valid()) {
1015 const auto reg = ap.get_reg();
1016 assert(!register_file.is_fixed(reg));
1017 register_file.unmark_used(reg);
1022 for (
auto reg_id : register_file.used_regs()) {
1023 assert(register_file.reg_local_idx(AsmReg{reg_id}) != local_idx &&
1024 "freeing assignment that is still referenced by a register");
1029 bool has_stack = Config::FRAME_INDEXING_NEGATIVE ? assignment->frame_off < 0
1030 : assignment->frame_off != 0;
1031 if (!is_var_ref && has_stack) {
1035 assignments.allocator.deallocate(assignment);
1038template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1039[[gnu::noinline]]
void
1041 ValLocalIdx local_idx, ValueAssignment *assignment)
noexcept {
1042 if (!assignment->delay_free) {
1043 free_assignment(local_idx, assignment);
1048 TPDE_LOG_TRACE(
"Delay freeing assignment for value {}",
1049 static_cast<u32
>(local_idx));
1050 const auto &liveness = analyzer.liveness_info(local_idx);
1051 auto &free_list_head = assignments.delayed_free_lists[u32(liveness.last)];
1052 assignment->next_delayed_free_entry = free_list_head;
1053 assignment->pending_free =
true;
1054 free_list_head = local_idx;
1057template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1059 ValLocalIdx local_idx, u32 var_ref_data)
noexcept {
1060 TPDE_LOG_TRACE(
"Initializing variable-ref assignment for value {}",
1061 static_cast<u32
>(local_idx));
1063 assert(val_assignment(local_idx) ==
nullptr);
1064 auto *assignment = assignments.allocator.allocate_slow(1,
true);
1065 assignments.value_ptrs[
static_cast<u32
>(local_idx)] = assignment;
1067 assignment->max_part_size = Config::PLATFORM_POINTER_SIZE;
1068 assignment->variable_ref =
true;
1069 assignment->stack_variable =
false;
1070 assignment->part_count = 1;
1071 assignment->var_ref_custom_idx = var_ref_data;
1072 assignment->next_delayed_free_entry = assignments.variable_ref_list;
1074 assignments.variable_ref_list = local_idx;
1076 AssignmentPartRef ap{assignment, 0};
1078 ap.set_bank(Config::GP_BANK);
1079 ap.set_part_size(Config::PLATFORM_POINTER_SIZE);
1082template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1084 u32 size)
noexcept {
1085 this->stack.frame_used =
true;
1086 unsigned align_bits = 4;
1089 }
else if (size <= 16) {
1091 u32 free_list_idx = size == 1 ? 0 : 32 - util::cnt_lz<u32>(size - 1);
1092 assert(size <= 1u << free_list_idx);
1093 size = 1 << free_list_idx;
1094 align_bits = free_list_idx;
1096 if (!stack.fixed_free_lists[free_list_idx].empty()) {
1097 auto slot = stack.fixed_free_lists[free_list_idx].back();
1098 stack.fixed_free_lists[free_list_idx].pop_back();
1102 size = util::align_up(size, 16);
1103 auto it = stack.dynamic_free_lists.find(size);
1104 if (it != stack.dynamic_free_lists.end() && !it->second.empty()) {
1105 const auto slot = it->second.back();
1106 it->second.pop_back();
1111 assert(stack.frame_size != ~0u &&
1112 "cannot allocate stack slot before stack frame is initialized");
1115 for (u32 list_idx = util::cnt_tz(stack.frame_size); list_idx < align_bits;
1116 list_idx = util::cnt_tz(stack.frame_size)) {
1117 i32 slot = stack.frame_size;
1118 if constexpr (Config::FRAME_INDEXING_NEGATIVE) {
1119 slot = -(slot + (1ull << list_idx));
1121 stack.fixed_free_lists[list_idx].push_back(slot);
1122 stack.frame_size += 1ull << list_idx;
1125 auto slot = stack.frame_size;
1126 assert(slot != 0 &&
"stack slot 0 is reserved");
1127 stack.frame_size += size;
1129 if constexpr (Config::FRAME_INDEXING_NEGATIVE) {
1130 slot = -(slot + size);
1135template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1137 u32 slot, u32 size)
noexcept {
1138 if (size == 0) [[unlikely]] {
1139 assert(slot == 0 &&
"unexpected slot for zero-sized stack-slot?");
1141 }
else if (size <= 16) [[likely]] {
1142 u32 free_list_idx = size == 1 ? 0 : 32 - util::cnt_lz<u32>(size - 1);
1143 stack.fixed_free_lists[free_list_idx].push_back(slot);
1145 size = util::align_up(size, 16);
1146 stack.dynamic_free_lists[size].push_back(slot);
1150template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1152 CCAssigner *cc_assigner,
1156 bool allow_split)
noexcept {
1157 ValueRef vr =
derived()->result_ref(arg);
1158 if (adaptor->cur_arg_is_byval(arg_idx)) {
1161 .align = u8(adaptor->cur_arg_byval_align(arg_idx)),
1162 .size = adaptor->cur_arg_byval_size(arg_idx),
1164 cc_assigner->assign_arg(cca);
1165 std::optional<i32> byval_frame_off =
1166 derived()->prologue_assign_arg_part(vr.part(0), cca);
1168 if (byval_frame_off) {
1170 ValLocalIdx local_idx = val_idx(arg);
1176 if (ValueAssignment *assignment = val_assignment(local_idx)) {
1177 free_assignment(local_idx, assignment);
1180 ValueAssignment *assignment = this->val_assignment(local_idx);
1181 assignment->stack_variable =
true;
1182 assignment->frame_off = *byval_frame_off;
1187 if (adaptor->cur_arg_is_sret(arg_idx)) {
1188 assert(vr.assignment()->part_count == 1 &&
"sret must be single-part");
1189 ValuePartRef vp = vr.part(0);
1191 .sret =
true, .bank = vp.bank(), .size = Config::PLATFORM_POINTER_SIZE};
1192 cc_assigner->assign_arg(cca);
1193 derived()->prologue_assign_arg_part(std::move(vp), cca);
1197 const u32 part_count = vr.assignment()->part_count;
1198 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
1199 ValuePartRef vp = vr.part(part_idx);
1200 u32 remaining = part_count < 256 ? part_count - part_idx - 1 : 255;
1202 .consecutive = u8(allow_split ? 0 : remaining),
1203 .align = u8(part_idx == 0 ? align : 1),
1205 .size = vp.part_size(),
1207 cc_assigner->assign_arg(cca);
1208 derived()->prologue_assign_arg_part(std::move(vp), cca);
1212template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1213typename CompilerBase<Adaptor, Derived, Config>::ValueRef
1215 if (
auto special =
derived()->val_ref_special(value); special) {
1216 return ValueRef{
this, std::move(*special)};
1219 const ValLocalIdx local_idx = analyzer.adaptor->val_local_idx(value);
1220 assert(val_assignment(local_idx) !=
nullptr &&
"value use before def");
1221 return ValueRef{
this, local_idx};
1224template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1225std::pair<typename CompilerBase<Adaptor, Derived, Config>::ValueRef,
1226 typename CompilerBase<Adaptor, Derived, Config>::ValuePartRef>
1228 IRValueRef value)
noexcept {
1229 std::pair<ValueRef, ValuePartRef> res{
val_ref(value),
this};
1230 res.second = res.first.part(0);
1234template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1235typename CompilerBase<Adaptor, Derived, Config>::ValueRef
1237 IRValueRef value)
noexcept {
1238 const ValLocalIdx local_idx = analyzer.adaptor->val_local_idx(value);
1239 if (val_assignment(local_idx) ==
nullptr) {
1240 init_assignment(value, local_idx);
1242 return ValueRef{
this, local_idx};
1245template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1246std::pair<typename CompilerBase<Adaptor, Derived, Config>::ValueRef,
1247 typename CompilerBase<Adaptor, Derived, Config>::ValuePartRef>
1249 IRValueRef value)
noexcept {
1250 std::pair<ValueRef, ValuePartRef> res{
result_ref(value),
this};
1251 res.second = res.first.part(0);
1255template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1256typename CompilerBase<Adaptor, Derived, Config>::ValueRef
1258 IRValueRef dst, ValueRef &&src)
noexcept {
1259 const ValLocalIdx local_idx = analyzer.adaptor->val_local_idx(dst);
1260 assert(!val_assignment(local_idx) &&
"alias target already defined");
1261 assert(src.has_assignment() &&
"alias src must have an assignment");
1266 assert(src.is_owned() &&
"alias src must be owned");
1268 ValueAssignment *assignment = src.assignment();
1269 u32 part_count = assignment->part_count;
1270 assert(!assignment->pending_free);
1271 assert(!assignment->variable_ref);
1272 assert(!assignment->pending_free);
1275 const auto &src_liveness = analyzer.liveness_info(src.local_idx());
1276 assert(!src_liveness.last_full);
1277 assert(assignment->references_left == 1);
1280 const auto parts =
derived()->val_parts(dst);
1281 assert(parts.count() == part_count);
1282 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
1283 AssignmentPartRef ap{assignment, part_idx};
1284 assert(parts.reg_bank(part_idx) == ap.bank());
1285 assert(parts.size_bytes(part_idx) == ap.part_size());
1291 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
1292 AssignmentPartRef ap{assignment, part_idx};
1293 if (ap.register_valid()) {
1294 register_file.update_reg_assignment(ap.get_reg(), local_idx, part_idx);
1298 const auto &liveness = analyzer.liveness_info(local_idx);
1299 assignment->delay_free = liveness.last_full;
1300 assignment->references_left = liveness.ref_count;
1301 assignments.value_ptrs[
static_cast<u32
>(src.local_idx())] =
nullptr;
1302 assignments.value_ptrs[
static_cast<u32
>(local_idx)] = assignment;
1306 return ValueRef{
this, local_idx};
1309template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1310typename CompilerBase<Adaptor, Derived, Config>::ValueRef
1312 IRValueRef dst, AssignmentPartRef base, i32 off)
noexcept {
1313 const ValLocalIdx local_idx = analyzer.adaptor->val_local_idx(dst);
1314 assert(!val_assignment(local_idx) &&
"new value already defined");
1316 ValueAssignment *assignment = this->val_assignment(local_idx);
1317 assignment->stack_variable =
true;
1318 assignment->frame_off = base.variable_stack_off() + off;
1319 return ValueRef{
this, local_idx};
1322template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1323void CompilerBase<Adaptor, Derived, Config>::set_value(
1324 ValuePartRef &
val_ref, ScratchReg &scratch)
noexcept {
1325 val_ref.set_value(std::move(scratch));
1328template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1329typename CompilerBase<Adaptor, Derived, Config>::AsmReg
1331 GenericValuePart &gv)
noexcept {
1332 if (std::holds_alternative<ScratchReg>(gv.state)) {
1333 return std::get<ScratchReg>(gv.state).cur_reg();
1335 if (std::holds_alternative<ValuePartRef>(gv.state)) {
1336 auto &vpr = std::get<ValuePartRef>(gv.state);
1337 if (vpr.has_reg()) {
1338 return vpr.cur_reg();
1340 return vpr.load_to_reg();
1342 if (
auto *expr = std::get_if<typename GenericValuePart::Expr>(&gv.state)) {
1343 if (expr->has_base() && !expr->has_index() && expr->disp == 0) {
1344 return expr->base_reg();
1346 return derived()->gval_expr_as_reg(gv);
1348 TPDE_UNREACHABLE(
"gval_as_reg on empty GenericValuePart");
1351template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1352typename CompilerBase<Adaptor, Derived, Config>::AsmReg
1354 GenericValuePart &gv,
ScratchReg &dst)
noexcept {
1356 if (!dst.has_reg()) {
1357 if (
auto *scratch = std::get_if<ScratchReg>(&gv.state)) {
1358 dst = std::move(*scratch);
1359 }
else if (
auto *
val_ref = std::get_if<ValuePartRef>(&gv.state)) {
1361 dst.alloc_specific(
val_ref->salvage());
1362 assert(dst.cur_reg() == reg &&
"salvaging unsuccessful");
1369template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1370typename CompilerBase<Adaptor, Derived, Config>::AsmReg
1372 GenericValuePart &gv, ValuePart &dst)
noexcept {
1374 if (!dst.has_reg() &&
1375 (!dst.has_assignment() || !dst.assignment().fixed_assignment())) {
1377 if (
auto *scratch = std::get_if<ScratchReg>(&gv.state)) {
1378 dst.set_value(
this, std::move(*scratch));
1379 if (dst.has_assignment()) {
1382 }
else if (
auto *
val_ref = std::get_if<ValuePartRef>(&gv.state)) {
1384 dst.set_value(
this, std::move(*
val_ref));
1385 if (dst.has_assignment()) {
1394template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1395Reg CompilerBase<Adaptor, Derived, Config>::select_reg_evict(
1396 RegBank bank)
noexcept {
1397 TPDE_LOG_DBG(
"select_reg_evict for bank {}", bank.id());
1398 auto candidates = register_file.used & register_file.bank_regs(bank);
1400 Reg candidate = Reg::make_invalid();
1402 for (
auto reg_id : util::BitSetIterator<>(candidates)) {
1404 if (register_file.is_fixed(reg)) {
1409 auto local_idx = register_file.reg_local_idx(reg);
1410 u32 part = register_file.reg_part(Reg{reg});
1411 assert(local_idx != INVALID_VAL_LOCAL_IDX);
1412 ValueAssignment *va = val_assignment(local_idx);
1413 AssignmentPartRef ap{va, part};
1426 if (ap.variable_ref()) {
1427 TPDE_LOG_DBG(
" r{} ({}) is variable-ref", reg_id, u32(local_idx));
1433 if (ap.stack_valid()) {
1434 score |= u32{1} << 31;
1437 const auto &liveness = analyzer.liveness_info(local_idx);
1438 u32 last_use_dist = u32(liveness.last) - u32(cur_block_idx);
1439 score |= (last_use_dist < 0x8000 ? 0x8000 - last_use_dist : 0) << 16;
1441 u32 refs_left = va->pending_free ? 0 : va->references_left;
1442 score |= (refs_left < 0xffff ? 0x10000 - refs_left : 1);
1444 TPDE_LOG_DBG(
" r{} ({}:{}) rc={}/{} live={}-{}{} spilled={} score={:#x}",
1450 u32(liveness.first),
1452 &
"*"[!liveness.last_full],
1457 if (score > max_score) {
1462 if (candidate.invalid()) [[unlikely]] {
1463 TPDE_FATAL(
"ran out of registers for scratch registers");
1465 TPDE_LOG_DBG(
" selected r{}", candidate.id());
1470template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1472 AsmReg dst, AssignmentPartRef ap)
noexcept {
1473 if (!ap.variable_ref()) {
1474 assert(ap.stack_valid());
1475 derived()->load_from_stack(dst, ap.frame_off(), ap.part_size());
1476 }
else if (ap.is_stack_variable()) {
1477 derived()->load_address_of_stack_var(dst, ap);
1478 }
else if constexpr (!Config::DEFAULT_VAR_REF_HANDLING) {
1479 derived()->load_address_of_var_reference(dst, ap);
1481 TPDE_UNREACHABLE(
"non-stack-variable needs custom var-ref handling");
1485template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1487 AssignmentPartRef ap)
noexcept {
1488 assert(!ap.variable_ref() &&
"cannot allocate spill slot for variable ref");
1489 if (ap.assignment()->frame_off == 0) {
1490 assert(!ap.stack_valid() &&
"stack-valid set without spill slot");
1492 assert(ap.assignment()->frame_off != 0);
1496template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1498 AssignmentPartRef ap)
noexcept {
1499 assert(may_change_value_state());
1500 if (!ap.stack_valid() && !ap.variable_ref()) {
1501 assert(ap.register_valid() &&
"cannot spill uninitialized assignment part");
1503 derived()->spill_reg(ap.get_reg(), ap.frame_off(), ap.part_size());
1504 ap.set_stack_valid();
1508template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1510 AssignmentPartRef ap)
noexcept {
1511 assert(may_change_value_state());
1512 assert(ap.register_valid());
1514 ap.set_register_valid(
false);
1515 register_file.unmark_used(ap.get_reg());
1518template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1520 assert(may_change_value_state());
1521 assert(!register_file.is_fixed(reg));
1522 assert(register_file.reg_local_idx(reg) != INVALID_VAL_LOCAL_IDX);
1524 ValLocalIdx local_idx = register_file.reg_local_idx(reg);
1525 auto part = register_file.reg_part(reg);
1526 AssignmentPartRef evict_part{val_assignment(local_idx), part};
1527 assert(evict_part.register_valid());
1528 assert(evict_part.get_reg() == reg);
1530 evict_part.set_register_valid(
false);
1531 register_file.unmark_used(reg);
1534template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1536 assert(may_change_value_state());
1537 assert(!register_file.is_fixed(reg));
1538 assert(register_file.reg_local_idx(reg) != INVALID_VAL_LOCAL_IDX);
1540 ValLocalIdx local_idx = register_file.reg_local_idx(reg);
1541 auto part = register_file.reg_part(reg);
1542 AssignmentPartRef ap{val_assignment(local_idx), part};
1543 assert(ap.register_valid());
1544 assert(ap.get_reg() == reg);
1545 assert(!ap.modified() || ap.variable_ref());
1546 ap.set_register_valid(
false);
1547 register_file.unmark_used(reg);
1550template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1551typename CompilerBase<Adaptor, Derived, Config>::RegisterFile::RegBitSet
1553 bool force_spill)
noexcept {
1577 using RegBitSet =
typename RegisterFile::RegBitSet;
1579 assert(may_change_value_state());
1581 const IRBlockRef cur_block_ref = analyzer.block_ref(cur_block_idx);
1585 BlockIndex earliest_next_succ = Analyzer<Adaptor>::INVALID_BLOCK_IDX;
1587 bool must_spill = force_spill;
1591 auto next_block_is_succ =
false;
1592 auto next_block_has_multiple_incoming =
false;
1594 for (
const IRBlockRef succ : adaptor->block_succs(cur_block_ref)) {
1596 BlockIndex succ_idx = analyzer.block_idx(succ);
1597 if (u32(succ_idx) == u32(cur_block_idx) + 1) {
1598 next_block_is_succ =
true;
1599 if (analyzer.block_has_multiple_incoming(succ)) {
1600 next_block_has_multiple_incoming =
true;
1602 }
else if (succ_idx > cur_block_idx && succ_idx < earliest_next_succ) {
1603 earliest_next_succ = succ_idx;
1607 must_spill = !next_block_is_succ || next_block_has_multiple_incoming;
1609 if (succ_count == 1 && !must_spill) {
1614 auto release_regs = RegBitSet{};
1616 for (
auto reg : register_file.used_regs()) {
1617 auto local_idx = register_file.reg_local_idx(Reg{reg});
1618 auto part = register_file.reg_part(Reg{reg});
1619 if (local_idx == INVALID_VAL_LOCAL_IDX) {
1623 AssignmentPartRef ap{val_assignment(local_idx), part};
1624 if (ap.fixed_assignment()) {
1632 release_regs |= RegBitSet{1ull} << reg;
1635 if (!ap.modified() || ap.variable_ref()) {
1640 const auto &liveness = analyzer.liveness_info(local_idx);
1641 if (liveness.last <= cur_block_idx) {
1654 if (must_spill || earliest_next_succ <= liveness.last) {
1659 return release_regs;
1662template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1664 typename RegisterFile::RegBitSet regs)
noexcept {
1665 assert(may_change_value_state());
1668 for (
auto reg_id : util::BitSetIterator<>{regs & register_file.used}) {
1669 if (!register_file.is_fixed(Reg{reg_id})) {
1675template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1679 for (
auto reg_id : register_file.used_regs()) {
1680 if (!register_file.is_fixed(Reg{reg_id})) {
1686template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1687template <
typename Jump>
1689 Jump jmp, IRBlockRef target,
bool needs_split,
bool last_inst)
noexcept {
1690 BlockIndex target_idx = this->analyzer.block_idx(target);
1691 Label target_label = this->block_labels[u32(target_idx)];
1693 move_to_phi_nodes(target_idx);
1694 if (!last_inst || target_idx != this->next_block()) {
1695 derived()->generate_raw_jump(jmp, target_label);
1698 Label tmp_label = this->text_writer.label_create();
1699 derived()->generate_raw_jump(
derived()->invert_jump(jmp), tmp_label);
1700 move_to_phi_nodes(target_idx);
1701 derived()->generate_raw_jump(Jump::jmp, target_label);
1706template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1708 IRBlockRef target)
noexcept {
1718template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1719template <
typename Jump>
1721 Jump jmp, IRBlockRef true_target, IRBlockRef false_target)
noexcept {
1722 IRBlockRef next = analyzer.block_ref(next_block());
1730 if (next == true_target || (next != false_target && true_needs_split)) {
1732 derived()->invert_jump(jmp), false_target, false_needs_split,
false);
1734 }
else if (next == false_target) {
1738 assert(!true_needs_split);
1747template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1751 IRBlockRef default_block,
1752 std::span<
const std::pair<u64, IRBlockRef>> cases)
noexcept {
1757 assert(width <= 64);
1760 assert(cases.size() < UINT32_MAX &&
"large switches are unsupported");
1762 AsmReg cmp_reg = cond.cur_reg();
1763 bool width_is_32 = width <= 32;
1764 if (u32 dst_width = util::align_up(width, 32); width != dst_width) {
1765 derived()->generate_raw_intext(cmp_reg, cmp_reg,
false, width, dst_width);
1772 AsmReg tmp_reg = tmp_scratch.
alloc_gp();
1777 tpde::util::SmallVector<tpde::Label, 64> case_labels;
1780 tpde::util::SmallVector<std::pair<tpde::Label, IRBlockRef>, 64> case_blocks;
1781 for (
auto i = 0u; i < cases.size(); ++i) {
1785 BlockIndex target = this->analyzer.block_idx(cases[i].second);
1786 if (analyzer.block_has_phis(target)) {
1787 case_labels.push_back(this->text_writer.label_create());
1788 case_blocks.emplace_back(case_labels.back(), cases[i].second);
1790 case_labels.push_back(this->block_labels[u32(target)]);
1794 const auto default_label = this->text_writer.label_create();
1796 const auto build_range = [&,
1797 this](
size_t begin,
size_t end,
const auto &self) {
1798 assert(begin <= end);
1799 const auto num_cases = end - begin;
1800 if (num_cases <= 4) {
1803 for (
auto i = 0u; i < num_cases; ++i) {
1804 derived()->switch_emit_cmpeq(case_labels[begin + i],
1807 cases[begin + i].first,
1811 derived()->generate_raw_jump(Derived::Jump::jmp, default_label);
1817 auto range = cases[end - 1].first - cases[begin].first;
1820 if (range != 0xFFFF'FFFF'FFFF'FFFF && (range / num_cases) < 8) {
1828 tpde::util::SmallVector<tpde::Label, 32> label_vec;
1829 std::span<tpde::Label> labels;
1830 if (range == num_cases) {
1831 labels = std::span{case_labels.begin() + begin, num_cases};
1833 label_vec.resize(range, default_label);
1834 for (
auto i = 0u; i < num_cases; ++i) {
1835 label_vec[cases[begin + i].first - cases[begin].first] =
1836 case_labels[begin + i];
1838 labels = std::span{label_vec.begin(), range};
1842 if (
derived()->switch_emit_jump_table(default_label,
1847 cases[end - 1].first,
1854 const auto half_len = num_cases / 2;
1855 const auto half_value = cases[begin + half_len].first;
1856 const auto gt_label = this->text_writer.label_create();
1861 derived()->switch_emit_binary_step(case_labels[begin + half_len],
1868 self(begin, begin + half_len, self);
1872 self(begin + half_len + 1, end, self);
1875 build_range(0, case_labels.size(), build_range);
1879 derived()->generate_branch_to_block(
1880 Derived::Jump::jmp, default_block,
false,
false);
1882 for (
const auto &[label, target] : case_blocks) {
1885 this->text_writer.align(8);
1887 derived()->generate_branch_to_block(
1888 Derived::Jump::jmp, target,
false,
false);
1895template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1896void CompilerBase<Adaptor, Derived, Config>::move_to_phi_nodes_impl(
1897 BlockIndex target)
noexcept {
1906 struct ScratchWrapper {
1908 AsmReg cur_reg = AsmReg::make_invalid();
1909 bool backed_up =
false;
1910 bool was_modified =
false;
1912 ValLocalIdx local_idx = INVALID_VAL_LOCAL_IDX;
1914 ScratchWrapper(Derived *self) : self{self} {}
1916 ~ScratchWrapper() { reset(); }
1919 if (cur_reg.invalid()) {
1923 self->register_file.unmark_fixed(cur_reg);
1924 self->register_file.unmark_used(cur_reg);
1929 auto *assignment = self->val_assignment(local_idx);
1933 auto ap = AssignmentPartRef{assignment, part};
1934 if (!ap.variable_ref()) {
1936 assert(ap.stack_valid());
1937 self->load_from_stack(cur_reg, ap.frame_off(), ap.part_size());
1939 ap.set_reg(cur_reg);
1940 ap.set_register_valid(
true);
1941 ap.set_modified(was_modified);
1942 self->register_file.mark_used(cur_reg, local_idx, part);
1946 cur_reg = AsmReg::make_invalid();
1949 AsmReg alloc_from_bank(RegBank bank) {
1950 if (cur_reg.valid() && self->register_file.reg_bank(cur_reg) == bank) {
1953 if (cur_reg.valid()) {
1959 auto ®_file = self->register_file;
1960 auto reg = reg_file.find_first_free_excluding(bank, 0);
1961 if (reg.invalid()) {
1963 reg = reg_file.find_first_nonfixed_excluding(bank, 0);
1964 if (reg.invalid()) {
1965 TPDE_FATAL(
"ran out of registers for scratch registers");
1969 local_idx = reg_file.reg_local_idx(reg);
1970 part = reg_file.reg_part(reg);
1971 AssignmentPartRef ap{self->val_assignment(local_idx), part};
1972 was_modified = ap.modified();
1976 assert(ap.register_valid() && ap.get_reg() == reg);
1977 if (!ap.stack_valid() && !ap.variable_ref()) {
1978 self->allocate_spill_slot(ap);
1979 self->spill_reg(ap.get_reg(), ap.frame_off(), ap.part_size());
1980 ap.set_stack_valid();
1982 ap.set_register_valid(
false);
1983 reg_file.unmark_used(reg);
1986 reg_file.mark_used(reg, INVALID_VAL_LOCAL_IDX, 0);
1987 reg_file.mark_clobbered(reg);
1988 reg_file.mark_fixed(reg);
1993 ScratchWrapper &operator=(
const ScratchWrapper &) =
delete;
1994 ScratchWrapper &operator=(ScratchWrapper &&) =
delete;
1997 IRBlockRef target_ref = analyzer.block_ref(target);
1998 IRBlockRef cur_ref = analyzer.block_ref(cur_block_idx);
2003 IRValueRef incoming_val;
2004 ValLocalIdx phi_local_idx;
2006 ValLocalIdx incoming_phi_local_idx = INVALID_VAL_LOCAL_IDX;
2010 bool operator<(
const NodeEntry &other)
const noexcept {
2011 return phi_local_idx < other.phi_local_idx;
2014 bool operator<(ValLocalIdx other)
const noexcept {
2015 return phi_local_idx < other;
2019 util::SmallVector<NodeEntry, 16> nodes;
2020 for (IRValueRef phi : adaptor->block_phis(target_ref)) {
2021 ValLocalIdx phi_local_idx = adaptor->val_local_idx(phi);
2022 auto incoming = adaptor->val_as_phi(phi).incoming_val_for_block(cur_ref);
2023 nodes.emplace_back(NodeEntry{
2024 .phi = phi, .incoming_val = incoming, .phi_local_idx = phi_local_idx});
2028 assert(!nodes.empty() &&
"block marked has having phi nodes has none");
2030 ScratchWrapper scratch{
derived()};
2031 const auto move_to_phi = [
this, &scratch](IRValueRef phi,
2032 IRValueRef incoming_val) {
2033 auto phi_vr =
derived()->result_ref(phi);
2034 auto val_vr =
derived()->val_ref(incoming_val);
2035 if (phi == incoming_val) {
2039 u32 part_count = phi_vr.assignment()->part_count;
2040 for (u32 i = 0; i < part_count; ++i) {
2041 AssignmentPartRef phi_ap{phi_vr.assignment(), i};
2042 ValuePartRef val_vpr = val_vr.part(i);
2044 if (phi_ap.fixed_assignment()) {
2045 if (AsmReg reg = val_vpr.cur_reg_unlocked(); reg.valid()) {
2046 derived()->mov(phi_ap.get_reg(), reg, phi_ap.part_size());
2048 val_vpr.reload_into_specific_fixed(phi_ap.get_reg());
2051 AsmReg reg = val_vpr.cur_reg_unlocked();
2053 reg = scratch.alloc_from_bank(val_vpr.bank());
2054 val_vpr.reload_into_specific_fixed(reg);
2057 derived()->spill_reg(reg, phi_ap.frame_off(), phi_ap.part_size());
2058 phi_ap.set_stack_valid();
2063 if (nodes.size() == 1) {
2064 move_to_phi(nodes[0].phi, nodes[0].incoming_val);
2069 std::sort(nodes.begin(), nodes.end());
2072 auto all_zero_ref =
true;
2073 for (
auto &node : nodes) {
2076 bool incoming_is_phi = adaptor->val_is_phi(node.incoming_val);
2077 if (!incoming_is_phi || node.incoming_val == node.phi) {
2081 ValLocalIdx inc_local_idx = adaptor->val_local_idx(node.incoming_val);
2082 auto it = std::lower_bound(nodes.begin(), nodes.end(), inc_local_idx);
2083 if (it == nodes.end() || it->phi != node.incoming_val) {
2088 node.incoming_phi_local_idx = inc_local_idx;
2090 all_zero_ref =
false;
2095 for (
auto &node : nodes) {
2096 move_to_phi(node.phi, node.incoming_val);
2102 util::SmallVector<u32, 32> ready_indices;
2103 ready_indices.reserve(nodes.size());
2104 util::SmallBitSet<256> waiting_nodes;
2105 waiting_nodes.resize(nodes.size());
2106 for (u32 i = 0; i < nodes.size(); ++i) {
2107 if (nodes[i].ref_count) {
2108 waiting_nodes.mark_set(i);
2110 ready_indices.push_back(i);
2114 u32 handled_count = 0;
2115 u32 cur_tmp_part_count = 0;
2116 i32 cur_tmp_slot = 0;
2117 u32 cur_tmp_slot_size = 0;
2118 IRValueRef cur_tmp_val = Adaptor::INVALID_VALUE_REF;
2121 const auto move_from_tmp_phi = [&](IRValueRef target_phi) {
2122 auto phi_vr =
val_ref(target_phi);
2123 if (cur_tmp_part_count <= 2) {
2124 AssignmentPartRef ap{phi_vr.assignment(), 0};
2125 assert(!tmp_reg1.cur_reg.invalid());
2126 if (ap.fixed_assignment()) {
2127 derived()->mov(ap.get_reg(), tmp_reg1.cur_reg, ap.part_size());
2129 derived()->spill_reg(tmp_reg1.cur_reg, ap.frame_off(), ap.part_size());
2132 if (cur_tmp_part_count == 2) {
2133 AssignmentPartRef ap_high{phi_vr.assignment(), 1};
2134 assert(!ap_high.fixed_assignment());
2135 assert(!tmp_reg2.cur_reg.invalid());
2137 tmp_reg2.cur_reg, ap_high.frame_off(), ap_high.part_size());
2142 for (u32 i = 0; i < cur_tmp_part_count; ++i) {
2143 AssignmentPartRef phi_ap{phi_vr.assignment(), i};
2144 assert(!phi_ap.fixed_assignment());
2146 auto slot_off = cur_tmp_slot + phi_ap.part_off();
2147 auto reg = tmp_reg1.alloc_from_bank(phi_ap.bank());
2148 derived()->load_from_stack(reg, slot_off, phi_ap.part_size());
2149 derived()->spill_reg(reg, phi_ap.frame_off(), phi_ap.part_size());
2153 while (handled_count != nodes.size()) {
2154 if (ready_indices.empty()) {
2156 auto cur_idx_opt = waiting_nodes.first_set();
2157 assert(cur_idx_opt);
2158 auto cur_idx = *cur_idx_opt;
2159 assert(nodes[cur_idx].ref_count == 1);
2160 assert(cur_tmp_val == Adaptor::INVALID_VALUE_REF);
2162 auto phi_val = nodes[cur_idx].phi;
2163 auto phi_vr = this->
val_ref(phi_val);
2164 auto *assignment = phi_vr.assignment();
2165 cur_tmp_part_count = assignment->part_count;
2166 cur_tmp_val = phi_val;
2168 if (cur_tmp_part_count > 2) {
2170 cur_tmp_slot_size = assignment->size();
2173 for (u32 i = 0; i < cur_tmp_part_count; ++i) {
2174 auto ap = AssignmentPartRef{assignment, i};
2175 assert(!ap.fixed_assignment());
2176 auto slot_off = cur_tmp_slot + ap.part_off();
2178 if (ap.register_valid()) {
2179 auto reg = ap.get_reg();
2180 derived()->spill_reg(reg, slot_off, ap.part_size());
2182 auto reg = tmp_reg1.alloc_from_bank(ap.bank());
2183 assert(ap.stack_valid());
2184 derived()->load_from_stack(reg, ap.frame_off(), ap.part_size());
2185 derived()->spill_reg(reg, slot_off, ap.part_size());
2191 auto phi_vpr = phi_vr.part(0);
2192 auto reg = tmp_reg1.alloc_from_bank(phi_vpr.bank());
2193 phi_vpr.reload_into_specific_fixed(
this, reg);
2195 if (cur_tmp_part_count == 2) {
2197 auto phi_vpr_high = phi_vr.part(1);
2198 auto reg_high = tmp_reg2.alloc_from_bank(phi_vpr_high.bank());
2199 phi_vpr_high.reload_into_specific_fixed(
this, reg_high);
2203 nodes[cur_idx].ref_count = 0;
2204 ready_indices.push_back(cur_idx);
2205 waiting_nodes.mark_unset(cur_idx);
2208 for (u32 i = 0; i < ready_indices.size(); ++i) {
2210 auto cur_idx = ready_indices[i];
2211 auto phi_val = nodes[cur_idx].phi;
2212 IRValueRef incoming_val = nodes[cur_idx].incoming_val;
2213 if (incoming_val == phi_val) {
2220 if (incoming_val == cur_tmp_val) {
2221 move_from_tmp_phi(phi_val);
2223 if (cur_tmp_part_count > 2) {
2225 cur_tmp_slot = 0xFFFF'FFFF;
2226 cur_tmp_slot_size = 0;
2228 cur_tmp_val = Adaptor::INVALID_VALUE_REF;
2235 move_to_phi(phi_val, incoming_val);
2237 if (nodes[cur_idx].incoming_phi_local_idx == INVALID_VAL_LOCAL_IDX) {
2241 auto it = std::lower_bound(
2242 nodes.begin(), nodes.end(), nodes[cur_idx].incoming_phi_local_idx);
2243 assert(it != nodes.end() && it->phi == incoming_val &&
2244 "incoming_phi_local_idx set incorrectly");
2246 assert(it->ref_count > 0);
2247 if (--it->ref_count == 0) {
2248 auto node_idx =
static_cast<u32
>(it - nodes.begin());
2249 ready_indices.push_back(node_idx);
2250 waiting_nodes.mark_unset(node_idx);
2253 ready_indices.clear();
2257template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
2260 return static_cast<BlockIndex
>(
static_cast<u32
>(cur_block_idx) + 1);
2263template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
2265 SymRef personality_sym;
2266 if (this->adaptor->cur_needs_unwind_info()) {
2267 SymRef personality_func =
derived()->cur_personality_func();
2268 if (personality_func.valid()) {
2269 for (
const auto &[fn_sym, ptr_sym] : personality_syms) {
2270 if (fn_sym == personality_func) {
2271 personality_sym = ptr_sym;
2276 if (!personality_sym.valid()) {
2280 static constexpr std::array<u8, 8> zero{};
2282 auto rodata = this->assembler.get_data_section(
true,
true);
2283 personality_sym = this->assembler.sym_def_data(
2285 this->assembler.reloc_abs(rodata, personality_func, off, 0);
2287 personality_syms.emplace_back(personality_func, personality_sym);
2291 return personality_sym;
2294template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
2296 const IRFuncRef func,
const u32 func_idx)
noexcept {
2297 if (!adaptor->switch_func(func)) {
2301 analyzer.switch_func(func);
2305 stack.frame_size = ~0u;
2307 for (
auto &e : stack.fixed_free_lists) {
2310 stack.dynamic_free_lists.clear();
2312 stack.has_dynamic_alloca = this->adaptor->cur_has_dynamic_alloca();
2313 stack.is_leaf_function = !
derived()->cur_func_may_emit_calls();
2314 stack.generated_call =
false;
2315 stack.frame_used =
false;
2317 assignments.cur_fixed_assignment_count = {};
2318 assert(std::ranges::none_of(assignments.value_ptrs, std::identity{}));
2319 if (assignments.value_ptrs.size() < analyzer.liveness.size()) {
2320 assignments.value_ptrs.resize(analyzer.liveness.size());
2323 assignments.allocator.reset();
2324 assignments.variable_ref_list = INVALID_VAL_LOCAL_IDX;
2325 assignments.delayed_free_lists.clear();
2326 assignments.delayed_free_lists.resize(analyzer.block_layout.size(),
2327 INVALID_VAL_LOCAL_IDX);
2330 static_cast<BlockIndex
>(analyzer.block_idx(adaptor->cur_entry_block()));
2332 register_file.reset();
2338 u32 expected_code_size = 0x8 * analyzer.num_insts + 0x40;
2339 this->text_writer.begin_func(16, expected_code_size);
2341 derived()->start_func(func_idx);
2343 block_labels.clear();
2344 block_labels.resize_uninitialized(analyzer.block_layout.size());
2345 for (u32 i = 0; i < analyzer.block_layout.size(); ++i) {
2346 block_labels[i] = text_writer.label_create();
2352 CCAssigner *cc_assigner =
derived()->cur_cc_assigner();
2353 assert(cc_assigner !=
nullptr);
2355 register_file.allocatable = cc_assigner->get_ccinfo().allocatable_regs;
2357 cc_assigner->reset();
2359 const CCInfo &cc_info = cc_assigner->get_ccinfo();
2360 assert((cc_info.allocatable_regs & cc_info.arg_regs) == cc_info.arg_regs &&
2361 "argument registers must also be allocatable");
2362 this->register_file.allocatable &= ~cc_info.arg_regs;
2365 derived()->prologue_begin(cc_assigner);
2367 for (
const IRValueRef arg : this->adaptor->cur_args()) {
2370 derived()->prologue_assign_arg(cc_assigner, arg_idx++, arg);
2373 derived()->prologue_end(cc_assigner);
2375 this->register_file.allocatable |= cc_info.arg_regs;
2377 for (
const IRValueRef alloca : adaptor->cur_static_allocas()) {
2378 auto size = adaptor->val_alloca_size(alloca);
2379 size = util::align_up(size, adaptor->val_alloca_align(alloca));
2381 ValLocalIdx local_idx = adaptor->val_local_idx(alloca);
2383 ValueAssignment *assignment = val_assignment(local_idx);
2384 assignment->stack_variable =
true;
2388 if constexpr (!Config::DEFAULT_VAR_REF_HANDLING) {
2389 derived()->setup_var_ref_assignments();
2392 for (u32 i = 0; i < analyzer.block_layout.size(); ++i) {
2393 const auto block_ref = analyzer.block_layout[i];
2395 "Compiling block {} ({})", i, adaptor->block_fmt_ref(block_ref));
2396 if (!
derived()->compile_block(block_ref, i)) [[unlikely]] {
2397 TPDE_LOG_ERR(
"Failed to compile block {} ({})",
2399 adaptor->block_fmt_ref(block_ref));
2401 assignments.value_ptrs.clear();
2407 ValLocalIdx variable_ref_list = assignments.variable_ref_list;
2408 while (variable_ref_list != INVALID_VAL_LOCAL_IDX) {
2409 u32 idx = u32(variable_ref_list);
2410 ValLocalIdx next = assignments.value_ptrs[idx]->next_delayed_free_entry;
2411 assignments.value_ptrs[idx] =
nullptr;
2412 variable_ref_list = next;
2415 assert(std::ranges::none_of(assignments.value_ptrs, std::identity{}) &&
2416 "found non-freed ValueAssignment, maybe missing ref-count?");
2418 derived()->finish_func(func_idx);
2419 this->text_writer.finish_func();
2424template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
2426 const IRBlockRef block,
const u32 block_idx)
noexcept {
2428 static_cast<typename Analyzer<Adaptor>::BlockIndex
>(block_idx);
2431 auto &&val_range = adaptor->block_insts(block);
2432 auto end = val_range.end();
2433 for (
auto it = val_range.begin(); it != end; ++it) {
2434 const IRInstRef inst = *it;
2435 if (this->adaptor->inst_fused(inst)) {
2441 if (!
derived()->compile_inst(inst, InstRange{.from = it_cpy, .to = end}))
2443 TPDE_LOG_ERR(
"Failed to compile instruction {}",
2444 this->adaptor->inst_fmt_ref(inst));
2453 for (
auto reg_id : register_file.used_regs()) {
2455 assert(register_file.reg_local_idx(reg) != INVALID_VAL_LOCAL_IDX);
2456 AssignmentPartRef ap{val_assignment(register_file.reg_local_idx(reg)),
2457 register_file.reg_part(reg)};
2458 assert(ap.register_valid());
2459 assert(ap.get_reg() == reg);
2460 assert(!register_file.is_fixed(reg) || ap.fixed_assignment());
2464 if (
static_cast<u32
>(assignments.delayed_free_lists[block_idx]) != ~0u) {
2465 auto list_entry = assignments.delayed_free_lists[block_idx];
2466 while (
static_cast<u32
>(list_entry) != ~0u) {
2467 auto *assignment = assignments.value_ptrs[
static_cast<u32
>(list_entry)];
2468 auto next_entry = assignment->next_delayed_free_entry;
2469 derived()->free_assignment(list_entry, assignment);
2470 list_entry = next_entry;