543 template <
typename Jump>
547 bool last_inst)
noexcept;
553 void move_to_phi_nodes(BlockIndex target)
noexcept {
554 if (analyzer.block_has_phis(target)) {
555 move_to_phi_nodes_impl(target);
559 void move_to_phi_nodes_impl(BlockIndex target)
noexcept;
565 return analyzer.block_has_phis(target);
570 BlockIndex next_block() const noexcept;
572 bool try_force_fixed_assignment(IRValueRef) const noexcept {
return false; }
574 bool hook_post_func_sym_init() noexcept {
return true; }
576 void analysis_start() noexcept {}
578 void analysis_end() noexcept {}
580 void reloc_text(SymRef sym, u32 type, u64 offset, i64 addend = 0) noexcept {
581 this->assembler.reloc_sec(
582 text_writer.get_sec_ref(), sym, type, offset, addend);
587 this->text_writer.label_place(label, text_writer.offset());
591 SymRef get_personality_sym() noexcept;
593 bool compile_func(IRFuncRef func, u32 func_idx) noexcept;
595 bool compile_block(IRBlockRef block, u32 block_idx) noexcept;
599#include "GenericValuePart.hpp"
600#include "ScratchReg.hpp"
601#include "ValuePartRef.hpp"
602#include "ValueRef.hpp"
606template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
607template <
typename CBDerived>
609 CBDerived>
::add_arg(ValuePart &&vp, CCAssignment cca)
noexcept {
610 if (!cca.byval && cca.bank == RegBank{}) {
611 cca.bank = vp.bank();
612 cca.size = vp.part_size();
615 assigner.assign_arg(cca);
616 bool needs_ext = cca.int_ext != 0;
617 bool ext_sign = cca.int_ext >> 7;
618 unsigned ext_bits = cca.int_ext & 0x3f;
621 derived()->add_arg_byval(vp, cca);
623 }
else if (!cca.reg.valid()) {
625 auto ext = std::move(vp).into_extended(&compiler, ext_sign, ext_bits, 64);
626 derived()->add_arg_stack(ext, cca);
627 ext.reset(&compiler);
629 derived()->add_arg_stack(vp, cca);
633 u32 size = vp.part_size();
634 if (vp.is_in_reg(cca.reg)) {
635 if (!vp.can_salvage()) {
636 compiler.evict_reg(cca.reg);
638 vp.salvage(&compiler);
641 compiler.generate_raw_intext(cca.reg, cca.reg, ext_sign, ext_bits, 64);
644 if (compiler.register_file.is_used(cca.reg)) {
645 compiler.evict_reg(cca.reg);
647 if (vp.can_salvage()) {
648 AsmReg vp_reg = vp.salvage(&compiler);
650 compiler.generate_raw_intext(cca.reg, vp_reg, ext_sign, ext_bits, 64);
652 compiler.mov(cca.reg, vp_reg, size);
654 }
else if (needs_ext && vp.is_const()) {
655 u64 val = vp.const_data()[0];
657 ext_sign ? util::sext(val, ext_bits) : util::zext(val, ext_bits);
658 compiler.materialize_constant(&extended, cca.bank, 8, cca.reg);
660 vp.reload_into_specific_fixed(&compiler, cca.reg);
662 compiler.generate_raw_intext(
663 cca.reg, cca.reg, ext_sign, ext_bits, 64);
668 assert(!compiler.register_file.is_used(cca.reg));
669 compiler.register_file.mark_clobbered(cca.reg);
670 compiler.register_file.allocatable &= ~(u64{1} << cca.reg.id());
671 arg_regs |= (1ull << cca.reg.id());
675template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
676template <
typename CBDerived>
679 ValueRef vr = compiler.val_ref(arg.value);
682 assert(part_count == 1);
686 .align = arg.byval_align,
687 .size = arg.byval_size,
692 u32 align = arg.byval_align;
695 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
698 assert(arg.ext_bits != 0 &&
"cannot extend zero-bit integer");
701 u32 remaining = part_count < 256 ? part_count - part_idx - 1 : 255;
702 derived()->add_arg(vr.part(part_idx),
704 .consecutive = u8(allow_split ? 0 : remaining),
705 .sret = arg.flag == CallArg::Flag::sret,
707 .align = u8(part_idx == 0 ? align : 1),
712template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
713template <
typename CBDerived>
715 std::variant<SymRef, ValuePart> target)
noexcept {
716 assert(!compiler.stack.is_leaf_function &&
"leaf func must not have calls");
717 compiler.stack.generated_call =
true;
718 typename RegisterFile::RegBitSet skip_evict = arg_regs;
719 if (
auto *vp = std::get_if<ValuePart>(&target); vp && vp->can_salvage()) {
721 assert(vp->cur_reg_unlocked().valid() &&
"can_salvage implies register");
722 skip_evict |= (1ull << vp->cur_reg_unlocked().
id());
725 auto clobbered = ~assigner.get_ccinfo().callee_saved_regs;
726 for (
auto reg_id : util::BitSetIterator<>{compiler.register_file.used &
727 clobbered & ~skip_evict}) {
728 compiler.evict_reg(AsmReg{reg_id});
729 compiler.register_file.mark_clobbered(Reg{reg_id});
732 derived()->call_impl(std::move(target));
734 assert((compiler.register_file.allocatable & arg_regs) == 0);
735 compiler.register_file.allocatable |= arg_regs;
738template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
739template <
typename CBDerived>
741 CBDerived>
::add_ret(ValuePart &vp, CCAssignment cca)
noexcept {
742 cca.bank = vp.bank();
743 cca.size = vp.part_size();
744 assigner.assign_ret(cca);
745 assert(cca.reg.valid() &&
"return value must be in register");
746 vp.set_value_reg(&compiler, cca.reg);
749template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
750template <
typename CBDerived>
753 assert(vr.has_assignment());
754 u32 part_count = vr.assignment()->part_count;
755 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
757 add_ret(vr.part(part_idx), cca);
761template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
762void CompilerBase<Adaptor, Derived, Config>::RetBuilder::add(
763 ValuePart &&vp, CCAssignment cca)
noexcept {
764 cca.bank = vp.bank();
765 u32 size = cca.size = vp.part_size();
766 assigner.assign_ret(cca);
767 assert(cca.reg.valid() &&
"indirect return value must use sret argument");
769 bool needs_ext = cca.int_ext != 0;
770 bool ext_sign = cca.int_ext >> 7;
771 unsigned ext_bits = cca.int_ext & 0x3f;
773 if (vp.is_in_reg(cca.reg)) {
774 if (!vp.can_salvage()) {
775 compiler.evict_reg(cca.reg);
777 vp.salvage(&compiler);
780 compiler.generate_raw_intext(cca.reg, cca.reg, ext_sign, ext_bits, 64);
783 if (compiler.register_file.is_used(cca.reg)) {
784 compiler.evict_reg(cca.reg);
786 if (vp.can_salvage()) {
787 AsmReg vp_reg = vp.salvage(&compiler);
789 compiler.generate_raw_intext(cca.reg, vp_reg, ext_sign, ext_bits, 64);
791 compiler.mov(cca.reg, vp_reg, size);
794 vp.reload_into_specific_fixed(&compiler, cca.reg);
796 compiler.generate_raw_intext(cca.reg, cca.reg, ext_sign, ext_bits, 64);
801 assert(!compiler.register_file.is_used(cca.reg));
802 compiler.register_file.mark_clobbered(cca.reg);
803 compiler.register_file.allocatable &= ~(u64{1} << cca.reg.id());
804 ret_regs |= (1ull << cca.reg.id());
807template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
808void CompilerBase<Adaptor, Derived, Config>::RetBuilder::add(
809 IRValueRef val)
noexcept {
810 u32 part_count = compiler.val_parts(val).count();
811 ValueRef vr = compiler.val_ref(val);
812 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
813 add(vr.part(part_idx), CCAssignment{});
817template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
818void CompilerBase<Adaptor, Derived, Config>::RetBuilder::ret() noexcept {
819 assert((compiler.register_file.allocatable & ret_regs) == 0);
820 compiler.register_file.allocatable |= ret_regs;
822 compiler.gen_func_epilog();
823 compiler.release_regs_after_return();
826template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
829 text_writer.begin_module(assembler);
830 text_writer.switch_section(
831 assembler.get_section(assembler.get_text_section()));
833 assert(func_syms.empty());
834 for (
const IRFuncRef func : adaptor->funcs()) {
836 if (adaptor->func_has_weak_linkage(func)) {
838 }
else if (adaptor->func_only_local(func)) {
841 if (adaptor->func_extern(func)) {
842 func_syms.push_back(
derived()->assembler.sym_add_undef(
843 adaptor->func_link_name(func), binding));
845 func_syms.push_back(
derived()->assembler.sym_predef_func(
846 adaptor->func_link_name(func), binding));
848 derived()->define_func_idx(func, func_syms.size() - 1);
851 if (!
derived()->hook_post_func_sym_init()) {
852 TPDE_LOG_ERR(
"hook_pust_func_sym_init failed");
861 for (
const IRFuncRef func : adaptor->funcs()) {
862 if (adaptor->func_extern(func)) {
863 TPDE_LOG_TRACE(
"Skipping compilation of func {}",
864 adaptor->func_link_name(func));
869 TPDE_LOG_TRACE(
"Compiling func {}", adaptor->func_link_name(func));
870 if (!
derived()->compile_func(func, func_idx)) {
871 TPDE_LOG_ERR(
"Failed to compile function {}",
872 adaptor->func_link_name(func));
879 text_writer.end_module();
880 assembler.finalize();
887template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
891 for (
auto &e : stack.fixed_free_lists) {
894 stack.dynamic_free_lists.clear();
898 block_labels.clear();
899 personality_syms.clear();
902template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
903void CompilerBase<Adaptor, Derived, Config>::init_assignment(
904 IRValueRef value, ValLocalIdx local_idx)
noexcept {
905 assert(val_assignment(local_idx) ==
nullptr);
906 TPDE_LOG_TRACE(
"Initializing assignment for value {}",
907 static_cast<u32
>(local_idx));
909 const auto parts =
derived()->val_parts(value);
910 const u32 part_count = parts.count();
911 assert(part_count > 0);
912 auto *assignment = assignments.allocator.allocate(part_count);
913 assignments.value_ptrs[
static_cast<u32
>(local_idx)] = assignment;
915 u32 max_part_size = 0;
916 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
917 auto ap = AssignmentPartRef{assignment, part_idx};
919 ap.set_bank(parts.reg_bank(part_idx));
920 const u32 size = parts.size_bytes(part_idx);
922 max_part_size = std::max(max_part_size, size);
923 ap.set_part_size(size);
926 const auto &liveness = analyzer.liveness_info(local_idx);
935 if (part_count == 1) {
936 const auto &cur_loop =
937 analyzer.loop_from_idx(analyzer.block_loop_idx(cur_block_idx));
938 auto ap = AssignmentPartRef{assignment, 0};
941 liveness.last > cur_block_idx &&
942 cur_loop.definitions_in_childs +
943 assignments.cur_fixed_assignment_count[ap.bank().id()] <
944 Derived::NUM_FIXED_ASSIGNMENTS[ap.bank().id()];
945 if (
derived()->try_force_fixed_assignment(value)) {
946 try_fixed = assignments.cur_fixed_assignment_count[ap.bank().id()] <
947 Derived::NUM_FIXED_ASSIGNMENTS[ap.bank().id()];
952 AsmReg reg =
derived()->select_fixed_assignment_reg(ap, value);
953 TPDE_LOG_TRACE(
"Trying to assign fixed reg to value {}",
954 static_cast<u32
>(local_idx));
958 if (!reg.invalid() && !register_file.is_used(reg)) {
959 TPDE_LOG_TRACE(
"Assigning fixed assignment to reg {} for value {}",
961 static_cast<u32
>(local_idx));
963 ap.set_register_valid(
true);
964 ap.set_fixed_assignment(
true);
965 register_file.mark_used(reg, local_idx, 0);
966 register_file.inc_lock_count(reg);
967 register_file.mark_clobbered(reg);
968 ++assignments.cur_fixed_assignment_count[ap.bank().id()];
973 const auto last_full = liveness.last_full;
974 const auto ref_count = liveness.ref_count;
976 assert(max_part_size <= 256);
977 assignment->max_part_size = max_part_size;
978 assignment->pending_free =
false;
979 assignment->variable_ref =
false;
980 assignment->stack_variable =
false;
981 assignment->delay_free = last_full;
982 assignment->part_count = part_count;
983 assignment->frame_off = 0;
984 assignment->references_left = ref_count;
987template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
989 ValLocalIdx local_idx, ValueAssignment *assignment)
noexcept {
990 TPDE_LOG_TRACE(
"Freeing assignment for value {}",
991 static_cast<u32
>(local_idx));
993 assert(assignments.value_ptrs[
static_cast<u32
>(local_idx)] == assignment);
994 assignments.value_ptrs[
static_cast<u32
>(local_idx)] =
nullptr;
995 const auto is_var_ref = assignment->variable_ref;
996 const u32 part_count = assignment->part_count;
999 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
1000 auto ap = AssignmentPartRef{assignment, part_idx};
1001 if (ap.fixed_assignment()) [[unlikely]] {
1002 const auto reg = ap.get_reg();
1003 assert(register_file.is_fixed(reg));
1004 assert(register_file.reg_local_idx(reg) == local_idx);
1005 assert(register_file.reg_part(reg) == part_idx);
1006 --assignments.cur_fixed_assignment_count[ap.bank().id()];
1007 register_file.dec_lock_count_must_zero(reg);
1008 register_file.unmark_used(reg);
1009 }
else if (ap.register_valid()) {
1010 const auto reg = ap.get_reg();
1011 assert(!register_file.is_fixed(reg));
1012 register_file.unmark_used(reg);
1017 for (
auto reg_id : register_file.used_regs()) {
1018 assert(register_file.reg_local_idx(AsmReg{reg_id}) != local_idx &&
1019 "freeing assignment that is still referenced by a register");
1024 bool has_stack = Config::FRAME_INDEXING_NEGATIVE ? assignment->frame_off < 0
1025 : assignment->frame_off != 0;
1026 if (!is_var_ref && has_stack) {
1030 assignments.allocator.deallocate(assignment);
1033template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1034[[gnu::noinline]]
void
1036 ValLocalIdx local_idx, ValueAssignment *assignment)
noexcept {
1037 if (!assignment->delay_free) {
1038 free_assignment(local_idx, assignment);
1043 TPDE_LOG_TRACE(
"Delay freeing assignment for value {}",
1044 static_cast<u32
>(local_idx));
1045 const auto &liveness = analyzer.liveness_info(local_idx);
1046 auto &free_list_head = assignments.delayed_free_lists[u32(liveness.last)];
1047 assignment->next_delayed_free_entry = free_list_head;
1048 assignment->pending_free =
true;
1049 free_list_head = local_idx;
1052template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1054 ValLocalIdx local_idx, u32 var_ref_data)
noexcept {
1055 TPDE_LOG_TRACE(
"Initializing variable-ref assignment for value {}",
1056 static_cast<u32
>(local_idx));
1058 assert(val_assignment(local_idx) ==
nullptr);
1059 auto *assignment = assignments.allocator.allocate_slow(1,
true);
1060 assignments.value_ptrs[
static_cast<u32
>(local_idx)] = assignment;
1062 assignment->max_part_size = Config::PLATFORM_POINTER_SIZE;
1063 assignment->variable_ref =
true;
1064 assignment->stack_variable =
false;
1065 assignment->part_count = 1;
1066 assignment->var_ref_custom_idx = var_ref_data;
1067 assignment->next_delayed_free_entry = assignments.variable_ref_list;
1069 assignments.variable_ref_list = local_idx;
1071 AssignmentPartRef ap{assignment, 0};
1073 ap.set_bank(Config::GP_BANK);
1074 ap.set_part_size(Config::PLATFORM_POINTER_SIZE);
1077template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1079 u32 size)
noexcept {
1080 this->stack.frame_used =
true;
1081 unsigned align_bits = 4;
1084 }
else if (size <= 16) {
1086 u32 free_list_idx = size == 1 ? 0 : 32 - util::cnt_lz<u32>(size - 1);
1087 assert(size <= 1u << free_list_idx);
1088 size = 1 << free_list_idx;
1089 align_bits = free_list_idx;
1091 if (!stack.fixed_free_lists[free_list_idx].empty()) {
1092 auto slot = stack.fixed_free_lists[free_list_idx].back();
1093 stack.fixed_free_lists[free_list_idx].pop_back();
1097 size = util::align_up(size, 16);
1098 auto it = stack.dynamic_free_lists.find(size);
1099 if (it != stack.dynamic_free_lists.end() && !it->second.empty()) {
1100 const auto slot = it->second.back();
1101 it->second.pop_back();
1106 assert(stack.frame_size != ~0u &&
1107 "cannot allocate stack slot before stack frame is initialized");
1110 for (u32 list_idx = util::cnt_tz(stack.frame_size); list_idx < align_bits;
1111 list_idx = util::cnt_tz(stack.frame_size)) {
1112 i32 slot = stack.frame_size;
1113 if constexpr (Config::FRAME_INDEXING_NEGATIVE) {
1114 slot = -(slot + (1ull << list_idx));
1116 stack.fixed_free_lists[list_idx].push_back(slot);
1117 stack.frame_size += 1ull << list_idx;
1120 auto slot = stack.frame_size;
1121 assert(slot != 0 &&
"stack slot 0 is reserved");
1122 stack.frame_size += size;
1124 if constexpr (Config::FRAME_INDEXING_NEGATIVE) {
1125 slot = -(slot + size);
1130template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1132 u32 slot, u32 size)
noexcept {
1133 if (size == 0) [[unlikely]] {
1134 assert(slot == 0 &&
"unexpected slot for zero-sized stack-slot?");
1136 }
else if (size <= 16) [[likely]] {
1137 u32 free_list_idx = size == 1 ? 0 : 32 - util::cnt_lz<u32>(size - 1);
1138 stack.fixed_free_lists[free_list_idx].push_back(slot);
1140 size = util::align_up(size, 16);
1141 stack.dynamic_free_lists[size].push_back(slot);
1145template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1147 CCAssigner *cc_assigner,
1151 bool allow_split)
noexcept {
1152 ValueRef vr =
derived()->result_ref(arg);
1153 if (adaptor->cur_arg_is_byval(arg_idx)) {
1156 .align = u8(adaptor->cur_arg_byval_align(arg_idx)),
1157 .size = adaptor->cur_arg_byval_size(arg_idx),
1159 cc_assigner->assign_arg(cca);
1160 std::optional<i32> byval_frame_off =
1161 derived()->prologue_assign_arg_part(vr.part(0), cca);
1163 if (byval_frame_off) {
1165 ValLocalIdx local_idx = val_idx(arg);
1171 if (ValueAssignment *assignment = val_assignment(local_idx)) {
1172 free_assignment(local_idx, assignment);
1175 ValueAssignment *assignment = this->val_assignment(local_idx);
1176 assignment->stack_variable =
true;
1177 assignment->frame_off = *byval_frame_off;
1182 if (adaptor->cur_arg_is_sret(arg_idx)) {
1183 assert(vr.assignment()->part_count == 1 &&
"sret must be single-part");
1184 ValuePartRef vp = vr.part(0);
1186 .sret =
true, .bank = vp.bank(), .size = Config::PLATFORM_POINTER_SIZE};
1187 cc_assigner->assign_arg(cca);
1188 derived()->prologue_assign_arg_part(std::move(vp), cca);
1192 const u32 part_count = vr.assignment()->part_count;
1193 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
1194 ValuePartRef vp = vr.part(part_idx);
1195 u32 remaining = part_count < 256 ? part_count - part_idx - 1 : 255;
1197 .consecutive = u8(allow_split ? 0 : remaining),
1198 .align = u8(part_idx == 0 ? align : 1),
1200 .size = vp.part_size(),
1202 cc_assigner->assign_arg(cca);
1203 derived()->prologue_assign_arg_part(std::move(vp), cca);
1207template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1208typename CompilerBase<Adaptor, Derived, Config>::ValueRef
1210 if (
auto special =
derived()->val_ref_special(value); special) {
1211 return ValueRef{
this, std::move(*special)};
1214 const ValLocalIdx local_idx = analyzer.adaptor->val_local_idx(value);
1215 assert(val_assignment(local_idx) !=
nullptr &&
"value use before def");
1216 return ValueRef{
this, local_idx};
1219template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1220std::pair<typename CompilerBase<Adaptor, Derived, Config>::ValueRef,
1221 typename CompilerBase<Adaptor, Derived, Config>::ValuePartRef>
1223 IRValueRef value)
noexcept {
1224 std::pair<ValueRef, ValuePartRef> res{
val_ref(value),
this};
1225 res.second = res.first.part(0);
1229template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1230typename CompilerBase<Adaptor, Derived, Config>::ValueRef
1232 IRValueRef value)
noexcept {
1233 const ValLocalIdx local_idx = analyzer.adaptor->val_local_idx(value);
1234 if (val_assignment(local_idx) ==
nullptr) {
1235 init_assignment(value, local_idx);
1237 return ValueRef{
this, local_idx};
1240template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1241std::pair<typename CompilerBase<Adaptor, Derived, Config>::ValueRef,
1242 typename CompilerBase<Adaptor, Derived, Config>::ValuePartRef>
1244 IRValueRef value)
noexcept {
1245 std::pair<ValueRef, ValuePartRef> res{
result_ref(value),
this};
1246 res.second = res.first.part(0);
1250template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1251typename CompilerBase<Adaptor, Derived, Config>::ValueRef
1253 IRValueRef dst, ValueRef &&src)
noexcept {
1254 const ValLocalIdx local_idx = analyzer.adaptor->val_local_idx(dst);
1255 assert(!val_assignment(local_idx) &&
"alias target already defined");
1256 assert(src.has_assignment() &&
"alias src must have an assignment");
1261 assert(src.is_owned() &&
"alias src must be owned");
1263 ValueAssignment *assignment = src.assignment();
1264 u32 part_count = assignment->part_count;
1265 assert(!assignment->pending_free);
1266 assert(!assignment->variable_ref);
1267 assert(!assignment->pending_free);
1270 const auto &src_liveness = analyzer.liveness_info(src.local_idx());
1271 assert(!src_liveness.last_full);
1272 assert(assignment->references_left == 1);
1275 const auto parts =
derived()->val_parts(dst);
1276 assert(parts.count() == part_count);
1277 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
1278 AssignmentPartRef ap{assignment, part_idx};
1279 assert(parts.reg_bank(part_idx) == ap.bank());
1280 assert(parts.size_bytes(part_idx) == ap.part_size());
1286 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
1287 AssignmentPartRef ap{assignment, part_idx};
1288 if (ap.register_valid()) {
1289 register_file.update_reg_assignment(ap.get_reg(), local_idx, part_idx);
1293 const auto &liveness = analyzer.liveness_info(local_idx);
1294 assignment->delay_free = liveness.last_full;
1295 assignment->references_left = liveness.ref_count;
1296 assignments.value_ptrs[
static_cast<u32
>(src.local_idx())] =
nullptr;
1297 assignments.value_ptrs[
static_cast<u32
>(local_idx)] = assignment;
1301 return ValueRef{
this, local_idx};
1304template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1305typename CompilerBase<Adaptor, Derived, Config>::ValueRef
1307 IRValueRef dst, AssignmentPartRef base, i32 off)
noexcept {
1308 const ValLocalIdx local_idx = analyzer.adaptor->val_local_idx(dst);
1309 assert(!val_assignment(local_idx) &&
"new value already defined");
1311 ValueAssignment *assignment = this->val_assignment(local_idx);
1312 assignment->stack_variable =
true;
1313 assignment->frame_off = base.variable_stack_off() + off;
1314 return ValueRef{
this, local_idx};
1317template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1318void CompilerBase<Adaptor, Derived, Config>::set_value(
1319 ValuePartRef &
val_ref, ScratchReg &scratch)
noexcept {
1320 val_ref.set_value(std::move(scratch));
1323template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1324typename CompilerBase<Adaptor, Derived, Config>::AsmReg
1326 GenericValuePart &gv)
noexcept {
1327 if (std::holds_alternative<ScratchReg>(gv.state)) {
1328 return std::get<ScratchReg>(gv.state).cur_reg();
1330 if (std::holds_alternative<ValuePartRef>(gv.state)) {
1331 auto &vpr = std::get<ValuePartRef>(gv.state);
1332 if (vpr.has_reg()) {
1333 return vpr.cur_reg();
1335 return vpr.load_to_reg();
1337 if (
auto *expr = std::get_if<typename GenericValuePart::Expr>(&gv.state)) {
1338 if (expr->has_base() && !expr->has_index() && expr->disp == 0) {
1339 return expr->base_reg();
1341 return derived()->gval_expr_as_reg(gv);
1343 TPDE_UNREACHABLE(
"gval_as_reg on empty GenericValuePart");
1346template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1347typename CompilerBase<Adaptor, Derived, Config>::AsmReg
1349 GenericValuePart &gv,
ScratchReg &dst)
noexcept {
1351 if (!dst.has_reg()) {
1352 if (
auto *scratch = std::get_if<ScratchReg>(&gv.state)) {
1353 dst = std::move(*scratch);
1354 }
else if (
auto *
val_ref = std::get_if<ValuePartRef>(&gv.state)) {
1356 dst.alloc_specific(
val_ref->salvage());
1357 assert(dst.cur_reg() == reg &&
"salvaging unsuccessful");
1364template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1365Reg CompilerBase<Adaptor, Derived, Config>::select_reg_evict(
1366 RegBank bank, u64 exclusion_mask)
noexcept {
1367 TPDE_LOG_DBG(
"select_reg_evict for bank {}", bank.id());
1369 register_file.used & register_file.bank_regs(bank) & ~exclusion_mask;
1371 Reg candidate = Reg::make_invalid();
1373 for (
auto reg_id : util::BitSetIterator<>(candidates)) {
1375 if (register_file.is_fixed(reg)) {
1380 auto local_idx = register_file.reg_local_idx(reg);
1381 u32 part = register_file.reg_part(Reg{reg});
1382 assert(local_idx != INVALID_VAL_LOCAL_IDX);
1383 ValueAssignment *va = val_assignment(local_idx);
1384 AssignmentPartRef ap{va, part};
1397 if (ap.variable_ref()) {
1398 TPDE_LOG_DBG(
" r{} ({}) is variable-ref", reg_id, u32(local_idx));
1404 if (ap.stack_valid()) {
1405 score |= u32{1} << 31;
1408 const auto &liveness = analyzer.liveness_info(local_idx);
1409 u32 last_use_dist = u32(liveness.last) - u32(cur_block_idx);
1410 score |= (last_use_dist < 0x8000 ? 0x8000 - last_use_dist : 0) << 16;
1412 u32 refs_left = va->pending_free ? 0 : va->references_left;
1413 score |= (refs_left < 0xffff ? 0x10000 - refs_left : 1);
1415 TPDE_LOG_DBG(
" r{} ({}:{}) rc={}/{} live={}-{}{} spilled={} score={:#x}",
1421 u32(liveness.first),
1423 &
"*"[!liveness.last_full],
1428 if (score > max_score) {
1433 if (candidate.invalid()) [[unlikely]] {
1434 TPDE_FATAL(
"ran out of registers for scratch registers");
1436 TPDE_LOG_DBG(
" selected r{}", candidate.id());
1441template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1443 AsmReg dst, AssignmentPartRef ap)
noexcept {
1444 if (!ap.variable_ref()) {
1445 assert(ap.stack_valid());
1446 derived()->load_from_stack(dst, ap.frame_off(), ap.part_size());
1447 }
else if (ap.is_stack_variable()) {
1448 derived()->load_address_of_stack_var(dst, ap);
1449 }
else if constexpr (!Config::DEFAULT_VAR_REF_HANDLING) {
1450 derived()->load_address_of_var_reference(dst, ap);
1452 TPDE_UNREACHABLE(
"non-stack-variable needs custom var-ref handling");
1456template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1458 AssignmentPartRef ap)
noexcept {
1459 assert(!ap.variable_ref() &&
"cannot allocate spill slot for variable ref");
1460 if (ap.assignment()->frame_off == 0) {
1461 assert(!ap.stack_valid() &&
"stack-valid set without spill slot");
1463 assert(ap.assignment()->frame_off != 0);
1467template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1469 AssignmentPartRef ap)
noexcept {
1470 assert(may_change_value_state());
1471 if (!ap.stack_valid() && !ap.variable_ref()) {
1472 assert(ap.register_valid() &&
"cannot spill uninitialized assignment part");
1474 derived()->spill_reg(ap.get_reg(), ap.frame_off(), ap.part_size());
1475 ap.set_stack_valid();
1479template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1481 AssignmentPartRef ap)
noexcept {
1482 assert(may_change_value_state());
1483 assert(ap.register_valid());
1485 ap.set_register_valid(
false);
1486 register_file.unmark_used(ap.get_reg());
1489template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1491 assert(may_change_value_state());
1492 assert(!register_file.is_fixed(reg));
1493 assert(register_file.reg_local_idx(reg) != INVALID_VAL_LOCAL_IDX);
1495 ValLocalIdx local_idx = register_file.reg_local_idx(reg);
1496 auto part = register_file.reg_part(reg);
1497 AssignmentPartRef evict_part{val_assignment(local_idx), part};
1498 assert(evict_part.register_valid());
1499 assert(evict_part.get_reg() == reg);
1501 evict_part.set_register_valid(
false);
1502 register_file.unmark_used(reg);
1505template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1507 assert(may_change_value_state());
1508 assert(!register_file.is_fixed(reg));
1509 assert(register_file.reg_local_idx(reg) != INVALID_VAL_LOCAL_IDX);
1511 ValLocalIdx local_idx = register_file.reg_local_idx(reg);
1512 auto part = register_file.reg_part(reg);
1513 AssignmentPartRef ap{val_assignment(local_idx), part};
1514 assert(ap.register_valid());
1515 assert(ap.get_reg() == reg);
1516 assert(!ap.modified() || ap.variable_ref());
1517 ap.set_register_valid(
false);
1518 register_file.unmark_used(reg);
1521template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1522typename CompilerBase<Adaptor, Derived, Config>::RegisterFile::RegBitSet
1524 bool force_spill)
noexcept {
1548 using RegBitSet =
typename RegisterFile::RegBitSet;
1550 assert(may_change_value_state());
1552 const IRBlockRef cur_block_ref = analyzer.block_ref(cur_block_idx);
1556 BlockIndex earliest_next_succ = Analyzer<Adaptor>::INVALID_BLOCK_IDX;
1558 bool must_spill = force_spill;
1562 auto next_block_is_succ =
false;
1563 auto next_block_has_multiple_incoming =
false;
1565 for (
const IRBlockRef succ : adaptor->block_succs(cur_block_ref)) {
1567 BlockIndex succ_idx = analyzer.block_idx(succ);
1568 if (u32(succ_idx) == u32(cur_block_idx) + 1) {
1569 next_block_is_succ =
true;
1570 if (analyzer.block_has_multiple_incoming(succ)) {
1571 next_block_has_multiple_incoming =
true;
1573 }
else if (succ_idx > cur_block_idx && succ_idx < earliest_next_succ) {
1574 earliest_next_succ = succ_idx;
1578 must_spill = !next_block_is_succ || next_block_has_multiple_incoming;
1580 if (succ_count == 1 && !must_spill) {
1585 auto release_regs = RegBitSet{};
1587 for (
auto reg : register_file.used_regs()) {
1588 auto local_idx = register_file.reg_local_idx(Reg{reg});
1589 auto part = register_file.reg_part(Reg{reg});
1590 if (local_idx == INVALID_VAL_LOCAL_IDX) {
1594 AssignmentPartRef ap{val_assignment(local_idx), part};
1595 if (ap.fixed_assignment()) {
1603 release_regs |= RegBitSet{1ull} << reg;
1606 if (!ap.modified() || ap.variable_ref()) {
1611 const auto &liveness = analyzer.liveness_info(local_idx);
1612 if (liveness.last <= cur_block_idx) {
1625 if (must_spill || earliest_next_succ <= liveness.last) {
1630 return release_regs;
1633template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1635 typename RegisterFile::RegBitSet regs)
noexcept {
1636 assert(may_change_value_state());
1639 for (
auto reg_id : util::BitSetIterator<>{regs & register_file.used}) {
1640 if (!register_file.is_fixed(Reg{reg_id})) {
1646template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1650 for (
auto reg_id : register_file.used_regs()) {
1651 if (!register_file.is_fixed(Reg{reg_id})) {
1657template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1658template <
typename Jump>
1660 Jump jmp, IRBlockRef target,
bool needs_split,
bool last_inst)
noexcept {
1661 BlockIndex target_idx = this->analyzer.block_idx(target);
1662 Label target_label = this->block_labels[u32(target_idx)];
1664 move_to_phi_nodes(target_idx);
1665 if (!last_inst || target_idx != this->next_block()) {
1666 derived()->generate_raw_jump(jmp, target_label);
1669 Label tmp_label = this->text_writer.label_create();
1670 derived()->generate_raw_jump(
derived()->invert_jump(jmp), tmp_label);
1671 move_to_phi_nodes(target_idx);
1672 derived()->generate_raw_jump(Jump::jmp, target_label);
1677template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1679 IRBlockRef target)
noexcept {
1689template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1690template <
typename Jump>
1692 Jump jmp, IRBlockRef true_target, IRBlockRef false_target)
noexcept {
1693 IRBlockRef next = analyzer.block_ref(next_block());
1701 if (next == true_target || (next != false_target && true_needs_split)) {
1703 derived()->invert_jump(jmp), false_target, false_needs_split,
false);
1705 }
else if (next == false_target) {
1709 assert(!true_needs_split);
1718template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1722 IRBlockRef default_block,
1723 std::span<
const std::pair<u64, IRBlockRef>> cases)
noexcept {
1728 assert(width <= 64);
1731 assert(cases.size() < UINT32_MAX &&
"large switches are unsupported");
1733 AsmReg cmp_reg = cond.cur_reg();
1734 bool width_is_32 = width <= 32;
1735 if (u32 dst_width = util::align_up(width, 32); width != dst_width) {
1736 derived()->generate_raw_intext(cmp_reg, cmp_reg,
false, width, dst_width);
1743 AsmReg tmp_reg = tmp_scratch.
alloc_gp();
1748 tpde::util::SmallVector<tpde::Label, 64> case_labels;
1751 tpde::util::SmallVector<std::pair<tpde::Label, IRBlockRef>, 64> case_blocks;
1752 for (
auto i = 0u; i < cases.size(); ++i) {
1756 BlockIndex target = this->analyzer.block_idx(cases[i].second);
1757 if (analyzer.block_has_phis(target)) {
1758 case_labels.push_back(this->text_writer.label_create());
1759 case_blocks.emplace_back(case_labels.back(), cases[i].second);
1761 case_labels.push_back(this->block_labels[u32(target)]);
1765 const auto default_label = this->text_writer.label_create();
1767 const auto build_range = [&,
1768 this](
size_t begin,
size_t end,
const auto &self) {
1769 assert(begin <= end);
1770 const auto num_cases = end - begin;
1771 if (num_cases <= 4) {
1774 for (
auto i = 0u; i < num_cases; ++i) {
1775 derived()->switch_emit_cmpeq(case_labels[begin + i],
1778 cases[begin + i].first,
1782 derived()->generate_raw_jump(Derived::Jump::jmp, default_label);
1788 auto range = cases[end - 1].first - cases[begin].first;
1791 if (range != 0xFFFF'FFFF'FFFF'FFFF && (range / num_cases) < 8) {
1799 tpde::util::SmallVector<tpde::Label, 32> label_vec;
1800 std::span<tpde::Label> labels;
1801 if (range == num_cases) {
1802 labels = std::span{case_labels.begin() + begin, num_cases};
1804 label_vec.resize(range, default_label);
1805 for (
auto i = 0u; i < num_cases; ++i) {
1806 label_vec[cases[begin + i].first - cases[begin].first] =
1807 case_labels[begin + i];
1809 labels = std::span{label_vec.begin(), range};
1813 if (
derived()->switch_emit_jump_table(default_label,
1818 cases[end - 1].first,
1825 const auto half_len = num_cases / 2;
1826 const auto half_value = cases[begin + half_len].first;
1827 const auto gt_label = this->text_writer.label_create();
1832 derived()->switch_emit_binary_step(case_labels[begin + half_len],
1839 self(begin, begin + half_len, self);
1843 self(begin + half_len + 1, end, self);
1846 build_range(0, case_labels.size(), build_range);
1850 derived()->generate_branch_to_block(
1851 Derived::Jump::jmp, default_block,
false,
false);
1853 for (
const auto &[label, target] : case_blocks) {
1856 this->text_writer.align(8);
1858 derived()->generate_branch_to_block(
1859 Derived::Jump::jmp, target,
false,
false);
1866template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1867void CompilerBase<Adaptor, Derived, Config>::move_to_phi_nodes_impl(
1868 BlockIndex target)
noexcept {
1877 struct ScratchWrapper {
1879 AsmReg cur_reg = AsmReg::make_invalid();
1880 bool backed_up =
false;
1881 bool was_modified =
false;
1883 ValLocalIdx local_idx = INVALID_VAL_LOCAL_IDX;
1885 ScratchWrapper(Derived *self) : self{self} {}
1887 ~ScratchWrapper() { reset(); }
1890 if (cur_reg.invalid()) {
1894 self->register_file.unmark_fixed(cur_reg);
1895 self->register_file.unmark_used(cur_reg);
1900 auto *assignment = self->val_assignment(local_idx);
1904 auto ap = AssignmentPartRef{assignment, part};
1905 if (!ap.variable_ref()) {
1907 assert(ap.stack_valid());
1908 self->load_from_stack(cur_reg, ap.frame_off(), ap.part_size());
1910 ap.set_reg(cur_reg);
1911 ap.set_register_valid(
true);
1912 ap.set_modified(was_modified);
1913 self->register_file.mark_used(cur_reg, local_idx, part);
1917 cur_reg = AsmReg::make_invalid();
1920 AsmReg alloc_from_bank(RegBank bank) {
1921 if (cur_reg.valid() && self->register_file.reg_bank(cur_reg) == bank) {
1924 if (cur_reg.valid()) {
1930 auto ®_file = self->register_file;
1931 auto reg = reg_file.find_first_free_excluding(bank, 0);
1932 if (reg.invalid()) {
1934 reg = reg_file.find_first_nonfixed_excluding(bank, 0);
1935 if (reg.invalid()) {
1936 TPDE_FATAL(
"ran out of registers for scratch registers");
1940 local_idx = reg_file.reg_local_idx(reg);
1941 part = reg_file.reg_part(reg);
1942 AssignmentPartRef ap{self->val_assignment(local_idx), part};
1943 was_modified = ap.modified();
1947 assert(ap.register_valid() && ap.get_reg() == reg);
1948 if (!ap.stack_valid() && !ap.variable_ref()) {
1949 self->allocate_spill_slot(ap);
1950 self->spill_reg(ap.get_reg(), ap.frame_off(), ap.part_size());
1951 ap.set_stack_valid();
1953 ap.set_register_valid(
false);
1954 reg_file.unmark_used(reg);
1957 reg_file.mark_used(reg, INVALID_VAL_LOCAL_IDX, 0);
1958 reg_file.mark_clobbered(reg);
1959 reg_file.mark_fixed(reg);
1964 ScratchWrapper &operator=(
const ScratchWrapper &) =
delete;
1965 ScratchWrapper &operator=(ScratchWrapper &&) =
delete;
1968 IRBlockRef target_ref = analyzer.block_ref(target);
1969 IRBlockRef cur_ref = analyzer.block_ref(cur_block_idx);
1974 IRValueRef incoming_val;
1975 ValLocalIdx phi_local_idx;
1977 ValLocalIdx incoming_phi_local_idx = INVALID_VAL_LOCAL_IDX;
1981 bool operator<(
const NodeEntry &other)
const noexcept {
1982 return phi_local_idx < other.phi_local_idx;
1985 bool operator<(ValLocalIdx other)
const noexcept {
1986 return phi_local_idx < other;
1990 util::SmallVector<NodeEntry, 16> nodes;
1991 for (IRValueRef phi : adaptor->block_phis(target_ref)) {
1992 ValLocalIdx phi_local_idx = adaptor->val_local_idx(phi);
1993 auto incoming = adaptor->val_as_phi(phi).incoming_val_for_block(cur_ref);
1994 nodes.emplace_back(NodeEntry{
1995 .phi = phi, .incoming_val = incoming, .phi_local_idx = phi_local_idx});
1999 assert(!nodes.empty() &&
"block marked has having phi nodes has none");
2001 ScratchWrapper scratch{
derived()};
2002 const auto move_to_phi = [
this, &scratch](IRValueRef phi,
2003 IRValueRef incoming_val) {
2004 auto phi_vr =
derived()->result_ref(phi);
2005 auto val_vr =
derived()->val_ref(incoming_val);
2006 if (phi == incoming_val) {
2010 u32 part_count = phi_vr.assignment()->part_count;
2011 for (u32 i = 0; i < part_count; ++i) {
2012 AssignmentPartRef phi_ap{phi_vr.assignment(), i};
2013 ValuePartRef val_vpr = val_vr.part(i);
2015 if (phi_ap.fixed_assignment()) {
2016 if (AsmReg reg = val_vpr.cur_reg_unlocked(); reg.valid()) {
2017 derived()->mov(phi_ap.get_reg(), reg, phi_ap.part_size());
2019 val_vpr.reload_into_specific_fixed(phi_ap.get_reg());
2022 AsmReg reg = val_vpr.cur_reg_unlocked();
2024 reg = scratch.alloc_from_bank(val_vpr.bank());
2025 val_vpr.reload_into_specific_fixed(reg);
2028 derived()->spill_reg(reg, phi_ap.frame_off(), phi_ap.part_size());
2029 phi_ap.set_stack_valid();
2034 if (nodes.size() == 1) {
2035 move_to_phi(nodes[0].phi, nodes[0].incoming_val);
2040 std::sort(nodes.begin(), nodes.end());
2043 auto all_zero_ref =
true;
2044 for (
auto &node : nodes) {
2047 bool incoming_is_phi = adaptor->val_is_phi(node.incoming_val);
2048 if (!incoming_is_phi || node.incoming_val == node.phi) {
2052 ValLocalIdx inc_local_idx = adaptor->val_local_idx(node.incoming_val);
2053 auto it = std::lower_bound(nodes.begin(), nodes.end(), inc_local_idx);
2054 if (it == nodes.end() || it->phi != node.incoming_val) {
2059 node.incoming_phi_local_idx = inc_local_idx;
2061 all_zero_ref =
false;
2066 for (
auto &node : nodes) {
2067 move_to_phi(node.phi, node.incoming_val);
2073 util::SmallVector<u32, 32> ready_indices;
2074 ready_indices.reserve(nodes.size());
2075 util::SmallBitSet<256> waiting_nodes;
2076 waiting_nodes.resize(nodes.size());
2077 for (u32 i = 0; i < nodes.size(); ++i) {
2078 if (nodes[i].ref_count) {
2079 waiting_nodes.mark_set(i);
2081 ready_indices.push_back(i);
2085 u32 handled_count = 0;
2086 u32 cur_tmp_part_count = 0;
2087 i32 cur_tmp_slot = 0;
2088 u32 cur_tmp_slot_size = 0;
2089 IRValueRef cur_tmp_val = Adaptor::INVALID_VALUE_REF;
2092 const auto move_from_tmp_phi = [&](IRValueRef target_phi) {
2093 auto phi_vr =
val_ref(target_phi);
2094 if (cur_tmp_part_count <= 2) {
2095 AssignmentPartRef ap{phi_vr.assignment(), 0};
2096 assert(!tmp_reg1.cur_reg.invalid());
2097 if (ap.fixed_assignment()) {
2098 derived()->mov(ap.get_reg(), tmp_reg1.cur_reg, ap.part_size());
2100 derived()->spill_reg(tmp_reg1.cur_reg, ap.frame_off(), ap.part_size());
2103 if (cur_tmp_part_count == 2) {
2104 AssignmentPartRef ap_high{phi_vr.assignment(), 1};
2105 assert(!ap_high.fixed_assignment());
2106 assert(!tmp_reg2.cur_reg.invalid());
2108 tmp_reg2.cur_reg, ap_high.frame_off(), ap_high.part_size());
2113 for (u32 i = 0; i < cur_tmp_part_count; ++i) {
2114 AssignmentPartRef phi_ap{phi_vr.assignment(), i};
2115 assert(!phi_ap.fixed_assignment());
2117 auto slot_off = cur_tmp_slot + phi_ap.part_off();
2118 auto reg = tmp_reg1.alloc_from_bank(phi_ap.bank());
2119 derived()->load_from_stack(reg, slot_off, phi_ap.part_size());
2120 derived()->spill_reg(reg, phi_ap.frame_off(), phi_ap.part_size());
2124 while (handled_count != nodes.size()) {
2125 if (ready_indices.empty()) {
2127 auto cur_idx_opt = waiting_nodes.first_set();
2128 assert(cur_idx_opt);
2129 auto cur_idx = *cur_idx_opt;
2130 assert(nodes[cur_idx].ref_count == 1);
2131 assert(cur_tmp_val == Adaptor::INVALID_VALUE_REF);
2133 auto phi_val = nodes[cur_idx].phi;
2134 auto phi_vr = this->
val_ref(phi_val);
2135 auto *assignment = phi_vr.assignment();
2136 cur_tmp_part_count = assignment->part_count;
2137 cur_tmp_val = phi_val;
2139 if (cur_tmp_part_count > 2) {
2141 cur_tmp_slot_size = assignment->size();
2144 for (u32 i = 0; i < cur_tmp_part_count; ++i) {
2145 auto ap = AssignmentPartRef{assignment, i};
2146 assert(!ap.fixed_assignment());
2147 auto slot_off = cur_tmp_slot + ap.part_off();
2149 if (ap.register_valid()) {
2150 auto reg = ap.get_reg();
2151 derived()->spill_reg(reg, slot_off, ap.part_size());
2153 auto reg = tmp_reg1.alloc_from_bank(ap.bank());
2154 assert(ap.stack_valid());
2155 derived()->load_from_stack(reg, ap.frame_off(), ap.part_size());
2156 derived()->spill_reg(reg, slot_off, ap.part_size());
2162 auto phi_vpr = phi_vr.part(0);
2163 auto reg = tmp_reg1.alloc_from_bank(phi_vpr.bank());
2164 phi_vpr.reload_into_specific_fixed(
this, reg);
2166 if (cur_tmp_part_count == 2) {
2168 auto phi_vpr_high = phi_vr.part(1);
2169 auto reg_high = tmp_reg2.alloc_from_bank(phi_vpr_high.bank());
2170 phi_vpr_high.reload_into_specific_fixed(
this, reg_high);
2174 nodes[cur_idx].ref_count = 0;
2175 ready_indices.push_back(cur_idx);
2176 waiting_nodes.mark_unset(cur_idx);
2179 for (u32 i = 0; i < ready_indices.size(); ++i) {
2181 auto cur_idx = ready_indices[i];
2182 auto phi_val = nodes[cur_idx].phi;
2183 IRValueRef incoming_val = nodes[cur_idx].incoming_val;
2184 if (incoming_val == phi_val) {
2191 if (incoming_val == cur_tmp_val) {
2192 move_from_tmp_phi(phi_val);
2194 if (cur_tmp_part_count > 2) {
2196 cur_tmp_slot = 0xFFFF'FFFF;
2197 cur_tmp_slot_size = 0;
2199 cur_tmp_val = Adaptor::INVALID_VALUE_REF;
2206 move_to_phi(phi_val, incoming_val);
2208 if (nodes[cur_idx].incoming_phi_local_idx == INVALID_VAL_LOCAL_IDX) {
2212 auto it = std::lower_bound(
2213 nodes.begin(), nodes.end(), nodes[cur_idx].incoming_phi_local_idx);
2214 assert(it != nodes.end() && it->phi == incoming_val &&
2215 "incoming_phi_local_idx set incorrectly");
2217 assert(it->ref_count > 0);
2218 if (--it->ref_count == 0) {
2219 auto node_idx =
static_cast<u32
>(it - nodes.begin());
2220 ready_indices.push_back(node_idx);
2221 waiting_nodes.mark_unset(node_idx);
2224 ready_indices.clear();
2228template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
2231 return static_cast<BlockIndex
>(
static_cast<u32
>(cur_block_idx) + 1);
2234template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
2236 SymRef personality_sym;
2237 if (this->adaptor->cur_needs_unwind_info()) {
2238 SymRef personality_func =
derived()->cur_personality_func();
2239 if (personality_func.valid()) {
2240 for (
const auto &[fn_sym, ptr_sym] : personality_syms) {
2241 if (fn_sym == personality_func) {
2242 personality_sym = ptr_sym;
2247 if (!personality_sym.valid()) {
2251 static constexpr std::array<u8, 8> zero{};
2253 auto rodata = this->assembler.get_data_section(
true,
true);
2254 personality_sym = this->assembler.sym_def_data(
2256 this->assembler.reloc_abs(rodata, personality_func, off, 0);
2258 personality_syms.emplace_back(personality_func, personality_sym);
2262 return personality_sym;
2265template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
2267 const IRFuncRef func,
const u32 func_idx)
noexcept {
2268 if (!adaptor->switch_func(func)) {
2272 analyzer.switch_func(func);
2276 stack.frame_size = ~0u;
2278 for (
auto &e : stack.fixed_free_lists) {
2281 stack.dynamic_free_lists.clear();
2283 stack.has_dynamic_alloca = this->adaptor->cur_has_dynamic_alloca();
2284 stack.is_leaf_function = !
derived()->cur_func_may_emit_calls();
2285 stack.generated_call =
false;
2286 stack.frame_used =
false;
2288 assignments.cur_fixed_assignment_count = {};
2289 assert(std::ranges::none_of(assignments.value_ptrs, std::identity{}));
2290 if (assignments.value_ptrs.size() < analyzer.liveness.size()) {
2291 assignments.value_ptrs.resize(analyzer.liveness.size());
2294 assignments.allocator.reset();
2295 assignments.variable_ref_list = INVALID_VAL_LOCAL_IDX;
2296 assignments.delayed_free_lists.clear();
2297 assignments.delayed_free_lists.resize(analyzer.block_layout.size(),
2298 INVALID_VAL_LOCAL_IDX);
2301 static_cast<BlockIndex
>(analyzer.block_idx(adaptor->cur_entry_block()));
2303 register_file.reset();
2309 u32 expected_code_size = 0x8 * analyzer.num_insts + 0x40;
2310 this->text_writer.begin_func(16, expected_code_size);
2312 derived()->start_func(func_idx);
2314 block_labels.clear();
2315 block_labels.resize_uninitialized(analyzer.block_layout.size());
2316 for (u32 i = 0; i < analyzer.block_layout.size(); ++i) {
2317 block_labels[i] = text_writer.label_create();
2323 CCAssigner *cc_assigner =
derived()->cur_cc_assigner();
2324 assert(cc_assigner !=
nullptr);
2326 register_file.allocatable = cc_assigner->get_ccinfo().allocatable_regs;
2328 cc_assigner->reset();
2330 const CCInfo &cc_info = cc_assigner->get_ccinfo();
2331 assert((cc_info.allocatable_regs & cc_info.arg_regs) == cc_info.arg_regs &&
2332 "argument registers must also be allocatable");
2333 this->register_file.allocatable &= ~cc_info.arg_regs;
2336 derived()->prologue_begin(cc_assigner);
2338 for (
const IRValueRef arg : this->adaptor->cur_args()) {
2341 derived()->prologue_assign_arg(cc_assigner, arg_idx++, arg);
2344 derived()->prologue_end(cc_assigner);
2346 this->register_file.allocatable |= cc_info.arg_regs;
2348 for (
const IRValueRef alloca : adaptor->cur_static_allocas()) {
2349 auto size = adaptor->val_alloca_size(alloca);
2350 size = util::align_up(size, adaptor->val_alloca_align(alloca));
2352 ValLocalIdx local_idx = adaptor->val_local_idx(alloca);
2354 ValueAssignment *assignment = val_assignment(local_idx);
2355 assignment->stack_variable =
true;
2359 if constexpr (!Config::DEFAULT_VAR_REF_HANDLING) {
2360 derived()->setup_var_ref_assignments();
2363 for (u32 i = 0; i < analyzer.block_layout.size(); ++i) {
2364 const auto block_ref = analyzer.block_layout[i];
2366 "Compiling block {} ({})", i, adaptor->block_fmt_ref(block_ref));
2367 if (!
derived()->compile_block(block_ref, i)) [[unlikely]] {
2368 TPDE_LOG_ERR(
"Failed to compile block {} ({})",
2370 adaptor->block_fmt_ref(block_ref));
2372 assignments.value_ptrs.clear();
2378 ValLocalIdx variable_ref_list = assignments.variable_ref_list;
2379 while (variable_ref_list != INVALID_VAL_LOCAL_IDX) {
2380 u32 idx = u32(variable_ref_list);
2381 ValLocalIdx next = assignments.value_ptrs[idx]->next_delayed_free_entry;
2382 assignments.value_ptrs[idx] =
nullptr;
2383 variable_ref_list = next;
2386 assert(std::ranges::none_of(assignments.value_ptrs, std::identity{}) &&
2387 "found non-freed ValueAssignment, maybe missing ref-count?");
2389 derived()->finish_func(func_idx);
2390 this->text_writer.finish_func();
2395template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
2397 const IRBlockRef block,
const u32 block_idx)
noexcept {
2399 static_cast<typename Analyzer<Adaptor>::BlockIndex
>(block_idx);
2402 auto &&val_range = adaptor->block_insts(block);
2403 auto end = val_range.end();
2404 for (
auto it = val_range.begin(); it != end; ++it) {
2405 const IRInstRef inst = *it;
2406 if (this->adaptor->inst_fused(inst)) {
2412 if (!
derived()->compile_inst(inst, InstRange{.from = it_cpy, .to = end}))
2414 TPDE_LOG_ERR(
"Failed to compile instruction {}",
2415 this->adaptor->inst_fmt_ref(inst));
2424 for (
auto reg_id : register_file.used_regs()) {
2426 assert(register_file.reg_local_idx(reg) != INVALID_VAL_LOCAL_IDX);
2427 AssignmentPartRef ap{val_assignment(register_file.reg_local_idx(reg)),
2428 register_file.reg_part(reg)};
2429 assert(ap.register_valid());
2430 assert(ap.get_reg() == reg);
2431 assert(!register_file.is_fixed(reg) || ap.fixed_assignment());
2435 if (
static_cast<u32
>(assignments.delayed_free_lists[block_idx]) != ~0u) {
2436 auto list_entry = assignments.delayed_free_lists[block_idx];
2437 while (
static_cast<u32
>(list_entry) != ~0u) {
2438 auto *assignment = assignments.value_ptrs[
static_cast<u32
>(list_entry)];
2439 auto next_entry = assignment->next_delayed_free_entry;
2440 derived()->free_assignment(list_entry, assignment);
2441 list_entry = next_entry;