436 IRBlockRef default_block,
437 std::span<const std::pair<u64, IRBlockRef>> cases) noexcept;
459 void move_to_phi_nodes(BlockIndex target)
noexcept {
460 if (analyzer.block_has_phis(target)) {
461 move_to_phi_nodes_impl(target);
465 void move_to_phi_nodes_impl(BlockIndex target)
noexcept;
467 bool branch_needs_split(IRBlockRef target)
noexcept {
469 return analyzer.block_has_phis(target);
472 BlockIndex next_block() const noexcept;
474 bool try_force_fixed_assignment(IRValueRef) const noexcept {
return false; }
476 bool hook_post_func_sym_init() noexcept {
return true; }
478 void analysis_start() noexcept {}
480 void analysis_end() noexcept {}
482 void reloc_text(SymRef sym, u32 type, u64 offset, i64 addend = 0) noexcept {
483 this->assembler.reloc_sec(
484 text_writer.get_sec_ref(), sym, type, offset, addend);
487 void label_place(Label label)
noexcept {
488 this->text_writer.label_place(label, text_writer.offset());
492 SymRef get_personality_sym() noexcept;
494 bool compile_func(IRFuncRef func, u32 func_idx) noexcept;
496 bool compile_block(IRBlockRef block, u32 block_idx) noexcept;
500#include "GenericValuePart.hpp"
501#include "ScratchReg.hpp"
502#include "ValuePartRef.hpp"
503#include "ValueRef.hpp"
507template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
508template <
typename CBDerived>
509void CompilerBase<Adaptor, Derived, Config>::CallBuilderBase<
510 CBDerived>::add_arg(ValuePart &&vp, CCAssignment cca)
noexcept {
512 cca.bank = vp.bank();
513 cca.size = vp.part_size();
516 assigner.assign_arg(cca);
517 bool needs_ext = cca.int_ext != 0;
518 bool ext_sign = cca.int_ext >> 7;
519 unsigned ext_bits = cca.int_ext & 0x3f;
522 derived()->add_arg_byval(vp, cca);
524 }
else if (!cca.reg.valid()) {
526 auto ext = std::move(vp).into_extended(&compiler, ext_sign, ext_bits, 64);
527 derived()->add_arg_stack(ext, cca);
528 ext.reset(&compiler);
530 derived()->add_arg_stack(vp, cca);
534 u32 size = vp.part_size();
535 if (vp.is_in_reg(cca.reg)) {
536 if (!vp.can_salvage()) {
537 compiler.evict_reg(cca.reg);
539 vp.salvage(&compiler);
542 compiler.generate_raw_intext(cca.reg, cca.reg, ext_sign, ext_bits, 64);
545 if (compiler.register_file.is_used(cca.reg)) {
546 compiler.evict_reg(cca.reg);
548 if (vp.can_salvage()) {
549 AsmReg vp_reg = vp.salvage(&compiler);
551 compiler.generate_raw_intext(cca.reg, vp_reg, ext_sign, ext_bits, 64);
553 compiler.mov(cca.reg, vp_reg, size);
556 vp.reload_into_specific_fixed(&compiler, cca.reg);
558 compiler.generate_raw_intext(
559 cca.reg, cca.reg, ext_sign, ext_bits, 64);
564 assert(!compiler.register_file.is_used(cca.reg));
565 compiler.register_file.mark_clobbered(cca.reg);
566 compiler.register_file.allocatable &= ~(u64{1} << cca.reg.id());
567 arg_regs |= (1ull << cca.reg.id());
571template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
572template <
typename CBDerived>
573void CompilerBase<Adaptor, Derived, Config>::CallBuilderBase<
574 CBDerived>::add_arg(
const CallArg &arg, u32 part_count)
noexcept {
575 ValueRef vr = compiler.val_ref(arg.value);
577 if (arg.flag == CallArg::Flag::byval) {
578 assert(part_count == 1);
582 .align = arg.byval_align,
583 .size = arg.byval_size,
589 bool consecutive =
false;
591 if (compiler.arg_is_int128(arg.value)) {
595 }
else if (part_count > 1 &&
596 !compiler.arg_allow_split_reg_stack_passing(arg.value)) {
598 if (part_count > UINT8_MAX) {
605 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
607 if (arg.flag == CallArg::Flag::sext || arg.flag == CallArg::Flag::zext) {
608 assert(arg.ext_bits != 0 &&
"cannot extend zero-bit integer");
609 int_ext = arg.ext_bits | (arg.flag == CallArg::Flag::sext ? 0x80 : 0);
615 u8(consecutive ? part_count - part_idx - 1 : consec_def),
616 .sret = arg.flag == CallArg::Flag::sret,
618 .align = u8(part_idx == 0 ? align : 1),
623template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
624template <
typename CBDerived>
625void CompilerBase<Adaptor, Derived, Config>::CallBuilderBase<CBDerived>::call(
626 std::variant<SymRef, ValuePart> target)
noexcept {
627 assert(!compiler.stack.is_leaf_function &&
"leaf func must not have calls");
628 compiler.stack.generated_call =
true;
629 typename RegisterFile::RegBitSet skip_evict = arg_regs;
630 if (
auto *vp = std::get_if<ValuePart>(&target); vp && vp->can_salvage()) {
632 assert(vp->cur_reg_unlocked().valid() &&
"can_salvage implies register");
633 skip_evict |= (1ull << vp->cur_reg_unlocked().
id());
636 auto clobbered = ~assigner.get_ccinfo().callee_saved_regs;
637 for (
auto reg_id : util::BitSetIterator<>{compiler.register_file.used &
638 clobbered & ~skip_evict}) {
639 compiler.evict_reg(AsmReg{reg_id});
640 compiler.register_file.mark_clobbered(Reg{reg_id});
643 derived()->call_impl(std::move(target));
645 assert((compiler.register_file.allocatable & arg_regs) == 0);
646 compiler.register_file.allocatable |= arg_regs;
649template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
650template <
typename CBDerived>
651void CompilerBase<Adaptor, Derived, Config>::CallBuilderBase<
652 CBDerived>::add_ret(ValuePart &vp, CCAssignment cca)
noexcept {
653 cca.bank = vp.bank();
654 cca.size = vp.part_size();
655 assigner.assign_ret(cca);
656 assert(cca.reg.valid() &&
"return value must be in register");
657 vp.set_value_reg(&compiler, cca.reg);
660template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
661template <
typename CBDerived>
662void CompilerBase<Adaptor, Derived, Config>::CallBuilderBase<
663 CBDerived>::add_ret(ValueRef &vr)
noexcept {
664 assert(vr.has_assignment());
665 u32 part_count = vr.assignment()->part_count;
666 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
668 add_ret(vr.part(part_idx), cca);
672template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
673void CompilerBase<Adaptor, Derived, Config>::RetBuilder::add(
674 ValuePart &&vp, CCAssignment cca)
noexcept {
675 cca.bank = vp.bank();
676 u32 size = cca.size = vp.part_size();
677 assigner.assign_ret(cca);
678 assert(cca.reg.valid() &&
"indirect return value must use sret argument");
680 bool needs_ext = cca.int_ext != 0;
681 bool ext_sign = cca.int_ext >> 7;
682 unsigned ext_bits = cca.int_ext & 0x3f;
684 if (vp.is_in_reg(cca.reg)) {
685 if (!vp.can_salvage()) {
686 compiler.evict_reg(cca.reg);
688 vp.salvage(&compiler);
691 compiler.generate_raw_intext(cca.reg, cca.reg, ext_sign, ext_bits, 64);
694 if (compiler.register_file.is_used(cca.reg)) {
695 compiler.evict_reg(cca.reg);
697 if (vp.can_salvage()) {
698 AsmReg vp_reg = vp.salvage(&compiler);
700 compiler.generate_raw_intext(cca.reg, vp_reg, ext_sign, ext_bits, 64);
702 compiler.mov(cca.reg, vp_reg, size);
705 vp.reload_into_specific_fixed(&compiler, cca.reg);
707 compiler.generate_raw_intext(cca.reg, cca.reg, ext_sign, ext_bits, 64);
712 assert(!compiler.register_file.is_used(cca.reg));
713 compiler.register_file.mark_clobbered(cca.reg);
714 compiler.register_file.allocatable &= ~(u64{1} << cca.reg.id());
715 ret_regs |= (1ull << cca.reg.id());
718template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
719void CompilerBase<Adaptor, Derived, Config>::RetBuilder::add(
720 IRValueRef val)
noexcept {
721 u32 part_count = compiler.val_parts(val).count();
722 ValueRef vr = compiler.val_ref(val);
723 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
724 add(vr.part(part_idx), CCAssignment{});
728template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
729void CompilerBase<Adaptor, Derived, Config>::RetBuilder::ret() noexcept {
730 assert((compiler.register_file.allocatable & ret_regs) == 0);
731 compiler.register_file.allocatable |= ret_regs;
733 compiler.gen_func_epilog();
734 compiler.release_regs_after_return();
737template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
740 text_writer.switch_section(
741 assembler.get_section(assembler.get_text_section()));
743 assert(func_syms.empty());
744 for (
const IRFuncRef func : adaptor->funcs()) {
745 auto binding = Assembler::SymBinding::GLOBAL;
746 if (adaptor->func_has_weak_linkage(func)) {
747 binding = Assembler::SymBinding::WEAK;
748 }
else if (adaptor->func_only_local(func)) {
749 binding = Assembler::SymBinding::LOCAL;
751 if (adaptor->func_extern(func)) {
752 func_syms.push_back(
derived()->assembler.sym_add_undef(
753 adaptor->func_link_name(func), binding));
755 func_syms.push_back(
derived()->assembler.sym_predef_func(
756 adaptor->func_link_name(func), binding));
758 derived()->define_func_idx(func, func_syms.size() - 1);
761 if (!
derived()->hook_post_func_sym_init()) {
762 TPDE_LOG_ERR(
"hook_pust_func_sym_init failed");
771 for (
const IRFuncRef func : adaptor->funcs()) {
772 if (adaptor->func_extern(func)) {
773 TPDE_LOG_TRACE(
"Skipping compilation of func {}",
774 adaptor->func_link_name(func));
779 TPDE_LOG_TRACE(
"Compiling func {}", adaptor->func_link_name(func));
780 if (!
derived()->compile_func(func, func_idx)) {
781 TPDE_LOG_ERR(
"Failed to compile function {}",
782 adaptor->func_link_name(func));
789 assembler.finalize();
796template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
800 for (
auto &e : stack.fixed_free_lists) {
803 stack.dynamic_free_lists.clear();
807 block_labels.clear();
808 personality_syms.clear();
811template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
812void CompilerBase<Adaptor, Derived, Config>::init_assignment(
813 IRValueRef value, ValLocalIdx local_idx)
noexcept {
814 assert(val_assignment(local_idx) ==
nullptr);
815 TPDE_LOG_TRACE(
"Initializing assignment for value {}",
816 static_cast<u32
>(local_idx));
818 const auto parts =
derived()->val_parts(value);
819 const u32 part_count = parts.count();
820 assert(part_count > 0);
821 auto *assignment = assignments.allocator.allocate(part_count);
822 assignments.value_ptrs[
static_cast<u32
>(local_idx)] = assignment;
824 u32 max_part_size = 0;
825 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
826 auto ap = AssignmentPartRef{assignment, part_idx};
828 ap.set_bank(parts.reg_bank(part_idx));
829 const u32 size = parts.size_bytes(part_idx);
831 max_part_size = std::max(max_part_size, size);
832 ap.set_part_size(size);
835 const auto &liveness = analyzer.liveness_info(local_idx);
844 if (part_count == 1) {
845 const auto &cur_loop =
846 analyzer.loop_from_idx(analyzer.block_loop_idx(cur_block_idx));
847 auto ap = AssignmentPartRef{assignment, 0};
850 liveness.last > cur_block_idx &&
851 cur_loop.definitions_in_childs +
852 assignments.cur_fixed_assignment_count[ap.bank().id()] <
853 Derived::NUM_FIXED_ASSIGNMENTS[ap.bank().id()];
854 if (
derived()->try_force_fixed_assignment(value)) {
855 try_fixed = assignments.cur_fixed_assignment_count[ap.bank().id()] <
856 Derived::NUM_FIXED_ASSIGNMENTS[ap.bank().id()];
861 AsmReg reg =
derived()->select_fixed_assignment_reg(ap, value);
862 TPDE_LOG_TRACE(
"Trying to assign fixed reg to value {}",
863 static_cast<u32
>(local_idx));
867 if (!reg.invalid() && !register_file.is_used(reg)) {
868 TPDE_LOG_TRACE(
"Assigning fixed assignment to reg {} for value {}",
870 static_cast<u32
>(local_idx));
872 ap.set_register_valid(
true);
873 ap.set_fixed_assignment(
true);
874 register_file.mark_used(reg, local_idx, 0);
875 register_file.inc_lock_count(reg);
876 register_file.mark_clobbered(reg);
877 ++assignments.cur_fixed_assignment_count[ap.bank().id()];
882 const auto last_full = liveness.last_full;
883 const auto ref_count = liveness.ref_count;
885 assert(max_part_size <= 256);
886 assignment->max_part_size = max_part_size;
887 assignment->pending_free =
false;
888 assignment->variable_ref =
false;
889 assignment->stack_variable =
false;
890 assignment->delay_free = last_full;
891 assignment->part_count = part_count;
892 assignment->frame_off = 0;
893 assignment->references_left = ref_count;
896template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
898 ValLocalIdx local_idx, ValueAssignment *assignment)
noexcept {
899 TPDE_LOG_TRACE(
"Freeing assignment for value {}",
900 static_cast<u32
>(local_idx));
902 assert(assignments.value_ptrs[
static_cast<u32
>(local_idx)] == assignment);
903 assignments.value_ptrs[
static_cast<u32
>(local_idx)] =
nullptr;
904 const auto is_var_ref = assignment->variable_ref;
905 const u32 part_count = assignment->part_count;
908 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
909 auto ap = AssignmentPartRef{assignment, part_idx};
910 if (ap.fixed_assignment()) [[unlikely]] {
911 const auto reg = ap.get_reg();
912 assert(register_file.is_fixed(reg));
913 assert(register_file.reg_local_idx(reg) == local_idx);
914 assert(register_file.reg_part(reg) == part_idx);
915 --assignments.cur_fixed_assignment_count[ap.bank().id()];
916 register_file.dec_lock_count_must_zero(reg);
917 register_file.unmark_used(reg);
918 }
else if (ap.register_valid()) {
919 const auto reg = ap.get_reg();
920 assert(!register_file.is_fixed(reg));
921 register_file.unmark_used(reg);
926 for (
auto reg_id : register_file.used_regs()) {
927 assert(register_file.reg_local_idx(AsmReg{reg_id}) != local_idx &&
928 "freeing assignment that is still referenced by a register");
933 if (!is_var_ref && assignment->frame_off != 0) {
934 free_stack_slot(assignment->frame_off, assignment->size());
937 assignments.allocator.deallocate(assignment);
940template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
941[[gnu::noinline]]
void
943 ValLocalIdx local_idx, ValueAssignment *assignment)
noexcept {
944 if (!assignment->delay_free) {
945 free_assignment(local_idx, assignment);
950 TPDE_LOG_TRACE(
"Delay freeing assignment for value {}",
951 static_cast<u32
>(local_idx));
952 const auto &liveness = analyzer.liveness_info(local_idx);
953 auto &free_list_head = assignments.delayed_free_lists[u32(liveness.last)];
954 assignment->next_delayed_free_entry = free_list_head;
955 assignment->pending_free =
true;
956 free_list_head = local_idx;
959template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
961 ValLocalIdx local_idx, u32 var_ref_data)
noexcept {
962 TPDE_LOG_TRACE(
"Initializing variable-ref assignment for value {}",
963 static_cast<u32
>(local_idx));
965 assert(val_assignment(local_idx) ==
nullptr);
966 auto *assignment = assignments.allocator.allocate_slow(1,
true);
967 assignments.value_ptrs[
static_cast<u32
>(local_idx)] = assignment;
969 assignment->max_part_size = Config::PLATFORM_POINTER_SIZE;
970 assignment->variable_ref =
true;
971 assignment->stack_variable =
false;
972 assignment->part_count = 1;
973 assignment->var_ref_custom_idx = var_ref_data;
974 assignment->next_delayed_free_entry = assignments.variable_ref_list;
976 assignments.variable_ref_list = local_idx;
978 AssignmentPartRef ap{assignment, 0};
980 ap.set_bank(Config::GP_BANK);
981 ap.set_part_size(Config::PLATFORM_POINTER_SIZE);
984template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
985i32 CompilerBase<Adaptor, Derived, Config>::allocate_stack_slot(
987 unsigned align_bits = 4;
990 }
else if (size <= 16) {
992 u32 free_list_idx = size == 1 ? 0 : 32 - util::cnt_lz<u32>(size - 1);
993 assert(size <= 1u << free_list_idx);
994 size = 1 << free_list_idx;
995 align_bits = free_list_idx;
997 if (!stack.fixed_free_lists[free_list_idx].empty()) {
998 auto slot = stack.fixed_free_lists[free_list_idx].back();
999 stack.fixed_free_lists[free_list_idx].pop_back();
1003 size = util::align_up(size, 16);
1004 auto it = stack.dynamic_free_lists.find(size);
1005 if (it != stack.dynamic_free_lists.end() && !it->second.empty()) {
1006 const auto slot = it->second.back();
1007 it->second.pop_back();
1012 assert(stack.frame_size != ~0u &&
1013 "cannot allocate stack slot before stack frame is initialized");
1016 for (u32 list_idx = util::cnt_tz(stack.frame_size); list_idx < align_bits;
1017 list_idx = util::cnt_tz(stack.frame_size)) {
1018 i32 slot = stack.frame_size;
1019 if constexpr (Config::FRAME_INDEXING_NEGATIVE) {
1020 slot = -(slot + (1ull << list_idx));
1022 stack.fixed_free_lists[list_idx].push_back(slot);
1023 stack.frame_size += 1ull << list_idx;
1026 auto slot = stack.frame_size;
1027 assert(slot != 0 &&
"stack slot 0 is reserved");
1028 stack.frame_size += size;
1030 if constexpr (Config::FRAME_INDEXING_NEGATIVE) {
1031 slot = -(slot + size);
1036template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1038 u32 slot, u32 size)
noexcept {
1039 if (size == 0) [[unlikely]] {
1040 assert(slot == 0 &&
"unexpected slot for zero-sized stack-slot?");
1042 }
else if (size <= 16) [[likely]] {
1043 u32 free_list_idx = size == 1 ? 0 : 32 - util::cnt_lz<u32>(size - 1);
1044 stack.fixed_free_lists[free_list_idx].push_back(slot);
1046 size = util::align_up(size, 16);
1047 stack.dynamic_free_lists[size].push_back(slot);
1051template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1052template <
typename Fn>
1054 u32 arg_idx, IRValueRef arg, Fn add_arg)
noexcept {
1055 ValueRef vr =
derived()->result_ref(arg);
1056 if (adaptor->cur_arg_is_byval(arg_idx)) {
1057 std::optional<i32> byval_frame_off =
1061 .align = u8(adaptor->cur_arg_byval_align(arg_idx)),
1062 .size = adaptor->cur_arg_byval_size(arg_idx),
1065 if (byval_frame_off) {
1067 ValLocalIdx local_idx = val_idx(arg);
1073 if (ValueAssignment *assignment = val_assignment(local_idx)) {
1074 free_assignment(local_idx, assignment);
1077 ValueAssignment *assignment = this->val_assignment(local_idx);
1078 assignment->stack_variable =
true;
1079 assignment->frame_off = *byval_frame_off;
1084 if (adaptor->cur_arg_is_sret(arg_idx)) {
1085 add_arg(vr.part(0), CCAssignment{.sret = true});
1089 const u32 part_count = vr.assignment()->part_count;
1092 u32 consecutive = 0;
1094 if (
derived()->arg_is_int128(arg)) {
1098 }
else if (part_count > 1 &&
1099 !
derived()->arg_allow_split_reg_stack_passing(arg)) {
1101 if (part_count > UINT8_MAX) {
1108 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
1109 add_arg(vr.part(part_idx),
1112 u8(consecutive ? part_count - part_idx - 1 : consec_def),
1113 .align = u8(part_idx == 0 ? align : 1),
1118template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1121 if (
auto special =
derived()->val_ref_special(value); special) {
1122 return ValueRef{
this, std::move(*special)};
1125 const ValLocalIdx local_idx = analyzer.adaptor->val_local_idx(value);
1126 assert(val_assignment(local_idx) !=
nullptr &&
"value use before def");
1127 return ValueRef{
this, local_idx};
1130template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1131std::pair<typename CompilerBase<Adaptor, Derived, Config>::ValueRef,
1134 IRValueRef value)
noexcept {
1135 std::pair<ValueRef, ValuePartRef> res{val_ref(value),
this};
1136 res.second = res.first.part(0);
1140template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1143 IRValueRef value)
noexcept {
1144 const ValLocalIdx local_idx = analyzer.adaptor->val_local_idx(value);
1145 if (val_assignment(local_idx) ==
nullptr) {
1146 init_assignment(value, local_idx);
1148 return ValueRef{
this, local_idx};
1151template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1152std::pair<typename CompilerBase<Adaptor, Derived, Config>::ValueRef,
1153 typename CompilerBase<Adaptor, Derived, Config>::ValuePartRef>
1154 CompilerBase<Adaptor, Derived, Config>::result_ref_single(
1155 IRValueRef value)
noexcept {
1156 std::pair<ValueRef, ValuePartRef> res{
result_ref(value),
this};
1157 res.second = res.first.part(0);
1161template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1164 IRValueRef dst, ValueRef &&src)
noexcept {
1165 const ValLocalIdx local_idx = analyzer.adaptor->val_local_idx(dst);
1166 assert(!val_assignment(local_idx) &&
"alias target already defined");
1167 assert(src.has_assignment() &&
"alias src must have an assignment");
1172 assert(src.is_owned() &&
"alias src must be owned");
1174 ValueAssignment *assignment = src.assignment();
1175 u32 part_count = assignment->part_count;
1176 assert(!assignment->pending_free);
1177 assert(!assignment->variable_ref);
1178 assert(!assignment->pending_free);
1181 const auto &src_liveness = analyzer.liveness_info(src.local_idx());
1182 assert(!src_liveness.last_full);
1183 assert(assignment->references_left == 1);
1186 const auto parts =
derived()->val_parts(dst);
1187 assert(parts.count() == part_count);
1188 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
1189 AssignmentPartRef ap{assignment, part_idx};
1190 assert(parts.reg_bank(part_idx) == ap.bank());
1191 assert(parts.size_bytes(part_idx) == ap.part_size());
1197 for (u32 part_idx = 0; part_idx < part_count; ++part_idx) {
1198 AssignmentPartRef ap{assignment, part_idx};
1199 if (ap.register_valid()) {
1200 register_file.update_reg_assignment(ap.get_reg(), local_idx, part_idx);
1204 const auto &liveness = analyzer.liveness_info(local_idx);
1205 assignment->delay_free = liveness.last_full;
1206 assignment->references_left = liveness.ref_count;
1207 assignments.value_ptrs[
static_cast<u32
>(src.local_idx())] =
nullptr;
1208 assignments.value_ptrs[
static_cast<u32
>(local_idx)] = assignment;
1212 return ValueRef{
this, local_idx};
1215template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1216typename CompilerBase<Adaptor, Derived, Config>::ValueRef
1218 IRValueRef dst, AssignmentPartRef base, i32 off)
noexcept {
1219 const ValLocalIdx local_idx = analyzer.adaptor->val_local_idx(dst);
1220 assert(!val_assignment(local_idx) &&
"new value already defined");
1222 ValueAssignment *assignment = this->val_assignment(local_idx);
1223 assignment->stack_variable =
true;
1224 assignment->frame_off = base.variable_stack_off() + off;
1225 return ValueRef{
this, local_idx};
1228template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1229void CompilerBase<Adaptor, Derived, Config>::set_value(
1230 ValuePartRef &val_ref, ScratchReg &scratch)
noexcept {
1231 val_ref.set_value(std::move(scratch));
1234template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1235typename CompilerBase<Adaptor, Derived, Config>::AsmReg
1237 GenericValuePart &gv)
noexcept {
1238 if (std::holds_alternative<ScratchReg>(gv.state)) {
1239 return std::get<ScratchReg>(gv.state).cur_reg();
1241 if (std::holds_alternative<ValuePartRef>(gv.state)) {
1242 auto &vpr = std::get<ValuePartRef>(gv.state);
1243 if (vpr.has_reg()) {
1244 return vpr.cur_reg();
1246 return vpr.load_to_reg();
1248 if (
auto *expr = std::get_if<typename GenericValuePart::Expr>(&gv.state)) {
1249 if (expr->has_base() && !expr->has_index() && expr->disp == 0) {
1250 return expr->base_reg();
1252 return derived()->gval_expr_as_reg(gv);
1254 TPDE_UNREACHABLE(
"gval_as_reg on empty GenericValuePart");
1257template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1258typename CompilerBase<Adaptor, Derived, Config>::AsmReg
1260 GenericValuePart &gv, ScratchReg &dst)
noexcept {
1262 if (!dst.has_reg()) {
1263 if (
auto *scratch = std::get_if<ScratchReg>(&gv.state)) {
1264 dst = std::move(*scratch);
1265 }
else if (
auto *val_ref = std::get_if<ValuePartRef>(&gv.state)) {
1266 if (val_ref->can_salvage()) {
1267 dst.alloc_specific(val_ref->salvage());
1268 assert(dst.cur_reg() == reg &&
"salvaging unsuccessful");
1275template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1276Reg CompilerBase<Adaptor, Derived, Config>::select_reg_evict(
1277 RegBank bank, u64 exclusion_mask)
noexcept {
1278 TPDE_LOG_DBG(
"select_reg_evict for bank {}", bank.id());
1280 register_file.used & register_file.bank_regs(bank) & ~exclusion_mask;
1282 Reg candidate = Reg::make_invalid();
1284 for (
auto reg_id : util::BitSetIterator<>(candidates)) {
1286 if (register_file.is_fixed(reg)) {
1291 auto local_idx = register_file.reg_local_idx(reg);
1292 u32 part = register_file.reg_part(Reg{reg});
1293 assert(local_idx != INVALID_VAL_LOCAL_IDX);
1294 ValueAssignment *va = val_assignment(local_idx);
1295 AssignmentPartRef ap{va, part};
1308 if (ap.variable_ref()) {
1309 TPDE_LOG_DBG(
" r{} ({}) is variable-ref", reg_id, u32(local_idx));
1315 if (ap.stack_valid()) {
1316 score |= u32{1} << 31;
1319 const auto &liveness = analyzer.liveness_info(local_idx);
1320 u32 last_use_dist = u32(liveness.last) - u32(cur_block_idx);
1321 score |= (last_use_dist < 0x8000 ? 0x8000 - last_use_dist : 0) << 16;
1323 u32 refs_left = va->pending_free ? 0 : va->references_left;
1324 score |= (refs_left < 0xffff ? 0x10000 - refs_left : 1);
1326 TPDE_LOG_DBG(
" r{} ({}:{}) rc={}/{} live={}-{}{} spilled={} score={:#x}",
1332 u32(liveness.first),
1334 &
"*"[!liveness.last_full],
1339 if (score > max_score) {
1344 if (candidate.invalid()) [[unlikely]] {
1345 TPDE_FATAL(
"ran out of registers for scratch registers");
1347 TPDE_LOG_DBG(
" selected r{}", candidate.id());
1352template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1354 AsmReg dst, AssignmentPartRef ap)
noexcept {
1355 if (!ap.variable_ref()) {
1356 assert(ap.stack_valid());
1357 derived()->load_from_stack(dst, ap.frame_off(), ap.part_size());
1358 }
else if (ap.is_stack_variable()) {
1359 derived()->load_address_of_stack_var(dst, ap);
1360 }
else if constexpr (!Config::DEFAULT_VAR_REF_HANDLING) {
1361 derived()->load_address_of_var_reference(dst, ap);
1363 TPDE_UNREACHABLE(
"non-stack-variable needs custom var-ref handling");
1367template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1368void CompilerBase<Adaptor, Derived, Config>::allocate_spill_slot(
1369 AssignmentPartRef ap)
noexcept {
1370 assert(!ap.variable_ref() &&
"cannot allocate spill slot for variable ref");
1371 if (ap.assignment()->frame_off == 0) {
1372 assert(!ap.stack_valid() &&
"stack-valid set without spill slot");
1373 ap.assignment()->frame_off = allocate_stack_slot(ap.assignment()->size());
1374 assert(ap.assignment()->frame_off != 0);
1378template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1380 AssignmentPartRef ap)
noexcept {
1381 assert(may_change_value_state());
1382 if (!ap.stack_valid() && !ap.variable_ref()) {
1383 assert(ap.register_valid() &&
"cannot spill uninitialized assignment part");
1384 allocate_spill_slot(ap);
1385 derived()->spill_reg(ap.get_reg(), ap.frame_off(), ap.part_size());
1386 ap.set_stack_valid();
1390template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1392 AssignmentPartRef ap)
noexcept {
1393 assert(may_change_value_state());
1394 assert(ap.register_valid());
1396 ap.set_register_valid(
false);
1397 register_file.unmark_used(ap.get_reg());
1400template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1402 assert(may_change_value_state());
1403 assert(!register_file.is_fixed(reg));
1404 assert(register_file.reg_local_idx(reg) != INVALID_VAL_LOCAL_IDX);
1406 ValLocalIdx local_idx = register_file.reg_local_idx(reg);
1407 auto part = register_file.reg_part(reg);
1408 AssignmentPartRef evict_part{val_assignment(local_idx), part};
1409 assert(evict_part.register_valid());
1410 assert(evict_part.get_reg() == reg);
1412 evict_part.set_register_valid(
false);
1413 register_file.unmark_used(reg);
1416template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1418 assert(may_change_value_state());
1419 assert(!register_file.is_fixed(reg));
1420 assert(register_file.reg_local_idx(reg) != INVALID_VAL_LOCAL_IDX);
1422 ValLocalIdx local_idx = register_file.reg_local_idx(reg);
1423 auto part = register_file.reg_part(reg);
1424 AssignmentPartRef ap{val_assignment(local_idx), part};
1425 assert(ap.register_valid());
1426 assert(ap.get_reg() == reg);
1427 assert(!ap.modified() || ap.variable_ref());
1428 ap.set_register_valid(
false);
1429 register_file.unmark_used(reg);
1432template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1433typename CompilerBase<Adaptor, Derived, Config>::RegisterFile::RegBitSet
1434 CompilerBase<Adaptor, Derived, Config>::spill_before_branch(
1435 bool force_spill)
noexcept {
1459 using RegBitSet =
typename RegisterFile::RegBitSet;
1461 assert(may_change_value_state());
1463 const IRBlockRef cur_block_ref = analyzer.block_ref(cur_block_idx);
1467 BlockIndex earliest_next_succ = Analyzer<Adaptor>::INVALID_BLOCK_IDX;
1469 bool must_spill = force_spill;
1473 auto next_block_is_succ =
false;
1474 auto next_block_has_multiple_incoming =
false;
1476 for (
const IRBlockRef succ : adaptor->block_succs(cur_block_ref)) {
1478 BlockIndex succ_idx = analyzer.block_idx(succ);
1479 if (u32(succ_idx) == u32(cur_block_idx) + 1) {
1480 next_block_is_succ =
true;
1481 if (analyzer.block_has_multiple_incoming(succ)) {
1482 next_block_has_multiple_incoming =
true;
1484 }
else if (succ_idx > cur_block_idx && succ_idx < earliest_next_succ) {
1485 earliest_next_succ = succ_idx;
1489 must_spill = !next_block_is_succ || next_block_has_multiple_incoming;
1491 if (succ_count == 1 && !must_spill) {
1496 auto release_regs = RegBitSet{};
1498 for (
auto reg : register_file.used_regs()) {
1499 auto local_idx = register_file.reg_local_idx(Reg{reg});
1500 auto part = register_file.reg_part(Reg{reg});
1501 if (local_idx == INVALID_VAL_LOCAL_IDX) {
1505 AssignmentPartRef ap{val_assignment(local_idx), part};
1506 if (ap.fixed_assignment()) {
1514 release_regs |= RegBitSet{1ull} << reg;
1517 if (!ap.modified() || ap.variable_ref()) {
1522 const auto &liveness = analyzer.liveness_info(local_idx);
1523 if (liveness.last <= cur_block_idx) {
1536 if (must_spill || earliest_next_succ <= liveness.last) {
1541 return release_regs;
1544template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1546 typename RegisterFile::RegBitSet regs)
noexcept {
1547 assert(may_change_value_state());
1550 for (
auto reg_id : util::BitSetIterator<>{regs & register_file.used}) {
1551 if (!register_file.is_fixed(Reg{reg_id})) {
1557template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1561 for (
auto reg_id : register_file.used_regs()) {
1562 if (!register_file.is_fixed(Reg{reg_id})) {
1568template <IRAdaptor Adaptor,
typename Derived, CompilerConfig Config>
1572 IRBlockRef default_block,
1573 std::span<
const std::pair<u64, IRBlockRef>> cases)
noexcept {
1578 assert(width <= 64);
1581 assert(cases.size() < UINT32_MAX &&
"large switches are unsupported");
1583 AsmReg cmp_reg = cond.cur_reg();
1584 bool width_is_32 = width <= 32;
1585 if (u32 dst_width = util::align_up(width, 32); width != dst_width) {
1586 derived()->generate_raw_intext(cmp_reg, cmp_reg,
false, width, dst_width);
1592 ScratchReg tmp_scratch{
this};
1593 AsmReg tmp_reg = tmp_scratch.alloc_gp();
1595 const auto spilled = this->spill_before_branch();
1603 tpde::util::SmallVector<tpde::Label, 64> case_labels;
1604 for (
auto i = 0u; i < cases.size(); ++i) {
1605 case_labels.push_back(this->text_writer.label_create());
1608 const auto default_label = this->text_writer.label_create();
1610 const auto build_range = [&,
1611 this](
size_t begin,
size_t end,
const auto &self) {
1612 assert(begin <= end);
1613 const auto num_cases = end - begin;
1614 if (num_cases <= 4) {
1617 for (
auto i = 0u; i < num_cases; ++i) {
1618 derived()->switch_emit_cmpeq(case_labels[begin + i],
1621 cases[begin + i].first,
1625 derived()->generate_raw_jump(Derived::Jump::jmp, default_label);
1631 auto range = cases[end - 1].first - cases[begin].first;
1634 if (range != 0xFFFF'FFFF'FFFF'FFFF && (range / num_cases) < 8) {
1642 tpde::util::SmallVector<tpde::Label, 32> label_vec;
1643 std::span<tpde::Label> labels;
1644 if (range == num_cases) {
1645 labels = std::span{case_labels.begin() + begin, num_cases};
1647 label_vec.resize(range, default_label);
1648 for (
auto i = 0u; i < num_cases; ++i) {
1649 label_vec[cases[begin + i].first - cases[begin].first] =
1650 case_labels[begin + i];
1652 labels = std::span{label_vec.begin(), range};
1656 if (
derived()->switch_emit_jump_table(default_label,
1661 cases[end - 1].first,
1668 const auto half_len = num_cases / 2;
1669 const auto half_value = cases[begin + half_len].first;
1670 const auto gt_label = this->text_writer.label_create();
1675 derived()->switch_emit_binary_step(case_labels[begin + half_len],
1682 self(begin, begin + half_len, self);
1685 this->label_place(gt_label);
1686 self(begin + half_len + 1, end, self);
1689 build_range(0, case_labels.size(), build_range);
1692 this->label_place(default_label);
1693 derived()->generate_branch_to_block(
1694 Derived::Jump::jmp, default_block,
false,
false);
1696 for (
auto i = 0u; i < cases.size(); ++i) {
1697 this->label_place(case_labels[i]);
1698 derived()->generate_branch_to_block(
1699 Derived::Jump::jmp, cases[i].second,
false,
false);
1703 this->release_spilled_regs(spilled);