6#include "tpde/AssemblerElf.hpp"
7#include "tpde/AssignmentPartRef.hpp"
8#include "tpde/CompilerBase.hpp"
9#include "tpde/arm64/FunctionWriterA64.hpp"
10#include "tpde/base.hpp"
11#include "tpde/util/SmallVector.hpp"
12#include "tpde/util/misc.hpp"
19#if defined(ASM) || defined(ASMNC) || defined(ASMC)
20 #error Got definition for ASM macros from somewhere else. Maybe you included compilers for multiple architectures?
24#define ASMC(compiler, op, ...) \
25 ((compiler)->text_writer.write_inst(de64_##op(__VA_ARGS__)))
27#define ASM(...) ASMC(this, __VA_ARGS__)
29#define ASMNC(op, ...) \
30 (this->text_writer.write_inst_unchecked(de64_##op(__VA_ARGS__)))
32#define ASMIFC(compiler, op, ...) \
33 ((compiler)->text_writer.try_write_inst(de64_##op(__VA_ARGS__)))
35#define ASMIF(...) ASMIFC(this, __VA_ARGS__)
110 constexpr explicit AsmReg() noexcept : Reg((u8)0xFF) {}
112 constexpr AsmReg(
const REG
id) noexcept : Reg((u8)
id) {}
114 constexpr AsmReg(
const Reg base) noexcept : Reg(base) {}
116 constexpr explicit AsmReg(
const u8
id) noexcept : Reg(
id) {
117 assert(
id <= SP || (
id >= V0 &&
id <= V31));
120 constexpr explicit AsmReg(
const u64
id) noexcept : Reg(
id) {
121 assert(
id <= SP || (
id >= V0 &&
id <= V31));
124 operator DA_GReg() const noexcept {
126 return DA_GReg{reg_id};
129 operator DA_GRegZR() const noexcept {
131 assert(reg_id != SP);
132 return DA_GRegZR{reg_id};
135 operator DA_GRegSP() const noexcept {
136 assert(reg_id <= SP);
137 return DA_GRegSP{reg_id};
140 operator DA_VReg() const noexcept {
141 assert(reg_id >= V0 && reg_id <= V31);
142 return DA_VReg{
static_cast<u8
>(reg_id - V0)};
147 create_bitmask(
const std::initializer_list<AsmReg::REG> regs) {
149 for (
const auto reg : regs) {
156constexpr static u64 create_bitmask(
const std::array<AsmReg, N> regs) {
158 for (
const auto reg : regs) {
159 set |= 1ull << reg.id();
164class CCAssignerAAPCS :
public CCAssigner {
165 static constexpr CCInfo Info{
168 0xFFFF'FFFF'FFFF'FFFF &
169 ~create_bitmask({AsmReg::SP, AsmReg::FP, AsmReg::R16, AsmReg::R17}),
171 .callee_saved_regs = create_bitmask({
191 .arg_regs = create_bitmask({
215 u32 ngrn = 0, nsrn = 0, nsaa = 0;
216 u32 ret_ngrn = 0, ret_nsrn = 0;
219 CCAssignerAAPCS() noexcept : CCAssigner(Info) {}
221 void reset() noexcept
override {
222 ngrn = nsrn = nsaa = ret_ngrn = ret_nsrn = 0;
225 void assign_arg(CCAssignment &arg)
noexcept override {
226 if (arg.byval) [[unlikely]] {
227 nsaa = util::align_up(nsaa, arg.align < 8 ? 8 : arg.align);
228 arg.stack_off = nsaa;
233 if (arg.sret) [[unlikely]] {
234 arg.reg = AsmReg{AsmReg::R8};
238 if (arg.bank == RegBank{0}) {
240 ngrn = util::align_up(ngrn, 2);
242 if (ngrn + arg.consecutive < 8) {
243 arg.reg = Reg{AsmReg::R0 + ngrn};
247 nsaa = util::align_up(nsaa, arg.align < 8 ? 8 : arg.align);
248 arg.stack_off = nsaa;
252 if (nsrn + arg.consecutive < 8) {
253 arg.reg = Reg{AsmReg::V0 + nsrn};
257 u32 size = util::align_up(arg.size, 8);
258 nsaa = util::align_up(nsaa, size);
259 arg.stack_off = nsaa;
265 u32 get_stack_size() noexcept
override {
return nsaa; }
267 void assign_ret(CCAssignment &arg)
noexcept override {
268 assert(!arg.byval && !arg.sret);
269 if (arg.bank == RegBank{0}) {
271 ret_ngrn = util::align_up(ret_ngrn, 2);
273 if (ret_ngrn + arg.consecutive < 8) {
274 arg.reg = Reg{AsmReg::R0 + ret_ngrn};
280 if (ret_nsrn + arg.consecutive < 8) {
281 arg.reg = Reg{AsmReg::V0 + ret_nsrn};
290struct PlatformConfig : CompilerConfigDefault {
291 using Assembler = AssemblerElfA64;
292 using AsmReg = tpde::a64::AsmReg;
293 using DefaultCCAssigner = CCAssignerAAPCS;
294 using FunctionWriter = FunctionWriterA64;
296 static constexpr RegBank GP_BANK{0};
297 static constexpr RegBank FP_BANK{1};
298 static constexpr bool FRAME_INDEXING_NEGATIVE =
false;
299 static constexpr u32 PLATFORM_POINTER_SIZE = 8;
300 static constexpr u32 NUM_BANKS = 2;
304template <
typename T,
typename Config>
305concept Compiler = tpde::Compiler<T, Config> &&
requires(T a) {
307 a.arg_is_int128(std::declval<typename T::IRValueRef>())
308 } -> std::convertible_to<bool>;
311 a.arg_allow_split_reg_stack_passing(std::declval<typename T::IRValueRef>())
312 } -> std::convertible_to<bool>;
316template <IRAdaptor Adaptor,
318 template <
typename,
typename,
typename>
typename BaseTy =
320 typename Config = PlatformConfig>
321struct CompilerA64 : BaseTy<Adaptor, Derived, Config> {
322 using Base = BaseTy<Adaptor, Derived, Config>;
324 using IRValueRef =
typename Base::IRValueRef;
325 using IRBlockRef =
typename Base::IRBlockRef;
326 using IRFuncRef =
typename Base::IRFuncRef;
328 using ScratchReg =
typename Base::ScratchReg;
329 using ValuePartRef =
typename Base::ValuePartRef;
330 using ValuePart =
typename Base::ValuePart;
331 using GenericValuePart =
typename Base::GenericValuePart;
333 using Assembler =
typename PlatformConfig::Assembler;
334 using RegisterFile =
typename Base::RegisterFile;
336 using CallArg =
typename Base::CallArg;
343 static constexpr u32 NUM_FIXED_ASSIGNMENTS[PlatformConfig::NUM_BANKS] = {5,
346 enum CPU_FEATURES : u32 {
350 CPU_FEATURES cpu_feats = CPU_BASELINE;
357 u64 fixed_assignment_nonallocatable_mask =
358 create_bitmask({AsmReg::R0, AsmReg::R1});
359 u32 func_start_off = 0u, func_prologue_alloc = 0u, func_epilogue_alloc = 0u;
362 u32 func_arg_stack_add_off = ~0u;
363 AsmReg func_arg_stack_add_reg = AsmReg::make_invalid();
367 AsmReg permanent_scratch_reg = AsmReg::R16;
369 u32 scalar_arg_count = 0xFFFF'FFFF, vec_arg_count = 0xFFFF'FFFF;
370 u32 reg_save_frame_off = 0;
371 util::SmallVector<u32, 8> func_ret_offs = {};
373 class CallBuilder :
public Base::template CallBuilderBase<CallBuilder> {
374 u32 stack_adjust_off = 0;
378 void set_stack_used() noexcept;
381 CallBuilder(Derived &compiler, CCAssigner &assigner) noexcept
382 : Base::template CallBuilderBase<CallBuilder>(compiler, assigner) {}
384 void add_arg_byval(ValuePart &vp, CCAssignment &cca)
noexcept;
385 void add_arg_stack(ValuePart &vp, CCAssignment &cca)
noexcept;
386 void call_impl(std::variant<SymRef, ValuePart> &&) noexcept;
387 void reset_stack() noexcept;
391 explicit CompilerA64(Adaptor *adaptor,
392 const CPU_FEATURES cpu_features = CPU_BASELINE)
393 : Base{adaptor}, cpu_feats(cpu_features) {
394 static_assert(std::is_base_of_v<CompilerA64, Derived>);
395 static_assert(concepts::Compiler<Derived, PlatformConfig>);
398 void start_func(u32 func_idx)
noexcept;
400 void gen_func_prolog_and_args(CCAssigner *cc_assigner)
noexcept;
403 void finish_func(u32 func_idx)
noexcept;
405 void reset() noexcept;
409 void gen_func_epilog() noexcept;
412 spill_reg(const AsmReg reg, const u32 frame_off, const u32 size) noexcept;
414 void load_from_stack(AsmReg dst,
417 bool sign_extend = false) noexcept;
419 void load_address_of_stack_var(AsmReg dst, AssignmentPartRef ap) noexcept;
421 void mov(AsmReg dst, AsmReg src, u32 size) noexcept;
423 GenericValuePart val_spill_slot(AssignmentPartRef ap) noexcept {
424 assert(ap.stack_valid() && !ap.variable_ref());
425 return typename GenericValuePart::Expr(AsmReg::R29, ap.frame_off());
428 AsmReg gval_expr_as_reg(GenericValuePart &gv)
noexcept;
431 void alloca_fixed(u64 size, u32 align, ValuePart &res)
noexcept;
435 void alloca_dynamic(u64 elem_size,
438 ValuePart &res)
noexcept;
440 void materialize_constant(
const u64 *data,
443 AsmReg dst)
noexcept;
444 void materialize_constant(u64 const_u64,
447 AsmReg dst)
noexcept {
448 assert(size <=
sizeof(const_u64));
449 materialize_constant(&const_u64, bank, size, dst);
452 AsmReg select_fixed_assignment_reg(AssignmentPartRef, IRValueRef)
noexcept;
455 enum Kind : uint8_t {
485 constexpr Jump() : kind(Kind::jmp) {}
487 constexpr Jump(Kind kind) : kind(kind), cmp_is_32(false), test_bit(0) {
488 assert(kind != Cbz && kind != Cbnz && kind != Tbz && kind != Tbnz);
491 constexpr Jump(Kind kind, AsmReg cmp_reg,
bool cmp_is_32)
492 : kind(kind), cmp_reg(cmp_reg), cmp_is_32(cmp_is_32), test_bit(0) {
493 assert(kind == Cbz || kind == Cbnz);
496 constexpr Jump(Kind kind, AsmReg cmp_reg, u8 test_bit)
497 : kind(kind), cmp_reg(cmp_reg), cmp_is_32(false), test_bit(test_bit) {
498 assert(kind == Tbz || kind == Tbnz);
501 constexpr Jump change_kind(Kind new_kind)
const {
508 Jump invert_jump(Jump jmp)
noexcept;
509 Jump swap_jump(Jump jmp)
noexcept;
511 void generate_branch_to_block(Jump jmp,
514 bool last_inst)
noexcept;
516 void generate_raw_jump(Jump jmp, Label target)
noexcept;
520 Da64Cond jump_to_cond(Jump jmp)
noexcept;
522 void generate_raw_set(Jump cc, AsmReg dst)
noexcept;
524 void generate_raw_mask(Jump cc, AsmReg dst)
noexcept;
528 void generate_raw_select(Jump cc,
532 bool is_64)
noexcept;
534 void generate_raw_intext(
535 AsmReg dst, AsmReg src,
bool sign, u32 from, u32 to)
noexcept;
538 void generate_raw_bfi(AsmReg dst, AsmReg src, u32 lsb, u32 width)
noexcept {
539 ASM(BFIx, dst, src, lsb, width);
542 void generate_raw_bfiz(AsmReg dst, AsmReg src, u32 lsb, u32 width)
noexcept {
543 ASM(UBFIZx, dst, src, lsb, width);
556 void generate_call(std::variant<SymRef, ValuePart> &&target,
557 std::span<CallArg> arguments,
558 typename Base::ValueRef *result,
559 bool variable_args =
false);
563 void switch_emit_cmp(AsmReg cmp_reg,
566 bool width_is_32)
noexcept;
570 void switch_emit_cmpeq(Label case_label,
574 bool width_is_32)
noexcept;
576 bool switch_emit_jump_table(Label default_label,
577 std::span<const Label> labels,
582 bool width_is_32)
noexcept;
584 void switch_emit_binary_step(Label case_label,
589 bool width_is_32)
noexcept;
593 ScratchReg tls_get_addr(SymRef sym, TLSModel model)
noexcept;
595 bool has_cpu_feats(CPU_FEATURES feats)
const noexcept {
596 return ((cpu_feats & feats) == feats);
600template <IRAdaptor Adaptor,
602 template <
typename,
typename,
typename>
class BaseTy,
604void CompilerA64<Adaptor, Derived, BaseTy, Config>::CallBuilder::
605 set_stack_used() noexcept {
606 if (stack_adjust_off == 0) {
607 this->compiler.text_writer.ensure_space(16);
608 stack_adjust_off = this->compiler.text_writer.offset();
609 this->compiler.text_writer.cur_ptr() += 4;
613template <IRAdaptor Adaptor,
615 template <
typename,
typename,
typename>
class BaseTy,
617void CompilerA64<Adaptor, Derived, BaseTy, Config>::CallBuilder::add_arg_byval(
618 ValuePart &vp, CCAssignment &cca)
noexcept {
619 AsmReg ptr_reg = vp.load_to_reg(&this->compiler);
620 AsmReg tmp_reg = AsmReg::R16;
622 auto size = cca.size;
624 for (u32 off = 0; off < size;) {
625 if (size - off >= 8) {
626 ASMC(&this->compiler, LDRxu, tmp_reg, ptr_reg, off);
627 ASMC(&this->compiler, STRxu, tmp_reg, DA_SP, cca.stack_off + off);
629 }
else if (size - off >= 4) {
630 ASMC(&this->compiler, LDRwu, tmp_reg, ptr_reg, off);
631 ASMC(&this->compiler, STRwu, tmp_reg, DA_SP, cca.stack_off + off);
633 }
else if (size - off >= 2) {
634 ASMC(&this->compiler, LDRHu, tmp_reg, ptr_reg, off);
635 ASMC(&this->compiler, STRHu, tmp_reg, DA_SP, cca.stack_off + off);
638 ASMC(&this->compiler, LDRBu, tmp_reg, ptr_reg, off);
639 ASMC(&this->compiler, STRBu, tmp_reg, DA_SP, cca.stack_off + off);
645template <IRAdaptor Adaptor,
647 template <
typename,
typename,
typename>
class BaseTy,
649void CompilerA64<Adaptor, Derived, BaseTy, Config>::CallBuilder::add_arg_stack(
650 ValuePart &vp, CCAssignment &cca)
noexcept {
653 auto reg = vp.has_reg() ? vp.cur_reg() : vp.load_to_reg(&this->compiler);
654 if (this->compiler.register_file.reg_bank(reg) == Config::GP_BANK) {
656 case 1: ASMC(&this->compiler, STRBu, reg, DA_SP, cca.stack_off);
break;
657 case 2: ASMC(&this->compiler, STRHu, reg, DA_SP, cca.stack_off);
break;
658 case 4: ASMC(&this->compiler, STRwu, reg, DA_SP, cca.stack_off);
break;
659 case 8: ASMC(&this->compiler, STRxu, reg, DA_SP, cca.stack_off);
break;
660 default: TPDE_UNREACHABLE(
"invalid GP reg size");
663 assert(this->compiler.register_file.reg_bank(reg) == Config::FP_BANK);
665 case 1: ASMC(&this->compiler, STRbu, reg, DA_SP, cca.stack_off);
break;
666 case 2: ASMC(&this->compiler, STRhu, reg, DA_SP, cca.stack_off);
break;
667 case 4: ASMC(&this->compiler, STRsu, reg, DA_SP, cca.stack_off);
break;
668 case 8: ASMC(&this->compiler, STRdu, reg, DA_SP, cca.stack_off);
break;
669 case 16: ASMC(&this->compiler, STRqu, reg, DA_SP, cca.stack_off);
break;
670 default: TPDE_UNREACHABLE(
"invalid FP reg size");
675template <IRAdaptor Adaptor,
677 template <
typename,
typename,
typename>
class BaseTy,
679void CompilerA64<Adaptor, Derived, BaseTy, Config>::CallBuilder::call_impl(
680 std::variant<SymRef, ValuePart> &&target)
noexcept {
682 if (stack_adjust_off != 0) {
683 auto *text_data = this->compiler.text_writer.begin_ptr();
684 u32 *write_ptr =
reinterpret_cast<u32 *
>(text_data + stack_adjust_off);
685 u32 stack_size = this->assigner.get_stack_size();
686 sub = util::align_up(stack_size, stack_size < 0x1000 ? 0x10 : 0x1000);
687 *write_ptr = de64_SUBxi(DA_SP, DA_SP, sub);
689 assert(this->assigner.get_stack_size() == 0);
694 auto fp_regs = RegisterFile::bank_regs(Config::FP_BANK);
695 auto fp_csrs = fp_regs & this->assigner.get_ccinfo().callee_saved_regs;
696 auto used_fp_csrs = fp_csrs & this->compiler.register_file.used;
697 for (
auto reg_id : util::BitSetIterator<>{used_fp_csrs}) {
699 ValLocalIdx local_idx = this->compiler.register_file.reg_local_idx(reg);
700 auto part = this->compiler.register_file.reg_part(reg);
701 AssignmentPartRef ap{this->compiler.val_assignment(local_idx), part};
702 if (ap.part_size() > 8) {
703 this->compiler.evict(ap);
707 if (
auto *sym = std::get_if<SymRef>(&target)) {
708 ASMC(&this->compiler, BL, 0);
709 this->compiler.reloc_text(
710 *sym, R_AARCH64_CALL26, this->compiler.text_writer.offset() - 4);
712 ValuePart &tvp = std::get<ValuePart>(target);
713 if (tvp.can_salvage()) {
714 ASMC(&this->compiler, BLR, tvp.salvage(&this->compiler));
716 AsmReg reg = this->compiler.permanent_scratch_reg;
717 tvp.reload_into_specific_fixed(&this->compiler, reg);
718 ASMC(&this->compiler, BLR, reg);
720 tvp.reset(&this->compiler);
723 if (stack_adjust_off != 0) {
724 ASMC(&this->compiler, ADDxi, DA_SP, DA_SP, sub);
728template <IRAdaptor Adaptor,
730 template <
typename,
typename,
typename>
class BaseTy,
732void CompilerA64<Adaptor, Derived, BaseTy, Config>::start_func(
733 const u32 )
noexcept {
734 this->assembler.except_begin_func();
735 this->text_writer.align(16);
738template <IRAdaptor Adaptor,
740 template <
typename,
typename,
typename>
typename BaseTy,
742void CompilerA64<Adaptor, Derived, BaseTy, Config>::gen_func_prolog_and_args(
743 CCAssigner *cc_assigner)
noexcept {
760 func_ret_offs.clear();
761 func_start_off = this->text_writer.offset();
763 const CCInfo &cc_info = cc_assigner->get_ccinfo();
770 this->stack.frame_size = 16;
772 auto csr = cc_info.callee_saved_regs;
773 auto csr_gp = csr & this->register_file.bank_regs(Config::GP_BANK);
774 auto csr_fp = csr & this->register_file.bank_regs(Config::FP_BANK);
775 u32 gp_saves = std::popcount(csr_gp);
776 u32 fp_saves = std::popcount(csr_fp);
778 u32 reg_save_size = 4 * ((gp_saves + 1) / 2 + (fp_saves + 1) / 2);
780 this->stack.frame_size += util::align_up(gp_saves * 8 + fp_saves * 8, 16);
783 func_prologue_alloc = reg_save_size + 12;
784 this->text_writer.ensure_space(func_prologue_alloc);
785 this->text_writer.cur_ptr() += func_prologue_alloc;
788 func_epilogue_alloc = reg_save_size + 12;
790 func_epilogue_alloc += this->stack.has_dynamic_alloca ? 4 : 0;
795 if (this->adaptor->cur_is_vararg()) [[unlikely]] {
796 reg_save_frame_off = this->stack.frame_size;
800 this->stack.frame_size += 8 * 8 + 8 * 16 + 16;
801 this->text_writer.ensure_space(4 * 8);
802 ASMNC(STPx, DA_GP(0), DA_GP(1), DA_SP, reg_save_frame_off);
803 ASMNC(STPx, DA_GP(2), DA_GP(3), DA_SP, reg_save_frame_off + 16);
804 ASMNC(STPx, DA_GP(4), DA_GP(5), DA_SP, reg_save_frame_off + 32);
805 ASMNC(STPx, DA_GP(6), DA_GP(7), DA_SP, reg_save_frame_off + 48);
806 ASMNC(STPq, DA_V(0), DA_V(1), DA_SP, reg_save_frame_off + 64);
807 ASMNC(STPq, DA_V(2), DA_V(3), DA_SP, reg_save_frame_off + 96);
808 ASMNC(STPq, DA_V(4), DA_V(5), DA_SP, reg_save_frame_off + 128);
809 ASMNC(STPq, DA_V(6), DA_V(7), DA_SP, reg_save_frame_off + 160);
813 assert((cc_info.allocatable_regs & cc_info.arg_regs) == cc_info.arg_regs &&
814 "argument registers must also be allocatable");
815 this->register_file.allocatable &= ~cc_info.arg_regs;
817 this->func_arg_stack_add_off = ~0u;
820 for (
const IRValueRef arg : this->adaptor->cur_args()) {
824 [&](ValuePart &&vp, CCAssignment cca) -> std::optional<i32> {
826 cca.bank = vp.bank();
827 cca.size = vp.part_size();
830 cc_assigner->assign_arg(cca);
832 if (cca.reg.valid()) [[likely]] {
833 vp.set_value_reg(
this, cca.reg);
837 this->register_file.allocatable |= u64{1} << cca.reg.id();
841 AsmReg dst = vp.alloc_reg(
this);
843 this->text_writer.ensure_space(8);
844 AsmReg stack_reg = AsmReg::R17;
847 !(this->register_file.allocatable & (u64{1} << stack_reg.id())) &&
848 "x17 must not be allocatable");
849 if (this->func_arg_stack_add_off == ~0u) {
850 this->func_arg_stack_add_off = this->text_writer.offset();
851 this->func_arg_stack_add_reg = stack_reg;
853 ASMNC(ADDxi, stack_reg, DA_SP, 0);
857 ASMNC(ADDxi, dst, stack_reg, cca.stack_off);
858 }
else if (cca.bank == Config::GP_BANK) {
860 case 1: ASMNC(LDRBu, dst, stack_reg, cca.stack_off);
break;
861 case 2: ASMNC(LDRHu, dst, stack_reg, cca.stack_off);
break;
862 case 4: ASMNC(LDRwu, dst, stack_reg, cca.stack_off);
break;
863 case 8: ASMNC(LDRxu, dst, stack_reg, cca.stack_off);
break;
864 default: TPDE_UNREACHABLE(
"invalid GP reg size");
867 assert(cca.bank == Config::FP_BANK);
869 case 1: ASMNC(LDRbu, dst, stack_reg, cca.stack_off);
break;
870 case 2: ASMNC(LDRhu, dst, stack_reg, cca.stack_off);
break;
871 case 4: ASMNC(LDRsu, dst, stack_reg, cca.stack_off);
break;
872 case 8: ASMNC(LDRdu, dst, stack_reg, cca.stack_off);
break;
873 case 16: ASMNC(LDRqu, dst, stack_reg, cca.stack_off);
break;
874 default: TPDE_UNREACHABLE(
"invalid FP reg size");
886 if (this->adaptor->cur_is_vararg()) [[unlikely]] {
887 AsmReg stack_reg = AsmReg::R17;
889 assert(!(this->register_file.allocatable & (u64{1} << stack_reg.id())) &&
890 "x17 must not be allocatable");
891 if (this->func_arg_stack_add_off == ~0u) {
892 this->func_arg_stack_add_off = this->text_writer.offset();
893 this->func_arg_stack_add_reg = stack_reg;
895 ASMC(
this, ADDxi, stack_reg, DA_SP, 0);
897 ASM(ADDxi, stack_reg, stack_reg, cc_assigner->get_stack_size());
898 ASM(STRxu, stack_reg, DA_GP(29), this->reg_save_frame_off + 192);
903 auto arg_regs = this->register_file.allocatable & cc_info.arg_regs;
904 u32 ngrn = 8 - util::cnt_lz<u16>((arg_regs & 0xff) << 8 | 0x80);
905 u32 nsrn = 8 - util::cnt_lz<u16>(((arg_regs >> 32) & 0xff) << 8 | 0x80);
906 this->scalar_arg_count = ngrn;
907 this->vec_arg_count = nsrn;
910 this->register_file.allocatable |= cc_info.arg_regs;
913template <IRAdaptor Adaptor,
915 template <
typename,
typename,
typename>
typename BaseTy,
917void CompilerA64<Adaptor, Derived, BaseTy, Config>::finish_func(
918 u32 func_idx)
noexcept {
919 auto csr =
derived()->cur_cc_assigner()->get_ccinfo().callee_saved_regs;
920 u64 saved_regs = this->register_file.clobbered & csr;
922 auto stack_reg = DA_SP;
923 if (this->stack.has_dynamic_alloca) {
924 stack_reg = DA_GP(29);
927 auto final_frame_size = util::align_up(this->stack.frame_size, 16);
928 if (final_frame_size > 4095) {
930 final_frame_size = util::align_up(final_frame_size, 4096);
931 assert(final_frame_size < 16 * 1024 * 1024);
934 auto fde_off = this->assembler.eh_begin_fde(this->get_personality_sym());
938 util::SmallVector<u32, 16> prologue;
939 prologue.push_back(de64_SUBxi(DA_SP, DA_SP, final_frame_size));
940 this->assembler.eh_write_inst(dwarf::DW_CFA_advance_loc, 1);
941 this->assembler.eh_write_inst(dwarf::DW_CFA_def_cfa_offset,
943 prologue.push_back(de64_STPx(DA_GP(29), DA_GP(30), DA_SP, 0));
944 prologue.push_back(de64_MOV_SPx(DA_GP(29), DA_SP));
945 this->assembler.eh_write_inst(dwarf::DW_CFA_advance_loc, 2);
946 this->assembler.eh_write_inst(dwarf::DW_CFA_def_cfa_register,
947 dwarf::a64::DW_reg_fp);
948 this->assembler.eh_write_inst(
949 dwarf::DW_CFA_offset, dwarf::a64::DW_reg_fp, final_frame_size / 8);
950 this->assembler.eh_write_inst(
951 dwarf::DW_CFA_offset, dwarf::a64::DW_reg_lr, final_frame_size / 8 - 1);
954 auto fde_prologue_adv_off = this->assembler.eh_writer.size();
955 this->assembler.eh_write_inst(dwarf::DW_CFA_advance_loc, 0);
957 AsmReg last_reg = AsmReg::make_invalid();
959 for (
auto reg : util::BitSetIterator{saved_regs}) {
960 if (last_reg.valid()) {
961 const auto reg_bank = this->register_file.reg_bank(AsmReg{reg});
962 const auto last_bank = this->register_file.reg_bank(last_reg);
963 if (reg_bank == last_bank) {
964 if (reg_bank == Config::GP_BANK) {
966 de64_STPx(last_reg, AsmReg{reg}, stack_reg, frame_off));
969 de64_STPd(last_reg, AsmReg{reg}, stack_reg, frame_off));
972 last_reg = AsmReg::make_invalid();
974 assert(last_bank == Config::GP_BANK && reg_bank == Config::FP_BANK);
975 prologue.push_back(de64_STRxu(last_reg, stack_reg, frame_off));
977 last_reg = AsmReg{reg};
982 u8 dwarf_base = reg < 32 ? dwarf::a64::DW_reg_v0 : dwarf::a64::DW_reg_x0;
983 u8 dwarf_reg = dwarf_base + reg % 32;
984 u32 cfa_off = (final_frame_size - frame_off) / 8;
985 if ((dwarf_reg & dwarf::DWARF_CFI_PRIMARY_OPCODE_MASK) == 0) {
986 this->assembler.eh_write_inst(dwarf::DW_CFA_offset, dwarf_reg, cfa_off);
988 this->assembler.eh_write_inst(
989 dwarf::DW_CFA_offset_extended, dwarf_reg, cfa_off);
992 last_reg = AsmReg{reg};
995 if (last_reg.valid()) {
996 if (this->register_file.reg_bank(last_reg) == Config::GP_BANK) {
997 prologue.push_back(de64_STRxu(last_reg, stack_reg, frame_off));
999 assert(this->register_file.reg_bank(last_reg) == Config::FP_BANK);
1000 prologue.push_back(de64_STRdu(last_reg, stack_reg, frame_off));
1004 assert(prologue.size() *
sizeof(u32) <= func_prologue_alloc);
1006 assert(prologue.size() < 0x4c);
1007 this->assembler.eh_writer.data()[fde_prologue_adv_off] =
1008 dwarf::DW_CFA_advance_loc | (prologue.size() - 3);
1012 const auto nop_count = (func_prologue_alloc / 4 - prologue.size()) % 4;
1013 const auto nop = de64_NOP();
1014 for (
auto i = 0u; i < nop_count; ++i) {
1015 prologue.push_back(nop);
1019 u32 skip = util::align_down(func_prologue_alloc - prologue.size() * 4, 16);
1020 std::memset(this->text_writer.begin_ptr() + func_start_off, 0, skip);
1021 func_start_off += skip;
1022 std::memcpy(this->text_writer.begin_ptr() + func_start_off,
1024 prologue.size() *
sizeof(u32));
1027 if (func_arg_stack_add_off != ~0u) {
1028 auto *inst_ptr = this->text_writer.begin_ptr() + func_arg_stack_add_off;
1029 *
reinterpret_cast<u32 *
>(inst_ptr) =
1030 de64_ADDxi(func_arg_stack_add_reg, DA_SP, final_frame_size);
1034 auto func_sym = this->func_syms[func_idx];
1035 auto func_sec = this->text_writer.get_sec_ref();
1037 if (func_ret_offs.empty()) {
1038 auto func_size = this->text_writer.offset() - func_start_off;
1039 this->assembler.sym_def(func_sym, func_sec, func_start_off, func_size);
1040 this->assembler.eh_end_fde(fde_off, func_sym);
1041 this->assembler.except_encode_func(func_sym,
1042 this->text_writer.label_offsets.data());
1046 auto *text_data = this->text_writer.begin_ptr();
1047 u32 first_ret_off = func_ret_offs[0];
1050 u32 *write_ptr =
reinterpret_cast<u32 *
>(text_data + first_ret_off);
1051 const auto ret_start = write_ptr;
1052 if (this->stack.has_dynamic_alloca) {
1053 *write_ptr++ = de64_MOV_SPx(DA_SP, DA_GP(29));
1055 *write_ptr++ = de64_LDPx(DA_GP(29), DA_GP(30), DA_SP, 0);
1058 AsmReg last_reg = AsmReg::make_invalid();
1060 for (
auto reg : util::BitSetIterator{saved_regs}) {
1061 if (last_reg.valid()) {
1062 const auto reg_bank = this->register_file.reg_bank(AsmReg{reg});
1063 const auto last_bank = this->register_file.reg_bank(last_reg);
1064 if (reg_bank == last_bank) {
1065 if (reg_bank == Config::GP_BANK) {
1067 de64_LDPx(last_reg, AsmReg{reg}, stack_reg, frame_off);
1070 de64_LDPd(last_reg, AsmReg{reg}, stack_reg, frame_off);
1073 last_reg = AsmReg::make_invalid();
1075 assert(last_bank == Config::GP_BANK && reg_bank == Config::FP_BANK);
1076 *write_ptr++ = de64_LDRxu(last_reg, stack_reg, frame_off);
1078 last_reg = AsmReg{reg};
1083 last_reg = AsmReg{reg};
1086 if (last_reg.valid()) {
1087 if (this->register_file.reg_bank(last_reg) == Config::GP_BANK) {
1088 *write_ptr++ = de64_LDRxu(last_reg, stack_reg, frame_off);
1090 *write_ptr++ = de64_LDRdu(last_reg, stack_reg, frame_off);
1094 if (this->stack.has_dynamic_alloca) {
1095 *write_ptr++ = de64_LDPx(DA_GP(29), DA_GP(30), DA_SP, 0);
1098 *write_ptr++ = de64_ADDxi(DA_SP, DA_SP, final_frame_size);
1099 *write_ptr++ = de64_RET(DA_GP(30));
1101 ret_size = (write_ptr - ret_start) * 4;
1102 assert(ret_size <= func_epilogue_alloc);
1103 std::memset(write_ptr, 0, func_epilogue_alloc - ret_size);
1106 for (u32 i = 1; i < func_ret_offs.size(); ++i) {
1107 std::memcpy(text_data + func_ret_offs[i],
1108 text_data + first_ret_off,
1109 func_epilogue_alloc);
1112 u32 func_end_ret_off = this->text_writer.offset() - func_epilogue_alloc;
1113 if (func_ret_offs.back() == func_end_ret_off) {
1114 this->text_writer.cur_ptr() -= func_epilogue_alloc - ret_size;
1117 auto func_size = this->text_writer.offset() - func_start_off;
1118 this->assembler.sym_def(func_sym, func_sec, func_start_off, func_size);
1119 this->assembler.eh_end_fde(fde_off, func_sym);
1120 this->assembler.except_encode_func(func_sym,
1121 this->text_writer.label_offsets.data());
1124template <IRAdaptor Adaptor,
1126 template <
typename,
typename,
typename>
typename BaseTy,
1128void CompilerA64<Adaptor, Derived, BaseTy, Config>::reset() noexcept {
1129 func_ret_offs.clear();
1133template <IRAdaptor Adaptor,
1135 template <
typename,
typename,
typename>
typename BaseTy,
1137void CompilerA64<Adaptor, Derived, BaseTy, Config>::gen_func_epilog() noexcept {
1156 func_ret_offs.push_back(this->text_writer.offset());
1157 this->text_writer.ensure_space(func_epilogue_alloc);
1158 this->text_writer.cur_ptr() += func_epilogue_alloc;
1161template <IRAdaptor Adaptor,
1163 template <
typename,
typename,
typename>
typename BaseTy,
1165void CompilerA64<Adaptor, Derived, BaseTy, Config>::spill_reg(
1166 const AsmReg reg,
const u32 frame_off,
const u32 size)
noexcept {
1167 assert((size & (size - 1)) == 0);
1168 assert(util::align_up(frame_off, size) == frame_off);
1170 assert(frame_off < 0x1'000'000);
1171 this->text_writer.ensure_space(8);
1173 u32 off = frame_off;
1174 auto addr_base = AsmReg{AsmReg::FP};
1175 if (off >= 0x1000 * size) [[unlikely]] {
1177 ASMNC(ADDxi, permanent_scratch_reg, DA_GP(29), off & ~0xfff);
1179 addr_base = permanent_scratch_reg;
1182 assert(-
static_cast<i32
>(frame_off) < 0);
1183 if (reg.id() <= AsmReg::R30) {
1185 case 1: ASMNC(STRBu, reg, addr_base, off);
break;
1186 case 2: ASMNC(STRHu, reg, addr_base, off);
break;
1187 case 4: ASMNC(STRwu, reg, addr_base, off);
break;
1188 case 8: ASMNC(STRxu, reg, addr_base, off);
break;
1189 default: TPDE_UNREACHABLE(
"invalid register spill size");
1193 case 1: ASMNC(STRbu, reg, addr_base, off);
break;
1194 case 2: ASMNC(STRhu, reg, addr_base, off);
break;
1195 case 4: ASMNC(STRsu, reg, addr_base, off);
break;
1196 case 8: ASMNC(STRdu, reg, addr_base, off);
break;
1197 case 16: ASMNC(STRqu, reg, addr_base, off);
break;
1198 default: TPDE_UNREACHABLE(
"invalid register spill size");
1203template <IRAdaptor Adaptor,
1205 template <
typename,
typename,
typename>
typename BaseTy,
1207void CompilerA64<Adaptor, Derived, BaseTy, Config>::load_from_stack(
1209 const i32 frame_off,
1211 const bool sign_extend)
noexcept {
1212 assert((size & (size - 1)) == 0);
1213 assert(util::align_up(frame_off, size) == frame_off);
1215 assert(frame_off >= 0 && frame_off < 0x1'000'000);
1216 this->text_writer.ensure_space(8);
1218 u32 off = frame_off;
1219 auto addr_base = AsmReg{AsmReg::FP};
1220 if (off >= 0x1000 * size) [[unlikely]] {
1222 addr_base = dst.id() <= AsmReg::R30 ? dst : permanent_scratch_reg;
1223 ASMNC(ADDxi, addr_base, DA_GP(29), off & ~0xfff);
1227 if (dst.id() <= AsmReg::R30) {
1230 case 1: ASMNC(LDRBu, dst, addr_base, off);
break;
1231 case 2: ASMNC(LDRHu, dst, addr_base, off);
break;
1232 case 4: ASMNC(LDRwu, dst, addr_base, off);
break;
1233 case 8: ASMNC(LDRxu, dst, addr_base, off);
break;
1234 default: TPDE_UNREACHABLE(
"invalid register spill size");
1238 case 1: ASMNC(LDRSBwu, dst, addr_base, off);
break;
1239 case 2: ASMNC(LDRSHwu, dst, addr_base, off);
break;
1240 case 4: ASMNC(LDRSWxu, dst, addr_base, off);
break;
1241 case 8: ASMNC(LDRxu, dst, addr_base, off);
break;
1242 default: TPDE_UNREACHABLE(
"invalid register spill size");
1248 assert(!sign_extend);
1251 case 1: ASMNC(LDRbu, dst, addr_base, off);
break;
1252 case 2: ASMNC(LDRhu, dst, addr_base, off);
break;
1253 case 4: ASMNC(LDRsu, dst, addr_base, off);
break;
1254 case 8: ASMNC(LDRdu, dst, addr_base, off);
break;
1255 case 16: ASMNC(LDRqu, dst, addr_base, off);
break;
1256 default: TPDE_UNREACHABLE(
"invalid register spill size");
1260template <IRAdaptor Adaptor,
1262 template <
typename,
typename,
typename>
typename BaseTy,
1264void CompilerA64<Adaptor, Derived, BaseTy, Config>::load_address_of_stack_var(
1265 const AsmReg dst,
const AssignmentPartRef ap)
noexcept {
1266 auto frame_off = ap.variable_stack_off();
1267 assert(frame_off >= 0);
1268 if (!ASMIF(ADDxi, dst, DA_GP(29), frame_off)) {
1269 materialize_constant(frame_off, Config::GP_BANK, 4, dst);
1270 ASM(ADDx_uxtw, dst, DA_GP(29), dst, 0);
1274template <IRAdaptor Adaptor,
1276 template <
typename,
typename,
typename>
typename BaseTy,
1278void CompilerA64<Adaptor, Derived, BaseTy, Config>::mov(
1279 const AsmReg dst,
const AsmReg src,
const u32 size)
noexcept {
1280 this->text_writer.ensure_space(4);
1281 assert(dst.valid());
1282 assert(src.valid());
1283 if (dst.id() <= AsmReg::SP && src.id() <= AsmReg::SP) {
1284 assert(dst.id() != AsmReg::SP && src.id() != AsmReg::SP);
1286 ASMNC(MOVx, dst, src);
1288 ASMNC(MOVw, dst, src);
1290 }
else if (dst.id() >= AsmReg::V0 && src.id() >= AsmReg::V0) {
1291 ASMNC(ORR16b, dst, src, src);
1292 }
else if (dst.id() <= AsmReg::SP) {
1293 assert(dst.id() != AsmReg::SP);
1295 assert(src.id() >= AsmReg::V0);
1298 ASMNC(FMOVws, dst, src);
1300 ASMNC(FMOVxd, dst, src);
1304 assert(src.id() <= AsmReg::R30);
1305 assert(dst.id() >= AsmReg::V0);
1308 ASMNC(FMOVsw, dst, src);
1310 ASMNC(FMOVdx, dst, src);
1315template <IRAdaptor Adaptor,
1317 template <
typename,
typename,
typename>
typename BaseTy,
1319AsmReg CompilerA64<Adaptor, Derived, BaseTy, Config>::gval_expr_as_reg(
1320 GenericValuePart &gv)
noexcept {
1321 auto &expr = std::get<typename GenericValuePart::Expr>(gv.state);
1323 ScratchReg scratch{
derived()};
1324 if (!expr.has_base() && !expr.has_index()) {
1325 AsmReg dst = scratch.alloc_gp();
1326 derived()->materialize_constant(expr.disp, Config::GP_BANK, 8, dst);
1328 }
else if (!expr.has_base() && expr.has_index()) {
1329 AsmReg index_reg = expr.index_reg();
1330 if (std::holds_alternative<ScratchReg>(expr.index)) {
1331 scratch = std::move(std::get<ScratchReg>(expr.index));
1333 (void)scratch.alloc_gp();
1335 AsmReg dst = scratch.cur_reg();
1336 if ((expr.scale & (expr.scale - 1)) == 0) {
1337 const auto shift = util::cnt_tz<u64>(expr.scale);
1338 ASM(LSLxi, dst, index_reg, shift);
1340 AsmReg tmp2 = permanent_scratch_reg;
1341 derived()->materialize_constant(expr.scale, Config::GP_BANK, 8, tmp2);
1342 ASM(MULx, dst, index_reg, tmp2);
1344 }
else if (expr.has_base() && expr.has_index()) {
1345 AsmReg base_reg = expr.base_reg();
1346 AsmReg index_reg = expr.index_reg();
1347 if (std::holds_alternative<ScratchReg>(expr.base)) {
1348 scratch = std::move(std::get<ScratchReg>(expr.base));
1349 }
else if (std::holds_alternative<ScratchReg>(expr.index)) {
1350 scratch = std::move(std::get<ScratchReg>(expr.index));
1352 (void)scratch.alloc_gp();
1354 AsmReg dst = scratch.cur_reg();
1355 if ((expr.scale & (expr.scale - 1)) == 0) {
1356 const auto shift = util::cnt_tz<u64>(expr.scale);
1357 ASM(ADDx_lsl, dst, base_reg, index_reg, shift);
1359 AsmReg tmp2 = permanent_scratch_reg;
1360 derived()->materialize_constant(expr.scale, Config::GP_BANK, 8, tmp2);
1361 ASM(MADDx, dst, index_reg, tmp2, base_reg);
1363 }
else if (expr.has_base() && !expr.has_index()) {
1364 AsmReg base_reg = expr.base_reg();
1365 if (std::holds_alternative<ScratchReg>(expr.base)) {
1366 scratch = std::move(std::get<ScratchReg>(expr.base));
1368 (void)scratch.alloc_gp();
1370 AsmReg dst = scratch.cur_reg();
1371 if (expr.disp != 0 && ASMIF(ADDxi, dst, base_reg, expr.disp)) {
1373 }
else if (dst != base_reg) {
1374 ASM(MOVx, dst, base_reg);
1377 TPDE_UNREACHABLE(
"inconsistent GenericValuePart::Expr");
1380 AsmReg dst = scratch.cur_reg();
1381 if (expr.disp != 0) {
1382 if (!ASMIF(ADDxi, dst, dst, expr.disp)) {
1383 AsmReg tmp2 = permanent_scratch_reg;
1384 derived()->materialize_constant(expr.disp, Config::GP_BANK, 8, tmp2);
1385 ASM(ADDx, dst, dst, tmp2);
1389 gv.state = std::move(scratch);
1393template <IRAdaptor Adaptor,
1395 template <
typename,
typename,
typename>
typename BaseTy,
1397void CompilerA64<Adaptor, Derived, BaseTy, Config>::alloca_fixed(
1398 u64 size, u32 align, ValuePart &res)
noexcept {
1399 assert(align != 0 && (align & (align - 1)) == 0 &&
"invalid alignment");
1400 size = tpde::util::align_up(size, 16);
1401 AsmReg res_reg = res.alloc_reg(
this);
1402 if (size >= 0x10'0000) {
1403 auto tmp = permanent_scratch_reg;
1404 materialize_constant(size, Config::GP_BANK, 8, tmp);
1405 ASM(SUBx_uxtx, res_reg, DA_SP, tmp, 0);
1406 }
else if (size >= 0x1000) {
1407 ASM(SUBxi, res_reg, DA_SP, size & 0xff'f000);
1408 ASM(SUBxi, res_reg, res_reg, size & 0xfff);
1410 ASM(SUBxi, res_reg, DA_SP, size & 0xfff);
1415 ASM(ANDxi, res_reg, res_reg, ~(u64{align} - 1));
1419 ASM(MOV_SPx, DA_SP, res_reg);
1423template <IRAdaptor Adaptor,
1425 template <
typename,
typename,
typename>
typename BaseTy,
1427void CompilerA64<Adaptor, Derived, BaseTy, Config>::alloca_dynamic(
1428 u64 elem_size, ValuePart &&count, u32 align, ValuePart &res)
noexcept {
1429 assert(align != 0 && (align & (align - 1)) == 0 &&
"invalid alignment");
1430 AsmReg size_reg = count.has_reg() ? count.cur_reg() : count.load_to_reg(
this);
1431 AsmReg res_reg = res.alloc_try_reuse(
this, count);
1433 if (elem_size == 0) {
1434 ASM(MOVZw, res_reg, 0);
1435 }
else if ((elem_size & (elem_size - 1)) == 0) {
1436 const auto shift = util::cnt_tz(elem_size);
1438 ASM(SUBx_uxtx, res_reg, DA_SP, size_reg, shift);
1440 ASM(LSLxi, res_reg, size_reg, shift);
1441 ASM(SUBx_uxtx, res_reg, DA_SP, res_reg, 0);
1444 auto tmp = permanent_scratch_reg;
1445 materialize_constant(elem_size, Config::GP_BANK, 8, tmp);
1446 ASM(MULx, res_reg, size_reg, tmp);
1447 ASM(SUBx_uxtx, res_reg, DA_SP, res_reg, 0);
1450 align = align > 16 ? align : 16;
1451 if (elem_size & (align - 1)) {
1452 ASM(ANDxi, res_reg, res_reg, ~(u64{align} - 1));
1455 ASM(MOV_SPx, DA_SP, res_reg);
1458template <IRAdaptor Adaptor,
1460 template <
typename,
typename,
typename>
typename BaseTy,
1462void CompilerA64<Adaptor, Derived, BaseTy, Config>::materialize_constant(
1463 const u64 *data,
const RegBank bank,
const u32 size, AsmReg dst)
noexcept {
1464 this->text_writer.ensure_space(5 * 4);
1466 const auto const_u64 = data[0];
1467 if (bank == Config::GP_BANK) {
1469 if (const_u64 == 0) {
1470 ASMNC(MOVZw, dst, 0);
1474 this->text_writer.cur_ptr() +=
1476 de64_MOVconst(
reinterpret_cast<u32 *
>(this->text_writer.cur_ptr()),
1482 assert(bank == Config::FP_BANK);
1485 if (ASMIF(FMOVsi, dst, std::bit_cast<float>((u32)const_u64))) {
1487 }
else if (ASMIF(MOVId, dst,
static_cast<u32
>(const_u64))) {
1490 }
else if (size == 8) {
1491 if (ASMIF(FMOVdi, dst, std::bit_cast<double>(const_u64))) {
1493 }
else if (ASMIF(MOVId, dst, const_u64)) {
1496 }
else if (size == 16) {
1497 const auto high_u64 = data[1];
1498 if (const_u64 == high_u64 && ASMIF(MOVI2d, dst, const_u64)) {
1500 }
else if (high_u64 == 0 && ASMIF(MOVId, dst, const_u64)) {
1508 this->register_file.mark_clobbered(permanent_scratch_reg);
1512 materialize_constant(data, Config::GP_BANK, size, permanent_scratch_reg);
1514 ASMNC(FMOVsw, dst, permanent_scratch_reg);
1516 ASMNC(FMOVdx, dst, permanent_scratch_reg);
1521 auto rodata = this->assembler.get_data_section(
true,
false);
1522 std::span<const u8> raw_data{
reinterpret_cast<const u8 *
>(data), size};
1523 auto sym = this->assembler.sym_def_data(
1524 rodata,
"", raw_data, 16, Assembler::SymBinding::LOCAL);
1525 this->text_writer.ensure_space(8);
1527 sym, R_AARCH64_ADR_PREL_PG_HI21, this->text_writer.offset(), 0);
1528 ASMNC(ADRP, permanent_scratch_reg, 0, 0);
1530 sym, R_AARCH64_LDST128_ABS_LO12_NC, this->text_writer.offset(), 0);
1531 ASMNC(LDRqu, dst, permanent_scratch_reg, 0);
1535 TPDE_FATAL(
"unable to materialize constant");
1538template <IRAdaptor Adaptor,
1540 template <
typename,
typename,
typename>
typename BaseTy,
1543 CompilerA64<Adaptor, Derived, BaseTy, Config>::select_fixed_assignment_reg(
1544 AssignmentPartRef ap, IRValueRef)
noexcept {
1545 RegBank bank = ap.bank();
1546 if (bank == Config::FP_BANK && ap.part_size() > 8) {
1549 return AsmReg::make_invalid();
1553 assert(bank.id() <= Config::NUM_BANKS);
1554 auto reg_mask = this->register_file.bank_regs(bank);
1555 reg_mask &= ~fixed_assignment_nonallocatable_mask;
1557 const auto find_possible_regs = [
this,
1558 reg_mask](
const u64 preferred_regs) -> u64 {
1560 u64 free_regs = this->register_file.allocatable & ~this->register_file.used;
1561 return free_regs & preferred_regs & reg_mask;
1565 auto csr =
derived()->cur_cc_assigner()->get_ccinfo().callee_saved_regs;
1566 if (!this->stack.is_leaf_function) {
1568 possible_regs = find_possible_regs(csr);
1572 possible_regs = find_possible_regs(~csr);
1573 if (possible_regs == 0) {
1575 possible_regs = find_possible_regs(csr);
1579 if (possible_regs == 0) {
1580 return AsmReg::make_invalid();
1584 if ((possible_regs & ~this->register_file.used) != 0) {
1585 return AsmReg{util::cnt_tz(possible_regs & ~this->register_file.used)};
1588 for (
const auto reg_id : util::BitSetIterator<>{possible_regs}) {
1589 const auto reg = AsmReg{reg_id};
1591 assert(!this->register_file.is_fixed(reg));
1593 const auto local_idx = this->register_file.reg_local_idx(reg);
1594 const auto part = this->register_file.reg_part(reg);
1595 assert(local_idx != Base::INVALID_VAL_LOCAL_IDX);
1597 auto *assignment = this->val_assignment(local_idx);
1598 auto ap = AssignmentPartRef{assignment, part};
1599 if (ap.modified()) {
1606 return AsmReg::make_invalid();
1609template <IRAdaptor Adaptor,
1611 template <
typename,
typename,
typename>
class BaseTy,
1613typename CompilerA64<Adaptor, Derived, BaseTy, Config>::Jump
1614 CompilerA64<Adaptor, Derived, BaseTy, Config>::invert_jump(
1615 Jump jmp)
noexcept {
1617 case Jump::Jeq:
return jmp.change_kind(Jump::Jne);
1618 case Jump::Jne:
return jmp.change_kind(Jump::Jeq);
1619 case Jump::Jcs:
return jmp.change_kind(Jump::Jcc);
1620 case Jump::Jcc:
return jmp.change_kind(Jump::Jcs);
1621 case Jump::Jmi:
return jmp.change_kind(Jump::Jpl);
1622 case Jump::Jpl:
return jmp.change_kind(Jump::Jmi);
1623 case Jump::Jvs:
return jmp.change_kind(Jump::Jvc);
1624 case Jump::Jvc:
return jmp.change_kind(Jump::Jvs);
1625 case Jump::Jhi:
return jmp.change_kind(Jump::Jls);
1626 case Jump::Jls:
return jmp.change_kind(Jump::Jhi);
1627 case Jump::Jge:
return jmp.change_kind(Jump::Jlt);
1628 case Jump::Jlt:
return jmp.change_kind(Jump::Jge);
1629 case Jump::Jgt:
return jmp.change_kind(Jump::Jle);
1630 case Jump::Jle:
return jmp.change_kind(Jump::Jgt);
1631 case Jump::jmp:
return jmp;
1632 case Jump::Cbz:
return jmp.change_kind(Jump::Cbnz);
1633 case Jump::Cbnz:
return jmp.change_kind(Jump::Cbz);
1634 case Jump::Tbz:
return jmp.change_kind(Jump::Tbnz);
1635 case Jump::Tbnz:
return jmp.change_kind(Jump::Tbz);
1636 default: TPDE_UNREACHABLE(
"invalid jump kind");
1640template <IRAdaptor Adaptor,
1642 template <
typename,
typename,
typename>
typename BaseTy,
1644typename CompilerA64<Adaptor, Derived, BaseTy, Config>::Jump
1645 CompilerA64<Adaptor, Derived, BaseTy, Config>::swap_jump(
1646 Jump jmp)
noexcept {
1648 case Jump::Jeq:
return jmp.change_kind(Jump::Jeq);
1649 case Jump::Jne:
return jmp.change_kind(Jump::Jne);
1650 case Jump::Jcc:
return jmp.change_kind(Jump::Jhi);
1651 case Jump::Jcs:
return jmp.change_kind(Jump::Jls);
1652 case Jump::Jhi:
return jmp.change_kind(Jump::Jcc);
1653 case Jump::Jls:
return jmp.change_kind(Jump::Jcs);
1654 case Jump::Jge:
return jmp.change_kind(Jump::Jle);
1655 case Jump::Jlt:
return jmp.change_kind(Jump::Jgt);
1656 case Jump::Jgt:
return jmp.change_kind(Jump::Jlt);
1657 case Jump::Jle:
return jmp.change_kind(Jump::Jge);
1658 case Jump::jmp:
return jmp;
1667 default: TPDE_UNREACHABLE(
"invalid jump kind for swap_jump");
1671template <IRAdaptor Adaptor,
1673 template <
typename,
typename,
typename>
typename BaseTy,
1675void CompilerA64<Adaptor, Derived, BaseTy, Config>::generate_branch_to_block(
1678 const bool needs_split,
1679 const bool last_inst)
noexcept {
1680 const auto target_idx = this->analyzer.block_idx(target);
1681 if (!needs_split || jmp.kind == Jump::jmp) {
1682 this->
derived()->move_to_phi_nodes(target_idx);
1684 if (!last_inst || this->analyzer.block_idx(target) != this->next_block()) {
1685 generate_raw_jump(jmp, this->block_labels[(u32)target_idx]);
1688 auto tmp_label = this->text_writer.label_create();
1689 generate_raw_jump(invert_jump(jmp), tmp_label);
1691 this->
derived()->move_to_phi_nodes(target_idx);
1693 generate_raw_jump(Jump::jmp, this->block_labels[(u32)target_idx]);
1695 this->label_place(tmp_label);
1699template <IRAdaptor Adaptor,
1701 template <
typename,
typename,
typename>
typename BaseTy,
1703void CompilerA64<Adaptor, Derived, BaseTy, Config>::generate_raw_jump(
1704 Jump jmp, Label target_label)
noexcept {
1705 const auto is_pending = this->text_writer.label_is_pending(target_label);
1706 this->text_writer.ensure_space(4);
1707 if (jmp.kind == Jump::jmp) {
1710 this->text_writer.label_ref(target_label,
1711 this->text_writer.offset() - 4,
1712 LabelFixupKind::AARCH64_BR);
1714 const auto label_off = this->text_writer.label_offset(target_label);
1715 const auto cur_off = this->text_writer.offset();
1716 assert(cur_off >= label_off);
1717 const auto diff = cur_off - label_off;
1718 assert((diff & 0b11) == 0);
1719 assert(diff < 128 * 1024 * 1024);
1721 ASMNC(B, -
static_cast<ptrdiff_t
>(diff) / 4);
1726 if (jmp.kind == Jump::Cbz || jmp.kind == Jump::Cbnz) {
1729 const auto label_off = this->text_writer.label_offset(target_label);
1730 const auto cur_off = this->text_writer.offset();
1731 assert(cur_off >= label_off);
1732 off = cur_off - label_off;
1733 assert((off & 0b11) == 0);
1734 assert(off < 128 * 1024 * 1024);
1737 if (off <= 1024 * 1024) {
1738 auto imm19 = -
static_cast<ptrdiff_t
>(off) / 4;
1739 if (jmp.kind == Jump::Cbz) {
1740 if (jmp.cmp_is_32) {
1741 ASMNC(CBZw, jmp.cmp_reg, imm19);
1743 ASMNC(CBZx, jmp.cmp_reg, imm19);
1746 if (jmp.cmp_is_32) {
1747 ASMNC(CBNZw, jmp.cmp_reg, imm19);
1749 ASMNC(CBNZx, jmp.cmp_reg, imm19);
1754 this->text_writer.label_ref(target_label,
1755 this->text_writer.offset() - 4,
1756 LabelFixupKind::AARCH64_COND_BR);
1759 assert(!is_pending);
1760 this->text_writer.ensure_space(2 * 4);
1762 if (jmp.kind == Jump::Cbz) {
1763 if (jmp.cmp_is_32) {
1764 ASMNC(CBNZw, jmp.cmp_reg, 2);
1766 ASMNC(CBNZx, jmp.cmp_reg, 2);
1769 if (jmp.cmp_is_32) {
1770 ASMNC(CBZw, jmp.cmp_reg, 2);
1772 ASMNC(CBZx, jmp.cmp_reg, 2);
1776 ASMNC(B, -
static_cast<ptrdiff_t
>(off + 4) / 4);
1781 if (jmp.kind == Jump::Tbz || jmp.kind == Jump::Tbnz) {
1784 const auto label_off = this->text_writer.label_offset(target_label);
1785 const auto cur_off = this->text_writer.offset();
1786 assert(cur_off >= label_off);
1787 off = cur_off - label_off;
1788 assert((off & 0b11) == 0);
1789 assert(off < 128 * 1024 * 1024);
1792 if (off <= 32 * 1024) {
1793 auto imm14 = -
static_cast<ptrdiff_t
>(off) / 4;
1794 if (jmp.kind == Jump::Tbz) {
1795 ASMNC(TBZ, jmp.cmp_reg, jmp.test_bit, imm14);
1797 ASMNC(TBNZ, jmp.cmp_reg, jmp.test_bit, imm14);
1801 this->text_writer.label_ref(target_label,
1802 this->text_writer.offset() - 4,
1803 LabelFixupKind::AARCH64_TEST_BR);
1806 assert(!is_pending);
1807 this->text_writer.ensure_space(2 * 4);
1809 if (jmp.kind == Jump::Tbz) {
1811 ASMNC(TBNZ, jmp.cmp_reg, jmp.test_bit, 2);
1813 ASMNC(TBZ, jmp.cmp_reg, jmp.test_bit, 2);
1816 ASMNC(B, -
static_cast<ptrdiff_t
>(off + 4) / 4);
1821 Da64Cond cond, cond_compl;
1879 default: TPDE_UNREACHABLE(
"invalid jump kind");
1885 const auto label_off = this->text_writer.label_offset(target_label);
1886 const auto cur_off = this->text_writer.offset();
1887 assert(cur_off >= label_off);
1888 off = cur_off - label_off;
1889 assert((off & 0b11) == 0);
1890 assert(off < 128 * 1024 * 1024);
1893 if (off <= 1024 * 1024) {
1894 ASMNC(BCOND, cond, -
static_cast<ptrdiff_t
>(off) / 4);
1897 this->text_writer.label_ref(target_label,
1898 this->text_writer.offset() - 4,
1899 LabelFixupKind::AARCH64_COND_BR);
1902 assert(!is_pending);
1903 this->text_writer.ensure_space(2 * 4);
1906 ASMNC(BCOND, cond_compl, 2);
1908 ASMNC(B, -
static_cast<ptrdiff_t
>(off + 4) / 4);
1911template <IRAdaptor Adaptor,
1913 template <
typename,
typename,
typename>
class BaseTy,
1915Da64Cond CompilerA64<Adaptor, Derived, BaseTy, Config>::jump_to_cond(
1916 Jump jmp)
noexcept {
1918 case Jump::Jeq:
return DA_EQ;
1919 case Jump::Jne:
return DA_NE;
1920 case Jump::Jcs:
return DA_CS;
1921 case Jump::Jcc:
return DA_CC;
1922 case Jump::Jmi:
return DA_MI;
1923 case Jump::Jpl:
return DA_PL;
1924 case Jump::Jvs:
return DA_VS;
1925 case Jump::Jvc:
return DA_VC;
1926 case Jump::Jhi:
return DA_HI;
1927 case Jump::Jls:
return DA_LS;
1928 case Jump::Jge:
return DA_GE;
1929 case Jump::Jlt:
return DA_LT;
1930 case Jump::Jgt:
return DA_GT;
1931 case Jump::Jle:
return DA_LE;
1932 case Jump::jmp:
return DA_AL;
1933 default: TPDE_UNREACHABLE(
"invalid jump kind for conversion to Da64Cond");
1937template <IRAdaptor Adaptor,
1939 template <
typename,
typename,
typename>
class BaseTy,
1941void CompilerA64<Adaptor, Derived, BaseTy, Config>::generate_raw_set(
1942 Jump cc, AsmReg dst)
noexcept {
1943 ASM(CSETw, dst, jump_to_cond(cc));
1946template <IRAdaptor Adaptor,
1948 template <
typename,
typename,
typename>
class BaseTy,
1950void CompilerA64<Adaptor, Derived, BaseTy, Config>::generate_raw_mask(
1951 Jump cc, AsmReg dst)
noexcept {
1952 ASM(CSETMx, dst, jump_to_cond(cc));
1954template <IRAdaptor Adaptor,
1956 template <
typename,
typename,
typename>
class BaseTy,
1958void CompilerA64<Adaptor, Derived, BaseTy, Config>::generate_raw_select(
1962 AsmReg false_select,
1963 bool is_64)
noexcept {
1964 this->text_writer.ensure_space(4);
1965 Da64Cond cond = jump_to_cond(cc);
1967 ASMNC(CSELx, dst, true_select, false_select, cond);
1969 ASMNC(CSELw, dst, true_select, false_select, cond);
1973template <IRAdaptor Adaptor,
1975 template <
typename,
typename,
typename>
class BaseTy,
1977void CompilerA64<Adaptor, Derived, BaseTy, Config>::generate_raw_intext(
1978 AsmReg dst, AsmReg src,
bool sign, u32 from, u32 to)
noexcept {
1979 assert(from < to && to <= 64);
1983 ASM(SBFXw, dst, src, 0, from);
1985 ASM(SBFXx, dst, src, 0, from);
1989 ASM(UBFXw, dst, src, 0, from);
1991 ASM(UBFXx, dst, src, 0, from);
1996template <IRAdaptor Adaptor,
1998 template <
typename,
typename,
typename>
typename BaseTy,
2000void CompilerA64<Adaptor, Derived, BaseTy, Config>::generate_call(
2001 std::variant<SymRef, ValuePart> &&target,
2002 std::span<CallArg> arguments,
2003 typename Base::ValueRef *result,
2005 CCAssignerAAPCS assigner;
2006 CallBuilder cb{*
derived(), assigner};
2007 for (
auto &arg : arguments) {
2008 cb.add_arg(std::move(arg));
2010 cb.call(std::move(target));
2012 cb.add_ret(*result);
2016template <IRAdaptor Adaptor,
2018 template <
typename,
typename,
typename>
typename BaseTy,
2020void CompilerA64<Adaptor, Derived, BaseTy, Config>::switch_emit_cmp(
2021 AsmReg cmp_reg, AsmReg tmp_reg, u64 case_value,
bool width_is_32)
noexcept {
2023 if (!ASMIF(CMPwi, cmp_reg, case_value)) {
2024 materialize_constant(case_value, Config::GP_BANK, 4, tmp_reg);
2025 ASM(CMPw, cmp_reg, tmp_reg);
2028 if (!ASMIF(CMPxi, cmp_reg, case_value)) {
2029 materialize_constant(case_value, Config::GP_BANK, 4, tmp_reg);
2030 ASM(CMPx, cmp_reg, tmp_reg);
2035template <IRAdaptor Adaptor,
2037 template <
typename,
typename,
typename>
typename BaseTy,
2039void CompilerA64<Adaptor, Derived, BaseTy, Config>::switch_emit_cmpeq(
2044 bool width_is_32)
noexcept {
2045 switch_emit_cmp(cmp_reg, tmp_reg, case_value, width_is_32);
2046 generate_raw_jump(Jump::Jeq, case_label);
2049template <IRAdaptor Adaptor,
2051 template <
typename,
typename,
typename>
typename BaseTy,
2053bool CompilerA64<Adaptor, Derived, BaseTy, Config>::switch_emit_jump_table(
2054 Label default_label,
2055 std::span<const Label> labels,
2060 bool width_is_32)
noexcept {
2061 if (low_bound != 0) {
2062 switch_emit_cmp(cmp_reg, tmp_reg, low_bound, width_is_32);
2063 generate_raw_jump(Jump::Jcc, default_label);
2065 switch_emit_cmp(cmp_reg, tmp_reg, high_bound, width_is_32);
2066 generate_raw_jump(Jump::Jhi, default_label);
2068 if (low_bound != 0) {
2069 if (!ASMIF(SUBxi, cmp_reg, cmp_reg, low_bound)) {
2070 this->materialize_constant(&low_bound, Config::GP_BANK, 8, tmp_reg);
2071 ASM(SUBx, cmp_reg, cmp_reg, tmp_reg);
2076 this->text_writer.ensure_space(4 * 4 + 4 * labels.size());
2078 Label jump_table = this->text_writer.label_create();
2079 u32 adr_off = this->text_writer.offset();
2080 this->text_writer.write_unchecked(u32(0));
2083 ASMNC(LDRSWxr_uxtw, cmp_reg, tmp_reg, cmp_reg,
true);
2085 ASMNC(LDRSWxr_lsl, cmp_reg, tmp_reg, cmp_reg,
true);
2087 ASMNC(ADDx, tmp_reg, tmp_reg, cmp_reg);
2090 u32 table_off = this->text_writer.offset();
2091 this->text_writer.label_place(jump_table, table_off);
2092 for (Label label : labels) {
2093 this->text_writer.label_ref(
2094 label, this->text_writer.offset(), LabelFixupKind::AARCH64_JUMP_TABLE);
2095 this->text_writer.write_unchecked(table_off);
2098 assert(table_off - adr_off <= 1 * 1024 * 1024);
2099 u32 *adr =
reinterpret_cast<u32 *
>(this->text_writer.begin_ptr() + adr_off);
2100 *adr = de64_ADR(tmp_reg, adr_off, table_off);
2104template <IRAdaptor Adaptor,
2106 template <
typename,
typename,
typename>
typename BaseTy,
2108void CompilerA64<Adaptor, Derived, BaseTy, Config>::switch_emit_binary_step(
2114 bool width_is_32)
noexcept {
2115 switch_emit_cmpeq(case_label, cmp_reg, tmp_reg, case_value, width_is_32);
2116 generate_raw_jump(Jump::Jhi, gt_label);
2119template <IRAdaptor Adaptor,
2121 template <
typename,
typename,
typename>
typename BaseTy,
2123CompilerA64<Adaptor, Derived, BaseTy, Config>::ScratchReg
2124 CompilerA64<Adaptor, Derived, BaseTy, Config>::tls_get_addr(
2125 SymRef sym, TLSModel model)
noexcept {
2128 case TLSModel::GlobalDynamic: {
2129 ScratchReg r0_scratch{
this};
2130 AsmReg r0 = r0_scratch.alloc_specific(AsmReg::R0);
2131 ScratchReg r1_scratch{
this};
2132 AsmReg r1 = r1_scratch.alloc_specific(AsmReg::R1);
2135 if (this->register_file.is_used(Reg{AsmReg::LR})) {
2139 this->text_writer.ensure_space(0x18);
2141 sym, R_AARCH64_TLSDESC_ADR_PAGE21, this->text_writer.offset(), 0);
2142 ASMNC(ADRP, r0, 0, 0);
2144 sym, R_AARCH64_TLSDESC_LD64_LO12, this->text_writer.offset(), 0);
2145 ASMNC(LDRxu, r1, r0, 0);
2147 sym, R_AARCH64_TLSDESC_ADD_LO12, this->text_writer.offset(), 0);
2148 ASMNC(ADDxi, r0, r0, 0);
2150 sym, R_AARCH64_TLSDESC_CALL, this->text_writer.offset(), 0);
2152 ASMNC(MRS, r1, 0xde82);
2154 ASMNC(ADDx, r0, r1, r0);
void evict_reg(Reg reg) noexcept