6#include "AssemblerElfA64.hpp"
7#include "tpde/CompilerBase.hpp"
8#include "tpde/base.hpp"
9#include "tpde/util/SmallVector.hpp"
10#include "tpde/util/misc.hpp"
17#if defined(ASM) || defined(ASMNC) || defined(ASMC)
18 #error Got definition for ASM macros from somewhere else. Maybe you included compilers for multiple architectures?
22#define ASMC(compiler, op, ...) \
23 ((compiler)->text_writer.write_inst(de64_##op(__VA_ARGS__)))
25#define ASM(...) ASMC(this, __VA_ARGS__)
27#define ASMNC(op, ...) \
28 (this->text_writer.write_inst_unchecked(de64_##op(__VA_ARGS__)))
30#define ASMIFC(compiler, op, ...) \
31 ((compiler)->text_writer.try_write_inst(de64_##op(__VA_ARGS__)))
33#define ASMIF(...) ASMIFC(this, __VA_ARGS__)
108 constexpr explicit AsmReg() noexcept : Reg((u8)0xFF) {}
110 constexpr AsmReg(
const REG
id) noexcept : Reg((u8)
id) {}
112 constexpr AsmReg(
const Reg base) noexcept : Reg(base) {}
114 constexpr explicit AsmReg(
const u8
id) noexcept : Reg(
id) {
115 assert(
id <= SP || (
id >= V0 &&
id <= V31));
118 constexpr explicit AsmReg(
const u64
id) noexcept : Reg(
id) {
119 assert(
id <= SP || (
id >= V0 &&
id <= V31));
122 operator DA_GReg() const noexcept {
124 return DA_GReg{reg_id};
127 operator DA_GRegZR() const noexcept {
129 assert(reg_id != SP);
130 return DA_GRegZR{reg_id};
133 operator DA_GRegSP() const noexcept {
134 assert(reg_id <= SP);
135 return DA_GRegSP{reg_id};
138 operator DA_VReg() const noexcept {
139 assert(reg_id >= V0 && reg_id <= V31);
140 return DA_VReg{
static_cast<u8
>(reg_id - V0)};
145 create_bitmask(
const std::initializer_list<AsmReg::REG> regs) {
147 for (
const auto reg : regs) {
154constexpr static u64 create_bitmask(
const std::array<AsmReg, N> regs) {
156 for (
const auto reg : regs) {
157 set |= 1ull << reg.id();
162class CCAssignerAAPCS :
public CCAssigner {
163 static constexpr CCInfo Info{
166 0xFFFF'FFFF'FFFF'FFFF &
167 ~create_bitmask({AsmReg::SP, AsmReg::FP, AsmReg::R16, AsmReg::R17}),
169 .callee_saved_regs = create_bitmask({
189 .arg_regs = create_bitmask({
213 u32 ngrn = 0, nsrn = 0, nsaa = 0;
214 u32 ret_ngrn = 0, ret_nsrn = 0;
217 CCAssignerAAPCS() noexcept : CCAssigner(Info) {}
219 void reset() noexcept
override {
220 ngrn = nsrn = nsaa = ret_ngrn = ret_nsrn = 0;
223 void assign_arg(CCAssignment &arg)
noexcept override {
224 if (arg.byval) [[unlikely]] {
225 nsaa = util::align_up(nsaa, arg.byval_align < 8 ? 8 : arg.byval_align);
226 arg.stack_off = nsaa;
227 nsaa += arg.byval_size;
231 if (arg.sret) [[unlikely]] {
232 arg.reg = AsmReg{AsmReg::R8};
236 if (arg.bank == RegBank{0}) {
238 ngrn = util::align_up(ngrn, 2);
240 if (ngrn + arg.consecutive < 8) {
241 arg.reg = Reg{AsmReg::R0 + ngrn};
245 nsaa = util::align_up(nsaa, arg.align < 8 ? 8 : arg.align);
246 arg.stack_off = nsaa;
250 if (nsrn + arg.consecutive < 8) {
251 arg.reg = Reg{AsmReg::V0 + nsrn};
255 u32 size = util::align_up(arg.size, 8);
256 nsaa = util::align_up(nsaa, size);
257 arg.stack_off = nsaa;
263 u32 get_stack_size() noexcept
override {
return nsaa; }
265 void assign_ret(CCAssignment &arg)
noexcept override {
266 assert(!arg.byval && !arg.sret);
267 if (arg.bank == RegBank{0}) {
269 ret_ngrn = util::align_up(ret_ngrn, 2);
271 if (ret_ngrn + arg.consecutive < 8) {
272 arg.reg = Reg{AsmReg::R0 + ret_ngrn};
278 if (ret_nsrn + arg.consecutive < 8) {
279 arg.reg = Reg{AsmReg::V0 + ret_nsrn};
288struct PlatformConfig : CompilerConfigDefault {
289 using Assembler = AssemblerElfA64;
290 using AsmReg = tpde::a64::AsmReg;
291 using DefaultCCAssigner = CCAssignerAAPCS;
293 static constexpr RegBank GP_BANK{0};
294 static constexpr RegBank FP_BANK{1};
295 static constexpr bool FRAME_INDEXING_NEGATIVE =
false;
296 static constexpr u32 PLATFORM_POINTER_SIZE = 8;
297 static constexpr u32 NUM_BANKS = 2;
301template <
typename T,
typename Config>
302concept Compiler = tpde::Compiler<T, Config> &&
requires(T a) {
304 a.arg_is_int128(std::declval<typename T::IRValueRef>())
305 } -> std::convertible_to<bool>;
308 a.arg_allow_split_reg_stack_passing(std::declval<typename T::IRValueRef>())
309 } -> std::convertible_to<bool>;
313template <IRAdaptor Adaptor,
315 template <
typename,
typename,
typename>
typename BaseTy =
317 typename Config = PlatformConfig>
318struct CompilerA64 : BaseTy<Adaptor, Derived, Config> {
319 using Base = BaseTy<Adaptor, Derived, Config>;
321 using IRValueRef =
typename Base::IRValueRef;
322 using IRBlockRef =
typename Base::IRBlockRef;
323 using IRFuncRef =
typename Base::IRFuncRef;
325 using ScratchReg =
typename Base::ScratchReg;
326 using ValuePartRef =
typename Base::ValuePartRef;
327 using ValuePart =
typename Base::ValuePart;
328 using GenericValuePart =
typename Base::GenericValuePart;
330 using Assembler =
typename PlatformConfig::Assembler;
331 using RegisterFile =
typename Base::RegisterFile;
333 using CallArg =
typename Base::CallArg;
340 static constexpr u32 NUM_FIXED_ASSIGNMENTS[PlatformConfig::NUM_BANKS] = {5,
343 enum CPU_FEATURES : u32 {
347 CPU_FEATURES cpu_feats = CPU_BASELINE;
354 u64 fixed_assignment_nonallocatable_mask =
355 create_bitmask({AsmReg::R0, AsmReg::R1});
356 u32 func_start_off = 0u, func_prologue_alloc = 0u, func_epilogue_alloc = 0u;
359 u32 func_arg_stack_add_off = ~0u;
360 AsmReg func_arg_stack_add_reg = AsmReg::make_invalid();
364 AsmReg permanent_scratch_reg = AsmReg::R16;
366 u32 scalar_arg_count = 0xFFFF'FFFF, vec_arg_count = 0xFFFF'FFFF;
367 u32 reg_save_frame_off = 0;
368 util::SmallVector<u32, 8> func_ret_offs = {};
370 class CallBuilder :
public Base::template CallBuilderBase<CallBuilder> {
371 u32 stack_adjust_off = 0;
375 void set_stack_used() noexcept;
378 CallBuilder(Derived &compiler, CCAssigner &assigner) noexcept
379 : Base::template CallBuilderBase<CallBuilder>(compiler, assigner) {}
381 void add_arg_byval(ValuePart &vp, CCAssignment &cca)
noexcept;
382 void add_arg_stack(ValuePart &vp, CCAssignment &cca)
noexcept;
384 std::variant<typename Assembler::SymRef, ValuePart> &&) noexcept;
385 void reset_stack() noexcept;
389 explicit CompilerA64(Adaptor *adaptor,
390 const CPU_FEATURES cpu_features = CPU_BASELINE)
391 : Base{adaptor}, cpu_feats(cpu_features) {
392 static_assert(std::is_base_of_v<CompilerA64, Derived>);
393 static_assert(concepts::Compiler<Derived, PlatformConfig>);
396 void start_func(u32 func_idx)
noexcept;
398 void gen_func_prolog_and_args(CCAssigner *cc_assigner)
noexcept;
401 void finish_func(u32 func_idx)
noexcept;
403 void reset() noexcept;
407 void gen_func_epilog() noexcept;
410 spill_reg(const AsmReg reg, const u32 frame_off, const u32 size) noexcept;
412 void load_from_stack(AsmReg dst,
415 bool sign_extend = false) noexcept;
417 void load_address_of_stack_var(AsmReg dst, AssignmentPartRef ap) noexcept;
419 void mov(AsmReg dst, AsmReg src, u32 size) noexcept;
421 GenericValuePart val_spill_slot(ValuePart &val_ref) noexcept {
422 const auto ap = val_ref.assignment();
423 assert(ap.stack_valid() && !ap.variable_ref());
424 return typename GenericValuePart::Expr(AsmReg::R29, ap.frame_off());
427 AsmReg gval_expr_as_reg(GenericValuePart &gv)
noexcept;
429 void materialize_constant(
const u64 *data,
432 AsmReg dst)
noexcept;
433 void materialize_constant(u64 const_u64,
436 AsmReg dst)
noexcept {
437 assert(size <=
sizeof(const_u64));
438 materialize_constant(&const_u64, bank, size, dst);
441 AsmReg select_fixed_assignment_reg(RegBank bank, IRValueRef)
noexcept;
444 enum Kind : uint8_t {
474 constexpr Jump() : kind(Kind::jmp) {}
476 constexpr Jump(Kind kind) : kind(kind), cmp_is_32(false), test_bit(0) {
477 assert(kind != Cbz && kind != Cbnz && kind != Tbz && kind != Tbnz);
480 constexpr Jump(Kind kind, AsmReg cmp_reg,
bool cmp_is_32)
481 : kind(kind), cmp_reg(cmp_reg), cmp_is_32(cmp_is_32), test_bit(0) {
482 assert(kind == Cbz || kind == Cbnz);
485 constexpr Jump(Kind kind, AsmReg cmp_reg, u8 test_bit)
486 : kind(kind), cmp_reg(cmp_reg), cmp_is_32(false), test_bit(test_bit) {
487 assert(kind == Tbz || kind == Tbnz);
490 constexpr Jump change_kind(Kind new_kind)
const {
497 Jump invert_jump(Jump jmp)
noexcept;
498 Jump swap_jump(Jump jmp)
noexcept;
500 void generate_branch_to_block(Jump jmp,
503 bool last_inst)
noexcept;
505 void generate_raw_jump(Jump jmp, Assembler::Label target)
noexcept;
509 Da64Cond jump_to_cond(Jump jmp)
noexcept;
511 void generate_raw_set(Jump cc, AsmReg dst)
noexcept;
513 void generate_raw_mask(Jump cc, AsmReg dst)
noexcept;
517 void generate_raw_select(Jump cc,
521 bool is_64)
noexcept;
523 void generate_raw_intext(
524 AsmReg dst, AsmReg src,
bool sign, u32 from, u32 to)
noexcept;
536 void generate_call(std::variant<Assembler::SymRef, ValuePart> &&target,
537 std::span<CallArg> arguments,
538 typename Base::ValueRef *result,
539 bool variable_args =
false);
543 ScratchReg tls_get_addr(Assembler::SymRef sym, TLSModel model)
noexcept;
545 bool has_cpu_feats(CPU_FEATURES feats)
const noexcept {
546 return ((cpu_feats & feats) == feats);
550template <IRAdaptor Adaptor,
552 template <
typename,
typename,
typename>
class BaseTy,
554void CompilerA64<Adaptor, Derived, BaseTy, Config>::CallBuilder::
555 set_stack_used() noexcept {
556 if (stack_adjust_off == 0) {
557 this->compiler.text_writer.ensure_space(16);
558 stack_adjust_off = this->compiler.text_writer.offset();
559 this->compiler.text_writer.cur_ptr() += 4;
563template <IRAdaptor Adaptor,
565 template <
typename,
typename,
typename>
class BaseTy,
567void CompilerA64<Adaptor, Derived, BaseTy, Config>::CallBuilder::add_arg_byval(
568 ValuePart &vp, CCAssignment &cca)
noexcept {
569 AsmReg ptr_reg = vp.load_to_reg(&this->compiler);
570 AsmReg tmp_reg = AsmReg::R16;
572 auto size = cca.byval_size;
574 for (u32 off = 0; off < size;) {
575 if (size - off >= 8) {
576 ASMC(&this->compiler, LDRxu, tmp_reg, ptr_reg, off);
577 ASMC(&this->compiler, STRxu, tmp_reg, DA_SP, cca.stack_off + off);
579 }
else if (size - off >= 4) {
580 ASMC(&this->compiler, LDRwu, tmp_reg, ptr_reg, off);
581 ASMC(&this->compiler, STRwu, tmp_reg, DA_SP, cca.stack_off + off);
583 }
else if (size - off >= 2) {
584 ASMC(&this->compiler, LDRHu, tmp_reg, ptr_reg, off);
585 ASMC(&this->compiler, STRHu, tmp_reg, DA_SP, cca.stack_off + off);
588 ASMC(&this->compiler, LDRBu, tmp_reg, ptr_reg, off);
589 ASMC(&this->compiler, STRBu, tmp_reg, DA_SP, cca.stack_off + off);
595template <IRAdaptor Adaptor,
597 template <
typename,
typename,
typename>
class BaseTy,
599void CompilerA64<Adaptor, Derived, BaseTy, Config>::CallBuilder::add_arg_stack(
600 ValuePart &vp, CCAssignment &cca)
noexcept {
603 auto reg = vp.load_to_reg(&this->compiler);
604 if (this->compiler.register_file.reg_bank(reg) == Config::GP_BANK) {
606 case 1: ASMC(&this->compiler, STRBu, reg, DA_SP, cca.stack_off);
break;
607 case 2: ASMC(&this->compiler, STRHu, reg, DA_SP, cca.stack_off);
break;
608 case 4: ASMC(&this->compiler, STRwu, reg, DA_SP, cca.stack_off);
break;
609 case 8: ASMC(&this->compiler, STRxu, reg, DA_SP, cca.stack_off);
break;
610 default: TPDE_UNREACHABLE(
"invalid GP reg size");
613 assert(this->compiler.register_file.reg_bank(reg) == Config::FP_BANK);
615 case 1: ASMC(&this->compiler, STRbu, reg, DA_SP, cca.stack_off);
break;
616 case 2: ASMC(&this->compiler, STRhu, reg, DA_SP, cca.stack_off);
break;
617 case 4: ASMC(&this->compiler, STRsu, reg, DA_SP, cca.stack_off);
break;
618 case 8: ASMC(&this->compiler, STRdu, reg, DA_SP, cca.stack_off);
break;
619 case 16: ASMC(&this->compiler, STRqu, reg, DA_SP, cca.stack_off);
break;
620 default: TPDE_UNREACHABLE(
"invalid FP reg size");
625template <IRAdaptor Adaptor,
627 template <
typename,
typename,
typename>
class BaseTy,
629void CompilerA64<Adaptor, Derived, BaseTy, Config>::CallBuilder::call_impl(
630 std::variant<typename Assembler::SymRef, ValuePart> &&target)
noexcept {
632 if (stack_adjust_off != 0) {
633 auto *text_data = this->compiler.text_writer.begin_ptr();
634 u32 *write_ptr =
reinterpret_cast<u32 *
>(text_data + stack_adjust_off);
635 u32 stack_size = this->assigner.get_stack_size();
636 sub = util::align_up(stack_size, stack_size < 0x1000 ? 0x10 : 0x1000);
637 *write_ptr = de64_SUBxi(DA_SP, DA_SP, sub);
639 assert(this->assigner.get_stack_size() == 0);
643 if (
auto *sym = std::get_if<typename Assembler::SymRef>(&target)) {
644 ASMC(&this->compiler, BL, 0);
645 this->compiler.reloc_text(
646 *sym, R_AARCH64_CALL26, this->compiler.text_writer.offset() - 4);
648 ValuePart &tvp = std::get<ValuePart>(target);
649 AsmReg reg = tvp.cur_reg_unlocked();
651 reg = tvp.reload_into_specific_fixed(&this->compiler, AsmReg::R16);
653 ASMC(&this->compiler, BLR, reg);
654 tvp.reset(&this->compiler);
657 if (stack_adjust_off != 0) {
658 ASMC(&this->compiler, ADDxi, DA_SP, DA_SP, sub);
662template <IRAdaptor Adaptor,
664 template <
typename,
typename,
typename>
class BaseTy,
666void CompilerA64<Adaptor, Derived, BaseTy, Config>::start_func(
667 const u32 )
noexcept {
668 this->assembler.except_begin_func();
669 this->text_writer.align(16);
672template <IRAdaptor Adaptor,
674 template <
typename,
typename,
typename>
typename BaseTy,
676void CompilerA64<Adaptor, Derived, BaseTy, Config>::gen_func_prolog_and_args(
677 CCAssigner *cc_assigner)
noexcept {
694 func_ret_offs.clear();
695 func_start_off = this->text_writer.offset();
697 const CCInfo &cc_info = cc_assigner->get_ccinfo();
704 this->stack.frame_size = 16;
706 auto csr = cc_info.callee_saved_regs;
707 auto csr_gp = csr & this->register_file.bank_regs(Config::GP_BANK);
708 auto csr_fp = csr & this->register_file.bank_regs(Config::FP_BANK);
709 u32 gp_saves = std::popcount(csr_gp);
710 u32 fp_saves = std::popcount(csr_fp);
712 u32 reg_save_size = 4 * ((gp_saves + 1) / 2 + (fp_saves + 1) / 2);
714 this->stack.frame_size += util::align_up(gp_saves * 8 + fp_saves * 8, 16);
717 func_prologue_alloc = reg_save_size + 12;
718 this->text_writer.ensure_space(func_prologue_alloc);
719 this->text_writer.cur_ptr() += func_prologue_alloc;
722 func_epilogue_alloc = reg_save_size + 12;
724 func_epilogue_alloc += this->adaptor->cur_has_dynamic_alloca() ? 4 : 0;
729 if (this->adaptor->cur_is_vararg()) [[unlikely]] {
730 reg_save_frame_off = this->stack.frame_size;
734 this->stack.frame_size += 8 * 8 + 8 * 16 + 16;
735 this->text_writer.ensure_space(4 * 8);
736 ASMNC(STPx, DA_GP(0), DA_GP(1), DA_SP, reg_save_frame_off);
737 ASMNC(STPx, DA_GP(2), DA_GP(3), DA_SP, reg_save_frame_off + 16);
738 ASMNC(STPx, DA_GP(4), DA_GP(5), DA_SP, reg_save_frame_off + 32);
739 ASMNC(STPx, DA_GP(6), DA_GP(7), DA_SP, reg_save_frame_off + 48);
740 ASMNC(STPq, DA_V(0), DA_V(1), DA_SP, reg_save_frame_off + 64);
741 ASMNC(STPq, DA_V(2), DA_V(3), DA_SP, reg_save_frame_off + 96);
742 ASMNC(STPq, DA_V(4), DA_V(5), DA_SP, reg_save_frame_off + 128);
743 ASMNC(STPq, DA_V(6), DA_V(7), DA_SP, reg_save_frame_off + 160);
747 assert((cc_info.allocatable_regs & cc_info.arg_regs) == cc_info.arg_regs &&
748 "argument registers must also be allocatable");
749 this->register_file.allocatable &= ~cc_info.arg_regs;
751 this->func_arg_stack_add_off = ~0u;
754 for (
const IRValueRef arg : this->adaptor->cur_args()) {
758 [&](ValuePart &&vp, CCAssignment cca) -> std::optional<i32> {
759 cca.bank = vp.bank();
760 cca.size = vp.part_size();
762 cc_assigner->assign_arg(cca);
764 if (cca.reg.valid()) [[likely]] {
765 vp.set_value_reg(
this, cca.reg);
769 this->register_file.allocatable |= u64{1} << cca.reg.id();
773 this->text_writer.ensure_space(8);
774 AsmReg stack_reg = AsmReg::R17;
777 !(this->register_file.allocatable & (u64{1} << stack_reg.id())) &&
778 "x17 must not be allocatable");
779 if (this->func_arg_stack_add_off == ~0u) {
780 this->func_arg_stack_add_off = this->text_writer.offset();
781 this->func_arg_stack_add_reg = stack_reg;
783 ASMNC(ADDxi, stack_reg, DA_SP, 0);
786 AsmReg dst = vp.alloc_reg(
this);
788 ASM(ADDxi, dst, stack_reg, cca.stack_off);
789 }
else if (cca.bank == Config::GP_BANK) {
791 case 1: ASMNC(LDRBu, dst, stack_reg, cca.stack_off);
break;
792 case 2: ASMNC(LDRHu, dst, stack_reg, cca.stack_off);
break;
793 case 4: ASMNC(LDRwu, dst, stack_reg, cca.stack_off);
break;
794 case 8: ASMNC(LDRxu, dst, stack_reg, cca.stack_off);
break;
795 default: TPDE_UNREACHABLE(
"invalid GP reg size");
798 assert(cca.bank == Config::FP_BANK);
800 case 1: ASMNC(LDRbu, dst, stack_reg, cca.stack_off);
break;
801 case 2: ASMNC(LDRhu, dst, stack_reg, cca.stack_off);
break;
802 case 4: ASMNC(LDRsu, dst, stack_reg, cca.stack_off);
break;
803 case 8: ASMNC(LDRdu, dst, stack_reg, cca.stack_off);
break;
804 case 16: ASMNC(LDRqu, dst, stack_reg, cca.stack_off);
break;
805 default: TPDE_UNREACHABLE(
"invalid FP reg size");
817 if (this->adaptor->cur_is_vararg()) [[unlikely]] {
818 AsmReg stack_reg = AsmReg::R17;
820 assert(!(this->register_file.allocatable & (u64{1} << stack_reg.id())) &&
821 "x17 must not be allocatable");
822 if (this->func_arg_stack_add_off == ~0u) {
823 this->func_arg_stack_add_off = this->text_writer.offset();
824 this->func_arg_stack_add_reg = stack_reg;
826 ASMC(
this, ADDxi, stack_reg, DA_SP, 0);
828 ASM(ADDxi, stack_reg, stack_reg, cc_assigner->get_stack_size());
829 ASM(STRxu, stack_reg, DA_GP(29), this->reg_save_frame_off + 192);
834 auto arg_regs = this->register_file.allocatable & cc_info.arg_regs;
835 u32 ngrn = 8 - util::cnt_lz<u16>((arg_regs & 0xff) << 8 | 0x80);
836 u32 nsrn = 8 - util::cnt_lz<u16>(((arg_regs >> 32) & 0xff) << 8 | 0x80);
837 this->scalar_arg_count = ngrn;
838 this->vec_arg_count = nsrn;
841 this->register_file.allocatable |= cc_info.arg_regs;
844template <IRAdaptor Adaptor,
846 template <
typename,
typename,
typename>
typename BaseTy,
848void CompilerA64<Adaptor, Derived, BaseTy, Config>::finish_func(
849 u32 func_idx)
noexcept {
850 auto csr =
derived()->cur_cc_assigner()->get_ccinfo().callee_saved_regs;
851 u64 saved_regs = this->register_file.clobbered & csr;
853 const auto dyn_alloca = this->adaptor->cur_has_dynamic_alloca();
854 auto stack_reg = DA_SP;
856 stack_reg = DA_GP(29);
859 auto final_frame_size = util::align_up(this->stack.frame_size, 16);
860 if (final_frame_size > 4095) {
862 final_frame_size = util::align_up(final_frame_size, 4096);
863 assert(final_frame_size < 16 * 1024 * 1024);
866 auto fde_off = this->assembler.eh_begin_fde(this->get_personality_sym());
870 util::SmallVector<u32, 16> prologue;
871 prologue.push_back(de64_SUBxi(DA_SP, DA_SP, final_frame_size));
872 this->assembler.eh_write_inst(dwarf::DW_CFA_advance_loc, 1);
873 this->assembler.eh_write_inst(dwarf::DW_CFA_def_cfa_offset,
875 prologue.push_back(de64_STPx(DA_GP(29), DA_GP(30), DA_SP, 0));
876 prologue.push_back(de64_MOV_SPx(DA_GP(29), DA_SP));
877 this->assembler.eh_write_inst(dwarf::DW_CFA_advance_loc, 2);
878 this->assembler.eh_write_inst(dwarf::DW_CFA_def_cfa_register,
879 dwarf::a64::DW_reg_fp);
880 this->assembler.eh_write_inst(
881 dwarf::DW_CFA_offset, dwarf::a64::DW_reg_fp, final_frame_size / 8);
882 this->assembler.eh_write_inst(
883 dwarf::DW_CFA_offset, dwarf::a64::DW_reg_lr, final_frame_size / 8 - 1);
886 auto fde_prologue_adv_off = this->assembler.eh_writer.size();
887 this->assembler.eh_write_inst(dwarf::DW_CFA_advance_loc, 0);
889 AsmReg last_reg = AsmReg::make_invalid();
891 for (
auto reg : util::BitSetIterator{saved_regs}) {
892 if (last_reg.valid()) {
893 const auto reg_bank = this->register_file.reg_bank(AsmReg{reg});
894 const auto last_bank = this->register_file.reg_bank(last_reg);
895 if (reg_bank == last_bank) {
896 if (reg_bank == Config::GP_BANK) {
898 de64_STPx(last_reg, AsmReg{reg}, stack_reg, frame_off));
901 de64_STPd(last_reg, AsmReg{reg}, stack_reg, frame_off));
904 last_reg = AsmReg::make_invalid();
906 assert(last_bank == Config::GP_BANK && reg_bank == Config::FP_BANK);
907 prologue.push_back(de64_STRxu(last_reg, stack_reg, frame_off));
909 last_reg = AsmReg{reg};
914 u8 dwarf_base = reg < 32 ? dwarf::a64::DW_reg_v0 : dwarf::a64::DW_reg_x0;
915 u8 dwarf_reg = dwarf_base + reg % 32;
916 u32 cfa_off = (final_frame_size - frame_off) / 8;
917 if ((dwarf_reg & dwarf::DWARF_CFI_PRIMARY_OPCODE_MASK) == 0) {
918 this->assembler.eh_write_inst(dwarf::DW_CFA_offset, dwarf_reg, cfa_off);
920 this->assembler.eh_write_inst(
921 dwarf::DW_CFA_offset_extended, dwarf_reg, cfa_off);
924 last_reg = AsmReg{reg};
927 if (last_reg.valid()) {
928 if (this->register_file.reg_bank(last_reg) == Config::GP_BANK) {
929 prologue.push_back(de64_STRxu(last_reg, stack_reg, frame_off));
931 assert(this->register_file.reg_bank(last_reg) == Config::FP_BANK);
932 prologue.push_back(de64_STRdu(last_reg, stack_reg, frame_off));
936 assert(prologue.size() *
sizeof(u32) <= func_prologue_alloc);
938 assert(prologue.size() < 0x4c);
939 this->assembler.eh_writer.data()[fde_prologue_adv_off] =
940 dwarf::DW_CFA_advance_loc | (prologue.size() - 3);
944 const auto nop_count = (func_prologue_alloc / 4 - prologue.size()) % 4;
945 const auto nop = de64_NOP();
946 for (
auto i = 0u; i < nop_count; ++i) {
947 prologue.push_back(nop);
951 u32 skip = util::align_down(func_prologue_alloc - prologue.size() * 4, 16);
952 std::memset(this->text_writer.begin_ptr() + func_start_off, 0, skip);
953 func_start_off += skip;
954 this->assembler.sym_set_value(this->func_syms[func_idx], func_start_off);
955 std::memcpy(this->text_writer.begin_ptr() + func_start_off,
957 prologue.size() *
sizeof(u32));
960 if (func_arg_stack_add_off != ~0u) {
961 auto *inst_ptr = this->text_writer.begin_ptr() + func_arg_stack_add_off;
962 *
reinterpret_cast<u32 *
>(inst_ptr) =
963 de64_ADDxi(func_arg_stack_add_reg, DA_SP, final_frame_size);
967 auto func_sym = this->func_syms[func_idx];
968 auto func_sec = this->text_writer.get_sec_ref();
970 if (func_ret_offs.empty()) {
971 auto func_size = this->text_writer.offset() - func_start_off;
972 this->assembler.sym_def(func_sym, func_sec, func_start_off, func_size);
973 this->assembler.eh_end_fde(fde_off, func_sym);
974 this->assembler.except_encode_func(func_sym);
978 auto *text_data = this->text_writer.begin_ptr();
979 u32 first_ret_off = func_ret_offs[0];
982 u32 *write_ptr =
reinterpret_cast<u32 *
>(text_data + first_ret_off);
983 const auto ret_start = write_ptr;
985 *write_ptr++ = de64_MOV_SPx(DA_SP, DA_GP(29));
987 *write_ptr++ = de64_LDPx(DA_GP(29), DA_GP(30), DA_SP, 0);
990 AsmReg last_reg = AsmReg::make_invalid();
992 for (
auto reg : util::BitSetIterator{saved_regs}) {
993 if (last_reg.valid()) {
994 const auto reg_bank = this->register_file.reg_bank(AsmReg{reg});
995 const auto last_bank = this->register_file.reg_bank(last_reg);
996 if (reg_bank == last_bank) {
997 if (reg_bank == Config::GP_BANK) {
999 de64_LDPx(last_reg, AsmReg{reg}, stack_reg, frame_off);
1002 de64_LDPd(last_reg, AsmReg{reg}, stack_reg, frame_off);
1005 last_reg = AsmReg::make_invalid();
1007 assert(last_bank == Config::GP_BANK && reg_bank == Config::FP_BANK);
1008 *write_ptr++ = de64_LDRxu(last_reg, stack_reg, frame_off);
1010 last_reg = AsmReg{reg};
1015 last_reg = AsmReg{reg};
1018 if (last_reg.valid()) {
1019 if (this->register_file.reg_bank(last_reg) == Config::GP_BANK) {
1020 *write_ptr++ = de64_LDRxu(last_reg, stack_reg, frame_off);
1022 *write_ptr++ = de64_LDRdu(last_reg, stack_reg, frame_off);
1027 *write_ptr++ = de64_LDPx(DA_GP(29), DA_GP(30), DA_SP, 0);
1030 *write_ptr++ = de64_ADDxi(DA_SP, DA_SP, final_frame_size);
1031 *write_ptr++ = de64_RET(DA_GP(30));
1033 ret_size = (write_ptr - ret_start) * 4;
1034 assert(ret_size <= func_epilogue_alloc);
1035 std::memset(write_ptr, 0, func_epilogue_alloc - ret_size);
1038 for (u32 i = 1; i < func_ret_offs.size(); ++i) {
1039 std::memcpy(text_data + func_ret_offs[i],
1040 text_data + first_ret_off,
1041 func_epilogue_alloc);
1044 u32 func_end_ret_off = this->text_writer.offset() - func_epilogue_alloc;
1045 if (func_ret_offs.back() == func_end_ret_off) {
1046 this->text_writer.cur_ptr() -= func_epilogue_alloc - ret_size;
1049 auto func_size = this->text_writer.offset() - func_start_off;
1050 this->assembler.sym_def(func_sym, func_sec, func_start_off, func_size);
1051 this->assembler.eh_end_fde(fde_off, func_sym);
1052 this->assembler.except_encode_func(func_sym);
1055template <IRAdaptor Adaptor,
1057 template <
typename,
typename,
typename>
typename BaseTy,
1059void CompilerA64<Adaptor, Derived, BaseTy, Config>::reset() noexcept {
1060 func_ret_offs.clear();
1064template <IRAdaptor Adaptor,
1066 template <
typename,
typename,
typename>
typename BaseTy,
1068void CompilerA64<Adaptor, Derived, BaseTy, Config>::gen_func_epilog() noexcept {
1087 func_ret_offs.push_back(this->text_writer.offset());
1088 this->text_writer.ensure_space(func_epilogue_alloc);
1089 this->text_writer.cur_ptr() += func_epilogue_alloc;
1092template <IRAdaptor Adaptor,
1094 template <
typename,
typename,
typename>
typename BaseTy,
1096void CompilerA64<Adaptor, Derived, BaseTy, Config>::spill_reg(
1097 const AsmReg reg,
const u32 frame_off,
const u32 size)
noexcept {
1098 assert((size & (size - 1)) == 0);
1099 assert(util::align_up(frame_off, size) == frame_off);
1101 assert(frame_off < 0x1'000'000);
1103 u32 off = frame_off;
1104 auto addr_base = AsmReg{AsmReg::FP};
1105 if (off >= 0x1000 * size) [[unlikely]] {
1107 ASM(ADDxi, permanent_scratch_reg, DA_GP(29), off & ~0xfff);
1109 addr_base = permanent_scratch_reg;
1112 this->text_writer.ensure_space(4);
1113 assert(-
static_cast<i32
>(frame_off) < 0);
1114 if (reg.id() <= AsmReg::R30) {
1116 case 1: ASMNC(STRBu, reg, addr_base, off);
break;
1117 case 2: ASMNC(STRHu, reg, addr_base, off);
break;
1118 case 4: ASMNC(STRwu, reg, addr_base, off);
break;
1119 case 8: ASMNC(STRxu, reg, addr_base, off);
break;
1120 default: TPDE_UNREACHABLE(
"invalid register spill size");
1124 case 1: ASMNC(STRbu, reg, addr_base, off);
break;
1125 case 2: ASMNC(STRhu, reg, addr_base, off);
break;
1126 case 4: ASMNC(STRsu, reg, addr_base, off);
break;
1127 case 8: ASMNC(STRdu, reg, addr_base, off);
break;
1128 case 16: ASMNC(STRqu, reg, addr_base, off);
break;
1129 default: TPDE_UNREACHABLE(
"invalid register spill size");
1134template <IRAdaptor Adaptor,
1136 template <
typename,
typename,
typename>
typename BaseTy,
1138void CompilerA64<Adaptor, Derived, BaseTy, Config>::load_from_stack(
1140 const i32 frame_off,
1142 const bool sign_extend)
noexcept {
1143 assert((size & (size - 1)) == 0);
1144 assert(util::align_up(frame_off, size) == frame_off);
1146 assert(frame_off >= 0 && frame_off < 0x1'000'000);
1148 u32 off = frame_off;
1149 auto addr_base = AsmReg{AsmReg::FP};
1150 if (off >= 0x1000 * size) [[unlikely]] {
1152 addr_base = dst.id() <= AsmReg::R30 ? dst : permanent_scratch_reg;
1153 ASM(ADDxi, addr_base, DA_GP(29), off & ~0xfff);
1157 this->text_writer.ensure_space(4);
1158 if (dst.id() <= AsmReg::R30) {
1161 case 1: ASMNC(LDRBu, dst, addr_base, off);
break;
1162 case 2: ASMNC(LDRHu, dst, addr_base, off);
break;
1163 case 4: ASMNC(LDRwu, dst, addr_base, off);
break;
1164 case 8: ASMNC(LDRxu, dst, addr_base, off);
break;
1165 default: TPDE_UNREACHABLE(
"invalid register spill size");
1169 case 1: ASMNC(LDRSBwu, dst, addr_base, off);
break;
1170 case 2: ASMNC(LDRSHwu, dst, addr_base, off);
break;
1171 case 4: ASMNC(LDRSWxu, dst, addr_base, off);
break;
1172 case 8: ASMNC(LDRxu, dst, addr_base, off);
break;
1173 default: TPDE_UNREACHABLE(
"invalid register spill size");
1179 assert(!sign_extend);
1182 case 1: ASMNC(LDRbu, dst, addr_base, off);
break;
1183 case 2: ASMNC(LDRhu, dst, addr_base, off);
break;
1184 case 4: ASMNC(LDRsu, dst, addr_base, off);
break;
1185 case 8: ASMNC(LDRdu, dst, addr_base, off);
break;
1186 case 16: ASMNC(LDRqu, dst, addr_base, off);
break;
1187 default: TPDE_UNREACHABLE(
"invalid register spill size");
1191template <IRAdaptor Adaptor,
1193 template <
typename,
typename,
typename>
typename BaseTy,
1195void CompilerA64<Adaptor, Derived, BaseTy, Config>::load_address_of_stack_var(
1196 const AsmReg dst,
const AssignmentPartRef ap)
noexcept {
1197 auto frame_off = ap.variable_stack_off();
1198 assert(frame_off >= 0);
1199 if (!ASMIF(ADDxi, dst, DA_GP(29), frame_off)) {
1200 materialize_constant(frame_off, Config::GP_BANK, 4, dst);
1201 ASM(ADDx_uxtw, dst, DA_GP(29), dst, 0);
1205template <IRAdaptor Adaptor,
1207 template <
typename,
typename,
typename>
typename BaseTy,
1209void CompilerA64<Adaptor, Derived, BaseTy, Config>::mov(
1210 const AsmReg dst,
const AsmReg src,
const u32 size)
noexcept {
1211 assert(dst.valid());
1212 assert(src.valid());
1213 if (dst.id() <= AsmReg::SP && src.id() <= AsmReg::SP) {
1214 assert(dst.id() != AsmReg::SP && src.id() != AsmReg::SP);
1216 ASM(MOVx, dst, src);
1218 ASM(MOVw, dst, src);
1220 }
else if (dst.id() >= AsmReg::V0 && src.id() >= AsmReg::V0) {
1221 ASM(ORR16b, dst, src, src);
1222 }
else if (dst.id() <= AsmReg::SP) {
1223 assert(dst.id() != AsmReg::SP);
1225 assert(src.id() >= AsmReg::V0);
1228 ASM(FMOVws, dst, src);
1230 ASM(FMOVxd, dst, src);
1234 assert(src.id() <= AsmReg::R30);
1235 assert(dst.id() >= AsmReg::V0);
1238 ASM(FMOVsw, dst, src);
1240 ASM(FMOVdx, dst, src);
1245template <IRAdaptor Adaptor,
1247 template <
typename,
typename,
typename>
typename BaseTy,
1249AsmReg CompilerA64<Adaptor, Derived, BaseTy, Config>::gval_expr_as_reg(
1250 GenericValuePart &gv)
noexcept {
1251 auto &expr = std::get<typename GenericValuePart::Expr>(gv.state);
1253 ScratchReg scratch{
derived()};
1254 if (!expr.has_base() && !expr.has_index()) {
1255 AsmReg dst = scratch.alloc_gp();
1256 derived()->materialize_constant(expr.disp, Config::GP_BANK, 8, dst);
1258 }
else if (!expr.has_base() && expr.has_index()) {
1259 AsmReg index_reg = expr.index_reg();
1260 if (std::holds_alternative<ScratchReg>(expr.index)) {
1261 scratch = std::move(std::get<ScratchReg>(expr.index));
1263 (void)scratch.alloc_gp();
1265 AsmReg dst = scratch.cur_reg();
1266 if ((expr.scale & (expr.scale - 1)) == 0) {
1267 const auto shift = util::cnt_tz<u64>(expr.scale);
1268 ASM(LSLxi, dst, index_reg, shift);
1270 AsmReg tmp2 = permanent_scratch_reg;
1271 derived()->materialize_constant(expr.scale, Config::GP_BANK, 8, tmp2);
1272 ASM(MULx, dst, index_reg, tmp2);
1274 }
else if (expr.has_base() && expr.has_index()) {
1275 AsmReg base_reg = expr.base_reg();
1276 AsmReg index_reg = expr.index_reg();
1277 if (std::holds_alternative<ScratchReg>(expr.base)) {
1278 scratch = std::move(std::get<ScratchReg>(expr.base));
1279 }
else if (std::holds_alternative<ScratchReg>(expr.index)) {
1280 scratch = std::move(std::get<ScratchReg>(expr.index));
1282 (void)scratch.alloc_gp();
1284 AsmReg dst = scratch.cur_reg();
1285 if ((expr.scale & (expr.scale - 1)) == 0) {
1286 const auto shift = util::cnt_tz<u64>(expr.scale);
1287 ASM(ADDx_lsl, dst, base_reg, index_reg, shift);
1289 AsmReg tmp2 = permanent_scratch_reg;
1290 derived()->materialize_constant(expr.scale, Config::GP_BANK, 8, tmp2);
1291 ASM(MADDx, dst, index_reg, tmp2, base_reg);
1293 }
else if (expr.has_base() && !expr.has_index()) {
1294 AsmReg base_reg = expr.base_reg();
1295 if (std::holds_alternative<ScratchReg>(expr.base)) {
1296 scratch = std::move(std::get<ScratchReg>(expr.base));
1298 (void)scratch.alloc_gp();
1300 AsmReg dst = scratch.cur_reg();
1301 if (expr.disp != 0 && ASMIF(ADDxi, dst, base_reg, expr.disp)) {
1303 }
else if (dst != base_reg) {
1304 ASM(MOVx, dst, base_reg);
1307 TPDE_UNREACHABLE(
"inconsistent GenericValuePart::Expr");
1310 AsmReg dst = scratch.cur_reg();
1311 if (expr.disp != 0) {
1312 if (!ASMIF(ADDxi, dst, dst, expr.disp)) {
1313 AsmReg tmp2 = permanent_scratch_reg;
1314 derived()->materialize_constant(expr.disp, Config::GP_BANK, 8, tmp2);
1315 ASM(ADDx, dst, dst, tmp2);
1319 gv.state = std::move(scratch);
1323template <IRAdaptor Adaptor,
1325 template <
typename,
typename,
typename>
typename BaseTy,
1327void CompilerA64<Adaptor, Derived, BaseTy, Config>::materialize_constant(
1328 const u64 *data,
const RegBank bank,
const u32 size, AsmReg dst)
noexcept {
1329 const auto const_u64 = data[0];
1330 if (bank == Config::GP_BANK) {
1332 if (const_u64 == 0) {
1337 this->text_writer.ensure_space(5 * 4);
1338 this->text_writer.cur_ptr() +=
1340 de64_MOVconst(
reinterpret_cast<u32 *
>(this->text_writer.cur_ptr()),
1346 assert(bank == Config::FP_BANK);
1349 if (ASMIF(FMOVsi, dst, std::bit_cast<float>((u32)const_u64))) {
1351 }
else if (ASMIF(MOVId, dst,
static_cast<u32
>(const_u64))) {
1354 }
else if (size == 8) {
1355 if (ASMIF(FMOVdi, dst, std::bit_cast<double>(const_u64))) {
1357 }
else if (ASMIF(MOVId, dst, const_u64)) {
1360 }
else if (size == 16) {
1361 const auto high_u64 = data[1];
1362 if (const_u64 == high_u64 && ASMIF(MOVI2d, dst, const_u64)) {
1364 }
else if (high_u64 == 0 && ASMIF(MOVId, dst, const_u64)) {
1372 this->register_file.mark_clobbered(permanent_scratch_reg);
1376 materialize_constant(data, Config::GP_BANK, size, permanent_scratch_reg);
1378 ASMNC(FMOVsw, dst, permanent_scratch_reg);
1380 ASMNC(FMOVdx, dst, permanent_scratch_reg);
1385 auto rodata = this->assembler.get_data_section(
true,
false);
1386 std::span<const u8> raw_data{
reinterpret_cast<const u8 *
>(data), size};
1387 auto sym = this->assembler.sym_def_data(
1388 rodata,
"", raw_data, 16, Assembler::SymBinding::LOCAL);
1389 this->text_writer.ensure_space(8);
1391 sym, R_AARCH64_ADR_PREL_PG_HI21, this->text_writer.offset(), 0);
1392 ASMNC(ADRP, permanent_scratch_reg, 0, 0);
1394 sym, R_AARCH64_LDST128_ABS_LO12_NC, this->text_writer.offset(), 0);
1395 ASMNC(LDRqu, dst, permanent_scratch_reg, 0);
1399 TPDE_FATAL(
"unable to materialize constant");
1402template <IRAdaptor Adaptor,
1404 template <
typename,
typename,
typename>
typename BaseTy,
1407 CompilerA64<Adaptor, Derived, BaseTy, Config>::select_fixed_assignment_reg(
1408 const RegBank bank, IRValueRef)
noexcept {
1410 assert(bank.id() <= Config::NUM_BANKS);
1411 auto reg_mask = this->register_file.bank_regs(bank);
1412 reg_mask &= ~fixed_assignment_nonallocatable_mask;
1414 const auto find_possible_regs = [
this,
1415 reg_mask](
const u64 preferred_regs) -> u64 {
1417 u64 free_regs = this->register_file.allocatable & ~this->register_file.used;
1418 u64 possible_regs = free_regs & preferred_regs & reg_mask;
1419 if (possible_regs == 0) {
1420 possible_regs = (this->register_file.used & ~this->register_file.fixed) &
1421 preferred_regs & reg_mask;
1423 return possible_regs;
1427 auto csr =
derived()->cur_cc_assigner()->get_ccinfo().callee_saved_regs;
1428 if (
derived()->cur_func_may_emit_calls()) {
1430 possible_regs = find_possible_regs(csr);
1434 possible_regs = find_possible_regs(~csr);
1435 if (possible_regs == 0) {
1437 possible_regs = find_possible_regs(csr);
1441 if (possible_regs == 0) {
1442 return AsmReg::make_invalid();
1446 if ((possible_regs & ~this->register_file.used) != 0) {
1447 return AsmReg{util::cnt_tz(possible_regs & ~this->register_file.used)};
1450 for (
const auto reg_id : util::BitSetIterator<>{possible_regs}) {
1451 const auto reg = AsmReg{reg_id};
1453 assert(!this->register_file.is_fixed(reg));
1455 const auto local_idx = this->register_file.reg_local_idx(reg);
1456 const auto part = this->register_file.reg_part(reg);
1457 assert(local_idx != Base::INVALID_VAL_LOCAL_IDX);
1459 auto *assignment = this->val_assignment(local_idx);
1460 auto ap = AssignmentPartRef{assignment, part};
1461 if (ap.modified()) {
1468 return AsmReg::make_invalid();
1471template <IRAdaptor Adaptor,
1473 template <
typename,
typename,
typename>
class BaseTy,
1475typename CompilerA64<Adaptor, Derived, BaseTy, Config>::Jump
1476 CompilerA64<Adaptor, Derived, BaseTy, Config>::invert_jump(
1477 Jump jmp)
noexcept {
1479 case Jump::Jeq:
return jmp.change_kind(Jump::Jne);
1480 case Jump::Jne:
return jmp.change_kind(Jump::Jeq);
1481 case Jump::Jcs:
return jmp.change_kind(Jump::Jcc);
1482 case Jump::Jcc:
return jmp.change_kind(Jump::Jcs);
1483 case Jump::Jmi:
return jmp.change_kind(Jump::Jpl);
1484 case Jump::Jpl:
return jmp.change_kind(Jump::Jmi);
1485 case Jump::Jvs:
return jmp.change_kind(Jump::Jvc);
1486 case Jump::Jvc:
return jmp.change_kind(Jump::Jvs);
1487 case Jump::Jhi:
return jmp.change_kind(Jump::Jls);
1488 case Jump::Jls:
return jmp.change_kind(Jump::Jhi);
1489 case Jump::Jge:
return jmp.change_kind(Jump::Jlt);
1490 case Jump::Jlt:
return jmp.change_kind(Jump::Jge);
1491 case Jump::Jgt:
return jmp.change_kind(Jump::Jle);
1492 case Jump::Jle:
return jmp.change_kind(Jump::Jgt);
1493 case Jump::jmp:
return jmp;
1494 case Jump::Cbz:
return jmp.change_kind(Jump::Cbnz);
1495 case Jump::Cbnz:
return jmp.change_kind(Jump::Cbz);
1496 case Jump::Tbz:
return jmp.change_kind(Jump::Tbnz);
1497 case Jump::Tbnz:
return jmp.change_kind(Jump::Tbz);
1498 default: TPDE_UNREACHABLE(
"invalid jump kind");
1502template <IRAdaptor Adaptor,
1504 template <
typename,
typename,
typename>
typename BaseTy,
1506typename CompilerA64<Adaptor, Derived, BaseTy, Config>::Jump
1507 CompilerA64<Adaptor, Derived, BaseTy, Config>::swap_jump(
1508 Jump jmp)
noexcept {
1510 case Jump::Jeq:
return jmp.change_kind(Jump::Jeq);
1511 case Jump::Jne:
return jmp.change_kind(Jump::Jne);
1512 case Jump::Jcc:
return jmp.change_kind(Jump::Jhi);
1513 case Jump::Jcs:
return jmp.change_kind(Jump::Jls);
1514 case Jump::Jhi:
return jmp.change_kind(Jump::Jcc);
1515 case Jump::Jls:
return jmp.change_kind(Jump::Jcs);
1516 case Jump::Jge:
return jmp.change_kind(Jump::Jle);
1517 case Jump::Jlt:
return jmp.change_kind(Jump::Jgt);
1518 case Jump::Jgt:
return jmp.change_kind(Jump::Jlt);
1519 case Jump::Jle:
return jmp.change_kind(Jump::Jge);
1520 case Jump::jmp:
return jmp;
1529 default: TPDE_UNREACHABLE(
"invalid jump kind for swap_jump");
1533template <IRAdaptor Adaptor,
1535 template <
typename,
typename,
typename>
typename BaseTy,
1537void CompilerA64<Adaptor, Derived, BaseTy, Config>::generate_branch_to_block(
1540 const bool needs_split,
1541 const bool last_inst)
noexcept {
1542 const auto target_idx = this->analyzer.block_idx(target);
1543 if (!needs_split || jmp.kind == Jump::jmp) {
1544 this->
derived()->move_to_phi_nodes(target_idx);
1546 if (!last_inst || this->analyzer.block_idx(target) != this->next_block()) {
1547 generate_raw_jump(jmp, this->block_labels[(u32)target_idx]);
1550 auto tmp_label = this->assembler.label_create();
1551 generate_raw_jump(invert_jump(jmp), tmp_label);
1553 this->
derived()->move_to_phi_nodes(target_idx);
1555 generate_raw_jump(Jump::jmp, this->block_labels[(u32)target_idx]);
1557 this->label_place(tmp_label);
1561template <IRAdaptor Adaptor,
1563 template <
typename,
typename,
typename>
typename BaseTy,
1565void CompilerA64<Adaptor, Derived, BaseTy, Config>::generate_raw_jump(
1566 Jump jmp, Assembler::Label target_label)
noexcept {
1567 const auto is_pending = this->assembler.label_is_pending(target_label);
1568 this->text_writer.ensure_space(4);
1569 if (jmp.kind == Jump::jmp) {
1572 this->assembler.add_unresolved_entry(target_label,
1573 this->text_writer.get_sec_ref(),
1574 this->text_writer.offset() - 4,
1575 Assembler::UnresolvedEntryKind::BR);
1577 const auto label_off = this->assembler.label_offset(target_label);
1578 const auto cur_off = this->text_writer.offset();
1579 assert(cur_off >= label_off);
1580 const auto diff = cur_off - label_off;
1581 assert((diff & 0b11) == 0);
1582 assert(diff < 128 * 1024 * 1024);
1584 ASMNC(B, -
static_cast<ptrdiff_t
>(diff) / 4);
1589 if (jmp.kind == Jump::Cbz || jmp.kind == Jump::Cbnz) {
1592 const auto label_off = this->assembler.label_offset(target_label);
1593 const auto cur_off = this->text_writer.offset();
1594 assert(cur_off >= label_off);
1595 off = cur_off - label_off;
1596 assert((off & 0b11) == 0);
1597 assert(off < 128 * 1024 * 1024);
1600 if (off <= 1024 * 1024) {
1601 auto imm19 = -
static_cast<ptrdiff_t
>(off) / 4;
1602 if (jmp.kind == Jump::Cbz) {
1603 if (jmp.cmp_is_32) {
1604 ASMNC(CBZw, jmp.cmp_reg, imm19);
1606 ASMNC(CBZx, jmp.cmp_reg, imm19);
1609 if (jmp.cmp_is_32) {
1610 ASMNC(CBNZw, jmp.cmp_reg, imm19);
1612 ASMNC(CBNZx, jmp.cmp_reg, imm19);
1617 this->assembler.add_unresolved_entry(
1619 this->text_writer.get_sec_ref(),
1620 this->text_writer.offset() - 4,
1621 Assembler::UnresolvedEntryKind::COND_BR);
1624 assert(!is_pending);
1625 this->text_writer.ensure_space(2 * 4);
1627 if (jmp.kind == Jump::Cbz) {
1628 if (jmp.cmp_is_32) {
1629 ASMNC(CBNZw, jmp.cmp_reg, 2);
1631 ASMNC(CBNZx, jmp.cmp_reg, 2);
1634 if (jmp.cmp_is_32) {
1635 ASMNC(CBZw, jmp.cmp_reg, 2);
1637 ASMNC(CBZx, jmp.cmp_reg, 2);
1641 ASMNC(B, -
static_cast<ptrdiff_t
>(off + 4) / 4);
1646 if (jmp.kind == Jump::Tbz || jmp.kind == Jump::Tbnz) {
1649 const auto label_off = this->assembler.label_offset(target_label);
1650 const auto cur_off = this->text_writer.offset();
1651 assert(cur_off >= label_off);
1652 off = cur_off - label_off;
1653 assert((off & 0b11) == 0);
1654 assert(off < 128 * 1024 * 1024);
1657 if (off <= 32 * 1024) {
1658 auto imm14 = -
static_cast<ptrdiff_t
>(off) / 4;
1659 if (jmp.kind == Jump::Tbz) {
1660 ASMNC(TBZ, jmp.cmp_reg, jmp.test_bit, imm14);
1662 ASMNC(TBNZ, jmp.cmp_reg, jmp.test_bit, imm14);
1666 this->assembler.add_unresolved_entry(
1668 this->text_writer.get_sec_ref(),
1669 this->text_writer.offset() - 4,
1670 Assembler::UnresolvedEntryKind::TEST_BR);
1673 assert(!is_pending);
1674 this->text_writer.ensure_space(2 * 4);
1676 if (jmp.kind == Jump::Tbz) {
1678 ASMNC(TBNZ, jmp.cmp_reg, jmp.test_bit, 2);
1680 ASMNC(TBZ, jmp.cmp_reg, jmp.test_bit, 2);
1683 ASMNC(B, -
static_cast<ptrdiff_t
>(off + 4) / 4);
1688 Da64Cond cond, cond_compl;
1746 default: TPDE_UNREACHABLE(
"invalid jump kind");
1752 const auto label_off = this->assembler.label_offset(target_label);
1753 const auto cur_off = this->text_writer.offset();
1754 assert(cur_off >= label_off);
1755 off = cur_off - label_off;
1756 assert((off & 0b11) == 0);
1757 assert(off < 128 * 1024 * 1024);
1760 if (off <= 1024 * 1024) {
1761 ASMNC(BCOND, cond, -
static_cast<ptrdiff_t
>(off) / 4);
1764 this->assembler.add_unresolved_entry(
1766 this->text_writer.get_sec_ref(),
1767 this->text_writer.offset() - 4,
1768 Assembler::UnresolvedEntryKind::COND_BR);
1771 assert(!is_pending);
1772 this->text_writer.ensure_space(2 * 4);
1775 ASMNC(BCOND, cond_compl, 2);
1777 ASMNC(B, -
static_cast<ptrdiff_t
>(off + 4) / 4);
1780template <IRAdaptor Adaptor,
1782 template <
typename,
typename,
typename>
class BaseTy,
1784Da64Cond CompilerA64<Adaptor, Derived, BaseTy, Config>::jump_to_cond(
1785 Jump jmp)
noexcept {
1787 case Jump::Jeq:
return DA_EQ;
1788 case Jump::Jne:
return DA_NE;
1789 case Jump::Jcs:
return DA_CS;
1790 case Jump::Jcc:
return DA_CC;
1791 case Jump::Jmi:
return DA_MI;
1792 case Jump::Jpl:
return DA_PL;
1793 case Jump::Jvs:
return DA_VS;
1794 case Jump::Jvc:
return DA_VC;
1795 case Jump::Jhi:
return DA_HI;
1796 case Jump::Jls:
return DA_LS;
1797 case Jump::Jge:
return DA_GE;
1798 case Jump::Jlt:
return DA_LT;
1799 case Jump::Jgt:
return DA_GT;
1800 case Jump::Jle:
return DA_LE;
1801 case Jump::jmp:
return DA_AL;
1802 default: TPDE_UNREACHABLE(
"invalid jump kind for conversion to Da64Cond");
1806template <IRAdaptor Adaptor,
1808 template <
typename,
typename,
typename>
class BaseTy,
1810void CompilerA64<Adaptor, Derived, BaseTy, Config>::generate_raw_set(
1811 Jump cc, AsmReg dst)
noexcept {
1812 ASM(CSETw, dst, jump_to_cond(cc));
1815template <IRAdaptor Adaptor,
1817 template <
typename,
typename,
typename>
class BaseTy,
1819void CompilerA64<Adaptor, Derived, BaseTy, Config>::generate_raw_mask(
1820 Jump cc, AsmReg dst)
noexcept {
1821 ASM(CSETMx, dst, jump_to_cond(cc));
1823template <IRAdaptor Adaptor,
1825 template <
typename,
typename,
typename>
class BaseTy,
1827void CompilerA64<Adaptor, Derived, BaseTy, Config>::generate_raw_select(
1831 AsmReg false_select,
1832 bool is_64)
noexcept {
1833 this->text_writer.ensure_space(4);
1834 Da64Cond cond = jump_to_cond(cc);
1836 ASMNC(CSELx, dst, true_select, false_select, cond);
1838 ASMNC(CSELw, dst, true_select, false_select, cond);
1842template <IRAdaptor Adaptor,
1844 template <
typename,
typename,
typename>
class BaseTy,
1846void CompilerA64<Adaptor, Derived, BaseTy, Config>::generate_raw_intext(
1847 AsmReg dst, AsmReg src,
bool sign, u32 from, u32 to)
noexcept {
1848 assert(from < to && to <= 64);
1852 ASM(SBFXw, dst, src, 0, from);
1854 ASM(SBFXx, dst, src, 0, from);
1858 ASM(UBFXw, dst, src, 0, from);
1860 ASM(UBFXx, dst, src, 0, from);
1865template <IRAdaptor Adaptor,
1867 template <
typename,
typename,
typename>
typename BaseTy,
1869void CompilerA64<Adaptor, Derived, BaseTy, Config>::generate_call(
1870 std::variant<Assembler::SymRef, ValuePart> &&target,
1871 std::span<CallArg> arguments,
1872 typename Base::ValueRef *result,
1874 CCAssignerAAPCS assigner;
1875 CallBuilder cb{*
derived(), assigner};
1876 for (
auto &arg : arguments) {
1877 cb.add_arg(std::move(arg));
1879 cb.call(std::move(target));
1881 cb.add_ret(*result);
1885template <IRAdaptor Adaptor,
1887 template <
typename,
typename,
typename>
typename BaseTy,
1889CompilerA64<Adaptor, Derived, BaseTy, Config>::ScratchReg
1890 CompilerA64<Adaptor, Derived, BaseTy, Config>::tls_get_addr(
1891 Assembler::SymRef sym, TLSModel model)
noexcept {
1894 case TLSModel::GlobalDynamic: {
1895 ScratchReg r0_scratch{
this};
1896 AsmReg r0 = r0_scratch.alloc_specific(AsmReg::R0);
1897 ScratchReg r1_scratch{
this};
1898 AsmReg r1 = r1_scratch.alloc_specific(AsmReg::R1);
1901 if (this->register_file.is_used(Reg{AsmReg::LR})) {
1905 this->text_writer.ensure_space(0x18);
1907 sym, R_AARCH64_TLSDESC_ADR_PAGE21, this->text_writer.offset(), 0);
1908 ASMNC(ADRP, r0, 0, 0);
1910 sym, R_AARCH64_TLSDESC_LD64_LO12, this->text_writer.offset(), 0);
1911 ASMNC(LDRxu, r1, r0, 0);
1913 sym, R_AARCH64_TLSDESC_ADD_LO12, this->text_writer.offset(), 0);
1914 ASMNC(ADDxi, r0, r0, 0);
1916 sym, R_AARCH64_TLSDESC_CALL, this->text_writer.offset(), 0);
1918 ASMNC(MRS, r1, 0xde82);
1920 ASMNC(ADDx, r0, r1, r0);
void evict_reg(Reg reg) noexcept