6#include "tpde/AssemblerElf.hpp"
7#include "tpde/AssignmentPartRef.hpp"
8#include "tpde/CompilerBase.hpp"
9#include "tpde/DWARF.hpp"
10#include "tpde/ELF.hpp"
11#include "tpde/arm64/FunctionWriterA64.hpp"
12#include "tpde/base.hpp"
13#include "tpde/util/SmallVector.hpp"
14#include "tpde/util/misc.hpp"
20#if defined(ASM) || defined(ASMNC) || defined(ASMC)
21 #error Got definition for ASM macros from somewhere else. Maybe you included compilers for multiple architectures?
25#define ASMC(compiler, op, ...) \
26 ((compiler)->text_writer.write_inst(de64_##op(__VA_ARGS__)))
28#define ASM(...) ASMC(this, __VA_ARGS__)
30#define ASMNC(op, ...) \
31 (this->text_writer.write_inst_unchecked(de64_##op(__VA_ARGS__)))
33#define ASMIFC(compiler, op, ...) \
34 ((compiler)->text_writer.try_write_inst(de64_##op(__VA_ARGS__)))
36#define ASMIF(...) ASMIFC(this, __VA_ARGS__)
111 constexpr explicit AsmReg() : Reg((u8)0xFF) {}
113 constexpr AsmReg(
const REG
id) : Reg((u8)id) {}
115 constexpr AsmReg(
const Reg base) : Reg(base) {}
117 constexpr explicit AsmReg(
const u64
id) : Reg(id) {
118 assert(
id <= SP || (
id >= V0 &&
id <= V31));
121 operator DA_GReg()
const {
123 return DA_GReg{reg_id};
126 operator DA_GRegZR()
const {
128 assert(reg_id != SP);
129 return DA_GRegZR{reg_id};
132 operator DA_GRegSP()
const {
133 assert(reg_id <= SP);
134 return DA_GRegSP{reg_id};
137 operator DA_VReg()
const {
138 assert(reg_id >= V0 && reg_id <= V31);
139 return DA_VReg{
static_cast<u8
>(reg_id - V0)};
144 create_bitmask(
const std::initializer_list<AsmReg::REG> regs) {
146 for (
const auto reg : regs) {
153constexpr static u64 create_bitmask(
const std::array<AsmReg, N> regs) {
155 for (
const auto reg : regs) {
156 set |= 1ull << reg.id();
162class CCAssignerAAPCS :
public CCAssigner {
163 static constexpr CCInfo Info{
166 0xFFFF'FFFF'FFFF'FFFF &
167 ~create_bitmask({AsmReg::SP, AsmReg::FP, AsmReg::R16, AsmReg::R17}),
169 .callee_saved_regs = create_bitmask({
189 .arg_regs = create_bitmask({
213 u32 ngrn = 0, nsrn = 0, nsaa = 0;
214 u32 ret_ngrn = 0, ret_nsrn = 0;
217 CCAssignerAAPCS() : CCAssigner(Info) {}
219 void reset()
override { ngrn = nsrn = nsaa = ret_ngrn = ret_nsrn = 0; }
221 void assign_arg(CCAssignment &arg)
override {
222 if (arg.byval) [[unlikely]] {
223 nsaa = util::align_up(nsaa, arg.align < 8 ? 8 : arg.align);
224 arg.stack_off = nsaa;
229 if (arg.sret) [[unlikely]] {
230 arg.reg = AsmReg{AsmReg::R8};
234 if (arg.bank == RegBank{0}) {
236 ngrn = util::align_up(ngrn, 2);
238 if (ngrn + arg.consecutive < 8) {
239 arg.reg = Reg{AsmReg::R0 + ngrn};
243 nsaa = util::align_up(nsaa, arg.align < 8 ? 8 : arg.align);
244 arg.stack_off = nsaa;
248 if (nsrn + arg.consecutive < 8) {
249 arg.reg = Reg{AsmReg::V0 + nsrn};
253 u32 size = util::align_up(arg.size, 8);
254 nsaa = util::align_up(nsaa, size);
255 arg.stack_off = nsaa;
261 u32 get_stack_size()
override {
return nsaa; }
263 void assign_ret(CCAssignment &arg)
override {
264 assert(!arg.byval && !arg.sret);
265 if (arg.bank == RegBank{0}) {
267 ret_ngrn = util::align_up(ret_ngrn, 2);
269 if (ret_ngrn + arg.consecutive < 8) {
270 arg.reg = Reg{AsmReg::R0 + ret_ngrn};
276 if (ret_nsrn + arg.consecutive < 8) {
277 arg.reg = Reg{AsmReg::V0 + ret_nsrn};
286struct PlatformConfig : CompilerConfigDefault {
287 using Assembler = tpde::elf::AssemblerElfA64;
288 using AsmReg = tpde::a64::AsmReg;
292 static constexpr RegBank GP_BANK{0};
293 static constexpr RegBank FP_BANK{1};
294 static constexpr bool FRAME_INDEXING_NEGATIVE =
false;
295 static constexpr u32 PLATFORM_POINTER_SIZE = 8;
296 static constexpr u32 NUM_BANKS = 2;
300template <IRAdaptor Adaptor,
302 template <
typename,
typename,
typename>
typename BaseTy =
304 typename Config = PlatformConfig>
305struct CompilerA64 : BaseTy<Adaptor, Derived, Config> {
306 using Base = BaseTy<Adaptor, Derived, Config>;
308 using IRValueRef =
typename Base::IRValueRef;
309 using IRBlockRef =
typename Base::IRBlockRef;
310 using IRFuncRef =
typename Base::IRFuncRef;
312 using ScratchReg =
typename Base::ScratchReg;
313 using ValuePartRef =
typename Base::ValuePartRef;
314 using ValuePart =
typename Base::ValuePart;
315 using GenericValuePart =
typename Base::GenericValuePart;
317 using RegisterFile =
typename Base::RegisterFile;
319 using CallArg =
typename Base::CallArg;
326 static constexpr u32 NUM_FIXED_ASSIGNMENTS[PlatformConfig::NUM_BANKS] = {5,
331 static constexpr u32 MaxStaticAllocaSize = 0x100000;
333 enum CPU_FEATURES : u32 {
337 CPU_FEATURES cpu_feats = CPU_BASELINE;
344 u64 fixed_assignment_nonallocatable_mask =
345 create_bitmask({AsmReg::R0, AsmReg::R1});
346 u32 func_start_off = 0u, func_prologue_alloc = 0u;
350 AsmReg func_arg_stack_add_reg = AsmReg::make_invalid();
356 u32 scalar_arg_count = 0xFFFF'FFFF, vec_arg_count = 0xFFFF'FFFF;
357 u32 reg_save_frame_off = 0;
358 util::SmallVector<u32, 8> func_ret_offs = {};
361 class CallBuilder :
public Base::template CallBuilderBase<CallBuilder> {
362 u32 stack_adjust_off = 0;
366 void set_stack_used();
371 : Base::template CallBuilderBase<
CallBuilder>(compiler, assigner) {}
373 void add_arg_byval(ValuePart &vp, CCAssignment &cca);
374 void add_arg_stack(ValuePart &vp, CCAssignment &cca);
375 void call_impl(std::variant<SymRef, ValuePart> &&);
381 const CPU_FEATURES cpu_features = CPU_BASELINE)
382 : Base{adaptor}, cpu_feats(cpu_features) {
383 static_assert(std::is_base_of_v<CompilerA64, Derived>);
386 void start_func(u32) {}
397 void finish_func(u32 func_idx);
401 void gen_func_epilog();
403 void spill_reg(
const AsmReg reg,
const u32 frame_off,
const u32 size);
405 void load_from_stack(AsmReg dst,
408 bool sign_extend =
false);
410 void load_address_of_stack_var(AsmReg dst, AssignmentPartRef ap);
412 void mov(AsmReg dst, AsmReg src, u32 size);
414 GenericValuePart val_spill_slot(AssignmentPartRef ap) {
415 assert(ap.stack_valid() && !ap.variable_ref());
416 return typename GenericValuePart::Expr(AsmReg::R29, ap.frame_off());
419 AsmReg gval_expr_as_reg(GenericValuePart &gv);
436 assert(size <=
sizeof(const_u64));
440 AsmReg select_fixed_assignment_reg(AssignmentPartRef, IRValueRef);
478 constexpr Jump(
Kind kind) : kind(kind), cmp_is_32(false), test_bit(0) {
479 assert(kind !=
Cbz && kind !=
Cbnz && kind !=
Tbz && kind !=
Tbnz);
483 constexpr Jump(
Kind kind, AsmReg cmp_reg,
bool cmp_is_32)
484 : kind(kind), cmp_reg(cmp_reg), cmp_is_32(cmp_is_32), test_bit(0) {
485 assert(kind ==
Cbz || kind ==
Cbnz);
489 constexpr Jump(
Kind kind, AsmReg cmp_reg, u8 test_bit)
490 : kind(kind), cmp_reg(cmp_reg), cmp_is_32(false), test_bit(test_bit) {
491 assert(kind ==
Tbz || kind ==
Tbnz);
494 constexpr Jump change_kind(
Kind new_kind)
const {
501 Jump invert_jump(Jump jmp);
502 Jump swap_jump(Jump jmp);
518 Jump cc, AsmReg dst, AsmReg true_select, AsmReg false_select,
bool is_64);
525 ASM(BFIx, dst, src, lsb, width);
529 ASM(UBFIZx, dst, src, lsb, width);
543 std::span<CallArg> arguments,
544 typename Base::ValueRef *result,
545 bool variable_args =
false);
549 void switch_emit_cmp(AsmReg cmp_reg,
556 void switch_emit_cmpeq(Label case_label,
562 FunctionWriterBase::JumpTable *switch_create_jump_table(Label default_label,
569 void switch_emit_binary_step(Label case_label,
580 bool has_cpu_feats(CPU_FEATURES feats)
const {
581 return ((cpu_feats & feats) == feats);
585template <IRAdaptor Adaptor,
587 template <
typename,
typename,
typename>
class BaseTy,
589void CompilerA64<Adaptor, Derived, BaseTy, Config>::CallBuilder::
591 if (stack_adjust_off == 0) {
592 this->compiler.text_writer.ensure_space(16);
593 stack_adjust_off = this->compiler.text_writer.offset();
594 this->compiler.text_writer.cur_ptr() += 4;
598template <IRAdaptor Adaptor,
600 template <
typename,
typename,
typename>
class BaseTy,
602void CompilerA64<Adaptor, Derived, BaseTy, Config>::CallBuilder::add_arg_byval(
603 ValuePart &vp, CCAssignment &cca) {
604 AsmReg ptr_reg = vp.load_to_reg(&this->compiler);
605 AsmReg tmp_reg = AsmReg::R16;
607 auto size = cca.size;
609 for (u32 off = 0; off < size;) {
610 if (size - off >= 8) {
611 ASMC(&this->compiler, LDRxu, tmp_reg, ptr_reg, off);
612 ASMC(&this->compiler, STRxu, tmp_reg, DA_SP, cca.stack_off + off);
614 }
else if (size - off >= 4) {
615 ASMC(&this->compiler, LDRwu, tmp_reg, ptr_reg, off);
616 ASMC(&this->compiler, STRwu, tmp_reg, DA_SP, cca.stack_off + off);
618 }
else if (size - off >= 2) {
619 ASMC(&this->compiler, LDRHu, tmp_reg, ptr_reg, off);
620 ASMC(&this->compiler, STRHu, tmp_reg, DA_SP, cca.stack_off + off);
623 ASMC(&this->compiler, LDRBu, tmp_reg, ptr_reg, off);
624 ASMC(&this->compiler, STRBu, tmp_reg, DA_SP, cca.stack_off + off);
630template <IRAdaptor Adaptor,
632 template <
typename,
typename,
typename>
class BaseTy,
634void CompilerA64<Adaptor, Derived, BaseTy, Config>::CallBuilder::add_arg_stack(
635 ValuePart &vp, CCAssignment &cca) {
638 auto reg = vp.has_reg() ? vp.cur_reg() : vp.load_to_reg(&this->compiler);
639 if (this->compiler.register_file.reg_bank(reg) == Config::GP_BANK) {
641 case 1: ASMC(&this->compiler, STRBu, reg, DA_SP, cca.stack_off);
break;
642 case 2: ASMC(&this->compiler, STRHu, reg, DA_SP, cca.stack_off);
break;
643 case 4: ASMC(&this->compiler, STRwu, reg, DA_SP, cca.stack_off);
break;
644 case 8: ASMC(&this->compiler, STRxu, reg, DA_SP, cca.stack_off);
break;
645 default: TPDE_UNREACHABLE(
"invalid GP reg size");
648 assert(this->compiler.register_file.reg_bank(reg) == Config::FP_BANK);
650 case 1: ASMC(&this->compiler, STRbu, reg, DA_SP, cca.stack_off);
break;
651 case 2: ASMC(&this->compiler, STRhu, reg, DA_SP, cca.stack_off);
break;
652 case 4: ASMC(&this->compiler, STRsu, reg, DA_SP, cca.stack_off);
break;
653 case 8: ASMC(&this->compiler, STRdu, reg, DA_SP, cca.stack_off);
break;
654 case 16: ASMC(&this->compiler, STRqu, reg, DA_SP, cca.stack_off);
break;
655 default: TPDE_UNREACHABLE(
"invalid FP reg size");
660template <IRAdaptor Adaptor,
662 template <
typename,
typename,
typename>
class BaseTy,
664void CompilerA64<Adaptor, Derived, BaseTy, Config>::CallBuilder::call_impl(
665 std::variant<SymRef, ValuePart> &&target) {
667 if (stack_adjust_off != 0) {
668 auto *text_data = this->compiler.text_writer.begin_ptr();
669 u32 *write_ptr =
reinterpret_cast<u32 *
>(text_data + stack_adjust_off);
670 u32 stack_size = this->assigner.get_stack_size();
671 sub = util::align_up(stack_size, stack_size < 0x1000 ? 0x10 : 0x1000);
672 *write_ptr = de64_SUBxi(DA_SP, DA_SP, sub);
674 assert(this->assigner.get_stack_size() == 0);
679 auto fp_regs = RegisterFile::bank_regs(Config::FP_BANK);
680 auto fp_csrs = fp_regs & this->assigner.get_ccinfo().callee_saved_regs;
681 auto used_fp_csrs = fp_csrs & this->compiler.register_file.used;
682 for (
auto reg_id : util::BitSetIterator<>{used_fp_csrs}) {
684 ValLocalIdx local_idx = this->compiler.register_file.reg_local_idx(reg);
685 auto part = this->compiler.register_file.reg_part(reg);
686 AssignmentPartRef ap{this->compiler.val_assignment(local_idx), part};
687 if (ap.part_size() > 8) {
688 this->compiler.evict(ap);
692 if (
auto *sym = std::get_if<SymRef>(&target)) {
693 ASMC(&this->compiler, BL, 0);
694 this->compiler.reloc_text(
695 *sym, elf::R_AARCH64_CALL26, this->compiler.text_writer.offset() - 4);
697 ValuePart &tvp = std::get<ValuePart>(target);
698 if (tvp.can_salvage()) {
699 ASMC(&this->compiler, BLR, tvp.salvage(&this->compiler));
701 AsmReg reg = this->compiler.permanent_scratch_reg;
702 tvp.reload_into_specific_fixed(&this->compiler, reg);
703 ASMC(&this->compiler, BLR, reg);
705 tvp.reset(&this->compiler);
708 if (stack_adjust_off != 0) {
709 ASMC(&this->compiler, ADDxi, DA_SP, DA_SP, sub);
713template <IRAdaptor Adaptor,
715 template <
typename,
typename,
typename>
typename BaseTy,
718 CCAssigner *cc_assigner) {
719 func_ret_offs.clear();
720 func_start_off = this->text_writer.offset();
722 const CCInfo &cc_info = cc_assigner->get_ccinfo();
729 this->stack.frame_size = 16;
731 auto csr = cc_info.callee_saved_regs;
732 auto csr_gp = csr & this->register_file.bank_regs(Config::GP_BANK);
733 auto csr_fp = csr & this->register_file.bank_regs(Config::FP_BANK);
734 u32 gp_saves = std::popcount(csr_gp);
735 u32 fp_saves = std::popcount(csr_fp);
737 u32 reg_save_size = 4 * ((gp_saves + 1) / 2 + (fp_saves + 1) / 2);
739 this->stack.frame_size += util::align_up(gp_saves * 8 + fp_saves * 8, 16);
742 func_prologue_alloc = reg_save_size + 12;
743 this->text_writer.ensure_space(func_prologue_alloc);
744 this->text_writer.cur_ptr() += func_prologue_alloc;
749 if (this->adaptor->cur_is_vararg()) [[unlikely]] {
750 this->stack.frame_used =
true;
751 reg_save_frame_off = this->stack.frame_size;
755 this->stack.frame_size += 8 * 8 + 8 * 16 + 16;
756 this->text_writer.ensure_space(4 * 8);
757 ASMNC(STPx, DA_GP(0), DA_GP(1), DA_SP, reg_save_frame_off);
758 ASMNC(STPx, DA_GP(2), DA_GP(3), DA_SP, reg_save_frame_off + 16);
759 ASMNC(STPx, DA_GP(4), DA_GP(5), DA_SP, reg_save_frame_off + 32);
760 ASMNC(STPx, DA_GP(6), DA_GP(7), DA_SP, reg_save_frame_off + 48);
761 ASMNC(STPq, DA_V(0), DA_V(1), DA_SP, reg_save_frame_off + 64);
762 ASMNC(STPq, DA_V(2), DA_V(3), DA_SP, reg_save_frame_off + 96);
763 ASMNC(STPq, DA_V(4), DA_V(5), DA_SP, reg_save_frame_off + 128);
764 ASMNC(STPq, DA_V(6), DA_V(7), DA_SP, reg_save_frame_off + 160);
767 this->func_arg_stack_add_off = ~0u;
772 template <
typename,
typename,
typename>
typename BaseTy,
776 ValuePart &&vp, CCAssignment cca) {
777 if (cca.reg.valid()) [[likely]] {
778 vp.set_value_reg(
this, cca.reg);
782 this->register_file.allocatable |= u64{1} << cca.reg.id();
786 AsmReg dst = vp.alloc_reg(
this);
788 this->text_writer.ensure_space(8);
789 AsmReg stack_reg = AsmReg::R17;
791 assert(!(this->register_file.allocatable & (u64{1} << stack_reg.id())) &&
792 "x17 must not be allocatable");
793 if (this->func_arg_stack_add_off == ~0u) {
794 this->func_arg_stack_add_off = this->text_writer.offset();
795 this->func_arg_stack_add_reg = stack_reg;
797 ASMNC(ADDxi, stack_reg, DA_SP, 0);
801 ASMNC(ADDxi, dst, stack_reg, cca.stack_off);
802 }
else if (cca.bank == Config::GP_BANK) {
804 case 1: ASMNC(LDRBu, dst, stack_reg, cca.stack_off);
break;
805 case 2: ASMNC(LDRHu, dst, stack_reg, cca.stack_off);
break;
806 case 4: ASMNC(LDRwu, dst, stack_reg, cca.stack_off);
break;
807 case 8: ASMNC(LDRxu, dst, stack_reg, cca.stack_off);
break;
808 default: TPDE_UNREACHABLE(
"invalid GP reg size");
811 assert(cca.bank == Config::FP_BANK);
813 case 1: ASMNC(LDRbu, dst, stack_reg, cca.stack_off);
break;
814 case 2: ASMNC(LDRhu, dst, stack_reg, cca.stack_off);
break;
815 case 4: ASMNC(LDRsu, dst, stack_reg, cca.stack_off);
break;
816 case 8: ASMNC(LDRdu, dst, stack_reg, cca.stack_off);
break;
817 case 16: ASMNC(LDRqu, dst, stack_reg, cca.stack_off);
break;
818 default: TPDE_UNREACHABLE(
"invalid FP reg size");
826 template <
typename,
typename,
typename>
typename BaseTy,
829 CCAssigner *cc_assigner) {
833 if (this->adaptor->cur_is_vararg()) [[unlikely]] {
834 this->stack.frame_used =
true;
835 AsmReg stack_reg = AsmReg::R17;
837 assert(!(this->register_file.allocatable & (u64{1} << stack_reg.id())) &&
838 "x17 must not be allocatable");
839 if (this->func_arg_stack_add_off == ~0u) {
840 this->func_arg_stack_add_off = this->text_writer.offset();
841 this->func_arg_stack_add_reg = stack_reg;
843 ASMC(
this, ADDxi, stack_reg, DA_SP, 0);
845 ASM(ADDxi, stack_reg, stack_reg, cc_assigner->get_stack_size());
846 ASM(STRxu, stack_reg, DA_GP(29), this->reg_save_frame_off + 192);
851 const CCInfo &cc_info = cc_assigner->get_ccinfo();
852 auto arg_regs = this->register_file.allocatable & cc_info.arg_regs;
853 u32 ngrn = 8 - util::cnt_lz<u16>((arg_regs & 0xff) << 8 | 0x80);
854 u32 nsrn = 8 - util::cnt_lz<u16>(((arg_regs >> 32) & 0xff) << 8 | 0x80);
855 this->scalar_arg_count = ngrn;
856 this->vec_arg_count = nsrn;
862 template <
typename,
typename,
typename>
typename BaseTy,
864void CompilerA64<Adaptor, Derived, BaseTy, Config>::finish_func(u32 func_idx) {
865 auto csr =
derived()->cur_cc_assigner()->get_ccinfo().callee_saved_regs;
866 u64 saved_regs = this->register_file.clobbered & csr;
868 auto stack_reg = DA_SP;
869 if (this->stack.has_dynamic_alloca) {
870 stack_reg = DA_GP(29);
873 auto final_frame_size = util::align_up(this->stack.frame_size, 16);
874 if (final_frame_size > 4095) {
876 final_frame_size = util::align_up(final_frame_size, 4096);
877 assert(final_frame_size < 16 * 1024 * 1024);
880 bool needs_stack_frame =
881 this->stack.frame_used || this->stack.generated_call ||
882 this->stack.has_dynamic_alloca || saved_regs != 0 ||
883 (this->register_file.clobbered & (u64{1} << AsmReg::LR));
885 this->text_writer.eh_begin_fde(this->get_personality_sym());
887 u32 prologue_size = 0;
888 if (needs_stack_frame) [[likely]] {
890 util::SmallVector<u32, 16> prologue;
896 if (!func_ret_offs.empty() && final_frame_size <= 0x1f8) {
897 this->text_writer.eh_write_inst(dwarf::DW_CFA_remember_state);
899 this->text_writer.eh_write_inst(dwarf::DW_CFA_advance_loc, 1);
900 this->text_writer.eh_write_inst(dwarf::DW_CFA_def_cfa_offset,
902 if (final_frame_size <= 0x1f8) {
904 de64_STPx_pre(DA_GP(29), DA_GP(30), DA_SP, -
int(final_frame_size)));
905 prologue.push_back(de64_MOV_SPx(DA_GP(29), DA_SP));
907 if (!func_ret_offs.empty()) {
908 this->text_writer.eh_write_inst(dwarf::DW_CFA_remember_state);
910 prologue.push_back(de64_SUBxi(DA_SP, DA_SP, final_frame_size));
911 prologue.push_back(de64_STPx(DA_GP(29), DA_GP(30), DA_SP, 0));
912 prologue.push_back(de64_MOV_SPx(DA_GP(29), DA_SP));
916 auto fde_prologue_adv_off = this->text_writer.eh_writer.size();
917 this->text_writer.eh_write_inst(dwarf::DW_CFA_advance_loc, 0);
918 this->text_writer.eh_write_inst(dwarf::DW_CFA_def_cfa_register,
919 dwarf::a64::DW_reg_fp);
920 this->text_writer.eh_write_inst(
921 dwarf::DW_CFA_offset, dwarf::a64::DW_reg_fp, final_frame_size / 8);
922 this->text_writer.eh_write_inst(
923 dwarf::DW_CFA_offset, dwarf::a64::DW_reg_lr, final_frame_size / 8 - 1);
925 AsmReg last_reg = AsmReg::make_invalid();
927 for (
auto reg : util::BitSetIterator{saved_regs}) {
928 u8 dwarf_base = reg < 32 ? dwarf::a64::DW_reg_x0 : dwarf::a64::DW_reg_v0;
929 u8 dwarf_reg = dwarf_base + reg % 32;
930 u32 cfa_off = (final_frame_size - frame_off) / 8 - last_reg.valid();
931 if ((dwarf_reg & dwarf::DWARF_CFI_PRIMARY_OPCODE_MASK) == 0) {
932 this->text_writer.eh_write_inst(
933 dwarf::DW_CFA_offset, dwarf_reg, cfa_off);
935 this->text_writer.eh_write_inst(
936 dwarf::DW_CFA_offset_extended, dwarf_reg, cfa_off);
939 if (last_reg.valid()) {
940 const auto reg_bank = this->register_file.reg_bank(AsmReg{reg});
941 const auto last_bank = this->register_file.reg_bank(last_reg);
942 if (reg_bank == last_bank) {
943 if (reg_bank == Config::GP_BANK) {
945 de64_STPx(last_reg, AsmReg{reg}, stack_reg, frame_off));
948 de64_STPd(last_reg, AsmReg{reg}, stack_reg, frame_off));
951 last_reg = AsmReg::make_invalid();
953 assert(last_bank == Config::GP_BANK && reg_bank == Config::FP_BANK);
954 prologue.push_back(de64_STRxu(last_reg, stack_reg, frame_off));
956 last_reg = AsmReg{reg};
959 last_reg = AsmReg{reg};
963 if (last_reg.valid()) {
964 if (this->register_file.reg_bank(last_reg) == Config::GP_BANK) {
965 prologue.push_back(de64_STRxu(last_reg, stack_reg, frame_off));
967 assert(this->register_file.reg_bank(last_reg) == Config::FP_BANK);
968 prologue.push_back(de64_STRdu(last_reg, stack_reg, frame_off));
972 assert(prologue.size() *
sizeof(u32) <= func_prologue_alloc);
974 assert(prologue.size() < 0x4c);
975 this->text_writer.eh_writer.data()[fde_prologue_adv_off] =
976 dwarf::DW_CFA_advance_loc | (prologue.size() - 1);
978 std::memcpy(this->text_writer.begin_ptr() + func_start_off,
980 prologue.size() *
sizeof(u32));
982 prologue_size = prologue.size() *
sizeof(u32);
987 u32 *inst_ptr =
reinterpret_cast<u32 *
>(raw_inst_ptr);
988 if (needs_stack_frame) {
989 *inst_ptr = de64_ADDxi(func_arg_stack_add_reg, DA_SP, final_frame_size);
991 *inst_ptr = de64_MOV_SPx(func_arg_stack_add_reg, DA_SP);
995 if (!func_ret_offs.empty()) {
996 u8 *text_data = this->text_writer.begin_ptr();
997 if (func_ret_offs.back() == this->text_writer.offset() - 4) {
998 this->text_writer.cur_ptr() -= 4;
999 func_ret_offs.pop_back();
1001 for (
auto ret_off : func_ret_offs) {
1002 u32 *write_ptr =
reinterpret_cast<u32 *
>(text_data + ret_off);
1003 *write_ptr = de64_B((this->text_writer.offset() - ret_off) / 4);
1007 this->text_writer.ensure_space(prologue_size + 4);
1009 if (this->stack.has_dynamic_alloca) {
1010 ASMNC(MOV_SPx, DA_SP, DA_GP(29));
1013 AsmReg last_reg = AsmReg::make_invalid();
1015 for (
auto reg : util::BitSetIterator{saved_regs}) {
1016 if (last_reg.valid()) {
1017 const auto reg_bank = this->register_file.reg_bank(AsmReg{reg});
1018 const auto last_bank = this->register_file.reg_bank(last_reg);
1019 if (reg_bank == last_bank) {
1020 if (reg_bank == Config::GP_BANK) {
1021 ASMNC(LDPx, last_reg, AsmReg{reg}, stack_reg, frame_off);
1023 ASMNC(LDPd, last_reg, AsmReg{reg}, stack_reg, frame_off);
1026 last_reg = AsmReg::make_invalid();
1028 assert(last_bank == Config::GP_BANK && reg_bank == Config::FP_BANK);
1029 ASMNC(LDRxu, last_reg, stack_reg, frame_off);
1031 last_reg = AsmReg{reg};
1036 last_reg = AsmReg{reg};
1039 if (last_reg.valid()) {
1040 if (this->register_file.reg_bank(last_reg) == Config::GP_BANK) {
1041 ASMNC(LDRxu, last_reg, stack_reg, frame_off);
1043 ASMNC(LDRdu, last_reg, stack_reg, frame_off);
1046 if (needs_stack_frame) {
1047 u32 body_start = func_start_off + func_prologue_alloc;
1048 this->text_writer.eh_advance(this->text_writer.offset() - body_start + 4);
1049 this->text_writer.eh_write_inst(dwarf::DW_CFA_restore_state);
1050 if (final_frame_size <= 0x1f8) {
1051 ASMNC(LDPx_post, DA_GP(29), DA_GP(30), DA_SP, final_frame_size);
1054 ASMNC(LDPx, DA_GP(29), DA_GP(30), DA_SP, 0);
1056 ASMNC(ADDxi, DA_SP, DA_SP, final_frame_size);
1057 this->text_writer.eh_write_inst(dwarf::DW_CFA_advance_loc, 1);
1058 this->text_writer.eh_write_inst(dwarf::DW_CFA_def_cfa_offset, 0);
1062 ASMNC(RET, DA_GP(30));
1066 this->text_writer.remove_prologue_bytes(func_start_off + prologue_size,
1067 func_prologue_alloc - prologue_size);
1068 auto func_size = this->text_writer.offset() - func_start_off;
1069 auto func_sym = this->func_syms[func_idx];
1070 auto func_sec = this->text_writer.get_sec_ref();
1071 this->assembler.sym_def(func_sym, func_sec, func_start_off, func_size);
1072 this->text_writer.eh_end_fde();
1073 this->text_writer.except_encode_func();
1076template <IRAdaptor Adaptor,
1078 template <
typename,
typename,
typename>
typename BaseTy,
1080void CompilerA64<Adaptor, Derived, BaseTy, Config>::gen_func_epilog() {
1082 func_ret_offs.push_back(this->text_writer.offset());
1083 this->text_writer.ensure_space(4);
1084 this->text_writer.cur_ptr() += 4;
1087template <IRAdaptor Adaptor,
1089 template <
typename,
typename,
typename>
typename BaseTy,
1091void CompilerA64<Adaptor, Derived, BaseTy, Config>::spill_reg(
1092 const AsmReg reg,
const u32 frame_off,
const u32 size) {
1093 assert(this->stack.frame_used);
1094 assert((size & (size - 1)) == 0);
1095 assert(util::align_up(frame_off, size) == frame_off);
1097 assert(frame_off < 0x1'000'000);
1098 this->text_writer.ensure_space(8);
1100 u32 off = frame_off;
1101 auto addr_base = AsmReg{AsmReg::FP};
1102 if (off >= 0x1000 * size) [[unlikely]] {
1109 assert(-
static_cast<i32
>(frame_off) < 0);
1110 if (reg.id() <= AsmReg::R30) {
1112 case 1: ASMNC(STRBu, reg, addr_base, off);
break;
1113 case 2: ASMNC(STRHu, reg, addr_base, off);
break;
1114 case 4: ASMNC(STRwu, reg, addr_base, off);
break;
1115 case 8: ASMNC(STRxu, reg, addr_base, off);
break;
1116 default: TPDE_UNREACHABLE(
"invalid register spill size");
1120 case 1: ASMNC(STRbu, reg, addr_base, off);
break;
1121 case 2: ASMNC(STRhu, reg, addr_base, off);
break;
1122 case 4: ASMNC(STRsu, reg, addr_base, off);
break;
1123 case 8: ASMNC(STRdu, reg, addr_base, off);
break;
1124 case 16: ASMNC(STRqu, reg, addr_base, off);
break;
1125 default: TPDE_UNREACHABLE(
"invalid register spill size");
1130template <IRAdaptor Adaptor,
1132 template <
typename,
typename,
typename>
typename BaseTy,
1134void CompilerA64<Adaptor, Derived, BaseTy, Config>::load_from_stack(
1136 const i32 frame_off,
1138 const bool sign_extend) {
1139 assert(this->stack.frame_used);
1140 assert((size & (size - 1)) == 0);
1141 assert(util::align_up(frame_off, size) == frame_off);
1143 assert(frame_off >= 0 && frame_off < 0x1'000'000);
1144 this->text_writer.ensure_space(8);
1146 u32 off = frame_off;
1147 auto addr_base = AsmReg{AsmReg::FP};
1148 if (off >= 0x1000 * size) [[unlikely]] {
1151 ASMNC(ADDxi, addr_base, DA_GP(29), off & ~0xfff);
1155 if (dst.id() <= AsmReg::R30) {
1158 case 1: ASMNC(LDRBu, dst, addr_base, off);
break;
1159 case 2: ASMNC(LDRHu, dst, addr_base, off);
break;
1160 case 4: ASMNC(LDRwu, dst, addr_base, off);
break;
1161 case 8: ASMNC(LDRxu, dst, addr_base, off);
break;
1162 default: TPDE_UNREACHABLE(
"invalid register spill size");
1166 case 1: ASMNC(LDRSBwu, dst, addr_base, off);
break;
1167 case 2: ASMNC(LDRSHwu, dst, addr_base, off);
break;
1168 case 4: ASMNC(LDRSWxu, dst, addr_base, off);
break;
1169 case 8: ASMNC(LDRxu, dst, addr_base, off);
break;
1170 default: TPDE_UNREACHABLE(
"invalid register spill size");
1176 assert(!sign_extend);
1179 case 1: ASMNC(LDRbu, dst, addr_base, off);
break;
1180 case 2: ASMNC(LDRhu, dst, addr_base, off);
break;
1181 case 4: ASMNC(LDRsu, dst, addr_base, off);
break;
1182 case 8: ASMNC(LDRdu, dst, addr_base, off);
break;
1183 case 16: ASMNC(LDRqu, dst, addr_base, off);
break;
1184 default: TPDE_UNREACHABLE(
"invalid register spill size");
1188template <IRAdaptor Adaptor,
1190 template <
typename,
typename,
typename>
typename BaseTy,
1192void CompilerA64<Adaptor, Derived, BaseTy, Config>::load_address_of_stack_var(
1193 const AsmReg dst,
const AssignmentPartRef ap) {
1194 assert(this->stack.frame_used);
1195 auto frame_off = ap.variable_stack_off();
1196 assert(frame_off >= 0);
1197 if (!ASMIF(ADDxi, dst, DA_GP(29), frame_off)) {
1199 ASM(ADDx_uxtw, dst, DA_GP(29), dst, 0);
1203template <IRAdaptor Adaptor,
1205 template <
typename,
typename,
typename>
typename BaseTy,
1207void CompilerA64<Adaptor, Derived, BaseTy, Config>::mov(
const AsmReg dst,
1210 this->text_writer.ensure_space(4);
1211 assert(dst.valid());
1212 assert(src.valid());
1213 if (dst.id() <= AsmReg::SP && src.id() <= AsmReg::SP) {
1214 assert(dst.id() != AsmReg::SP && src.id() != AsmReg::SP);
1216 ASMNC(MOVx, dst, src);
1218 ASMNC(MOVw, dst, src);
1220 }
else if (dst.id() >= AsmReg::V0 && src.id() >= AsmReg::V0) {
1221 ASMNC(ORR16b, dst, src, src);
1222 }
else if (dst.id() <= AsmReg::SP) {
1223 assert(dst.id() != AsmReg::SP);
1225 assert(src.id() >= AsmReg::V0);
1228 ASMNC(FMOVws, dst, src);
1230 ASMNC(FMOVxd, dst, src);
1234 assert(src.id() <= AsmReg::R30);
1235 assert(dst.id() >= AsmReg::V0);
1238 ASMNC(FMOVsw, dst, src);
1240 ASMNC(FMOVdx, dst, src);
1245template <IRAdaptor Adaptor,
1247 template <
typename,
typename,
typename>
typename BaseTy,
1249AsmReg CompilerA64<Adaptor, Derived, BaseTy, Config>::gval_expr_as_reg(
1250 GenericValuePart &gv) {
1251 auto &expr = std::get<typename GenericValuePart::Expr>(gv.state);
1253 ScratchReg scratch{
derived()};
1254 if (!expr.has_base() && !expr.has_index()) {
1255 AsmReg dst = scratch.alloc_gp();
1256 derived()->materialize_constant(expr.disp, Config::GP_BANK, 8, dst);
1258 }
else if (!expr.has_base() && expr.has_index()) {
1259 AsmReg index_reg = expr.index_reg();
1260 if (std::holds_alternative<ScratchReg>(expr.index)) {
1261 scratch = std::move(std::get<ScratchReg>(expr.index));
1263 (void)scratch.alloc_gp();
1265 AsmReg dst = scratch.cur_reg();
1266 if ((expr.scale & (expr.scale - 1)) == 0) {
1267 const auto shift = util::cnt_tz<u64>(expr.scale);
1268 ASM(LSLxi, dst, index_reg, shift);
1271 derived()->materialize_constant(expr.scale, Config::GP_BANK, 8, tmp2);
1272 ASM(MULx, dst, index_reg, tmp2);
1274 }
else if (expr.has_base() && expr.has_index()) {
1275 AsmReg base_reg = expr.base_reg();
1276 AsmReg index_reg = expr.index_reg();
1277 if (std::holds_alternative<ScratchReg>(expr.base)) {
1278 scratch = std::move(std::get<ScratchReg>(expr.base));
1279 }
else if (std::holds_alternative<ScratchReg>(expr.index)) {
1280 scratch = std::move(std::get<ScratchReg>(expr.index));
1282 (void)scratch.alloc_gp();
1284 AsmReg dst = scratch.cur_reg();
1285 if ((expr.scale & (expr.scale - 1)) == 0) {
1286 const auto shift = util::cnt_tz<u64>(expr.scale);
1287 ASM(ADDx_lsl, dst, base_reg, index_reg, shift);
1290 derived()->materialize_constant(expr.scale, Config::GP_BANK, 8, tmp2);
1291 ASM(MADDx, dst, index_reg, tmp2, base_reg);
1293 }
else if (expr.has_base() && !expr.has_index()) {
1294 AsmReg base_reg = expr.base_reg();
1295 if (std::holds_alternative<ScratchReg>(expr.base)) {
1296 scratch = std::move(std::get<ScratchReg>(expr.base));
1298 (void)scratch.alloc_gp();
1300 AsmReg dst = scratch.cur_reg();
1301 if (expr.disp != 0 && ASMIF(ADDxi, dst, base_reg, expr.disp)) {
1303 }
else if (dst != base_reg) {
1304 ASM(MOVx, dst, base_reg);
1307 TPDE_UNREACHABLE(
"inconsistent GenericValuePart::Expr");
1310 AsmReg dst = scratch.cur_reg();
1311 if (expr.disp != 0) {
1312 if (!ASMIF(ADDxi, dst, dst, expr.disp)) {
1314 derived()->materialize_constant(expr.disp, Config::GP_BANK, 8, tmp2);
1315 ASM(ADDx, dst, dst, tmp2);
1319 gv.state = std::move(scratch);
1323template <IRAdaptor Adaptor,
1325 template <
typename,
typename,
typename>
typename BaseTy,
1328 u64 size, u32 align, ValuePart &res) {
1329 assert(this->stack.has_dynamic_alloca &&
1330 "function marked as not having dynamic allocas can't have alloca");
1331 assert(align != 0 && (align & (align - 1)) == 0 &&
"invalid alignment");
1332 size = tpde::util::align_up(size, 16);
1333 AsmReg res_reg = res.alloc_reg(
this);
1334 if (size >= 0x10'0000) {
1337 ASM(SUBx_uxtx, res_reg, DA_SP, tmp, 0);
1338 }
else if (size >= 0x1000) {
1339 ASM(SUBxi, res_reg, DA_SP, size & 0xff'f000);
1341 ASM(SUBxi, res_reg, res_reg, size & 0xfff);
1344 ASM(SUBxi, res_reg, DA_SP, size & 0xfff);
1349 ASM(ANDxi, res_reg, res_reg, ~(u64{align} - 1));
1353 ASM(MOV_SPx, DA_SP, res_reg);
1359 template <
typename,
typename,
typename>
typename BaseTy,
1362 u64 elem_size, ValuePart &&count, u32 align, ValuePart &res) {
1363 assert(this->stack.has_dynamic_alloca &&
1364 "function marked as not having dynamic allocas can't have alloca");
1365 assert(align != 0 && (align & (align - 1)) == 0 &&
"invalid alignment");
1366 AsmReg size_reg = count.has_reg() ? count.cur_reg() : count.load_to_reg(
this);
1367 AsmReg res_reg = res.alloc_try_reuse(
this, count);
1369 if (elem_size == 0) {
1370 ASM(MOVZw, res_reg, 0);
1371 }
else if ((elem_size & (elem_size - 1)) == 0) {
1372 const auto shift = util::cnt_tz(elem_size);
1374 ASM(SUBx_uxtx, res_reg, DA_SP, size_reg, shift);
1376 ASM(LSLxi, res_reg, size_reg, shift);
1377 ASM(SUBx_uxtx, res_reg, DA_SP, res_reg, 0);
1382 ASM(MULx, res_reg, size_reg, tmp);
1383 ASM(SUBx_uxtx, res_reg, DA_SP, res_reg, 0);
1386 align = align > 16 ? align : 16;
1387 if (elem_size & (align - 1)) {
1388 ASM(ANDxi, res_reg, res_reg, ~(u64{align} - 1));
1391 ASM(MOV_SPx, DA_SP, res_reg);
1396 template <
typename,
typename,
typename>
typename BaseTy,
1399 const u64 *data,
const RegBank bank,
const u32 size, AsmReg dst) {
1400 this->text_writer.ensure_space(5 * 4);
1402 const auto const_u64 = data[0];
1403 if (bank == Config::GP_BANK) {
1405 if (const_u64 == 0) {
1406 ASMNC(MOVZw, dst, 0);
1410 this->text_writer.cur_ptr() +=
1412 de64_MOVconst(
reinterpret_cast<u32 *
>(this->text_writer.cur_ptr()),
1418 assert(bank == Config::FP_BANK);
1421 if (ASMIF(FMOVsi, dst, std::bit_cast<float>((u32)const_u64))) {
1423 }
else if (ASMIF(MOVId, dst,
static_cast<u32
>(const_u64))) {
1426 }
else if (size == 8) {
1427 if (ASMIF(FMOVdi, dst, std::bit_cast<double>(const_u64))) {
1429 }
else if (ASMIF(MOVId, dst, const_u64)) {
1432 }
else if (size == 16) {
1433 const auto high_u64 = data[1];
1434 if (const_u64 == high_u64 && ASMIF(MOVI2d, dst, const_u64)) {
1436 }
else if (high_u64 == 0 && ASMIF(MOVId, dst, const_u64)) {
1457 auto rodata = this->assembler.get_default_section(SectionKind::ReadOnly);
1458 std::span<const u8> raw_data{
reinterpret_cast<const u8 *
>(data), size};
1459 auto sym = this->assembler.sym_def_data(
1461 this->text_writer.ensure_space(8);
1463 sym, elf::R_AARCH64_ADR_PREL_PG_HI21, this->text_writer.offset(), 0);
1466 sym, elf::R_AARCH64_LDST128_ABS_LO12_NC, this->text_writer.offset(), 0);
1471 TPDE_FATAL(
"unable to materialize constant");
1476 template <
typename,
typename,
typename>
typename BaseTy,
1479 CompilerA64<Adaptor, Derived, BaseTy, Config>::select_fixed_assignment_reg(
1480 AssignmentPartRef ap, IRValueRef) {
1481 RegBank bank = ap.bank();
1482 if (bank == Config::FP_BANK && ap.part_size() > 8) {
1485 return AsmReg::make_invalid();
1489 assert(bank.id() <= Config::NUM_BANKS);
1490 auto reg_mask = this->register_file.bank_regs(bank);
1491 reg_mask &= ~fixed_assignment_nonallocatable_mask;
1493 const auto find_possible_regs = [
this,
1494 reg_mask](
const u64 preferred_regs) -> u64 {
1496 u64 free_regs = this->register_file.allocatable & ~this->register_file.used;
1497 return free_regs & preferred_regs & reg_mask;
1501 auto csr =
derived()->cur_cc_assigner()->get_ccinfo().callee_saved_regs;
1502 if (!this->stack.is_leaf_function) {
1504 possible_regs = find_possible_regs(csr);
1508 possible_regs = find_possible_regs(~csr);
1509 if (possible_regs == 0) {
1511 possible_regs = find_possible_regs(csr);
1515 if (possible_regs == 0) {
1516 return AsmReg::make_invalid();
1520 if ((possible_regs & ~this->register_file.used) != 0) {
1521 return AsmReg{util::cnt_tz(possible_regs & ~this->register_file.used)};
1524 for (
const auto reg_id : util::BitSetIterator<>{possible_regs}) {
1525 const auto reg = AsmReg{reg_id};
1527 assert(!this->register_file.is_fixed(reg));
1529 const auto local_idx = this->register_file.reg_local_idx(reg);
1530 const auto part = this->register_file.reg_part(reg);
1531 assert(local_idx != Base::INVALID_VAL_LOCAL_IDX);
1533 auto *assignment = this->val_assignment(local_idx);
1534 auto ap = AssignmentPartRef{assignment, part};
1535 if (ap.modified()) {
1542 return AsmReg::make_invalid();
1545template <IRAdaptor Adaptor,
1547 template <
typename,
typename,
typename>
class BaseTy,
1549typename CompilerA64<Adaptor, Derived, BaseTy, Config>::Jump
1550 CompilerA64<Adaptor, Derived, BaseTy, Config>::invert_jump(
Jump jmp) {
1571 default: TPDE_UNREACHABLE(
"invalid jump kind");
1575template <IRAdaptor Adaptor,
1577 template <
typename,
typename,
typename>
typename BaseTy,
1579typename CompilerA64<Adaptor, Derived, BaseTy, Config>::Jump
1580 CompilerA64<Adaptor, Derived, BaseTy, Config>::swap_jump(
Jump jmp) {
1601 default: TPDE_UNREACHABLE(
"invalid jump kind for swap_jump");
1605template <IRAdaptor Adaptor,
1607 template <
typename,
typename,
typename>
typename BaseTy,
1610 Jump jmp, Label target_label) {
1611 const auto is_pending = this->text_writer.label_is_pending(target_label);
1612 this->text_writer.ensure_space(4);
1616 this->text_writer.label_ref(target_label,
1617 this->text_writer.offset() - 4,
1618 LabelFixupKind::AARCH64_BR);
1620 const auto label_off = this->text_writer.label_offset(target_label);
1621 const auto cur_off = this->text_writer.offset();
1622 assert(cur_off >= label_off);
1623 const auto diff = cur_off - label_off;
1624 assert((diff & 0b11) == 0);
1625 assert(diff < 128 * 1024 * 1024);
1627 ASMNC(B, -
static_cast<ptrdiff_t
>(diff) / 4);
1635 const auto label_off = this->text_writer.label_offset(target_label);
1636 const auto cur_off = this->text_writer.offset();
1637 assert(cur_off >= label_off);
1638 off = cur_off - label_off;
1639 assert((off & 0b11) == 0);
1640 assert(off < 128 * 1024 * 1024);
1643 if (off <= 1024 * 1024) {
1644 auto imm19 = -
static_cast<ptrdiff_t
>(off) / 4;
1646 if (jmp.cmp_is_32) {
1647 ASMNC(CBZw, jmp.cmp_reg, imm19);
1649 ASMNC(CBZx, jmp.cmp_reg, imm19);
1652 if (jmp.cmp_is_32) {
1653 ASMNC(CBNZw, jmp.cmp_reg, imm19);
1655 ASMNC(CBNZx, jmp.cmp_reg, imm19);
1660 this->text_writer.label_ref(target_label,
1661 this->text_writer.offset() - 4,
1662 LabelFixupKind::AARCH64_COND_BR);
1665 assert(!is_pending);
1666 this->text_writer.ensure_space(2 * 4);
1669 if (jmp.cmp_is_32) {
1670 ASMNC(CBNZw, jmp.cmp_reg, 2);
1672 ASMNC(CBNZx, jmp.cmp_reg, 2);
1675 if (jmp.cmp_is_32) {
1676 ASMNC(CBZw, jmp.cmp_reg, 2);
1678 ASMNC(CBZx, jmp.cmp_reg, 2);
1682 ASMNC(B, -
static_cast<ptrdiff_t
>(off + 4) / 4);
1690 const auto label_off = this->text_writer.label_offset(target_label);
1691 const auto cur_off = this->text_writer.offset();
1692 assert(cur_off >= label_off);
1693 off = cur_off - label_off;
1694 assert((off & 0b11) == 0);
1695 assert(off < 128 * 1024 * 1024);
1698 if (off <= 32 * 1024) {
1699 auto imm14 = -
static_cast<ptrdiff_t
>(off) / 4;
1701 ASMNC(TBZ, jmp.cmp_reg, jmp.test_bit, imm14);
1703 ASMNC(TBNZ, jmp.cmp_reg, jmp.test_bit, imm14);
1707 this->text_writer.label_ref(target_label,
1708 this->text_writer.offset() - 4,
1709 LabelFixupKind::AARCH64_TEST_BR);
1712 assert(!is_pending);
1713 this->text_writer.ensure_space(2 * 4);
1717 ASMNC(TBNZ, jmp.cmp_reg, jmp.test_bit, 2);
1719 ASMNC(TBZ, jmp.cmp_reg, jmp.test_bit, 2);
1722 ASMNC(B, -
static_cast<ptrdiff_t
>(off + 4) / 4);
1727 Da64Cond cond, cond_compl;
1785 default: TPDE_UNREACHABLE(
"invalid jump kind");
1791 const auto label_off = this->text_writer.label_offset(target_label);
1792 const auto cur_off = this->text_writer.offset();
1793 assert(cur_off >= label_off);
1794 off = cur_off - label_off;
1795 assert((off & 0b11) == 0);
1796 assert(off < 128 * 1024 * 1024);
1799 if (off <= 1024 * 1024) {
1800 ASMNC(BCOND, cond, -
static_cast<ptrdiff_t
>(off) / 4);
1803 this->text_writer.label_ref(target_label,
1804 this->text_writer.offset() - 4,
1805 LabelFixupKind::AARCH64_COND_BR);
1808 assert(!is_pending);
1809 this->text_writer.ensure_space(2 * 4);
1812 ASMNC(BCOND, cond_compl, 2);
1814 ASMNC(B, -
static_cast<ptrdiff_t
>(off + 4) / 4);
1819 template <
typename,
typename,
typename>
class BaseTy,
1838 default: TPDE_UNREACHABLE(
"invalid jump kind for conversion to Da64Cond");
1844 template <
typename,
typename,
typename>
class BaseTy,
1847 Jump cc, AsmReg dst) {
1853 template <
typename,
typename,
typename>
class BaseTy,
1856 Jump cc, AsmReg dst) {
1861 template <
typename,
typename,
typename>
class BaseTy,
1864 Jump cc, AsmReg dst, AsmReg true_select, AsmReg false_select,
bool is_64) {
1865 this->text_writer.ensure_space(4);
1868 ASMNC(CSELx, dst, true_select, false_select, cond);
1870 ASMNC(CSELw, dst, true_select, false_select, cond);
1876 template <
typename,
typename,
typename>
class BaseTy,
1879 AsmReg dst, AsmReg src,
bool sign, u32 from, u32 to) {
1880 assert(from < to && to <= 64);
1884 ASM(SBFXw, dst, src, 0, from);
1886 ASM(SBFXx, dst, src, 0, from);
1890 ASM(UBFXw, dst, src, 0, from);
1892 ASM(UBFXx, dst, src, 0, from);
1899 template <
typename,
typename,
typename>
typename BaseTy,
1902 std::variant<SymRef, ValuePart> &&target,
1903 std::span<CallArg> arguments,
1904 typename Base::ValueRef *result,
1908 for (
auto &arg : arguments) {
1909 cb.add_arg(std::move(arg));
1911 cb.call(std::move(target));
1913 cb.add_ret(*result);
1919 template <
typename,
typename,
typename>
typename BaseTy,
1921void CompilerA64<Adaptor, Derived, BaseTy, Config>::switch_emit_cmp(
1922 AsmReg cmp_reg, AsmReg tmp_reg, u64 case_value,
bool width_is_32) {
1924 if (!ASMIF(CMPwi, cmp_reg, case_value)) {
1926 ASM(CMPw, cmp_reg, tmp_reg);
1929 if (!ASMIF(CMPxi, cmp_reg, case_value)) {
1931 ASM(CMPx, cmp_reg, tmp_reg);
1936template <IRAdaptor Adaptor,
1938 template <
typename,
typename,
typename>
typename BaseTy,
1940void CompilerA64<Adaptor, Derived, BaseTy, Config>::switch_emit_cmpeq(
1946 switch_emit_cmp(cmp_reg, tmp_reg, case_value, width_is_32);
1950template <IRAdaptor Adaptor,
1952 template <
typename,
typename,
typename>
typename BaseTy,
1954FunctionWriterBase::JumpTable *
1955 CompilerA64<Adaptor, Derived, BaseTy, Config>::switch_create_jump_table(
1956 Label default_label,
1962 if (low_bound > 0) {
1964 if (!ASMIF(SUBwi, cmp_reg, cmp_reg, low_bound)) {
1966 ASM(SUBw, cmp_reg, cmp_reg, tmp_reg);
1969 if (!ASMIF(SUBxi, cmp_reg, cmp_reg, low_bound)) {
1971 ASM(SUBx, cmp_reg, cmp_reg, tmp_reg);
1975 switch_emit_cmp(cmp_reg, tmp_reg, high_bound - low_bound, width_is_32);
1978 u64 range = high_bound - low_bound + 1;
1979 return &this->text_writer.create_jump_table(
1980 range, cmp_reg, tmp_reg, width_is_32);
1983template <IRAdaptor Adaptor,
1985 template <
typename,
typename,
typename>
typename BaseTy,
1987void CompilerA64<Adaptor, Derived, BaseTy, Config>::switch_emit_binary_step(
1994 switch_emit_cmpeq(case_label, cmp_reg, tmp_reg, case_value, width_is_32);
1998template <IRAdaptor Adaptor,
2000 template <
typename,
typename,
typename>
typename BaseTy,
2002CompilerA64<Adaptor, Derived, BaseTy, Config>::ScratchReg
2004 SymRef sym, TLSModel model) {
2007 case TLSModel::GlobalDynamic: {
2008 assert(!this->stack.is_leaf_function);
2009 this->stack.generated_call =
true;
2010 ScratchReg r0_scratch{
this};
2011 AsmReg r0 = r0_scratch.alloc_specific(AsmReg::R0);
2012 ScratchReg r1_scratch{
this};
2013 AsmReg r1 = r1_scratch.alloc_specific(AsmReg::R1);
2016 if (this->register_file.is_used(Reg{AsmReg::LR})) {
2020 this->text_writer.ensure_space(0x18);
2022 sym, elf::R_AARCH64_TLSDESC_ADR_PAGE21, this->text_writer.offset(), 0);
2023 ASMNC(ADRP, r0, 0, 0);
2025 sym, elf::R_AARCH64_TLSDESC_LD64_LO12, this->text_writer.offset(), 0);
2026 ASMNC(LDRxu, r1, r0, 0);
2028 sym, elf::R_AARCH64_TLSDESC_ADD_LO12, this->text_writer.offset(), 0);
2029 ASMNC(ADDxi, r0, r0, 0);
2031 sym, elf::R_AARCH64_TLSDESC_CALL, this->text_writer.offset(), 0);
2033 ASMNC(MRS, r1, 0xde82);
2035 ASMNC(ADDx, r0, r1, r0);
@ LOCAL
Symbol with local linkage, must be defined.
Helper class to write function text.
AArch64 AAPCS calling convention.
Helper class for building call sequences.
CallBuilder(Derived &compiler, CCAssigner &assigner)
Constructor.
Helper class to write function text for AArch64.
The IRAdaptor specifies the interface with which the IR-independent parts of the compiler interact wi...
constexpr Jump(Kind kind, AsmReg cmp_reg, bool cmp_is_32)
Cbz/Cbnz branch.
@ Tbnz
Test single bit and branch if not zero (Xn register)
@ Jge
Signed greater than or equal (N == V)
@ Jls
Unsigned lower or same (!(C == 1 && Z == 0))
@ Jhi
Unsigned higher (C == 1 && Z == 0)
@ Tbz
Test single bit and branch if zero (Xn register)
@ Jmi
Minus, negative (N == 1)
@ Jlo
Unsigned lower (C == 0)
@ Jlt
Signed less than (N != V)
@ Jgt
Signed greater than (Z == 0 && N == V)
@ Jhs
Unsigned higher or same (C == 1)
@ Jvc
No Overflow (V == 0)
@ Cbnz
Compare and branch if not zero (Wn or Xn register)
@ Jle
Signed lessthan or equal (!(Z == 0 && N == V))
@ Jcc
Carry clear (C == 0)
@ Jpl
Plus, positive or zero (N == 0)
@ Cbz
Compare and branch if zero (Wn or Xn register)
constexpr Jump(Kind kind, AsmReg cmp_reg, u8 test_bit)
Tbz/Tbnz branch.
constexpr Jump(Kind kind)
Unconditional or conditional branch based on flags.
constexpr Jump()
Unconditional branch.
Compiler mixin for targeting AArch64.
u32 func_arg_stack_add_off
Offset to the add sp, sp, XXX instruction that the argument handling uses to access stack arguments i...
void alloca_fixed(u64 size, u32 align, ValuePart &res)
Dynamic alloca of a fixed-size region.
void generate_raw_bfiz(AsmReg dst, AsmReg src, u32 lsb, u32 width)
Bitfield insert in zero. src is not modified.
void generate_raw_intext(AsmReg dst, AsmReg src, bool sign, u32 from, u32 to)
Integer extension. src is not modified.
AsmReg permanent_scratch_reg
Permanent scratch register, e.g.
std::optional< i32 > prologue_assign_arg_part(ValuePart &&vp, CCAssignment cca)
Assign argument part.
void generate_raw_mask(Jump cc, AsmReg dst)
Set all bits of dst to 1 if cc is true, otherwise set dst to zero.
void generate_raw_bfi(AsmReg dst, AsmReg src, u32 lsb, u32 width)
Bitfield insert. src is not modified.
void generate_call(std::variant< SymRef, ValuePart > &&target, std::span< CallArg > arguments, typename Base::ValueRef *result, bool variable_args=false)
Generate a function call.
void generate_raw_jump(Jump jmp, Label target)
Generate jump instruction to target label.
void materialize_constant(const u64 *data, RegBank bank, u32 size, AsmReg dst)
Materialize constant into a register.
void generate_raw_set(Jump cc, AsmReg dst)
Set dst to 1 if cc is true, otherwise set it to zero.
Da64Cond jump_to_cond(Jump jmp)
Convert jump condition to disarms Da64Cond.
void prologue_begin(CCAssigner *cc_assigner)
Begin prologue, prepare for assigning arguments.
void generate_raw_select(Jump cc, AsmReg dst, AsmReg true_select, AsmReg false_select, bool is_64)
Moves true_select into dst if cc is true, otherwise move false_select into dst.
ScratchReg tls_get_addr(SymRef sym, TLSModel model)
Generate code sequence to load address of sym into a register.
void prologue_end(CCAssigner *cc_assigner)
Finish prologue.
void alloca_dynamic(u64 elem_size, ValuePart &&count, u32 align, ValuePart &res)
Dynamic alloca of a dynamically-sized region (elem_size * count bytes).
void materialize_constant(u64 const_u64, RegBank bank, u32 size, AsmReg dst)
Materialize constant into a register.