INSERT_PASS_AFTER (pass_regrename, 1, pass_fma_steering);
INSERT_PASS_BEFORE (pass_reorder_blocks, 1, pass_track_speculation);
INSERT_PASS_BEFORE (pass_late_thread_prologue_and_epilogue, 1, pass_switch_pstate_sm);
+INSERT_PASS_BEFORE (pass_late_thread_prologue_and_epilogue, 1, pass_late_track_speculation);
INSERT_PASS_AFTER (pass_machine_reorg, 1, pass_tag_collision_avoidance);
INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_insert_bti);
INSERT_PASS_AFTER (pass_if_after_combine, 1, pass_cc_fusion);
rtl_opt_pass *make_pass_aarch64_early_ra (gcc::context *);
rtl_opt_pass *make_pass_fma_steering (gcc::context *);
rtl_opt_pass *make_pass_track_speculation (gcc::context *);
+rtl_opt_pass *make_pass_late_track_speculation (gcc::context *);
rtl_opt_pass *make_pass_tag_collision_avoidance (gcc::context *);
rtl_opt_pass *make_pass_insert_bti (gcc::context *ctxt);
rtl_opt_pass *make_pass_cc_fusion (gcc::context *ctxt);
(clobber (reg:CC CC_REGNUM))]
""
"bl\t__arm_sme_state"
+ [(set_attr "is_call" "yes")]
)
(define_insn "aarch64_read_svcr"
(clobber (reg:CC CC_REGNUM))]
""
"bl\t__arm_tpidr2_save"
+ [(set_attr "is_call" "yes")]
)
;; Set PSTATE.ZA to 1. If ZA was previously dormant or active,
(clobber (reg:CC CC_REGNUM))]
""
"bl\t__arm_tpidr2_restore"
+ [(set_attr "is_call" "yes")]
)
;; Check whether a lazy save set up by aarch64_save_za was committed
#include "insn-config.h"
#include "recog.h"
-/* This pass scans the RTL just before the final branch
- re-organisation pass. The aim is to identify all places where
- there is conditional control flow and to insert code that tracks
- any speculative execution of a conditional branch.
+/* This pass scans the RTL insns late in the RTL pipeline. The aim is
+ to identify all places where there is conditional control flow and
+ to insert code that tracks any speculative execution of a conditional
+ branch.
To do this we reserve a call-clobbered register (so that it can be
initialized very early in the function prologue) that can then be
carry the tracking state in SP for this period of time unless the
tracker value is needed at that point in time.
- We run the pass just before the final branch reorganization pass so
- that we can handle most of the conditional branch cases using the
- standard edge insertion code. The reorg pass will hopefully clean
- things up for afterwards so that the results aren't too
- horrible. */
+ We run the pass while the CFG is still present so that we can handle
+ most of the conditional branch cases using the standard edge insertion
+ code. Where possible, we prefer to run the pass just before the final
+ branch reorganization pass. That pass will then hopefully clean things
+ up afterwards so that the results aren't too horrible.
+
+ However, we must run the pass after all conditional branches have
+ been inserted. switch_pstate_sm inserts conditional branches for
+ streaming-compatible code, and so for streaming-compatible functions,
+ this pass must run after that one.
+
+ We handle this by having two copies of the pass: the normal one that
+ runs before branch reorganization, and a "late" one that runs just
+ before late_thread_prologue_and_epilogue. The two passes have
+ mutually exclusive gates, with the normal pass being chosen wherever
+ possible. */
/* Generate a code sequence to clobber SP if speculating incorreclty. */
static rtx_insn *
needs_tracking = true;
}
- if (CALL_P (insn))
+ if (CALL_P (insn)
+ || (NONDEBUG_INSN_P (insn)
+ && recog_memoized (insn) >= 0
+ && get_attr_is_call (insn) == IS_CALL_YES))
{
bool tailcall
- = (SIBLING_CALL_P (insn)
- || find_reg_note (insn, REG_NORETURN, NULL_RTX));
+ = (CALL_P (insn)
+ && (SIBLING_CALL_P (insn)
+ || find_reg_note (insn, REG_NORETURN, NULL_RTX)));
/* Tailcalls are like returns, we can eliminate the
transfer between the tracker register and SP if we
class pass_track_speculation : public rtl_opt_pass
{
- public:
- pass_track_speculation(gcc::context *ctxt)
- : rtl_opt_pass(pass_data_aarch64_track_speculation, ctxt)
- {}
+public:
+ pass_track_speculation(gcc::context *ctxt, bool is_late)
+ : rtl_opt_pass(pass_data_aarch64_track_speculation, ctxt),
+ is_late (is_late)
+ {}
/* opt_pass methods: */
virtual bool gate (function *)
{
- return aarch64_track_speculation;
+ return (aarch64_track_speculation
+ && (is_late == bool (TARGET_STREAMING_COMPATIBLE)));
}
virtual unsigned int execute (function *)
{
return aarch64_do_track_speculation ();
}
+
+private:
+ /* Whether this is the late pass that runs before late prologue/epilogue
+ insertion, or the normal pass that runs before branch reorganization. */
+ bool is_late;
}; // class pass_track_speculation.
} // anon namespace.
rtl_opt_pass *
make_pass_track_speculation (gcc::context *ctxt)
{
- return new pass_track_speculation (ctxt);
+ return new pass_track_speculation (ctxt, /*is_late=*/false);
+}
+
+rtl_opt_pass *
+make_pass_late_track_speculation (gcc::context *ctxt)
+{
+ return new pass_track_speculation (ctxt, /*is_late=*/true);
}
return gen_rtx_SET (pc_rtx, x);
}
+/* Return an rtx that branches to LABEL based on the value of bit BITNUM of X.
+ If CODE is NE, it branches to LABEL when the bit is set; if CODE is EQ,
+ it branches to LABEL when the bit is clear. */
+
+static rtx
+aarch64_gen_test_and_branch (rtx_code code, rtx x, int bitnum,
+ rtx_code_label *label)
+{
+ auto mode = GET_MODE (x);
+ if (aarch64_track_speculation)
+ {
+ auto mask = gen_int_mode (HOST_WIDE_INT_1U << bitnum, mode);
+ emit_insn (gen_aarch64_and3nr_compare0 (mode, x, mask));
+ rtx cc_reg = gen_rtx_REG (CC_NZVmode, CC_REGNUM);
+ rtx x = gen_rtx_fmt_ee (code, CC_NZVmode, cc_reg, const0_rtx);
+ return gen_condjump (x, cc_reg, label);
+ }
+ return gen_aarch64_tb (code, mode, mode,
+ x, gen_int_mode (bitnum, mode), label);
+}
+
/* Consider the operation:
OPERANDS[0] = CODE (OPERANDS[1], OPERANDS[2]) + OPERANDS[3]
gcc_assert (local_mode != 0);
auto already_ok_cond = (local_mode & AARCH64_FL_SM_ON ? NE : EQ);
auto *label = gen_label_rtx ();
- auto *jump = emit_jump_insn (gen_aarch64_tb (already_ok_cond, DImode, DImode,
- old_svcr, const0_rtx, label));
+ auto branch = aarch64_gen_test_and_branch (already_ok_cond, old_svcr, 0,
+ label);
+ auto *jump = emit_jump_insn (branch);
JUMP_LABEL (jump) = label;
return label;
}
(define_enum_attr "arch" "arches" (const_string "any"))
+;; Whether a normal INSN in fact contains a call. Sometimes we represent
+;; calls to functions that use an ad-hoc ABI as normal insns, both for
+;; optimization reasons and to avoid the need to describe the ABI to
+;; target-independent code.
+(define_attr "is_call" "no,yes" (const_string "no"))
+
;; [For compatibility with Arm in pipeline models]
;; Attribute that specifies whether or not the instruction touches fp
;; registers.
[(set_attr "type" "alus_imm")]
)
-(define_insn "*and<mode>3nr_compare0"
+(define_insn "@aarch64_and<mode>3nr_compare0"
[(set (reg:CC_NZV CC_REGNUM)
(compare:CC_NZV
(and:GPI (match_operand:GPI 0 "register_operand")
--- /dev/null
+// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls -funwind-tables -mtrack-speculation" }
+// { dg-final { check-function-bodies "**" "" } }
+
+void ns_callee ();
+ void s_callee () [[arm::streaming]];
+ void sc_callee () [[arm::streaming_compatible]];
+
+void ns_callee_stack (int, int, int, int, int, int, int, int, int);
+
+struct callbacks {
+ void (*ns_ptr) ();
+ void (*s_ptr) () [[arm::streaming]];
+ void (*sc_ptr) () [[arm::streaming_compatible]];
+};
+
+/*
+** sc_caller_sme:
+** cmp sp, #?0
+** csetm x15, ne
+** stp x29, x30, \[sp, #?-96\]!
+** mov x29, sp
+** cntd x16
+** str x16, \[sp, #?24\]
+** stp d8, d9, \[sp, #?32\]
+** stp d10, d11, \[sp, #?48\]
+** stp d12, d13, \[sp, #?64\]
+** stp d14, d15, \[sp, #?80\]
+** mrs x16, svcr
+** str x16, \[x29, #?16\]
+** ldr x16, \[x29, #?16\]
+** tst x16, #?1
+** beq [^\n]*
+** csel x15, x15, xzr, ne
+** smstop sm
+** b [^\n]*
+** csel x15, x15, xzr, eq
+** mov x14, sp
+** and x14, x14, x15
+** mov sp, x14
+** bl ns_callee
+** cmp sp, #?0
+** csetm x15, ne
+** ldr x16, \[x29, #?16\]
+** tst x16, #?1
+** beq [^\n]*
+** csel x15, x15, xzr, ne
+** smstart sm
+** b [^\n]*
+** csel x15, x15, xzr, eq
+** ldr x16, \[x29, #?16\]
+** tst x16, #?1
+** bne [^\n]*
+** csel x15, x15, xzr, eq
+** smstart sm
+** b [^\n]*
+** csel x15, x15, xzr, ne
+** mov x14, sp
+** and x14, x14, x15
+** mov sp, x14
+** bl s_callee
+** cmp sp, #?0
+** csetm x15, ne
+** ldr x16, \[x29, #?16\]
+** tst x16, #?1
+** bne [^\n]*
+** csel x15, x15, xzr, eq
+** smstop sm
+** b [^\n]*
+** csel x15, x15, xzr, ne
+** mov x14, sp
+** and x14, x14, x15
+** mov sp, x14
+** bl sc_callee
+** cmp sp, #?0
+** csetm x15, ne
+** ldp d8, d9, \[sp, #?32\]
+** ldp d10, d11, \[sp, #?48\]
+** ldp d12, d13, \[sp, #?64\]
+** ldp d14, d15, \[sp, #?80\]
+** ldp x29, x30, \[sp\], #?96
+** mov x14, sp
+** and x14, x14, x15
+** mov sp, x14
+** ret
+*/
+void
+sc_caller_sme () [[arm::streaming_compatible]]
+{
+ ns_callee ();
+ s_callee ();
+ sc_callee ();
+}
+
+#pragma GCC target "+nosme"
+
+/*
+** sc_caller:
+** cmp sp, #?0
+** csetm x15, ne
+** stp x29, x30, \[sp, #?-96\]!
+** mov x29, sp
+** cntd x16
+** str x16, \[sp, #?24\]
+** stp d8, d9, \[sp, #?32\]
+** stp d10, d11, \[sp, #?48\]
+** stp d12, d13, \[sp, #?64\]
+** stp d14, d15, \[sp, #?80\]
+** mov x14, sp
+** and x14, x14, x15
+** mov sp, x14
+** bl __arm_sme_state
+** cmp sp, #?0
+** csetm x15, ne
+** str x0, \[x29, #?16\]
+** ...
+** bl sc_callee
+** cmp sp, #?0
+** csetm x15, ne
+** ldp d8, d9, \[sp, #?32\]
+** ldp d10, d11, \[sp, #?48\]
+** ldp d12, d13, \[sp, #?64\]
+** ldp d14, d15, \[sp, #?80\]
+** ldp x29, x30, \[sp\], #?96
+** mov x14, sp
+** and x14, x14, x15
+** mov sp, x14
+** ret
+*/
+void
+sc_caller () [[arm::streaming_compatible]]
+{
+ ns_callee ();
+ sc_callee ();
+}
+
+/*
+** sc_caller_x0:
+** ...
+** mov x10, x0
+** mov x14, sp
+** and x14, x14, x15
+** mov sp, x14
+** bl __arm_sme_state
+** ...
+** str wzr, \[x10\]
+** ...
+*/
+void
+sc_caller_x0 (int *ptr) [[arm::streaming_compatible]]
+{
+ *ptr = 0;
+ ns_callee ();
+ sc_callee ();
+}
+
+/*
+** sc_caller_x1:
+** ...
+** mov x10, x0
+** mov x11, x1
+** mov x14, sp
+** and x14, x14, x15
+** mov sp, x14
+** bl __arm_sme_state
+** ...
+** str w11, \[x10\]
+** ...
+*/
+void
+sc_caller_x1 (int *ptr, int a) [[arm::streaming_compatible]]
+{
+ *ptr = a;
+ ns_callee ();
+ sc_callee ();
+}
+
+/*
+** sc_caller_stack:
+** cmp sp, #?0
+** csetm x15, ne
+** sub sp, sp, #112
+** stp x29, x30, \[sp, #?16\]
+** add x29, sp, #?16
+** ...
+** stp d8, d9, \[sp, #?48\]
+** ...
+** bl __arm_sme_state
+** cmp sp, #?0
+** csetm x15, ne
+** str x0, \[x29, #?16\]
+** ...
+** bl ns_callee_stack
+** cmp sp, #?0
+** csetm x15, ne
+** ldr x16, \[x29, #?16\]
+** tst x16, #?1
+** beq [^\n]*
+** csel x15, x15, xzr, ne
+** smstart sm
+** ...
+*/
+void
+sc_caller_stack () [[arm::streaming_compatible]]
+{
+ ns_callee_stack (0, 0, 0, 0, 0, 0, 0, 0, 0);
+}
+
+/* { dg-final { scan-assembler {sc_caller_sme:(?:(?!ret).)*\.cfi_offset 46, -72\n} } } */
+/* { dg-final { scan-assembler {sc_caller:(?:(?!ret).)*\.cfi_offset 46, -72\n} } } */