// { dg-options "-O -fno-optimize-sibling-calls" }
// { dg-final { check-function-bodies "**" "" } }
+#pragma GCC target "+sve"
+
void n_callee();
void s_callee() __arm_streaming;
void sc_callee() __arm_streaming_compatible;
--- /dev/null
+// { dg-options "-O -fno-optimize-sibling-calls" }
+// { dg-final { check-function-bodies "**" "" } }
+
+#pragma GCC target "+nosve"
+
+void n_callee();
+void s_callee() __arm_streaming;
+void sc_callee() __arm_streaming_compatible;
+
+void n_callee_ne() noexcept;
+void s_callee_ne() noexcept __arm_streaming;
+void sc_callee_ne() noexcept __arm_streaming_compatible;
+
+void n_caller1()
+{
+ try
+ {
+ n_callee();
+ sc_callee();
+ }
+ catch (...)
+ {
+ n_callee_ne();
+ sc_callee_ne();
+ }
+}
+// { dg-final { scan-assembler {_Z9n_caller1v:(?:(?!smstart|smstop).)*\tret} } }
+
+/*
+** _Z9n_caller2v:
+** ...
+** bl __arm_get_current_vg
+** str x0, [^\n]+
+** ...
+** bl __cxa_begin_catch
+** smstart sm
+** bl _Z11s_callee_nev
+** smstop sm
+** bl __cxa_end_catch
+** ...
+*/
+void n_caller2()
+{
+ try
+ {
+ n_callee();
+ sc_callee();
+ }
+ catch (...)
+ {
+ s_callee_ne();
+ }
+}
+
+/*
+** _Z9s_caller1v:
+** ...
+** bl __cxa_end_catch
+** smstart sm
+** ...
+*/
+int s_caller1() __arm_streaming
+{
+ try
+ {
+ s_callee();
+ return 1;
+ }
+ catch (...)
+ {
+ return 2;
+ }
+}
+
+/*
+** _Z9s_caller2v:
+** ...
+** bl __cxa_begin_catch
+** smstart sm
+** bl _Z11s_callee_nev
+** smstop sm
+** bl __cxa_end_catch
+** smstart sm
+** ...
+*/
+int s_caller2() __arm_streaming
+{
+ try
+ {
+ n_callee();
+ return 1;
+ }
+ catch (...)
+ {
+ s_callee_ne();
+ return 2;
+ }
+}
+
+/*
+** _Z10sc_caller1v:
+** ...
+** bl __arm_get_current_vg
+** str x0, [^\n]+
+** mrs (x[0-9]+), svcr
+** str \1, ([^\n]+)
+** ...
+** bl __cxa_end_catch
+** ldr (x[0-9]+), \2
+** tbz \3, 0, [^\n]+
+** smstart sm
+** ...
+*/
+int sc_caller1() __arm_streaming_compatible
+{
+ try
+ {
+ sc_callee();
+ return 1;
+ }
+ catch (...)
+ {
+ return 2;
+ }
+}
+
+/*
+** _Z10ls_caller1v:
+** ...
+** bl __arm_get_current_vg
+** str x0, [^\n]+
+** ...
+** bl __cxa_begin_catch
+** smstart sm
+** bl _Z12sc_callee_nev
+** smstop sm
+** bl __cxa_end_catch
+** ...
+*/
+__arm_locally_streaming void ls_caller1()
+{
+ try
+ {
+ sc_callee();
+ }
+ catch (...)
+ {
+ sc_callee_ne();
+ }
+}
** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\]
** ret
*/
-TEST_LOAD_ZA (ldr_vnum_za_0_0,
+TEST_LOAD_ZA_NOPRED (ldr_vnum_za_0_0,
svldr_vnum_za (0, x1, 0),
svldr_vnum_za (0, x1, 0))
** ldr za\[\1, 1\], \[x1(?:, #1, mul vl)?\]
** ret
*/
-TEST_LOAD_ZA (ldr_vnum_za_0_1,
+TEST_LOAD_ZA_NOPRED (ldr_vnum_za_0_1,
svldr_vnum_za (0, x1, 1),
svldr_vnum_za (0, x1, 1))
** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\]
** ret
*/
-TEST_LOAD_ZA (ldr_vnum_za_1_0,
+TEST_LOAD_ZA_NOPRED (ldr_vnum_za_1_0,
svldr_vnum_za (1, x1, 0),
svldr_vnum_za (1, x1, 0))
** ldr za\[\1, 2\], \[x1(?:, #2, mul vl)?\]
** ret
*/
-TEST_LOAD_ZA (ldr_vnum_za_1_2,
+TEST_LOAD_ZA_NOPRED (ldr_vnum_za_1_2,
svldr_vnum_za (1, x1, 2),
svldr_vnum_za (1, x1, 2))
** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\]
** ret
*/
-TEST_LOAD_ZA (ldr_vnum_za_w0_0,
+TEST_LOAD_ZA_NOPRED (ldr_vnum_za_w0_0,
svldr_vnum_za (w0, x1, 0),
svldr_vnum_za (w0, x1, 0))
** ldr za\[\1, 1\], \[x1, #1, mul vl\]
** ret
*/
-TEST_LOAD_ZA (ldr_vnum_za_w0_1,
+TEST_LOAD_ZA_NOPRED (ldr_vnum_za_w0_1,
svldr_vnum_za (w0, x1, 1),
svldr_vnum_za (w0, x1, 1))
** ldr za\[\1, 13\], \[x1, #13, mul vl\]
** ret
*/
-TEST_LOAD_ZA (ldr_vnum_za_w0_13,
+TEST_LOAD_ZA_NOPRED (ldr_vnum_za_w0_13,
svldr_vnum_za (w0, x1, 13),
svldr_vnum_za (w0, x1, 13))
** ldr za\[\1, 15\], \[x1, #15, mul vl\]
** ret
*/
-TEST_LOAD_ZA (ldr_vnum_za_w0_15,
+TEST_LOAD_ZA_NOPRED (ldr_vnum_za_w0_15,
svldr_vnum_za (w0, x1, 15),
svldr_vnum_za (w0, x1, 15))
** )
** ret
*/
-TEST_LOAD_ZA (ldr_vnum_za_w0_16,
+TEST_LOAD_ZA_NOPRED (ldr_vnum_za_w0_16,
svldr_vnum_za (w0, x1, 16),
svldr_vnum_za (w0, x1, 16))
** )
** ret
*/
-TEST_LOAD_ZA (ldr_vnum_za_w0_m1,
+TEST_LOAD_ZA_NOPRED (ldr_vnum_za_w0_m1,
svldr_vnum_za (w0, x1, -1),
svldr_vnum_za (w0, x1, -1))
** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\]
** ret
*/
-TEST_LOAD_ZA (ldr_vnum_za_w0p1_0,
+TEST_LOAD_ZA_NOPRED (ldr_vnum_za_w0p1_0,
svldr_vnum_za (w0 + 1, x1, 0),
svldr_vnum_za (w0 + 1, x1, 0))
** ldr za\[\1, 1\], \[x1(?:, #1, mul vl)?\]
** ret
*/
-TEST_LOAD_ZA (ldr_vnum_za_w0m1_1,
+TEST_LOAD_ZA_NOPRED (ldr_vnum_za_w0m1_1,
svldr_vnum_za (w0 - 1, x1, 1),
svldr_vnum_za (w0 - 1, x1, 1))
** ldr za\[\1, 3\], \[x1(?:, #3, mul vl)?\]
** ret
*/
-TEST_LOAD_ZA (ldr_vnum_za_w0p2_3,
+TEST_LOAD_ZA_NOPRED (ldr_vnum_za_w0p2_3,
svldr_vnum_za (w0 + 2, x1, 3),
svldr_vnum_za (w0 + 2, x1, 3))
** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\]
** ret
*/
-TEST_LOAD_ZA (ldr_za_0,
+TEST_LOAD_ZA_NOPRED (ldr_za_0,
svldr_za (0, x1),
svldr_za (0, x1))
** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\]
** ret
*/
-TEST_LOAD_ZA (ldr_za_1,
+TEST_LOAD_ZA_NOPRED (ldr_za_1,
svldr_za (1, x1),
svldr_za (1, x1))
** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\]
** ret
*/
-TEST_LOAD_ZA (ldr_za_w0,
+TEST_LOAD_ZA_NOPRED (ldr_za_w0,
svldr_za (w0, x1),
svldr_za (w0, x1))
** ldr za\[\1, 1\], \[x1, #1, mul vl\]
** ret
*/
-TEST_LOAD_ZA (ldr_za_w0_1_vnum,
+TEST_LOAD_ZA_NOPRED (ldr_za_w0_1_vnum,
svldr_za (w0 + 1, x1 + svcntsb ()),
svldr_za (w0 + 1, x1 + svcntsb ()))
** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\]
** ret
*/
-TEST_LOAD_ZA (ldr_za_w0p2,
+TEST_LOAD_ZA_NOPRED (ldr_za_w0p2,
svldr_za (w0 + 2, x1),
svldr_za (w0 + 2, x1))
** )
** ret
*/
-TEST_LOAD_ZA (ldr_za_offset,
+TEST_LOAD_ZA_NOPRED (ldr_za_offset,
svldr_za (w0, x1 + 1),
svldr_za (w0, x1 + 1))
** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\]
** ret
*/
-TEST_STORE_ZA (str_vnum_za_0_0,
+TEST_STORE_ZA_NOPRED (str_vnum_za_0_0,
svstr_vnum_za (0, x1, 0),
svstr_vnum_za (0, x1, 0))
** str za\[\1, 1\], \[x1(?:, #1, mul vl)?\]
** ret
*/
-TEST_STORE_ZA (str_vnum_za_0_1,
+TEST_STORE_ZA_NOPRED (str_vnum_za_0_1,
svstr_vnum_za (0, x1, 1),
svstr_vnum_za (0, x1, 1))
** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\]
** ret
*/
-TEST_STORE_ZA (str_vnum_za_1_0,
+TEST_STORE_ZA_NOPRED (str_vnum_za_1_0,
svstr_vnum_za (1, x1, 0),
svstr_vnum_za (1, x1, 0))
** str za\[\1, 2\], \[x1(?:, #2, mul vl)?\]
** ret
*/
-TEST_STORE_ZA (str_vnum_za_1_2,
+TEST_STORE_ZA_NOPRED (str_vnum_za_1_2,
svstr_vnum_za (1, x1, 2),
svstr_vnum_za (1, x1, 2))
** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\]
** ret
*/
-TEST_STORE_ZA (str_vnum_za_w0_0,
+TEST_STORE_ZA_NOPRED (str_vnum_za_w0_0,
svstr_vnum_za (w0, x1, 0),
svstr_vnum_za (w0, x1, 0))
** str za\[\1, 1\], \[x1, #1, mul vl\]
** ret
*/
-TEST_STORE_ZA (str_vnum_za_w0_1,
+TEST_STORE_ZA_NOPRED (str_vnum_za_w0_1,
svstr_vnum_za (w0, x1, 1),
svstr_vnum_za (w0, x1, 1))
** str za\[\1, 13\], \[x1, #13, mul vl\]
** ret
*/
-TEST_STORE_ZA (str_vnum_za_w0_13,
+TEST_STORE_ZA_NOPRED (str_vnum_za_w0_13,
svstr_vnum_za (w0, x1, 13),
svstr_vnum_za (w0, x1, 13))
** str za\[\1, 15\], \[x1, #15, mul vl\]
** ret
*/
-TEST_STORE_ZA (str_vnum_za_w0_15,
+TEST_STORE_ZA_NOPRED (str_vnum_za_w0_15,
svstr_vnum_za (w0, x1, 15),
svstr_vnum_za (w0, x1, 15))
** )
** ret
*/
-TEST_STORE_ZA (str_vnum_za_w0_16,
+TEST_STORE_ZA_NOPRED (str_vnum_za_w0_16,
svstr_vnum_za (w0, x1, 16),
svstr_vnum_za (w0, x1, 16))
** )
** ret
*/
-TEST_STORE_ZA (str_vnum_za_w0_m1,
+TEST_STORE_ZA_NOPRED (str_vnum_za_w0_m1,
svstr_vnum_za (w0, x1, -1),
svstr_vnum_za (w0, x1, -1))
** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\]
** ret
*/
-TEST_STORE_ZA (str_vnum_za_w0p1_0,
+TEST_STORE_ZA_NOPRED (str_vnum_za_w0p1_0,
svstr_vnum_za (w0 + 1, x1, 0),
svstr_vnum_za (w0 + 1, x1, 0))
** str za\[\1, 1\], \[x1(?:, #1, mul vl)?\]
** ret
*/
-TEST_STORE_ZA (str_vnum_za_w0m1_1,
+TEST_STORE_ZA_NOPRED (str_vnum_za_w0m1_1,
svstr_vnum_za (w0 - 1, x1, 1),
svstr_vnum_za (w0 - 1, x1, 1))
** str za\[\1, 3\], \[x1(?:, #3, mul vl)?\]
** ret
*/
-TEST_STORE_ZA (str_vnum_za_w0p2_3,
+TEST_STORE_ZA_NOPRED (str_vnum_za_w0p2_3,
svstr_vnum_za (w0 + 2, x1, 3),
svstr_vnum_za (w0 + 2, x1, 3))
** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\]
** ret
*/
-TEST_STORE_ZA (str_za_0,
+TEST_STORE_ZA_NOPRED (str_za_0,
svstr_za (0, x1),
svstr_za (0, x1))
** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\]
** ret
*/
-TEST_STORE_ZA (str_za_1,
+TEST_STORE_ZA_NOPRED (str_za_1,
svstr_za (1, x1),
svstr_za (1, x1))
** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\]
** ret
*/
-TEST_STORE_ZA (str_za_w0,
+TEST_STORE_ZA_NOPRED (str_za_w0,
svstr_za (w0, x1),
svstr_za (w0, x1))
** str za\[\1, 1\], \[x1, #1, mul vl\]
** ret
*/
-TEST_STORE_ZA (str_za_w0_1_vnum,
+TEST_STORE_ZA_NOPRED (str_za_w0_1_vnum,
svstr_za (w0 + 1, x1 + svcntsb ()),
svstr_za (w0 + 1, x1 + svcntsb ()))
** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\]
** ret
*/
-TEST_STORE_ZA (str_za_w0p2,
+TEST_STORE_ZA_NOPRED (str_za_w0p2,
svstr_za (w0 + 2, x1),
svstr_za (w0 + 2, x1))
** )
** ret
*/
-TEST_STORE_ZA (str_za_offset,
+TEST_STORE_ZA_NOPRED (str_za_offset,
svstr_za (w0, x1 + 1),
svstr_za (w0, x1 + 1))
INVOKE (CODE1, CODE2); \
}
+#define TEST_LOAD_ZA_NOPRED(NAME, CODE1, CODE2) \
+ PROTO (NAME, void, (int32_t w0, const char *x1, \
+ uint64_t x2)) \
+ { \
+ INVOKE (CODE1, CODE2); \
+ }
+
+#define TEST_STORE_ZA_NOPRED(NAME, CODE1, CODE2) \
+ PROTO (NAME, void, (int32_t w0, char *x1, \
+ uint64_t x2)) \
+ { \
+ INVOKE (CODE1, CODE2); \
+ }
+
#define TEST_READ_ZA(NAME, TYPE, CODE1, CODE2) \
PROTO (NAME, TYPE, (TYPE z0, TYPE z1, svbool_t p0, \
int32_t w0)) \
// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls -funwind-tables" }
// { dg-final { check-function-bodies "**" "" "" { target "*-*-*" } {\t\.inst} } }
+#pragma GCC target "+sve"
+
void ns_callee ();
void s_callee () [[arm::streaming]];
void sc_callee () [[arm::streaming_compatible]];
// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls -funwind-tables -mtrack-speculation" }
// { dg-final { check-function-bodies "**" "" "" { target "*-*-*" } {\t\.inst} } }
+#pragma GCC target "+sve"
void ns_callee ();
void s_callee () [[arm::streaming]];
--- /dev/null
+// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls -funwind-tables -mtrack-speculation" }
+// { dg-final { check-function-bodies "**" "" "" { target "*-*-*" } {\t\.inst} } }
+
+#pragma GCC target "+nosve"
+
+void ns_callee ();
+ void s_callee () [[arm::streaming]];
+ void sc_callee () [[arm::streaming_compatible]];
+
+void ns_callee_stack (int, int, int, int, int, int, int, int, int);
+
+struct callbacks {
+ void (*ns_ptr) ();
+ void (*s_ptr) () [[arm::streaming]];
+ void (*sc_ptr) () [[arm::streaming_compatible]];
+};
+
+/*
+** sc_caller_sme:
+** cmp sp, #?0
+** csetm x15, ne
+** stp x29, x30, \[sp, #?-96\]!
+** mov x29, sp
+** mov x14, sp
+** and x14, x14, x15
+** mov sp, x14
+** bl __arm_get_current_vg
+** cmp sp, #?0
+** csetm x15, ne
+** str x0, \[sp, #?24\]
+** stp d8, d9, \[sp, #?32\]
+** stp d10, d11, \[sp, #?48\]
+** stp d12, d13, \[sp, #?64\]
+** stp d14, d15, \[sp, #?80\]
+** mrs x16, svcr
+** str x16, \[x29, #?16\]
+** ldr x16, \[x29, #?16\]
+** tst x16, #?1
+** beq [^\n]*
+** csel x15, x15, xzr, ne
+** smstop sm
+** b [^\n]*
+** csel x15, x15, xzr, eq
+** mov x14, sp
+** and x14, x14, x15
+** mov sp, x14
+** bl ns_callee
+** cmp sp, #?0
+** csetm x15, ne
+** ldr x16, \[x29, #?16\]
+** tst x16, #?1
+** beq [^\n]*
+** csel x15, x15, xzr, ne
+** smstart sm
+** b [^\n]*
+** csel x15, x15, xzr, eq
+** ldr x16, \[x29, #?16\]
+** tst x16, #?1
+** bne [^\n]*
+** csel x15, x15, xzr, eq
+** smstart sm
+** b [^\n]*
+** csel x15, x15, xzr, ne
+** mov x14, sp
+** and x14, x14, x15
+** mov sp, x14
+** bl s_callee
+** cmp sp, #?0
+** csetm x15, ne
+** ldr x16, \[x29, #?16\]
+** tst x16, #?1
+** bne [^\n]*
+** csel x15, x15, xzr, eq
+** smstop sm
+** b [^\n]*
+** csel x15, x15, xzr, ne
+** mov x14, sp
+** and x14, x14, x15
+** mov sp, x14
+** bl sc_callee
+** cmp sp, #?0
+** csetm x15, ne
+** ldp d8, d9, \[sp, #?32\]
+** ldp d10, d11, \[sp, #?48\]
+** ldp d12, d13, \[sp, #?64\]
+** ldp d14, d15, \[sp, #?80\]
+** ldp x29, x30, \[sp\], #?96
+** mov x14, sp
+** and x14, x14, x15
+** mov sp, x14
+** ret
+*/
+void
+sc_caller_sme () [[arm::streaming_compatible]]
+{
+ ns_callee ();
+ s_callee ();
+ sc_callee ();
+}
+
+#pragma GCC target "+nosme"
+
+/*
+** sc_caller:
+** cmp sp, #?0
+** csetm x15, ne
+** stp x29, x30, \[sp, #?-96\]!
+** mov x29, sp
+** mov x14, sp
+** and x14, x14, x15
+** mov sp, x14
+** bl __arm_get_current_vg
+** str x0, \[sp, #?24\]
+** stp d8, d9, \[sp, #?32\]
+** stp d10, d11, \[sp, #?48\]
+** stp d12, d13, \[sp, #?64\]
+** stp d14, d15, \[sp, #?80\]
+** bl __arm_sme_state
+** cmp sp, #?0
+** csetm x15, ne
+** str x0, \[x29, #?16\]
+** ...
+** bl sc_callee
+** cmp sp, #?0
+** csetm x15, ne
+** ldp d8, d9, \[sp, #?32\]
+** ldp d10, d11, \[sp, #?48\]
+** ldp d12, d13, \[sp, #?64\]
+** ldp d14, d15, \[sp, #?80\]
+** ldp x29, x30, \[sp\], #?96
+** mov x14, sp
+** and x14, x14, x15
+** mov sp, x14
+** ret
+*/
+void
+sc_caller () [[arm::streaming_compatible]]
+{
+ ns_callee ();
+ sc_callee ();
+}
+
+/*
+** sc_caller_x0:
+** ...
+** mov x10, x0
+** mov x14, sp
+** and x14, x14, x15
+** mov sp, x14
+** bl __arm_get_current_vg
+** ...
+** bl __arm_sme_state
+** ...
+** str wzr, \[x10\]
+** ...
+*/
+void
+sc_caller_x0 (int *ptr) [[arm::streaming_compatible]]
+{
+ *ptr = 0;
+ ns_callee ();
+ sc_callee ();
+}
+
+/*
+** sc_caller_x1:
+** ...
+** mov x10, x0
+** mov x14, sp
+** and x14, x14, x15
+** mov sp, x14
+** bl __arm_get_current_vg
+** ...
+** mov x11, x1
+** bl __arm_sme_state
+** ...
+** str w11, \[x10\]
+** ...
+*/
+void
+sc_caller_x1 (int *ptr, int a) [[arm::streaming_compatible]]
+{
+ *ptr = a;
+ ns_callee ();
+ sc_callee ();
+}
+
+/*
+** sc_caller_stack:
+** cmp sp, #?0
+** csetm x15, ne
+** sub sp, sp, #112
+** stp x29, x30, \[sp, #?16\]
+** add x29, sp, #?16
+** ...
+** bl __arm_get_current_vg
+** ...
+** stp d8, d9, \[sp, #?48\]
+** ...
+** bl __arm_sme_state
+** cmp sp, #?0
+** csetm x15, ne
+** str x0, \[x29, #?16\]
+** ...
+** bl ns_callee_stack
+** cmp sp, #?0
+** csetm x15, ne
+** ldr x16, \[x29, #?16\]
+** tst x16, #?1
+** beq [^\n]*
+** csel x15, x15, xzr, ne
+** .inst 0xd503437f // smstart sm
+** ...
+*/
+void
+sc_caller_stack () [[arm::streaming_compatible]]
+{
+ ns_callee_stack (0, 0, 0, 0, 0, 0, 0, 0, 0);
+}
+
+/* { dg-final { scan-assembler {sc_caller_sme:(?:(?!ret).)*\.cfi_offset 46, -72\n} } } */
+/* { dg-final { scan-assembler {sc_caller:(?:(?!ret).)*\.cfi_offset 46, -72\n} } } */
--- /dev/null
+// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls -funwind-tables" }
+// { dg-final { check-function-bodies "**" "" "" { target "*-*-*" } {\t\.inst} } }
+
+#pragma GCC target "+nosve"
+
+void ns_callee ();
+ void s_callee () [[arm::streaming]];
+ void sc_callee () [[arm::streaming_compatible]];
+
+void ns_callee_stack (int, int, int, int, int, int, int, int, int);
+
+struct callbacks {
+ void (*ns_ptr) ();
+ void (*s_ptr) () [[arm::streaming]];
+ void (*sc_ptr) () [[arm::streaming_compatible]];
+};
+
+/*
+** n_caller: { target lp64 }
+** stp x30, (x19|x2[0-8]), \[sp, #?-96\]!
+** mov (x9|x1[0-5]), x0
+** bl __arm_get_current_vg
+** str x0, \[sp, #?16\]
+** stp d8, d9, \[sp, #?32\]
+** stp d10, d11, \[sp, #?48\]
+** stp d12, d13, \[sp, #?64\]
+** stp d14, d15, \[sp, #?80\]
+** mov \1, \2
+** bl ns_callee
+** smstart sm
+** bl s_callee
+** smstop sm
+** bl sc_callee
+** ldr (x[0-9]+), \[\1\]
+** blr \3
+** ldr (x[0-9]+), \[\1, #?8\]
+** smstart sm
+** blr \4
+** smstop sm
+** ldr (x[0-9]+), \[\1, #?16\]
+** blr \5
+** ldp d8, d9, \[sp, #?32\]
+** ldp d10, d11, \[sp, #?48\]
+** ldp d12, d13, \[sp, #?64\]
+** ldp d14, d15, \[sp, #?80\]
+** ldp x30, \1, \[sp\], #?96
+** ret
+*/
+void
+n_caller (struct callbacks *c)
+{
+ ns_callee ();
+ s_callee ();
+ sc_callee ();
+
+ c->ns_ptr ();
+ c->s_ptr ();
+ c->sc_ptr ();
+}
+
+/*
+** s_caller: { target lp64 }
+** stp x30, (x19|x2[0-8]), \[sp, #?-96\]!
+** cntd x16
+** str x16, \[sp, #?16\]
+** stp d8, d9, \[sp, #?32\]
+** stp d10, d11, \[sp, #?48\]
+** stp d12, d13, \[sp, #?64\]
+** stp d14, d15, \[sp, #?80\]
+** mov \1, x0
+** smstop sm
+** bl ns_callee
+** smstart sm
+** bl s_callee
+** bl sc_callee
+** ldr (x[0-9]+), \[\1\]
+** smstop sm
+** blr \2
+** smstart sm
+** ldr (x[0-9]+), \[\1, #?8\]
+** blr \3
+** ldr (x[0-9]+), \[\1, #?16\]
+** blr \4
+** ldp d8, d9, \[sp, #?32\]
+** ldp d10, d11, \[sp, #?48\]
+** ldp d12, d13, \[sp, #?64\]
+** ldp d14, d15, \[sp, #?80\]
+** ldp x30, \1, \[sp\], #?96
+** ret
+*/
+void
+s_caller (struct callbacks *c) [[arm::streaming]]
+{
+ ns_callee ();
+ s_callee ();
+ sc_callee ();
+
+ c->ns_ptr ();
+ c->s_ptr ();
+ c->sc_ptr ();
+}
+
+/*
+** sc_caller_sme:
+** stp x29, x30, \[sp, #?-96\]!
+** mov x29, sp
+** bl __arm_get_current_vg
+** str x0, \[sp, #?24\]
+** stp d8, d9, \[sp, #?32\]
+** stp d10, d11, \[sp, #?48\]
+** stp d12, d13, \[sp, #?64\]
+** stp d14, d15, \[sp, #?80\]
+** mrs x16, svcr
+** str x16, \[x29, #?16\]
+** ldr x16, \[x29, #?16\]
+** tbz x16, 0, .*
+** smstop sm
+** bl ns_callee
+** ldr x16, \[x29, #?16\]
+** tbz x16, 0, .*
+** smstart sm
+** ldr x16, \[x29, #?16\]
+** tbnz x16, 0, .*
+** smstart sm
+** bl s_callee
+** ldr x16, \[x29, #?16\]
+** tbnz x16, 0, .*
+** smstop sm
+** bl sc_callee
+** ldp d8, d9, \[sp, #?32\]
+** ldp d10, d11, \[sp, #?48\]
+** ldp d12, d13, \[sp, #?64\]
+** ldp d14, d15, \[sp, #?80\]
+** ldp x29, x30, \[sp\], #?96
+** ret
+*/
+void
+sc_caller_sme () [[arm::streaming_compatible]]
+{
+ ns_callee ();
+ s_callee ();
+ sc_callee ();
+}
+
+#pragma GCC target "+nosme"
+
+/*
+** sc_caller:
+** stp x29, x30, \[sp, #?-96\]!
+** mov x29, sp
+** bl __arm_get_current_vg
+** str x0, \[sp, #?24\]
+** stp d8, d9, \[sp, #?32\]
+** stp d10, d11, \[sp, #?48\]
+** stp d12, d13, \[sp, #?64\]
+** stp d14, d15, \[sp, #?80\]
+** bl __arm_sme_state
+** str x0, \[x29, #?16\]
+** ...
+** bl sc_callee
+** ldp d8, d9, \[sp, #?32\]
+** ldp d10, d11, \[sp, #?48\]
+** ldp d12, d13, \[sp, #?64\]
+** ldp d14, d15, \[sp, #?80\]
+** ldp x29, x30, \[sp\], #?96
+** ret
+*/
+void
+sc_caller () [[arm::streaming_compatible]]
+{
+ ns_callee ();
+ sc_callee ();
+}
+
+/*
+** sc_caller_x0:
+** ...
+** mov x10, x0
+** bl __arm_get_current_vg
+** ...
+** bl __arm_sme_state
+** ...
+** str wzr, \[x10\]
+** ...
+*/
+void
+sc_caller_x0 (int *ptr) [[arm::streaming_compatible]]
+{
+ *ptr = 0;
+ ns_callee ();
+ sc_callee ();
+}
+
+/*
+** sc_caller_x1:
+** ...
+** mov x10, x0
+** bl __arm_get_current_vg
+** ...
+** mov x11, x1
+** bl __arm_sme_state
+** ...
+** str w11, \[x10\]
+** ...
+*/
+void
+sc_caller_x1 (int *ptr, int a) [[arm::streaming_compatible]]
+{
+ *ptr = a;
+ ns_callee ();
+ sc_callee ();
+}
+
+/*
+** sc_caller_stack:
+** sub sp, sp, #112
+** stp x29, x30, \[sp, #?16\]
+** add x29, sp, #?16
+** ...
+** stp d8, d9, \[sp, #?48\]
+** ...
+** bl __arm_sme_state
+** str x0, \[x29, #?16\]
+** ...
+** bl ns_callee_stack
+** ldr x16, \[x29, #?16\]
+** tbz x16, 0, .*
+** .inst 0xd503437f // smstart sm
+** ...
+*/
+void
+sc_caller_stack () [[arm::streaming_compatible]]
+{
+ ns_callee_stack (0, 0, 0, 0, 0, 0, 0, 0, 0);
+}
+
+/* { dg-final { scan-assembler {n_caller:(?:(?!ret).)*\.cfi_offset 46, -80\n} } } */
+/* { dg-final { scan-assembler {s_caller:(?:(?!ret).)*\.cfi_offset 46, -80\n} } } */
+/* { dg-final { scan-assembler {sc_caller_sme:(?:(?!ret).)*\.cfi_offset 46, -72\n} } } */
+/* { dg-final { scan-assembler {sc_caller:(?:(?!ret).)*\.cfi_offset 46, -72\n} } } */
// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls -funwind-tables" }
// { dg-final { check-function-bodies "**" "" } }
+#pragma GCC target "+sve"
+
__attribute__((aarch64_vector_pcs)) void ns_callee ();
__attribute__((aarch64_vector_pcs)) void s_callee () [[arm::streaming]];
__attribute__((aarch64_vector_pcs)) void sc_callee () [[arm::streaming_compatible]];
--- /dev/null
+// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls -funwind-tables" }
+// { dg-final { check-function-bodies "**" "" } }
+
+#pragma GCC target "+nosve"
+
+__attribute__((aarch64_vector_pcs)) void ns_callee ();
+__attribute__((aarch64_vector_pcs)) void s_callee () [[arm::streaming]];
+__attribute__((aarch64_vector_pcs)) void sc_callee () [[arm::streaming_compatible]];
+
+struct callbacks {
+ __attribute__((aarch64_vector_pcs)) void (*ns_ptr) ();
+ __attribute__((aarch64_vector_pcs)) void (*s_ptr) () [[arm::streaming]];
+ __attribute__((aarch64_vector_pcs)) void (*sc_ptr) () [[arm::streaming_compatible]];
+};
+
+/*
+** n_caller: { target lp64 }
+** stp x30, (x19|x2[0-8]), \[sp, #?-288\]!
+** mov (x9|x1[0-5]), x0
+** bl __arm_get_current_vg
+** str x0, \[sp, #?16\]
+** stp q8, q9, \[sp, #?32\]
+** stp q10, q11, \[sp, #?64\]
+** stp q12, q13, \[sp, #?96\]
+** stp q14, q15, \[sp, #?128\]
+** stp q16, q17, \[sp, #?160\]
+** stp q18, q19, \[sp, #?192\]
+** stp q20, q21, \[sp, #?224\]
+** stp q22, q23, \[sp, #?256\]
+** mov \1, \2
+** bl ns_callee
+** smstart sm
+** bl s_callee
+** smstop sm
+** bl sc_callee
+** ldr (x[0-9]+), \[\1\]
+** blr \3
+** ldr (x[0-9]+), \[\1, #?8\]
+** smstart sm
+** blr \4
+** smstop sm
+** ldr (x[0-9]+), \[\1, #?16\]
+** blr \5
+** ldp q8, q9, \[sp, #?32\]
+** ldp q10, q11, \[sp, #?64\]
+** ldp q12, q13, \[sp, #?96\]
+** ldp q14, q15, \[sp, #?128\]
+** ldp q16, q17, \[sp, #?160\]
+** ldp q18, q19, \[sp, #?192\]
+** ldp q20, q21, \[sp, #?224\]
+** ldp q22, q23, \[sp, #?256\]
+** ldp x30, \1, \[sp\], #?288
+** ret
+*/
+void __attribute__((aarch64_vector_pcs))
+n_caller (struct callbacks *c)
+{
+ ns_callee ();
+ s_callee ();
+ sc_callee ();
+
+ c->ns_ptr ();
+ c->s_ptr ();
+ c->sc_ptr ();
+}
+
+/*
+** s_caller: { target lp64 }
+** stp x30, (x19|x2[0-8]), \[sp, #?-288\]!
+** cntd x16
+** str x16, \[sp, #?16\]
+** stp q8, q9, \[sp, #?32\]
+** stp q10, q11, \[sp, #?64\]
+** stp q12, q13, \[sp, #?96\]
+** stp q14, q15, \[sp, #?128\]
+** stp q16, q17, \[sp, #?160\]
+** stp q18, q19, \[sp, #?192\]
+** stp q20, q21, \[sp, #?224\]
+** stp q22, q23, \[sp, #?256\]
+** mov \1, x0
+** smstop sm
+** bl ns_callee
+** smstart sm
+** bl s_callee
+** bl sc_callee
+** ldr (x[0-9]+), \[\1\]
+** smstop sm
+** blr \2
+** smstart sm
+** ldr (x[0-9]+), \[\1, #?8\]
+** blr \3
+** ldr (x[0-9]+), \[\1, #?16\]
+** blr \4
+** ldp q8, q9, \[sp, #?32\]
+** ldp q10, q11, \[sp, #?64\]
+** ldp q12, q13, \[sp, #?96\]
+** ldp q14, q15, \[sp, #?128\]
+** ldp q16, q17, \[sp, #?160\]
+** ldp q18, q19, \[sp, #?192\]
+** ldp q20, q21, \[sp, #?224\]
+** ldp q22, q23, \[sp, #?256\]
+** ldp x30, \1, \[sp\], #?288
+** ret
+*/
+void __attribute__((aarch64_vector_pcs))
+s_caller (struct callbacks *c) [[arm::streaming]]
+{
+ ns_callee ();
+ s_callee ();
+ sc_callee ();
+
+ c->ns_ptr ();
+ c->s_ptr ();
+ c->sc_ptr ();
+}
+
+/*
+** sc_caller:
+** stp x29, x30, \[sp, #?-288\]!
+** mov x29, sp
+** bl __arm_get_current_vg
+** str x0, \[sp, #?24\]
+** stp q8, q9, \[sp, #?32\]
+** stp q10, q11, \[sp, #?64\]
+** stp q12, q13, \[sp, #?96\]
+** stp q14, q15, \[sp, #?128\]
+** stp q16, q17, \[sp, #?160\]
+** stp q18, q19, \[sp, #?192\]
+** stp q20, q21, \[sp, #?224\]
+** stp q22, q23, \[sp, #?256\]
+** mrs x16, svcr
+** str x16, \[x29, #?16\]
+** ldr x16, \[x29, #?16\]
+** tbz x16, 0, .*
+** smstop sm
+** bl ns_callee
+** ldr x16, \[x29, #?16\]
+** tbz x16, 0, .*
+** smstart sm
+** ldr x16, \[x29, #?16\]
+** tbnz x16, 0, .*
+** smstart sm
+** bl s_callee
+** ldr x16, \[x29, #?16\]
+** tbnz x16, 0, .*
+** smstop sm
+** bl sc_callee
+** ldp q8, q9, \[sp, #?32\]
+** ldp q10, q11, \[sp, #?64\]
+** ldp q12, q13, \[sp, #?96\]
+** ldp q14, q15, \[sp, #?128\]
+** ldp q16, q17, \[sp, #?160\]
+** ldp q18, q19, \[sp, #?192\]
+** ldp q20, q21, \[sp, #?224\]
+** ldp q22, q23, \[sp, #?256\]
+** ldp x29, x30, \[sp\], #?288
+** ret
+*/
+void __attribute__((aarch64_vector_pcs))
+sc_caller () [[arm::streaming_compatible]]
+{
+ ns_callee ();
+ s_callee ();
+ sc_callee ();
+}
+
+/* { dg-final { scan-assembler {n_caller:(?:(?!ret).)*\.cfi_offset 46, -272\n} } } */
+/* { dg-final { scan-assembler {s_caller:(?:(?!ret).)*\.cfi_offset 46, -272\n} } } */
+/* { dg-final { scan-assembler {sc_caller:(?:(?!ret).)*\.cfi_offset 46, -264\n} } } */
#include <arm_sve.h>
+#pragma GCC target "+sve"
+
svbool_t ns_callee ();
svbool_t s_callee () [[arm::streaming]];
svbool_t sc_callee () [[arm::streaming_compatible]];
#include <arm_sve.h>
+#pragma GCC target "+sve"
+
svbool_t ns_callee ();
svbool_t s_callee () [[arm::streaming]];
svbool_t sc_callee () [[arm::streaming_compatible]];
#include <arm_neon.h>
#include <arm_sme.h>
+#pragma GCC target "+sve"
+
uint8x16_t *neon;
svint64_t *sve;
int64_t *ptr;
#include <arm_neon.h>
#include <arm_sme.h>
+#pragma GCC target "+sve"
+
uint8x16_t *neon;
svint64_t *sve;
int64_t *ptr;
#include <arm_neon.h>
#include <arm_sme.h>
+#pragma GCC target "+sve"
+
uint8x16_t *neon;
svint64_t *sve;
int64_t *ptr;
// { dg-options "-O -fomit-frame-pointer -fno-stack-clash-protection" }
// { dg-final { check-function-bodies "**" "" } }
+#pragma GCC target "+sve"
+
void consume_za () [[arm::streaming, arm::inout("za")]];
/*
--- /dev/null
+// { dg-options "-O -fomit-frame-pointer -fno-stack-clash-protection" }
+// { dg-final { check-function-bodies "**" "" } }
+
+#pragma GCC target "+nosve"
+
+void consume_za () [[arm::streaming, arm::inout("za")]];
+
+/*
+** n_ls:
+** sub sp, sp, #?80
+** bl __arm_get_current_vg
+** str x0, \[sp\]
+** stp d8, d9, \[sp, #?16\]
+** stp d10, d11, \[sp, #?32\]
+** stp d12, d13, \[sp, #?48\]
+** stp d14, d15, \[sp, #?64\]
+** smstart sm
+** smstop sm
+** ldp d8, d9, \[sp, #?16\]
+** ldp d10, d11, \[sp, #?32\]
+** ldp d12, d13, \[sp, #?48\]
+** ldp d14, d15, \[sp, #?64\]
+** add sp, sp, #?80
+** ret
+*/
+[[arm::locally_streaming]] void
+n_ls ()
+{
+ asm ("");
+}
+
+/*
+** s_ls:
+** ret
+*/
+[[arm::locally_streaming]] void
+s_ls () [[arm::streaming]]
+{
+ asm ("");
+}
+
+/*
+** sc_ls:
+** stp x29, x30, \[sp, #?-96\]!
+** mov x29, sp
+** bl __arm_get_current_vg
+** str x0, \[sp, #?24\]
+** stp d8, d9, \[sp, #?32\]
+** stp d10, d11, \[sp, #?48\]
+** stp d12, d13, \[sp, #?64\]
+** stp d14, d15, \[sp, #?80\]
+** mrs x16, svcr
+** str x16, \[x29, #?16\]
+** tbnz x16, 0, [^\n]+
+** smstart sm
+** ldr x16, \[x29, #?16\]
+** tbnz x16, 0, [^\n]+
+** smstop sm
+** ldp d8, d9, \[sp, #?32\]
+** ldp d10, d11, \[sp, #?48\]
+** ldp d12, d13, \[sp, #?64\]
+** ldp d14, d15, \[sp, #?80\]
+** ldp x29, x30, \[sp\], #?96
+** ret
+*/
+[[arm::locally_streaming]] void
+sc_ls () [[arm::streaming_compatible]]
+{
+ asm ("");
+}
+
+/*
+** n_ls_new_za:
+** str x30, \[sp, #?-80\]!
+** bl __arm_get_current_vg
+** str x0, \[sp, #?8\]
+** stp d8, d9, \[sp, #?16\]
+** stp d10, d11, \[sp, #?32\]
+** stp d12, d13, \[sp, #?48\]
+** stp d14, d15, \[sp, #?64\]
+** smstart sm
+** mrs (x[0-9]+), tpidr2_el0
+** cbz \1, [^\n]+
+** bl __arm_tpidr2_save
+** msr tpidr2_el0, xzr
+** zero { za }
+** smstart za
+** bl consume_za
+** smstop za
+** smstop sm
+** ldp d8, d9, \[sp, #?16\]
+** ldp d10, d11, \[sp, #?32\]
+** ldp d12, d13, \[sp, #?48\]
+** ldp d14, d15, \[sp, #?64\]
+** ldr x30, \[sp\], #?80
+** ret
+*/
+[[arm::locally_streaming, arm::new("za")]] void
+n_ls_new_za ()
+{
+ consume_za ();
+ asm ("");
+}
+
+/*
+** s_ls_new_za:
+** str x30, \[sp, #?-16\]!
+** mrs (x[0-9]+), tpidr2_el0
+** cbz \1, [^\n]+
+** bl __arm_tpidr2_save
+** msr tpidr2_el0, xzr
+** zero { za }
+** smstart za
+** bl consume_za
+** smstop za
+** ldr x30, \[sp\], #?16
+** ret
+*/
+[[arm::locally_streaming, arm::new("za")]] void
+s_ls_new_za () [[arm::streaming]]
+{
+ consume_za ();
+ asm ("");
+}
+
+/*
+** sc_ls_new_za:
+** stp x29, x30, \[sp, #?-96\]!
+** mov x29, sp
+** bl __arm_get_current_vg
+** str x0, \[sp, #?24\]
+** stp d8, d9, \[sp, #?32\]
+** stp d10, d11, \[sp, #?48\]
+** stp d12, d13, \[sp, #?64\]
+** stp d14, d15, \[sp, #?80\]
+** mrs x16, svcr
+** str x16, \[x29, #?16\]
+** tbnz x16, 0, [^\n]+
+** smstart sm
+** mrs (x[0-9]+), tpidr2_el0
+** cbz \1, [^\n]+
+** bl __arm_tpidr2_save
+** msr tpidr2_el0, xzr
+** zero { za }
+** smstart za
+** bl consume_za
+** smstop za
+** ldr x16, \[x29, #?16\]
+** tbnz x16, 0, [^\n]+
+** smstop sm
+** ldp d8, d9, \[sp, #?32\]
+** ldp d10, d11, \[sp, #?48\]
+** ldp d12, d13, \[sp, #?64\]
+** ldp d14, d15, \[sp, #?80\]
+** ldp x29, x30, \[sp\], #?96
+** ret
+*/
+[[arm::locally_streaming, arm::new("za")]] void
+sc_ls_new_za () [[arm::streaming_compatible]]
+{
+ consume_za ();
+ asm ("");
+}
+
+/*
+** n_ls_shared_za:
+** str x30, \[sp, #?-80\]!
+** bl __arm_get_current_vg
+** str x0, \[sp, #?8\]
+** stp d8, d9, \[sp, #?16\]
+** stp d10, d11, \[sp, #?32\]
+** stp d12, d13, \[sp, #?48\]
+** stp d14, d15, \[sp, #?64\]
+** smstart sm
+** bl consume_za
+** smstop sm
+** ldp d8, d9, \[sp, #?16\]
+** ldp d10, d11, \[sp, #?32\]
+** ldp d12, d13, \[sp, #?48\]
+** ldp d14, d15, \[sp, #?64\]
+** ldr x30, \[sp\], #?80
+** ret
+*/
+[[arm::locally_streaming]] void
+n_ls_shared_za () [[arm::inout("za")]]
+{
+ consume_za ();
+ asm ("");
+}
+
+/*
+** s_ls_shared_za:
+** str x30, \[sp, #?-16\]!
+** bl consume_za
+** ldr x30, \[sp\], #?16
+** ret
+*/
+[[arm::locally_streaming]] void
+s_ls_shared_za () [[arm::streaming, arm::inout("za")]]
+{
+ consume_za ();
+ asm ("");
+}
+
+/*
+** sc_ls_shared_za:
+** stp x29, x30, \[sp, #?-96\]!
+** mov x29, sp
+** bl __arm_get_current_vg
+** str x0, \[sp, #?24\]
+** stp d8, d9, \[sp, #?32\]
+** stp d10, d11, \[sp, #?48\]
+** stp d12, d13, \[sp, #?64\]
+** stp d14, d15, \[sp, #?80\]
+** mrs x16, svcr
+** str x16, \[x29, #?16\]
+** tbnz x16, 0, [^\n]+
+** smstart sm
+** bl consume_za
+** ldr x16, \[x29, #?16\]
+** tbnz x16, 0, [^\n]+
+** smstop sm
+** ldp d8, d9, \[sp, #?32\]
+** ldp d10, d11, \[sp, #?48\]
+** ldp d12, d13, \[sp, #?64\]
+** ldp d14, d15, \[sp, #?80\]
+** ldp x29, x30, \[sp\], #?96
+** ret
+*/
+[[arm::locally_streaming]] void
+sc_ls_shared_za () [[arm::streaming_compatible, arm::inout("za")]]
+{
+ consume_za ();
+ asm ("");
+}
+
+/*
+** n_ls_vector_pcs:
+** sub sp, sp, #?272
+** bl __arm_get_current_vg
+** str x0, \[sp\]
+** stp q8, q9, \[sp, #?16\]
+** stp q10, q11, \[sp, #?48\]
+** stp q12, q13, \[sp, #?80\]
+** stp q14, q15, \[sp, #?112\]
+** stp q16, q17, \[sp, #?144\]
+** stp q18, q19, \[sp, #?176\]
+** stp q20, q21, \[sp, #?208\]
+** stp q22, q23, \[sp, #?240\]
+** smstart sm
+** smstop sm
+** ldp q8, q9, \[sp, #?16\]
+** ldp q10, q11, \[sp, #?48\]
+** ldp q12, q13, \[sp, #?80\]
+** ldp q14, q15, \[sp, #?112\]
+** ldp q16, q17, \[sp, #?144\]
+** ldp q18, q19, \[sp, #?176\]
+** ldp q20, q21, \[sp, #?208\]
+** ldp q22, q23, \[sp, #?240\]
+** add sp, sp, #?272
+** ret
+*/
+[[arm::locally_streaming]] void __attribute__((aarch64_vector_pcs))
+n_ls_vector_pcs ()
+{
+ asm ("");
+}
+
+/*
+** n_ls_sve_pcs: { target aarch64_little_endian }
+** sub sp, sp, #?16
+** bl __arm_get_current_vg
+** str x0, \[sp\]
+** addsvl sp, sp, #-18
+** str p4, \[sp\]
+** str p5, \[sp, #1, mul vl\]
+** str p6, \[sp, #2, mul vl\]
+** str p7, \[sp, #3, mul vl\]
+** str p8, \[sp, #4, mul vl\]
+** str p9, \[sp, #5, mul vl\]
+** str p10, \[sp, #6, mul vl\]
+** str p11, \[sp, #7, mul vl\]
+** str p12, \[sp, #8, mul vl\]
+** str p13, \[sp, #9, mul vl\]
+** str p14, \[sp, #10, mul vl\]
+** str p15, \[sp, #11, mul vl\]
+** str z8, \[sp, #2, mul vl\]
+** str z9, \[sp, #3, mul vl\]
+** str z10, \[sp, #4, mul vl\]
+** str z11, \[sp, #5, mul vl\]
+** str z12, \[sp, #6, mul vl\]
+** str z13, \[sp, #7, mul vl\]
+** str z14, \[sp, #8, mul vl\]
+** str z15, \[sp, #9, mul vl\]
+** str z16, \[sp, #10, mul vl\]
+** str z17, \[sp, #11, mul vl\]
+** str z18, \[sp, #12, mul vl\]
+** str z19, \[sp, #13, mul vl\]
+** str z20, \[sp, #14, mul vl\]
+** str z21, \[sp, #15, mul vl\]
+** str z22, \[sp, #16, mul vl\]
+** str z23, \[sp, #17, mul vl\]
+** addvl sp, sp, #-1
+** str p0, \[sp\]
+** smstart sm
+** ldr p0, \[sp\]
+** addvl sp, sp, #1
+** smstop sm
+** ldr z8, \[sp, #2, mul vl\]
+** ldr z9, \[sp, #3, mul vl\]
+** ldr z10, \[sp, #4, mul vl\]
+** ldr z11, \[sp, #5, mul vl\]
+** ldr z12, \[sp, #6, mul vl\]
+** ldr z13, \[sp, #7, mul vl\]
+** ldr z14, \[sp, #8, mul vl\]
+** ldr z15, \[sp, #9, mul vl\]
+** ldr z16, \[sp, #10, mul vl\]
+** ldr z17, \[sp, #11, mul vl\]
+** ldr z18, \[sp, #12, mul vl\]
+** ldr z19, \[sp, #13, mul vl\]
+** ldr z20, \[sp, #14, mul vl\]
+** ldr z21, \[sp, #15, mul vl\]
+** ldr z22, \[sp, #16, mul vl\]
+** ldr z23, \[sp, #17, mul vl\]
+** ldr p4, \[sp\]
+** ldr p5, \[sp, #1, mul vl\]
+** ldr p6, \[sp, #2, mul vl\]
+** ldr p7, \[sp, #3, mul vl\]
+** ldr p8, \[sp, #4, mul vl\]
+** ldr p9, \[sp, #5, mul vl\]
+** ldr p10, \[sp, #6, mul vl\]
+** ldr p11, \[sp, #7, mul vl\]
+** ldr p12, \[sp, #8, mul vl\]
+** ldr p13, \[sp, #9, mul vl\]
+** ldr p14, \[sp, #10, mul vl\]
+** ldr p15, \[sp, #11, mul vl\]
+** addsvl sp, sp, #18
+** add sp, sp, #?16
+** ret
+*/
+[[arm::locally_streaming]] void
+n_ls_sve_pcs (__SVBool_t x)
+{
+ asm ("");
+}
+
+/*
+** n_ls_v0:
+** addsvl sp, sp, #-1
+** ...
+** smstart sm
+** add x[0-9]+, [^\n]+
+** smstop sm
+** ...
+** addsvl sp, sp, #1
+** ...
+*/
+#define TEST(VN) __SVInt32_t VN; asm ("" :: "r" (&VN));
+[[arm::locally_streaming]] void
+n_ls_v0 ()
+{
+ TEST (v0);
+}
+
+/*
+** n_ls_v32:
+** addsvl sp, sp, #-32
+** ...
+** smstart sm
+** ...
+** smstop sm
+** ...
+** rdsvl (x[0-9]+), #1
+** lsl (x[0-9]+), \1, #?5
+** add sp, sp, \2
+** ...
+*/
+[[arm::locally_streaming]] void
+n_ls_v32 ()
+{
+ TEST (v0);
+ TEST (v1);
+ TEST (v2);
+ TEST (v3);
+ TEST (v4);
+ TEST (v5);
+ TEST (v6);
+ TEST (v7);
+ TEST (v8);
+ TEST (v9);
+ TEST (v10);
+ TEST (v11);
+ TEST (v12);
+ TEST (v13);
+ TEST (v14);
+ TEST (v15);
+ TEST (v16);
+ TEST (v17);
+ TEST (v18);
+ TEST (v19);
+ TEST (v20);
+ TEST (v21);
+ TEST (v22);
+ TEST (v23);
+ TEST (v24);
+ TEST (v25);
+ TEST (v26);
+ TEST (v27);
+ TEST (v28);
+ TEST (v29);
+ TEST (v30);
+ TEST (v31);
+}
+
+/*
+** n_ls_v33:
+** rdsvl (x[0-9]+), #1
+** mov (x[0-9]+), #?33
+** mul (x[0-9]+), (?:\1, \2|\2, \1)
+** sub sp, sp, \3
+** ...
+** smstart sm
+** ...
+** smstop sm
+** ...
+** rdsvl (x[0-9]+), #1
+** mov (x[0-9]+), #?33
+** mul (x[0-9]+), (?:\4, \5|\5, \4)
+** add sp, sp, \6
+** ...
+*/
+[[arm::locally_streaming]] void
+n_ls_v33 ()
+{
+ TEST (v0);
+ TEST (v1);
+ TEST (v2);
+ TEST (v3);
+ TEST (v4);
+ TEST (v5);
+ TEST (v6);
+ TEST (v7);
+ TEST (v8);
+ TEST (v9);
+ TEST (v10);
+ TEST (v11);
+ TEST (v12);
+ TEST (v13);
+ TEST (v14);
+ TEST (v15);
+ TEST (v16);
+ TEST (v17);
+ TEST (v18);
+ TEST (v19);
+ TEST (v20);
+ TEST (v21);
+ TEST (v22);
+ TEST (v23);
+ TEST (v24);
+ TEST (v25);
+ TEST (v26);
+ TEST (v27);
+ TEST (v28);
+ TEST (v29);
+ TEST (v30);
+ TEST (v31);
+ TEST (v32);
+}
ns_callee ();
}
-#pragma GCC target "+nosme"
+/* Add +sve to prevent passing +fcma to the assembler, since +fcma was
+ added to assemblers later than SME support. */
+#pragma GCC target "+sve+nosme"
/*
** sc_caller_nosme:
-/* { dg-additional-options "-O2 -ftree-vectorize" } */
+/* { dg-additional-options "-O2 -ftree-vectorize -mtune=generic-armv9-a" } */
#include <stdint.h>
#pragma GCC target "+sme2"
/*
** ldr_zt0_x0_vl1:
-** incb x0
+** addsvl x0, x0, #?1
** ldr zt0, \[x0\]
** ret
*/
-PROTO (ldr_zt0_x0_vl1, void, (char *x0)) { svldr_zt (0, x0 + svcntb()); }
+PROTO (ldr_zt0_x0_vl1, void, (char *x0)) { svldr_zt (0, x0 + svcntsb()); }
/*
** str_zt0_x0_vl1:
-** incb x0
+** addsvl x0, x0, #?1
** str zt0, \[x0\]
** ret
*/
-PROTO (str_zt0_x0_vl1, void, (char *x0)) { svstr_zt (0, x0 + svcntb()); }
+PROTO (str_zt0_x0_vl1, void, (char *x0)) { svstr_zt (0, x0 + svcntsb()); }