}
)
+;; Fold predicated loads/stores with a PTRUE predicate to unpredicated
+;; loads/stores after RA.
+(define_insn_and_split "*aarch64_sve_ptrue<mode>_ldr_str"
+ [(set (match_operand:SVE_FULL 0 "aarch64_sve_nonimmediate_operand" "=Utr,w")
+ (unspec:SVE_FULL
+ [(match_operand:<VPRED> 1 "aarch64_simd_imm_one")
+ (match_operand:SVE_FULL 2 "aarch64_sve_nonimmediate_operand" "w,Utr")]
+ UNSPEC_PRED_X))]
+ "TARGET_SVE && reload_completed
+ && (<MODE>mode == VNx16QImode || !BYTES_BIG_ENDIAN)
+ && ((REG_P (operands[0]) && MEM_P (operands[2]))
+ || (REG_P (operands[2]) && MEM_P (operands[0])))"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (match_dup 2))])
+
;; Unpredicated moves that cannot use LDR and STR, i.e. partial vectors
;; or vectors for which little-endian ordering isn't acceptable. Memory
;; accesses require secondary reloads.
}
/*
-** test_add_gnu:
+** test_add_gnu: {target aarch64_big_endian }
** (
** add (z[0-9]+\.b), (?:z0\.b, z1\.b|z1\.b, z0\.b)
** ptrue (p[0-7])\.b, vl32
** )
** ret
*/
+/*
+** test_add_gnu: {target aarch64_little_endian }
+** add (z[0-9]+)\.b, (?:z0\.b, z1\.b|z1\.b, z0\.b)
+** str \1, \[x8\]
+** ret
+*/
gnu_int8_t
test_add_gnu (fixed_int8_t x, fixed_int8_t y)
{
return res;
}
-/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d,} 5 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d,} 5 {target aarch64_big_endian} } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d,} 1 {target aarch64_little_endian} } } */
+/* { dg-final { scan-assembler-times {\tldr\tz[0-9]+,} 4 {target aarch64_little_endian} } } */
/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d,} 8 } } */
*dst++ = 1;
}
-/* { dg-final { scan-assembler-times {\tst1w\tz} 1 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz} 1 {target aarch64_big_endian} } } */
+/* { dg-final { scan-assembler-times {\tstr\tz} 1 {target aarch64_little_endian} } } */
*dst++ = 1;
}
-/* { dg-final { scan-assembler-times {\tst1w\tz} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz} 2 {target aarch64_big_endian} } } */
+/* { dg-final { scan-assembler-times {\tstr\tz} 2 {target aarch64_little_endian} } } */
/* { dg-final { scan-assembler-not {\tstp\tq} } } */
*dst++ = 1;
}
-/* { dg-final { scan-assembler-times {\tst1w\tz} 1 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz} 1 {target aarch64_big_endian} } } */
+/* { dg-final { scan-assembler-times {\tstr\tz} 1 {target aarch64_little_endian} } } */
*dst++ = 1;
}
-/* { dg-final { scan-assembler-times {\tst1w\tz} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz} 2 {target aarch64_big_endian} } } */
+/* { dg-final { scan-assembler-times {\tstr\tz} 2 {target aarch64_little_endian} } } */
#include <stdarg.h>
/*
-** callee_0:
+** callee_0: {target aarch64_big_endian}
** ...
** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x1\]
** ...
** ...
** ret
*/
+/*
+** callee_0: {target aarch64_little_endian}
+** ...
+** ldr (z[0-9]+), \[x1\]
+** ...
+** str \1, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_0 (int16_t *ptr, ...)
{
}
/*
-** caller_0:
+** caller_0: {target aarch64_big_endian}
** ...
** fmov (z[0-9]+\.h), #9\.0[^\n]*
** ...
** ...
** ret
*/
+/*
+** caller_0: {target aarch64_little_endian}
+** ...
+** fmov (z[0-9]+)\.h, #9\.0[^\n]*
+** ...
+** str \1, \[x1\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_0 (int16_t *ptr)
{
}
/*
-** callee_1:
+** callee_1: {target aarch64_big_endian}
** ...
** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x2\]
** ...
** ...
** ret
*/
+/*
+** callee_1: {target aarch64_little_endian}
+** ...
+** ldr (z[0-9]+), \[x2\]
+** ...
+** str \1, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_1 (int16_t *ptr, ...)
{
}
/*
-** caller_1:
+** caller_1: {target aarch64_big_endian}
** ...
** fmov (z[0-9]+\.h), #9\.0[^\n]*
** ...
** ...
** ret
*/
+/*
+** caller_1: {target aarch64_little_endian}
+** ...
+** fmov (z[0-9]+)\.h, #9\.0[^\n]*
+** ...
+** str \1, \[x2\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_1 (int16_t *ptr)
{
}
/*
-** callee_7:
+** callee_7: {target aarch64_big_endian}
** ...
** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x7\]
** ...
** ...
** ret
*/
+/*
+** callee_7: {target aarch64_little_endian}
+** ...
+** ldr (z[0-9]+), \[x7\]
+** ...
+** str \1, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_7 (int16_t *ptr, ...)
{
}
/*
-** caller_7:
+** caller_7: {target aarch64_big_endian}
** ...
** fmov (z[0-9]+\.h), #9\.0[^\n]*
** ...
** ...
** ret
*/
+/*
+** caller_7: {target aarch64_little_endian}
+** ...
+** fmov (z[0-9]+)\.h, #9\.0[^\n]*
+** ...
+** str \1, \[x7\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_7 (int16_t *ptr)
{
/* FIXME: We should be able to get rid of the va_list object. */
/*
-** callee_8:
+** callee_8: {target aarch64_big_endian}
** sub sp, sp, #([0-9]+)
** ...
** ldr (x[0-9]+), \[sp, \1\]
** ...
** ret
*/
+/*
+** callee_8: {target aarch64_little_endian}
+** sub sp, sp, #([0-9]+)
+** ...
+** ldr (x[0-9]+), \[sp, \1\]
+** ...
+** ldr (z[0-9]+), \[\2\]
+** ...
+** str \3, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_8 (int16_t *ptr, ...)
{
}
/*
-** caller_8:
+** caller_8: {target aarch64_big_endian}
** ...
** fmov (z[0-9]+\.h), #9\.0[^\n]*
** ...
** ...
** ret
*/
+/*
+** caller_8: {target aarch64_little_endian}
+** ...
+** fmov (z[0-9]+)\.h, #9\.0[^\n]*
+** ...
+** str \1, \[(x[0-9]+)\]
+** ...
+** str \2, \[sp\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_8 (int16_t *ptr)
{
#include <stdarg.h>
/*
-** callee_0:
+** callee_0: {target aarch64_big_endian}
** ...
** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x1\]
** ...
** ...
** ret
*/
+/*
+** callee_0: {target aarch64_little_endian}
+** ...
+** ldr (z[0-9]+), \[x1\]
+** ...
+** str \1, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_0 (int32_t *ptr, ...)
{
}
/*
-** caller_0:
+** caller_0: {target aarch64_big_endian}
** ...
** fmov (z[0-9]+\.s), #9\.0[^\n]*
** ...
** ...
** ret
*/
+/*
+** caller_0: {target aarch64_little_endian}
+** ...
+** fmov (z[0-9]+)\.s, #9\.0[^\n]*
+** ...
+** str \1, \[x1\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_0 (int32_t *ptr)
{
}
/*
-** callee_1:
+** callee_1: {target aarch64_big_endian}
** ...
** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x2\]
** ...
** ...
** ret
*/
+/*
+** callee_1: {target aarch64_little_endian}
+** ...
+** ldr (z[0-9]+), \[x2\]
+** ...
+** str \1, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_1 (int32_t *ptr, ...)
{
}
/*
-** caller_1:
+** caller_1: {target aarch64_big_endian}
** ...
** fmov (z[0-9]+\.s), #9\.0[^\n]*
** ...
** ...
** ret
*/
+/*
+** caller_1: {target aarch64_little_endian}
+** ...
+** fmov (z[0-9]+)\.s, #9\.0[^\n]*
+** ...
+** str \1, \[x2\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_1 (int32_t *ptr)
{
}
/*
-** callee_7:
+** callee_7: {target aarch64_big_endian}
** ...
** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x7\]
** ...
** ...
** ret
*/
+/*
+** callee_7: {target aarch64_little_endian}
+** ...
+** ldr (z[0-9]+), \[x7\]
+** ...
+** str \1, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_7 (int32_t *ptr, ...)
{
}
/*
-** caller_7:
+** caller_7: {target aarch64_big_endian}
** ...
** fmov (z[0-9]+\.s), #9\.0[^\n]*
** ...
** ...
** ret
*/
+/*
+** caller_7: {target aarch64_little_endian}
+** ...
+** fmov (z[0-9]+)\.s, #9\.0[^\n]*
+** ...
+** str \1, \[x7\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_7 (int32_t *ptr)
{
/* FIXME: We should be able to get rid of the va_list object. */
/*
-** callee_8:
+** callee_8: {target aarch64_big_endian}
** sub sp, sp, #([0-9]+)
** ...
** ldr (x[0-9]+), \[sp, \1\]
** ...
** ret
*/
+/*
+** callee_8: {target aarch64_little_endian}
+** sub sp, sp, #([0-9]+)
+** ...
+** ldr (x[0-9]+), \[sp, \1\]
+** ...
+** ldr (z[0-9]+), \[\2\]
+** ...
+** str \3, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_8 (int32_t *ptr, ...)
{
}
/*
-** caller_8:
+** caller_8: {target aarch64_big_endian}
** ...
** fmov (z[0-9]+\.s), #9\.0[^\n]*
** ...
** ...
** ret
*/
+/*
+** caller_8: {target aarch64_little_endian}
+** ...
+** fmov (z[0-9]+)\.s, #9\.0[^\n]*
+** ...
+** str \1, \[(x[0-9]+)\]
+** ...
+** str \2, \[sp\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_8 (int32_t *ptr)
{
#include <stdarg.h>
/*
-** callee_0:
+** callee_0: {target aarch64_big_endian}
** ...
** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x1\]
** ...
** ...
** ret
*/
+/*
+** callee_0: {target aarch64_little_endian}
+** ...
+** ldr (z[0-9]+), \[x1\]
+** ...
+** str \1, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_0 (int64_t *ptr, ...)
{
}
/*
-** caller_0:
+** caller_0: {target aarch64_big_endian}
** ...
** fmov (z[0-9]+\.d), #9\.0[^\n]*
** ...
** ...
** ret
*/
+/*
+** caller_0: {target aarch64_little_endian}
+** ...
+** fmov (z[0-9]+)\.d, #9\.0[^\n]*
+** ...
+** str \1, \[x1\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_0 (int64_t *ptr)
{
}
/*
-** callee_1:
+** callee_1: {target aarch64_big_endian}
** ...
** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x2\]
** ...
** ...
** ret
*/
+/*
+** callee_1: {target aarch64_little_endian}
+** ...
+** ldr (z[0-9]+), \[x2\]
+** ...
+** str \1, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_1 (int64_t *ptr, ...)
{
}
/*
-** caller_1:
+** caller_1: {target aarch64_big_endian}
** ...
** fmov (z[0-9]+\.d), #9\.0[^\n]*
** ...
** ...
** ret
*/
+/*
+** caller_1: {target aarch64_little_endian}
+** ...
+** fmov (z[0-9]+)\.d, #9\.0[^\n]*
+** ...
+** str \1, \[x2\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_1 (int64_t *ptr)
{
}
/*
-** callee_7:
+** callee_7: {target aarch64_big_endian}
** ...
** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x7\]
** ...
** ...
** ret
*/
+/*
+** callee_7: {target aarch64_little_endian}
+** ...
+** ldr (z[0-9]+), \[x7\]
+** ...
+** str \1, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_7 (int64_t *ptr, ...)
{
}
/*
-** caller_7:
+** caller_7: {target aarch64_big_endian}
** ...
** fmov (z[0-9]+\.d), #9\.0[^\n]*
** ...
** ...
** ret
*/
+/*
+** caller_7: {target aarch64_little_endian}
+** ...
+** fmov (z[0-9]+)\.d, #9\.0[^\n]*
+** ...
+** str \1, \[x7\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_7 (int64_t *ptr)
{
/* FIXME: We should be able to get rid of the va_list object. */
/*
-** callee_8:
+** callee_8: {target aarch64_big_endian}
** sub sp, sp, #([0-9]+)
** ...
** ldr (x[0-9]+), \[sp, \1\]
** ...
** ret
*/
+/*
+** callee_8:
+** sub sp, sp, #([0-9]+)
+** ...
+** ldr (x[0-9]+), \[sp, \1\]
+** ...
+** ldr (z[0-9]+), \[\2\]
+** ...
+** str \3, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_8 (int64_t *ptr, ...)
{
}
/*
-** caller_8:
+** caller_8: {target aarch64_big_endian}
** ...
** fmov (z[0-9]+\.d), #9\.0[^\n]*
** ...
** ...
** ret
*/
+/*
+** caller_8: {target aarch64_little_endian}
+** ...
+** fmov (z[0-9]+)\.d, #9\.0[^\n]*
+** ...
+** str \1, \[(x[0-9]+)\]
+** ...
+** str \2, \[sp\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_8 (int64_t *ptr)
{
/*
** callee_0:
** ...
-** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x1\]
+** ldr (z[0-9]+), \[x1\]
** ...
-** st1b \1, \2, \[x0\]
+** str \1, \[x0\]
** ...
** ret
*/
** ...
** umov (w[0-9]+), v0.b\[0\]
** ...
-** mov (z[0-9]+\.b), \1
+** mov (z[0-9]+)\.b, \1
** ...
-** st1b \2, p[0-7], \[x1\]
+** str \2, \[x1\]
** ...
** ret
*/
/*
** callee_1:
** ...
-** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x2\]
+** ldr (z[0-9]+), \[x2\]
** ...
-** st1b \1, p[0-7], \[x0\]
+** str \1, \[x0\]
** ...
** ret
*/
** ...
** umov (w[0-9]+), v0.b\[0\]
** ...
-** mov (z[0-9]+\.b), \1
+** mov (z[0-9]+)\.b, \1
** ...
-** st1b \2, p[0-7], \[x2\]
+** str \2, \[x2\]
** ...
** ret
*/
/*
** callee_7:
** ...
-** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x7\]
+** ldr (z[0-9]+), \[x7\]
** ...
-** st1b \1, p[0-7], \[x0\]
+** str \1, \[x0\]
** ...
** ret
*/
** ...
** umov (w[0-9]+), v0.b\[0\]
** ...
-** mov (z[0-9]+\.b), \1
+** mov (z[0-9]+)\.b, \1
** ...
-** st1b \2, p[0-7], \[x7\]
+** str \2, \[x7\]
** ...
** ret
*/
** ...
** ldr (x[0-9]+), \[sp, \1\]
** ...
-** ld1b (z[0-9]+\.b), (p[0-7])/z, \[\2\]
+** ldr (z[0-9]+), \[\2\]
** ...
-** st1b \3, \4, \[x0\]
+** str \3, \[x0\]
** ...
** ret
*/
** ...
** umov (w[0-9]+), v0.b\[0\]
** ...
-** mov (z[0-9]+\.b), \1
+** mov (z[0-9]+)\.b, \1
** ...
-** st1b \2, p[0-7], \[(x[0-9]+)\]
+** str \2, \[(x[0-9]+)\]
** ...
** str \3, \[sp\]
** ...
#include <stdarg.h>
/*
-** callee_0:
+** callee_0: {target aarch64_big_endian}
** ...
** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x1\]
** ...
** ...
** ret
*/
+/*
+** callee_0: {target aarch64_little_endian}
+** ...
+** ldr (z[0-9]+), \[x1\]
+** ...
+** str \1, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_0 (int16_t *ptr, ...)
{
}
/*
-** caller_0:
+** caller_0: {target aarch64_big_endian}
** ...
** mov (z[0-9]+\.h), #42
** ...
** ...
** ret
*/
+/*
+** caller_0: {target aarch64_little_endian}
+** ...
+** mov (z[0-9]+)\.h, #42
+** ...
+** str \1, \[x1\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_0 (int16_t *ptr)
{
}
/*
-** callee_1:
+** callee_1: {target aarch64_big_endian}
** ...
** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x2\]
** ...
** ...
** ret
*/
+/*
+** callee_1: {target aarch64_little_endian}
+** ...
+** ldr (z[0-9]+), \[x2\]
+** ...
+** str \1, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_1 (int16_t *ptr, ...)
{
}
/*
-** caller_1:
+** caller_1: {target aarch64_big_endian}
** ...
** mov (z[0-9]+\.h), #42
** ...
** ...
** ret
*/
+/*
+** caller_1: {target aarch64_little_endian}
+** ...
+** mov (z[0-9]+)\.h, #42
+** ...
+** str \1, \[x2\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_1 (int16_t *ptr)
{
}
/*
-** callee_7:
+** callee_7: {target aarch64_big_endian}
** ...
** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x7\]
** ...
** ...
** ret
*/
+/*
+** callee_7: {target aarch64_little_endian}
+** ...
+** ldr (z[0-9]+), \[x7\]
+** ...
+** str \1, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_7 (int16_t *ptr, ...)
{
}
/*
-** caller_7:
+** caller_7: {target aarch64_big_endian}
** ...
** mov (z[0-9]+\.h), #42
** ...
** ...
** ret
*/
+/*
+** caller_7: {target aarch64_little_endian}
+** ...
+** mov (z[0-9]+)\.h, #42
+** ...
+** str \1, \[x7\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_7 (int16_t *ptr)
{
/* FIXME: We should be able to get rid of the va_list object. */
/*
-** callee_8:
+** callee_8: {target aarch64_big_endian}
** sub sp, sp, #([0-9]+)
** ...
** ldr (x[0-9]+), \[sp, \1\]
** ...
** ret
*/
+/*
+** callee_8: {target aarch64_little_endian}
+** sub sp, sp, #([0-9]+)
+** ...
+** ldr (x[0-9]+), \[sp, \1\]
+** ...
+** ldr (z[0-9]+), \[\2\]
+** ...
+** str \3, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_8 (int16_t *ptr, ...)
{
}
/*
-** caller_8:
+** caller_8: {target aarch64_big_endian}
** ...
** mov (z[0-9]+\.h), #42
** ...
** ...
** ret
*/
+/*
+** caller_8: {target aarch64_little_endian}
+** ...
+** mov (z[0-9]+)\.h, #42
+** ...
+** str \1, \[(x[0-9]+)\]
+** ...
+** str \2, \[sp\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_8 (int16_t *ptr)
{
#include <stdarg.h>
/*
-** callee_0:
+** callee_0: {target aarch64_big_endian}
** ...
** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x1\]
** ...
** ...
** ret
*/
+/*
+** callee_0: {target aarch64_little_endian}
+** ...
+** ldr (z[0-9]+), \[x1\]
+** ...
+** str \1, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_0 (int32_t *ptr, ...)
{
}
/*
-** caller_0:
+** caller_0: {target aarch64_big_endian}
** ...
** mov (z[0-9]+\.s), #42
** ...
** ...
** ret
*/
+/*
+** caller_0: {target aarch64_little_endian}
+** ...
+** mov (z[0-9]+)\.s, #42
+** ...
+** str \1, \[x1\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_0 (int32_t *ptr)
{
}
/*
-** callee_1:
+** callee_1: {target aarch64_big_endian}
** ...
** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x2\]
** ...
** ...
** ret
*/
+/*
+** callee_1: {target aarch64_little_endian}
+** ...
+** ldr (z[0-9]+), \[x2\]
+** ...
+** str \1, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_1 (int32_t *ptr, ...)
{
}
/*
-** caller_1:
+** caller_1: {target aarch64_big_endian}
** ...
** mov (z[0-9]+\.s), #42
** ...
** ...
** ret
*/
+/*
+** caller_1: {target aarch64_little_endian}
+** ...
+** mov (z[0-9]+)\.s, #42
+** ...
+** str \1, \[x2\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_1 (int32_t *ptr)
{
}
/*
-** callee_7:
+** callee_7: {target aarch64_big_endian}
** ...
** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x7\]
** ...
** ...
** ret
*/
+/*
+** callee_7: {target aarch64_little_endian}
+** ...
+** ldr (z[0-9]+), \[x7\]
+** ...
+** str \1, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_7 (int32_t *ptr, ...)
{
}
/*
-** caller_7:
+** caller_7: {target aarch64_big_endian}
** ...
** mov (z[0-9]+\.s), #42
** ...
** ...
** ret
*/
+/*
+** caller_7: {target aarch64_little_endian}
+** ...
+** mov (z[0-9]+)\.s, #42
+** ...
+** str \1, \[x7\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_7 (int32_t *ptr)
{
/* FIXME: We should be able to get rid of the va_list object. */
/*
-** callee_8:
+** callee_8: {target aarch64_big_endian}
** sub sp, sp, #([0-9]+)
** ...
** ldr (x[0-9]+), \[sp, \1\]
** ...
** ret
*/
+/*
+** callee_8: {target aarch64_little_endian}
+** sub sp, sp, #([0-9]+)
+** ...
+** ldr (x[0-9]+), \[sp, \1\]
+** ...
+** ldr (z[0-9]+), \[\2\]
+** ...
+** str \3, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_8 (int32_t *ptr, ...)
{
}
/*
-** caller_8:
+** caller_8: {target aarch64_big_endian}
** ...
** mov (z[0-9]+\.s), #42
** ...
** ...
** ret
*/
+/*
+** caller_8: {target aarch64_little_endian}
+** ...
+** mov (z[0-9]+)\.s, #42
+** ...
+** str \1, \[(x[0-9]+)\]
+** ...
+** str \2, \[sp\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_8 (int32_t *ptr)
{
#include <stdarg.h>
/*
-** callee_0:
+** callee_0: {target aarch64_big_endian}
** ...
** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x1\]
** ...
** ...
** ret
*/
+/*
+** callee_0: {target aarch64_little_endian}
+** ...
+** ldr (z[0-9]+), \[x1\]
+** ...
+** str \1, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_0 (int64_t *ptr, ...)
{
}
/*
-** caller_0:
+** caller_0: {target aarch64_big_endian}
** ...
** mov (z[0-9]+\.d), #42
** ...
** ...
** ret
*/
+/*
+** caller_0: {target aarch64_little_endian}
+** ...
+** mov (z[0-9]+)\.d, #42
+** ...
+** str \1, \[x1\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_0 (int64_t *ptr)
{
}
/*
-** callee_1:
+** callee_1: {target aarch64_big_endian}
** ...
** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x2\]
** ...
** ...
** ret
*/
+/*
+** callee_1: {target aarch64_little_endian}
+** ...
+** ldr (z[0-9]+), \[x2\]
+** ...
+** str \1, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_1 (int64_t *ptr, ...)
{
}
/*
-** caller_1:
+** caller_1: {target aarch64_big_endian}
** ...
** mov (z[0-9]+\.d), #42
** ...
** ...
** ret
*/
+/*
+** caller_1: {target aarch64_little_endian}
+** ...
+** mov (z[0-9]+)\.d, #42
+** ...
+** str \1, \[x2\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_1 (int64_t *ptr)
{
}
/*
-** callee_7:
+** callee_7: {target aarch64_big_endian}
** ...
** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x7\]
** ...
** ...
** ret
*/
+/*
+** callee_7: {target aarch64_little_endian}
+** ...
+** ldr (z[0-9]+), \[x7\]
+** ...
+** str \1, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_7 (int64_t *ptr, ...)
{
}
/*
-** caller_7:
+** caller_7: {target aarch64_big_endian}
** ...
** mov (z[0-9]+\.d), #42
** ...
** ...
** ret
*/
+/*
+** caller_7: {target aarch64_little_endian}
+** ...
+** mov (z[0-9]+)\.d, #42
+** ...
+** str \1, \[x7\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_7 (int64_t *ptr)
{
/* FIXME: We should be able to get rid of the va_list object. */
/*
-** callee_8:
+** callee_8: {target aarch64_big_endian}
** sub sp, sp, #([0-9]+)
** ...
** ldr (x[0-9]+), \[sp, \1\]
** ...
** ret
*/
+/*
+** callee_8: {target aarch64_little_endian}
+** sub sp, sp, #([0-9]+)
+** ...
+** ldr (x[0-9]+), \[sp, \1\]
+** ...
+** ldr (z[0-9]+), \[\2\]
+** ...
+** str \3, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_8 (int64_t *ptr, ...)
{
}
/*
-** caller_8:
+** caller_8: {target aarch64_big_endian}
** ...
** mov (z[0-9]+\.d), #42
** ...
** ...
** ret
*/
+/*
+** caller_8: {target aarch64_little_endian}
+** ...
+** mov (z[0-9]+)\.d, #42
+** ...
+** str \1, \[(x[0-9]+)\]
+** ...
+** str \2, \[sp\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_8 (int64_t *ptr)
{
/*
** callee_0:
** ...
-** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x1\]
+** ldr (z[0-9]+), \[x1\]
** ...
-** st1b \1, \2, \[x0\]
+** str \1, \[x0\]
** ...
** ret
*/
/*
** caller_0:
** ...
-** mov (z[0-9]+\.b), #42
+** mov (z[0-9]+)\.b, #42
** ...
-** st1b \1, p[0-7], \[x1\]
+** str \1, \[x1\]
** ...
** ret
*/
/*
** callee_1:
** ...
-** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x2\]
+** ldr (z[0-9]+), \[x2\]
** ...
-** st1b \1, p[0-7], \[x0\]
+** str \1, \[x0\]
** ...
** ret
*/
/*
** caller_1:
** ...
-** mov (z[0-9]+\.b), #42
+** mov (z[0-9]+)\.b, #42
** ...
-** st1b \1, p[0-7], \[x2\]
+** str \1, \[x2\]
** ...
** ret
*/
/*
** callee_7:
** ...
-** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x7\]
+** ldr (z[0-9]+), \[x7\]
** ...
-** st1b \1, p[0-7], \[x0\]
+** str \1, \[x0\]
** ...
** ret
*/
/*
** caller_7:
** ...
-** mov (z[0-9]+\.b), #42
+** mov (z[0-9]+)\.b, #42
** ...
-** st1b \1, p[0-7], \[x7\]
+** str \1, \[x7\]
** ...
** ret
*/
** ...
** ldr (x[0-9]+), \[sp, \1\]
** ...
-** ld1b (z[0-9]+\.b), (p[0-7])/z, \[\2\]
+** ldr (z[0-9]+), \[\2\]
** ...
-** st1b \3, \4, \[x0\]
+** str \3, \[x0\]
** ...
** ret
*/
/*
** caller_8:
** ...
-** mov (z[0-9]+\.b), #42
+** mov (z[0-9]+)\.b, #42
** ...
-** st1b \1, p[0-7], \[(x[0-9]+)\]
+** str \1, \[(x[0-9]+)\]
** ...
** str \2, \[sp\]
** ...
#include <stdarg.h>
/*
-** callee_0:
+** callee_0: {target aarch64_big_endian}
** ...
** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x1\]
** ...
** ...
** ret
*/
+/*
+** callee_0: {target aarch64_little_endian}
+** ...
+** ldr (z[0-9]+), \[x1\]
+** ...
+** str \1, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_0 (int16_t *ptr, ...)
{
}
/*
-** caller_0:
+** caller_0: {target aarch64_big_endian}
** ...
** mov (z[0-9]+\.h), #42
** ...
** ...
** ret
*/
+/*
+** caller_0: {target aarch64_little_endian}
+** ...
+** mov (z[0-9]+)\.h, #42
+** ...
+** str \1, \[x1\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_0 (int16_t *ptr)
{
}
/*
-** callee_1:
+** callee_1: {target aarch64_big_endian}
** ...
** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x2\]
** ...
** ...
** ret
*/
+/*
+** callee_1: {target aarch64_little_endian}
+** ...
+** ldr (z[0-9]+), \[x2\]
+** ...
+** str \1, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_1 (int16_t *ptr, ...)
{
}
/*
-** caller_1:
+** caller_1: {target aarch64_big_endian}
** ...
** mov (z[0-9]+\.h), #42
** ...
** ...
** ret
*/
+/*
+** caller_1: {target aarch64_little_endian}
+** ...
+** mov (z[0-9]+)\.h, #42
+** ...
+** str \1, \[x2\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_1 (int16_t *ptr)
{
}
/*
-** callee_7:
+** callee_7: {target aarch64_big_endian}
** ...
** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x7\]
** ...
** ...
** ret
*/
+/*
+** callee_7: {target aarch64_little_endian}
+** ...
+** ldr (z[0-9]+), \[x7\]
+** ...
+** str \1, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_7 (int16_t *ptr, ...)
{
}
/*
-** caller_7:
+** caller_7: {target aarch64_big_endian}
** ...
** mov (z[0-9]+\.h), #42
** ...
** ...
** ret
*/
+/*
+** caller_7: {target aarch64_little_endian}
+** ...
+** mov (z[0-9]+)\.h, #42
+** ...
+** str \1, \[x7\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_7 (int16_t *ptr)
{
/* FIXME: We should be able to get rid of the va_list object. */
/*
-** callee_8:
+** callee_8: {target aarch64_big_endian}
** sub sp, sp, #([0-9]+)
** ...
** ldr (x[0-9]+), \[sp, \1\]
** ...
** ret
*/
+/*
+** callee_8: {target aarch64_little_endian}
+** sub sp, sp, #([0-9]+)
+** ...
+** ldr (x[0-9]+), \[sp, \1\]
+** ...
+** ldr (z[0-9]+), \[\2\]
+** ...
+** str \3, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_8 (int16_t *ptr, ...)
{
}
/*
-** caller_8:
+** caller_8: {target aarch64_big_endian}
** ...
** mov (z[0-9]+\.h), #42
** ...
** ...
** ret
*/
+/*
+** caller_8: {target aarch64_little_endian}
+** ...
+** mov (z[0-9]+)\.h, #42
+** ...
+** str \1, \[(x[0-9]+)\]
+** ...
+** str \2, \[sp\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_8 (int16_t *ptr)
{
#include <stdarg.h>
/*
-** callee_0:
+** callee_0: {target aarch64_big_endian}
** ...
** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x1\]
** ...
** ...
** ret
*/
+/*
+** callee_0: {target aarch64_little_endian}
+** ...
+** ldr (z[0-9]+), \[x1\]
+** ...
+** str \1, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_0 (int32_t *ptr, ...)
{
}
/*
-** caller_0:
+** caller_0: {target aarch64_big_endian}
** ...
** mov (z[0-9]+\.s), #42
** ...
** ...
** ret
*/
+/*
+** caller_0: {target aarch64_little_endian}
+** ...
+** mov (z[0-9]+)\.s, #42
+** ...
+** str \1, \[x1\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_0 (int32_t *ptr)
{
}
/*
-** callee_1:
+** callee_1: {target aarch64_big_endian}
** ...
** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x2\]
** ...
** ...
** ret
*/
+/*
+** callee_1: {target aarch64_little_endian}
+** ...
+** ldr (z[0-9]+), \[x2\]
+** ...
+** str \1, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_1 (int32_t *ptr, ...)
{
}
/*
-** caller_1:
+** caller_1: {target aarch64_big_endian}
** ...
** mov (z[0-9]+\.s), #42
** ...
** ...
** ret
*/
+/*
+** caller_1: {target aarch64_little_endian}
+** ...
+** mov (z[0-9]+)\.s, #42
+** ...
+** str \1, \[x2\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_1 (int32_t *ptr)
{
}
/*
-** callee_7:
+** callee_7: {target aarch64_big_endian}
** ...
** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x7\]
** ...
** ...
** ret
*/
+/*
+** callee_7: {target aarch64_little_endian}
+** ...
+** ldr (z[0-9]+), \[x7\]
+** ...
+** str \1, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_7 (int32_t *ptr, ...)
{
}
/*
-** caller_7:
+** caller_7: {target aarch64_big_endian}
** ...
** mov (z[0-9]+\.s), #42
** ...
** ...
** ret
*/
+/*
+** caller_7: {target aarch64_little_endian}
+** ...
+** mov (z[0-9]+)\.s, #42
+** ...
+** str \1, \[x7\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_7 (int32_t *ptr)
{
/* FIXME: We should be able to get rid of the va_list object. */
/*
-** callee_8:
+** callee_8: {target aarch64_big_endian}
** sub sp, sp, #([0-9]+)
** ...
** ldr (x[0-9]+), \[sp, \1\]
** ...
** ret
*/
+/*
+** callee_8: {target aarch64_little_endian}
+** sub sp, sp, #([0-9]+)
+** ...
+** ldr (x[0-9]+), \[sp, \1\]
+** ...
+** ldr (z[0-9]+), \[\2\]
+** ...
+** str \3, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_8 (int32_t *ptr, ...)
{
}
/*
-** caller_8:
+** caller_8: {target aarch64_big_endian}
** ...
** mov (z[0-9]+\.s), #42
** ...
** ...
** ret
*/
+/*
+** caller_8: {target aarch64_little_endian}
+** ...
+** mov (z[0-9]+)\.s, #42
+** ...
+** str \1, \[(x[0-9]+)\]
+** ...
+** str \2, \[sp\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_8 (int32_t *ptr)
{
#include <stdarg.h>
/*
-** callee_0:
+** callee_0: {target aarch64_big_endian}
** ...
** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x1\]
** ...
** ...
** ret
*/
+/*
+** callee_0: {target aarch64_little_endian}
+** ...
+** ldr (z[0-9]+), \[x1\]
+** ...
+** str \1, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_0 (int64_t *ptr, ...)
{
}
/*
-** caller_0:
+** caller_0: {target aarch64_big_endian}
** ...
** mov (z[0-9]+\.d), #42
** ...
** ...
** ret
*/
+/*
+** caller_0: {target aarch64_little_endian}
+** ...
+** mov (z[0-9]+)\.d, #42
+** ...
+** str \1, \[x1\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_0 (int64_t *ptr)
{
}
/*
-** callee_1:
+** callee_1: {target aarch64_big_endian}
** ...
** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x2\]
** ...
** ...
** ret
*/
+/*
+** callee_1: {target aarch64_little_endian}
+** ...
+** ldr (z[0-9]+), \[x2\]
+** ...
+** str \1, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_1 (int64_t *ptr, ...)
{
}
/*
-** caller_1:
+** caller_1: {target aarch64_big_endian}
** ...
** mov (z[0-9]+\.d), #42
** ...
** ...
** ret
*/
+/*
+** caller_1: {target aarch64_little_endian}
+** ...
+** mov (z[0-9]+)\.d, #42
+** ...
+** str \1, \[x2\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_1 (int64_t *ptr)
{
}
/*
-** callee_7:
+** callee_7: {target aarch64_big_endian}
** ...
** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x7\]
** ...
** ...
** ret
*/
+/*
+** callee_7: {target aarch64_little_endian}
+** ...
+** ldr (z[0-9]+), \[x7\]
+** ...
+** str \1, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_7 (int64_t *ptr, ...)
{
}
/*
-** caller_7:
+** caller_7: {target aarch64_big_endian}
** ...
** mov (z[0-9]+\.d), #42
** ...
** ...
** ret
*/
+/*
+** caller_7: {target aarch64_little_endian}
+** ...
+** mov (z[0-9]+)\.d, #42
+** ...
+** str \1, \[x7\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_7 (int64_t *ptr)
{
/* FIXME: We should be able to get rid of the va_list object. */
/*
-** callee_8:
+** callee_8: {target aarch64_big_endian}
** sub sp, sp, #([0-9]+)
** ...
** ldr (x[0-9]+), \[sp, \1\]
** ...
** ret
*/
+/*
+** callee_8: {target aarch64_little_endian}
+** sub sp, sp, #([0-9]+)
+** ...
+** ldr (x[0-9]+), \[sp, \1\]
+** ...
+** ldr (z[0-9]+), \[\2\]
+** ...
+** str \3, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_8 (int64_t *ptr, ...)
{
}
/*
-** caller_8:
+** caller_8: {target aarch64_big_endian}
** ...
** mov (z[0-9]+\.d), #42
** ...
** ...
** ret
*/
+/*
+** caller_8: {target aarch64_little_endian}
+** ...
+** mov (z[0-9]+)\.d, #42
+** ...
+** str \1, \[(x[0-9]+)\]
+** ...
+** str \2, \[sp\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_8 (int64_t *ptr)
{
/*
** callee_0:
** ...
-** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x1\]
+** ldr (z[0-9]+), \[x1\]
** ...
-** st1b \1, \2, \[x0\]
+** str \1, \[x0\]
** ...
** ret
*/
/*
** caller_0:
** ...
-** mov (z[0-9]+\.b), #42
+** mov (z[0-9]+)\.b, #42
** ...
-** st1b \1, p[0-7], \[x1\]
+** str \1, \[x1\]
** ...
** ret
*/
/*
** callee_1:
** ...
-** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x2\]
+** ldr (z[0-9]+), \[x2\]
** ...
-** st1b \1, p[0-7], \[x0\]
+** str \1, \[x0\]
** ...
** ret
*/
/*
** caller_1:
** ...
-** mov (z[0-9]+\.b), #42
+** mov (z[0-9]+)\.b, #42
** ...
-** st1b \1, p[0-7], \[x2\]
+** str \1, \[x2\]
** ...
** ret
*/
/*
** callee_7:
** ...
-** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x7\]
+** ldr (z[0-9]+), \[x7\]
** ...
-** st1b \1, p[0-7], \[x0\]
+** str \1, \[x0\]
** ...
** ret
*/
/*
** caller_7:
** ...
-** mov (z[0-9]+\.b), #42
+** mov (z[0-9]+)\.b, #42
** ...
-** st1b \1, p[0-7], \[x7\]
+** str \1, \[x7\]
** ...
** ret
*/
** ...
** ldr (x[0-9]+), \[sp, \1\]
** ...
-** ld1b (z[0-9]+\.b), (p[0-7])/z, \[\2\]
+** ldr (z[0-9]+), \[\2\]
** ...
-** st1b \3, \4, \[x0\]
+** str \3, \[x0\]
** ...
** ret
*/
/*
** caller_8:
** ...
-** mov (z[0-9]+\.b), #42
+** mov (z[0-9]+)\.b, #42
** ...
-** st1b \1, p[0-7], \[(x[0-9]+)\]
+** str \1, \[(x[0-9]+)\]
** ...
** str \2, \[sp\]
** ...
/* We should operate on aligned vectors. */
/* { dg-final { scan-assembler {\t(adrp|adr)\tx[0-9]+, (x|\.LANCHOR0)\n} } } */
/* We should unroll the loop three times. */
-/* { dg-final { scan-assembler-times "\tst1w\t" 3 } } */
+/* { dg-final { scan-assembler-times "\tst1w\t" 3 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times "\tst1w\t" 2 { target aarch64_little_endian } } } */
+/* { dg-final { scan-assembler-times "\tstr\t" 1 { target aarch64_little_endian } } } */
/* { dg-final { scan-assembler {\tptrue\t(p[0-9]+)\.s, vl7\n.*\teor\tp[0-9]+\.b, (p[0-9]+)/z, (\1\.b, \2\.b|\2\.b, \1\.b)\n} } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-require-effective-target aarch64_little_endian } */
+
+#include <arm_sve.h>
+
+#define TEST(TYPE, TY, B) \
+ sv##TYPE ld_##TY (TYPE *x) \
+ { \
+ return svld1_##TY(svptrue_b##B (), x); \
+ } \
+ void st_##TY (TYPE *x, sv##TYPE data) \
+ { \
+ svst1_##TY(svptrue_b##B (), x, data); \
+ }
+
+TEST(bfloat16_t, bf16, 16)
+TEST(float16_t, f16, 16)
+TEST(float32_t, f32, 32)
+TEST(float64_t, f64, 64)
+TEST(uint8_t, u8, 8)
+TEST(uint16_t, u16, 16)
+TEST(uint32_t, u32, 32)
+TEST(uint64_t, u64, 64)
+TEST(int8_t, s8, 8)
+TEST(int16_t, s16, 16)
+TEST(int32_t, s32, 32)
+TEST(int64_t, s64, 64)
+
+/* { dg-final { scan-assembler-times {\tldr\tz0, \[x0\]} 12 } } */
+/* { dg-final { scan-assembler-times {\tstr\tz0, \[x0\]} 12 } } */
\ No newline at end of file
/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0e\+0\n} 1 } } */
/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #3\.0e\+0\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl32\n} 11 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl32\n} 9 { target aarch64_big_endian } } } */
-/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b,} 2 } } */
-/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 } } */
-/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 } } */
-/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times {\tstr\tz[0-9]+, \[x0\]} 2 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times {\tstr\tz[0-9]+, \[x0\]} 11 { target aarch64_little_endian } } } */
/* { dg-final { scan-assembler-not {\twhile} } } */
/* { dg-final { scan-assembler-not {\tb} } } */
/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0e\+0\n} 1 } } */
/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #3\.0e\+0\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl64\n} 11 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl64\n} 9 { target aarch64_big_endian } } } */
-/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b,} 2 } } */
-/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 } } */
-/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 } } */
-/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times {\tstr\tz[0-9]+, \[x0\]} 2 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times {\tstr\tz[0-9]+, \[x0\]} 11 { target aarch64_little_endian } } } */
/* { dg-final { scan-assembler-not {\twhile} } } */
/* { dg-final { scan-assembler-not {\tb} } } */
/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0e\+0\n} 1 } } */
/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #3\.0e\+0\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl128\n} 11 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl128\n} 9 { target aarch64_big_endian } } } */
-/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b,} 2 } } */
-/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 } } */
-/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 } } */
-/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times {\tstr\tz[0-9]+, \[x0\]} 2 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times {\tstr\tz[0-9]+, \[x0\]} 11 { target aarch64_little_endian } } } */
/* { dg-final { scan-assembler-not {\twhile} } } */
/* { dg-final { scan-assembler-not {\tb} } } */
/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0e\+0\n} 1 } } */
/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #3\.0e\+0\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl256\n} 11 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl256\n} 9 { target aarch64_big_endian } } } */
-/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b,} 2 } } */
-/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 } } */
-/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 } } */
-/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times {\tstr\tz[0-9]+, \[x0\]} 2 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times {\tstr\tz[0-9]+, \[x0\]} 11 { target aarch64_little_endian } } } */
/* { dg-final { scan-assembler-not {\twhile} } } */
/* { dg-final { scan-assembler-not {\tb} } } */