aarch64: Extend HVLA permutations to big-endian

author Richard Sandiford <richard.sandiford@arm.com>

Mon, 21 Jul 2025 14:41:04 +0000 (15:41 +0100)

committer Richard Sandiford <richard.sandiford@arm.com>

Mon, 21 Jul 2025 14:41:04 +0000 (15:41 +0100)
author Richard Sandiford <richard.sandiford@arm.com>
Mon, 21 Jul 2025 14:41:04 +0000 (15:41 +0100)
committer Richard Sandiford <richard.sandiford@arm.com>
Mon, 21 Jul 2025 14:41:04 +0000 (15:41 +0100)
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc

index cfbf0a7eeba6b5a33e9e3d4cb84d754b186d8bea..656312d364413a66dbdb71c363118aefe89a6a22 100644 (file)
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -26518,7 +26518,6 @@ aarch64_evpc_hvla (struct expand_vec_perm_d *d)
    machine_mode vmode = d->vmode;
    if (!TARGET_SVE2p1
        || !TARGET_NON_STREAMING
-      || BYTES_BIG_ENDIAN
        || d->vec_flags != VEC_SVE_DATA
        || GET_MODE_UNIT_BITSIZE (vmode) > 64)
      return false;
diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi

index c29cd3f5207e80f9530d26d9b2d429dd349a7dfa..2e3159011d6d7603316c55f6f6c3089c00bbdd0a 100644 (file)
--- a/gcc/doc/sourcebuild.texi
+++ b/gcc/doc/sourcebuild.texi
@@ -2373,6 +2373,12 @@ whether it does so by default).
  @itemx aarch64_sve1024_hw
  @itemx aarch64_sve2048_hw
  Like @code{aarch64_sve_hw}, but also test for an exact hardware vector length.
+@item aarch64_sve2_hw
+AArch64 target that is able to generate and execute SVE2 code (regardless of
+whether it does so by default).
+@item aarch64_sve2p1_hw
+AArch64 target that is able to generate and execute SVE2.1 code (regardless of
+whether it does so by default).
  
  @item aarch64_fjcvtzs_hw
  AArch64 target that is able to generate and execute armv8.3-a FJCVTZS
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/dupq_1.c b/gcc/testsuite/gcc.target/aarch64/sve2/dupq_1.c

index 5472e30f812ac41d520e9c08ef5c1f170ff64c5d..9db60b1ea4f21270049506ba3e2e9faa1c1cd0c7 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/sve2/dupq_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/dupq_1.c
@@ -1,5 +1,5 @@
  /* { dg-options "-O2 -msve-vector-bits=256" } */
-/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
  
  #include <arm_sve.h>
  
@@ -15,7 +15,7 @@ typedef svuint64_t fixed_uint64_t __attribute__((arm_sve_vector_bits(256)));
  **     trn1    z0\.d, z0\.d, z0\.d
  **     ret
  */
-fixed_uint64_t
+[[gnu::noipa]] fixed_uint64_t
  f1 (fixed_uint64_t z0)
  {
    return __builtin_shufflevector (z0, z0, 0, 0, 2, 2);
@@ -26,7 +26,7 @@ f1 (fixed_uint64_t z0)
  **     trn2    z0\.d, z0\.d, z0\.d
  **     ret
  */
-fixed_uint64_t
+[[gnu::noipa]] fixed_uint64_t
  f2 (fixed_uint64_t z0)
  {
    return __builtin_shufflevector (z0, z0, 1, 1, 3, 3);
@@ -37,7 +37,7 @@ f2 (fixed_uint64_t z0)
  **     dupq    z0\.s, z0\.s\[0\]
  **     ret
  */
-fixed_int32_t
+[[gnu::noipa]] fixed_int32_t
  f3 (fixed_int32_t z0)
  {
    return __builtin_shufflevector (z0, z0, 0, 0, 0, 0, 4, 4, 4, 4);
@@ -48,7 +48,7 @@ f3 (fixed_int32_t z0)
  **     dupq    z0\.s, z0\.s\[1\]
  **     ret
  */
-fixed_int32_t
+[[gnu::noipa]] fixed_int32_t
  f4 (fixed_int32_t z0)
  {
    return __builtin_shufflevector (z0, z0, 1, 1, 1, 1, 5, 5, 5, 5);
@@ -59,7 +59,7 @@ f4 (fixed_int32_t z0)
  **     dupq    z0\.s, z0\.s\[2\]
  **     ret
  */
-fixed_int32_t
+[[gnu::noipa]] fixed_int32_t
  f5 (fixed_int32_t z0)
  {
    return __builtin_shufflevector (z0, z0, 2, 2, 2, 2, 6, 6, 6, 6);
@@ -70,7 +70,7 @@ f5 (fixed_int32_t z0)
  **     dupq    z0\.s, z0\.s\[3\]
  **     ret
  */
-fixed_int32_t
+[[gnu::noipa]] fixed_int32_t
  f6 (fixed_int32_t z0)
  {
    return __builtin_shufflevector (z0, z0, 3, 3, 3, 3, 7, 7, 7, 7);
@@ -81,7 +81,7 @@ f6 (fixed_int32_t z0)
  **     dupq    z0\.h, z0\.h\[0\]
  **     ret
  */
-fixed_uint16_t
+[[gnu::noipa]] fixed_uint16_t
  f7 (fixed_uint16_t z0)
  {
    return __builtin_shufflevector (z0, z0,
@@ -95,7 +95,7 @@ f7 (fixed_uint16_t z0)
  **     dupq    z0\.h, z0\.h\[5\]
  **     ret
  */
-fixed_uint16_t
+[[gnu::noipa]] fixed_uint16_t
  f8 (fixed_uint16_t z0)
  {
    return __builtin_shufflevector (z0, z0,
@@ -108,7 +108,7 @@ f8 (fixed_uint16_t z0)
  **     dupq    z0\.h, z0\.h\[7\]
  **     ret
  */
-fixed_uint16_t
+[[gnu::noipa]] fixed_uint16_t
  f9 (fixed_uint16_t z0)
  {
    return __builtin_shufflevector (z0, z0,
@@ -121,7 +121,7 @@ f9 (fixed_uint16_t z0)
  **     dupq    z0\.b, z0\.b\[0\]
  **     ret
  */
-fixed_uint8_t
+[[gnu::noipa]] fixed_uint8_t
  f10 (fixed_uint8_t z0)
  {
    return __builtin_shufflevector (z0, z0,
@@ -136,7 +136,7 @@ f10 (fixed_uint8_t z0)
  **     dupq    z0\.b, z0\.b\[13\]
  **     ret
  */
-fixed_uint8_t
+[[gnu::noipa]] fixed_uint8_t
  f11 (fixed_uint8_t z0)
  {
    return __builtin_shufflevector (z0, z0,
@@ -151,7 +151,7 @@ f11 (fixed_uint8_t z0)
  **     dupq    z0\.b, z0\.b\[15\]
  **     ret
  */
-fixed_uint8_t
+[[gnu::noipa]] fixed_uint8_t
  f12 (fixed_uint8_t z0)
  {
    return __builtin_shufflevector (z0, z0,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/dupq_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve2/dupq_1_run.c

new file mode 100644 (file)

index 0000000..fd25034
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/dupq_1_run.c
@@ -0,0 +1,87 @@
+/* { dg-do run { target { aarch64_sve256_hw && aarch64_sve2p1_hw } } } */
+/* { dg-options "-O2 -msve-vector-bits=256" } */
+
+#include "dupq_1.c"
+
+#define TEST(A, B)                                                     \
+  do {                                                                 \
+    typeof(B) actual_ = (A);                                           \
+    if (__builtin_memcmp (&actual_, &(B), sizeof (actual_)) != 0)      \
+      __builtin_abort ();                                              \
+  } while (0)
+
+int
+main ()
+{
+  fixed_uint64_t a64 = { 0x1122, -1, 0x5566, -2 };
+  fixed_int32_t a32 = { 0x1122, -0x3344, 0x5566, -0x7788,
+                       0x99aa, -0xbbcc, 0xddee, -0xff00 };
+  fixed_uint16_t a16 = { 0x9a12, 0xbc34, 0xde56, 0xf078,
+                        0x00ff, 0x11ee, 0x22dd, 0x33cc,
+                        0x44bb, 0x55aa, 0x6699, 0x7788,
+                        0xfe01, 0xdc23, 0xba45, 0x9867 };
+  fixed_uint8_t a8 = { 0x01, 0x12, 0x23, 0x34, 0x45, 0x56, 0x67, 0x70,
+                      0x89, 0x9a, 0xab, 0xbc, 0xcd, 0xde, 0xef, 0xf8,
+                      0xfe, 0xed, 0xdc, 0xcb, 0xba, 0xa9, 0x98, 0x8f,
+                      0x76, 0x65, 0x54, 0x43, 0x32, 0x21, 0x10, 0x07 };
+
+  fixed_uint64_t expected1 = { 0x1122, 0x1122, 0x5566, 0x5566 };
+  TEST (f1 (a64), expected1);
+
+  fixed_uint64_t expected2 = { -1, -1, -2, -2 };
+  TEST (f2 (a64), expected2);
+
+  fixed_int32_t expected3 = { 0x1122, 0x1122, 0x1122, 0x1122,
+                              0x99aa, 0x99aa, 0x99aa, 0x99aa };
+  TEST (f3 (a32), expected3);
+
+  fixed_int32_t expected4 = { -0x3344, -0x3344, -0x3344, -0x3344,
+                             -0xbbcc, -0xbbcc, -0xbbcc, -0xbbcc };
+  TEST (f4 (a32), expected4);
+
+  fixed_int32_t expected5 = { 0x5566, 0x5566, 0x5566, 0x5566,
+                              0xddee, 0xddee, 0xddee, 0xddee };
+  TEST (f5 (a32), expected5);
+
+  fixed_int32_t expected6 = { -0x7788, -0x7788, -0x7788, -0x7788,
+                             -0xff00, -0xff00, -0xff00, -0xff00 };
+  TEST (f6 (a32), expected6);
+
+  fixed_uint16_t expected7 = { 0x9a12, 0x9a12, 0x9a12, 0x9a12,
+                              0x9a12, 0x9a12, 0x9a12, 0x9a12,
+                              0x44bb, 0x44bb, 0x44bb, 0x44bb,
+                              0x44bb, 0x44bb, 0x44bb, 0x44bb };
+  TEST (f7 (a16), expected7);
+
+  fixed_uint16_t expected8 = { 0x11ee, 0x11ee, 0x11ee, 0x11ee,
+                              0x11ee, 0x11ee, 0x11ee, 0x11ee,
+                              0xdc23, 0xdc23, 0xdc23, 0xdc23,
+                              0xdc23, 0xdc23, 0xdc23, 0xdc23 };
+  TEST (f8 (a16), expected8);
+
+  fixed_uint16_t expected9 = { 0x33cc, 0x33cc, 0x33cc, 0x33cc,
+                              0x33cc, 0x33cc, 0x33cc, 0x33cc,
+                              0x9867, 0x9867, 0x9867, 0x9867,
+                              0x9867, 0x9867, 0x9867, 0x9867 };
+  TEST (f9 (a16), expected9);
+
+  fixed_uint8_t expected10 = { 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+                              0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+                              0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe,
+                              0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe };
+  TEST (f10 (a8), expected10);
+
+  fixed_uint8_t expected11 = { 0xde, 0xde, 0xde, 0xde, 0xde, 0xde, 0xde, 0xde,
+                              0xde, 0xde, 0xde, 0xde, 0xde, 0xde, 0xde, 0xde,
+                              0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21,
+                              0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21 };
+  TEST (f11 (a8), expected11);
+
+  fixed_uint8_t expected12 = { 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8,
+                              0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8,
+                              0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+                              0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07 };
+  TEST (f12 (a8), expected12);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/extq_1.c b/gcc/testsuite/gcc.target/aarch64/sve2/extq_1.c

index 03c5fb143f7e2434c6e410e89585b6b7aa441992..be5ae71de83cf8590b7bd743e13802ff27e60975 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/sve2/extq_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/extq_1.c
@@ -1,5 +1,5 @@
  /* { dg-options "-O2 -msve-vector-bits=256" } */
-/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
  
  #include <arm_sve.h>
  
@@ -15,7 +15,7 @@ typedef svfloat64_t fixed_float64_t __attribute__((arm_sve_vector_bits(256)));
  **     extq    z0\.b, z0\.b, z1\.b, #8
  **     ret
  */
-fixed_float64_t
+[[gnu::noipa]] fixed_float64_t
  f1 (fixed_float64_t z0, fixed_float64_t z1)
  {
    return __builtin_shufflevector (z0, z1, 1, 4, 3, 6);
@@ -26,7 +26,7 @@ f1 (fixed_float64_t z0, fixed_float64_t z1)
  **     extq    z0\.b, z0\.b, z1\.b, #4
  **     ret
  */
-fixed_uint32_t
+[[gnu::noipa]] fixed_uint32_t
  f2 (fixed_uint32_t z0, fixed_uint32_t z1)
  {
    return __builtin_shufflevector (z0, z1, 1, 2, 3, 8, 5, 6, 7, 12);
@@ -37,7 +37,7 @@ f2 (fixed_uint32_t z0, fixed_uint32_t z1)
  **     extq    z0\.b, z0\.b, z1\.b, #12
  **     ret
  */
-fixed_uint32_t
+[[gnu::noipa]] fixed_uint32_t
  f3 (fixed_uint32_t z0, fixed_uint32_t z1)
  {
    return __builtin_shufflevector (z0, z1, 3, 8, 9, 10, 7, 12, 13, 14);
@@ -48,7 +48,7 @@ f3 (fixed_uint32_t z0, fixed_uint32_t z1)
  **     extq    z0\.b, z0\.b, z1\.b, #2
  **     ret
  */
-fixed_float16_t
+[[gnu::noipa]] fixed_float16_t
  f4 (fixed_float16_t z0, fixed_float16_t z1)
  {
    return __builtin_shufflevector (z0, z1,
@@ -61,7 +61,7 @@ f4 (fixed_float16_t z0, fixed_float16_t z1)
  **     extq    z0\.b, z0\.b, z1\.b, #10
  **     ret
  */
-fixed_float16_t
+[[gnu::noipa]] fixed_float16_t
  f5 (fixed_float16_t z0, fixed_float16_t z1)
  {
    return __builtin_shufflevector (z0, z1,
@@ -74,7 +74,7 @@ f5 (fixed_float16_t z0, fixed_float16_t z1)
  **     extq    z0\.b, z0\.b, z1\.b, #14
  **     ret
  */
-fixed_float16_t
+[[gnu::noipa]] fixed_float16_t
  f6 (fixed_float16_t z0, fixed_float16_t z1)
  {
    return __builtin_shufflevector (z0, z1,
@@ -87,7 +87,7 @@ f6 (fixed_float16_t z0, fixed_float16_t z1)
  **     extq    z0\.b, z0\.b, z1\.b, #1
  **     ret
  */
-fixed_int8_t
+[[gnu::noipa]] fixed_int8_t
  f7 (fixed_int8_t z0, fixed_int8_t z1)
  {
    return __builtin_shufflevector (z0, z1,
@@ -102,7 +102,7 @@ f7 (fixed_int8_t z0, fixed_int8_t z1)
  **     extq    z0\.b, z0\.b, z1\.b, #11
  **     ret
  */
-fixed_int8_t
+[[gnu::noipa]] fixed_int8_t
  f8 (fixed_int8_t z0, fixed_int8_t z1)
  {
    return __builtin_shufflevector (z0, z1,
@@ -117,7 +117,7 @@ f8 (fixed_int8_t z0, fixed_int8_t z1)
  **     extq    z0\.b, z0\.b, z1\.b, #15
  **     ret
  */
-fixed_int8_t
+[[gnu::noipa]] fixed_int8_t
  f9 (fixed_int8_t z0, fixed_int8_t z1)
  {
    return __builtin_shufflevector (z0, z1,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/extq_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve2/extq_1_run.c

new file mode 100644 (file)

index 0000000..6b72c98
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/extq_1_run.c
@@ -0,0 +1,73 @@
+/* { dg-do run { target { aarch64_sve256_hw && aarch64_sve2p1_hw } } } */
+/* { dg-options "-O2 -msve-vector-bits=256" } */
+
+#include "extq_1.c"
+
+#define TEST(A, B)                                                     \
+  do {                                                                 \
+    typeof(B) actual_ = (A);                                           \
+    if (__builtin_memcmp (&actual_, &(B), sizeof (actual_)) != 0)      \
+      __builtin_abort ();                                              \
+  } while (0)
+
+int
+main ()
+{
+  fixed_float64_t a64 = { 1.5, 3.75, -5.25, 9 };
+  fixed_float64_t b64 = { -2, 4.125, -6.375, 11.5 };
+  fixed_float64_t expected1 = { 3.75, -2, 9, -6.375 };
+  TEST (f1 (a64, b64), expected1);
+
+  fixed_uint32_t a32 = { 0x1122, -0x3344, 0x5566, -0x7788,
+                        0x99aa, -0xbbcc, 0xddee, -0xff00 };
+  fixed_uint32_t b32 = { 1 << 20, 1 << 21, 1 << 22, 1 << 23,
+                        5 << 6, 5 << 7, 5 << 8, 5 << 9 };
+  fixed_uint32_t expected2 = { -0x3344, 0x5566, -0x7788, 1 << 20,
+                              -0xbbcc, 0xddee, -0xff00, 5 << 6 };
+  fixed_uint32_t expected3 = { -0x7788, 1 << 20, 1 << 21, 1 << 22,
+                              -0xff00, 5 << 6, 5 << 7, 5 << 8 };
+  TEST (f2 (a32, b32), expected2);
+  TEST (f3 (a32, b32), expected3);
+
+  fixed_float16_t a16 = { 0.5, 0.75, 1, 1.25, 1.5, 1.75, 2, 2.25,
+                         2.5, 2.75, 3, 3.25, 3.5, 3.75, 4, 4.25 };
+  fixed_float16_t b16 = { -0.5, -0.75, -1, -1.25, -1.5, -1.75, -2, -2.25,
+                         -2.5, -2.75, -3, -3.25, -3.5, -3.75, -4, -4.25 };
+  fixed_float16_t expected4 = { 0.75, 1, 1.25, 1.5, 1.75, 2, 2.25, -0.5,
+                               2.75, 3, 3.25, 3.5, 3.75, 4, 4.25, -2.5 };
+  fixed_float16_t expected5 = { 1.75, 2, 2.25, -0.5, -0.75, -1, -1.25, -1.5,
+                               3.75, 4, 4.25, -2.5, -2.75, -3, -3.25, -3.5 };
+  fixed_float16_t expected6 = { 2.25, -0.5, -0.75, -1,
+                               -1.25, -1.5, -1.75, -2,
+                               4.25, -2.5, -2.75, -3,
+                               -3.25, -3.5, -3.75, -4 };
+  TEST (f4 (a16, b16), expected4);
+  TEST (f5 (a16, b16), expected5);
+  TEST (f6 (a16, b16), expected6);
+
+  fixed_int8_t a8 = { 0x01, 0x12, 0x23, 0x34, 0x45, 0x56, 0x67, 0x70,
+                     0x89, 0x9a, 0xab, 0xbc, 0xcd, 0xde, 0xef, 0xf8,
+                     0xfe, 0xed, 0xdc, 0xcb, 0xba, 0xa9, 0x98, 0x8f,
+                     0x76, 0x65, 0x54, 0x43, 0x32, 0x21, 0x10, 0x07 };
+  fixed_int8_t b8 = { 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88,
+                     0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff, 0x00,
+                     0x13, 0x24, 0x35, 0x46, 0x57, 0x68, 0x79, 0x8a,
+                     0x9b, 0xac, 0xbd, 0xce, 0xdf, 0xe0, 0xf1, 0x02 };
+  fixed_int8_t expected7 = { 0x12, 0x23, 0x34, 0x45, 0x56, 0x67, 0x70, 0x89,
+                            0x9a, 0xab, 0xbc, 0xcd, 0xde, 0xef, 0xf8, 0x11,
+                            0xed, 0xdc, 0xcb, 0xba, 0xa9, 0x98, 0x8f, 0x76,
+                            0x65, 0x54, 0x43, 0x32, 0x21, 0x10, 0x07, 0x13 };
+  fixed_int8_t expected8 = { 0xbc, 0xcd, 0xde, 0xef, 0xf8, 0x11, 0x22, 0x33,
+                            0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb,
+                            0x43, 0x32, 0x21, 0x10, 0x07, 0x13, 0x24, 0x35,
+                            0x46, 0x57, 0x68, 0x79, 0x8a, 0x9b, 0xac, 0xbd };
+  fixed_int8_t expected9 = { 0xf8, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77,
+                            0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff,
+                            0x07, 0x13, 0x24, 0x35, 0x46, 0x57, 0x68, 0x79,
+                            0x8a, 0x9b, 0xac, 0xbd, 0xce, 0xdf, 0xe0, 0xf1 };
+  TEST (f7 (a8, b8), expected7);
+  TEST (f8 (a8, b8), expected8);
+  TEST (f9 (a8, b8), expected9);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/uzpq_1.c b/gcc/testsuite/gcc.target/aarch64/sve2/uzpq_1.c

index f923e9447ec3997710f5c8b1ec8216f2958d7dfd..587f67076b644c9f6a88834847b8e12c7bcb6f9f 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/sve2/uzpq_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/uzpq_1.c
@@ -1,5 +1,5 @@
  /* { dg-options "-O2 -msve-vector-bits=256" } */
-/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
  
  #include <arm_sve.h>
  
@@ -15,7 +15,7 @@ typedef svint64_t fixed_int64_t __attribute__((arm_sve_vector_bits(256)));
  **     trn1    z0\.d, z0\.d, z1\.d
  **     ret
  */
-fixed_int64_t
+[[gnu::noipa]] fixed_int64_t
  f1 (fixed_int64_t z0, fixed_int64_t z1)
  {
    return __builtin_shufflevector (z0, z1, 0, 4, 2, 6);
@@ -26,7 +26,7 @@ f1 (fixed_int64_t z0, fixed_int64_t z1)
  **     trn2    z0\.d, z0\.d, z1\.d
  **     ret
  */
-fixed_int64_t
+[[gnu::noipa]] fixed_int64_t
  f2 (fixed_int64_t z0, fixed_int64_t z1)
  {
    return __builtin_shufflevector (z0, z1, 1, 5, 3, 7);
@@ -37,7 +37,7 @@ f2 (fixed_int64_t z0, fixed_int64_t z1)
  **     uzpq1   z0\.s, z0\.s, z1\.s
  **     ret
  */
-fixed_float32_t
+[[gnu::noipa]] fixed_float32_t
  f3 (fixed_float32_t z0, fixed_float32_t z1)
  {
    return __builtin_shufflevector (z0, z1, 0, 2, 8, 10, 4, 6, 12, 14);
@@ -48,7 +48,7 @@ f3 (fixed_float32_t z0, fixed_float32_t z1)
  **     uzpq2   z0\.s, z0\.s, z1\.s
  **     ret
  */
-fixed_float32_t
+[[gnu::noipa]] fixed_float32_t
  f4 (fixed_float32_t z0, fixed_float32_t z1)
  {
    return __builtin_shufflevector (z0, z1, 1, 3, 9, 11, 5, 7, 13, 15);
@@ -59,7 +59,7 @@ f4 (fixed_float32_t z0, fixed_float32_t z1)
  **     uzpq1   z0\.h, z0\.h, z1\.h
  **     ret
  */
-fixed_bfloat16_t
+[[gnu::noipa]] fixed_bfloat16_t
  f5 (fixed_bfloat16_t z0, fixed_bfloat16_t z1)
  {
    return __builtin_shufflevector (z0, z1,
@@ -72,7 +72,7 @@ f5 (fixed_bfloat16_t z0, fixed_bfloat16_t z1)
  **     uzpq2   z0\.h, z0\.h, z1\.h
  **     ret
  */
-fixed_bfloat16_t
+[[gnu::noipa]] fixed_bfloat16_t
  f6 (fixed_bfloat16_t z0, fixed_bfloat16_t z1)
  {
    return __builtin_shufflevector (z0, z1,
@@ -85,7 +85,7 @@ f6 (fixed_bfloat16_t z0, fixed_bfloat16_t z1)
  **     uzpq1   z0\.b, z0\.b, z1\.b
  **     ret
  */
-fixed_uint8_t
+[[gnu::noipa]] fixed_uint8_t
  f7 (fixed_uint8_t z0, fixed_uint8_t z1)
  {
    return __builtin_shufflevector (z0, z1,
@@ -100,7 +100,7 @@ f7 (fixed_uint8_t z0, fixed_uint8_t z1)
  **     uzpq2   z0\.b, z0\.b, z1\.b
  **     ret
  */
-fixed_uint8_t
+[[gnu::noipa]] fixed_uint8_t
  f8 (fixed_uint8_t z0, fixed_uint8_t z1)
  {
    return __builtin_shufflevector (z0, z1,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/uzpq_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve2/uzpq_1_run.c

new file mode 100644 (file)

index 0000000..9044cae
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/uzpq_1_run.c
@@ -0,0 +1,78 @@
+/* { dg-do run { target { aarch64_sve256_hw && aarch64_sve2p1_hw } } } */
+/* { dg-options "-O2 -msve-vector-bits=256" } */
+
+#include "uzpq_1.c"
+
+typedef svuint16_t fixed_uint16_t __attribute__((arm_sve_vector_bits(256)));
+
+#define TEST(A, B)                                                     \
+  do {                                                                 \
+    typeof(A) actual_ = (A);                                           \
+    if (__builtin_memcmp (&actual_, &(B), sizeof (actual_)) != 0)      \
+      __builtin_abort ();                                              \
+  } while (0)
+
+int
+main ()
+{
+  fixed_int64_t a64 = { 0x1122LL << 31, -1LL << 47, 0x5566 << 15, -2 };
+  fixed_int64_t b64 = { 42, -0x3344LL << 19, 303, -0x7788LL << 27 };
+  fixed_int64_t expected1 = { 0x1122LL << 31, 42,
+                             0x5566 << 15, 303 };
+  fixed_int64_t expected2 = { -1LL << 47, -0x3344LL << 19,
+                             -2, -0x7788LL << 27 };
+  TEST (f1 (a64, b64), expected1);
+  TEST (f2 (a64, b64), expected2);
+
+  fixed_float32_t a32 = { 0.5, 0.75, 1, 1.25, 2.5, 2.75, 3, 3.25 };
+  fixed_float32_t b32 = { -0.5, -0.75, -1, -1.25, -2.5, -2.75, -3, -3.25 };
+  fixed_float32_t expected3 = { 0.5, 1, -0.5, -1,
+                               2.5, 3, -2.5, -3 };
+  fixed_float32_t expected4 = { 0.75, 1.25, -0.75, -1.25,
+                               2.75, 3.25, -2.75, -3.25 };
+  TEST (f3 (a32, b32), expected3);
+  TEST (f4 (a32, b32), expected4);
+
+  fixed_uint16_t a16_i = { 0x9a12, 0xbc34, 0xde56, 0xf078,
+                          0x00ff, 0x11ee, 0x22dd, 0x33cc,
+                          0x44bb, 0x55aa, 0x6699, 0x7788,
+                          0xfe01, 0xdc23, 0xba45, 0x9867 };
+  fixed_uint16_t b16_i = { 0x1010, 0x2020, 0x3030, 0x4040,
+                          0x5050, 0x6060, 0x7070, 0x8080,
+                          0x9090, 0xa0a0, 0xb0b0, 0xc0c0,
+                          0xd0d0, 0xe0e0, 0xf0f0, 0x0f0f };
+  fixed_uint16_t expected5 = { 0x9a12, 0xde56, 0x00ff, 0x22dd,
+                              0x1010, 0x3030, 0x5050, 0x7070,
+                              0x44bb, 0x6699, 0xfe01, 0xba45,
+                              0x9090, 0xb0b0, 0xd0d0, 0xf0f0 };
+  fixed_uint16_t expected6 = { 0xbc34, 0xf078, 0x11ee, 0x33cc,
+                              0x2020, 0x4040, 0x6060, 0x8080,
+                              0x55aa, 0x7788, 0xdc23, 0x9867,
+                              0xa0a0, 0xc0c0, 0xe0e0, 0x0f0f };
+  fixed_bfloat16_t a16, b16;
+  __builtin_memcpy (&a16, &a16_i, sizeof (a16));
+  __builtin_memcpy (&b16, &b16_i, sizeof (b16));
+  TEST (f5 (a16, b16), expected5);
+  TEST (f6 (a16, b16), expected6);
+
+  fixed_uint8_t a8 = { 0x01, 0x12, 0x23, 0x34, 0x45, 0x56, 0x67, 0x70,
+                      0x89, 0x9a, 0xab, 0xbc, 0xcd, 0xde, 0xef, 0xf8,
+                      0xfe, 0xed, 0xdc, 0xcb, 0xba, 0xa9, 0x98, 0x8f,
+                      0x76, 0x65, 0x54, 0x43, 0x32, 0x21, 0x10, 0x07 };
+  fixed_uint8_t b8 = { 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88,
+                      0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff, 0x00,
+                      0x13, 0x24, 0x35, 0x46, 0x57, 0x68, 0x79, 0x8a,
+                      0x9b, 0xac, 0xbd, 0xce, 0xdf, 0xe0, 0xf1, 0x02 };
+  fixed_uint8_t expected7 = { 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef,
+                             0x11, 0x33, 0x55, 0x77, 0x99, 0xbb, 0xdd, 0xff,
+                             0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10,
+                             0x13, 0x35, 0x57, 0x79, 0x9b, 0xbd, 0xdf, 0xf1 };
+  fixed_uint8_t expected8 = { 0x12, 0x34, 0x56, 0x70, 0x9a, 0xbc, 0xde, 0xf8,
+                             0x22, 0x44, 0x66, 0x88, 0xaa, 0xcc, 0xee, 0x00,
+                             0xed, 0xcb, 0xa9, 0x8f, 0x65, 0x43, 0x21, 0x07,
+                             0x24, 0x46, 0x68, 0x8a, 0xac, 0xce, 0xe0, 0x02 };
+  TEST (f7 (a8, b8), expected7);
+  TEST (f8 (a8, b8), expected8);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/zipq_1.c b/gcc/testsuite/gcc.target/aarch64/sve2/zipq_1.c

index fa420a959c723999492f8db3909db4a1d70de3eb..76fb4b4440b714262a1c5d68c18034d51c57d154 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/sve2/zipq_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/zipq_1.c
@@ -1,5 +1,5 @@
  /* { dg-options "-O2 -msve-vector-bits=256" } */
-/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
  
  #include <arm_sve.h>
  
@@ -15,7 +15,7 @@ typedef svint64_t fixed_int64_t __attribute__((arm_sve_vector_bits(256)));
  **     trn1    z0\.d, z0\.d, z1\.d
  **     ret
  */
-fixed_int64_t
+[[gnu::noipa]] fixed_int64_t
  f1 (fixed_int64_t z0, fixed_int64_t z1)
  {
    return __builtin_shufflevector (z0, z1, 0, 4, 2, 6);
@@ -26,7 +26,7 @@ f1 (fixed_int64_t z0, fixed_int64_t z1)
  **     trn2    z0\.d, z0\.d, z1\.d
  **     ret
  */
-fixed_int64_t
+[[gnu::noipa]] fixed_int64_t
  f2 (fixed_int64_t z0, fixed_int64_t z1)
  {
    return __builtin_shufflevector (z0, z1, 1, 5, 3, 7);
@@ -37,7 +37,7 @@ f2 (fixed_int64_t z0, fixed_int64_t z1)
  **     zipq1   z0\.s, z0\.s, z1\.s
  **     ret
  */
-fixed_float32_t
+[[gnu::noipa]] fixed_float32_t
  f3 (fixed_float32_t z0, fixed_float32_t z1)
  {
    return __builtin_shufflevector (z0, z1, 0, 8, 1, 9, 4, 12, 5, 13);
@@ -48,7 +48,7 @@ f3 (fixed_float32_t z0, fixed_float32_t z1)
  **     zipq2   z0\.s, z0\.s, z1\.s
  **     ret
  */
-fixed_float32_t
+[[gnu::noipa]] fixed_float32_t
  f4 (fixed_float32_t z0, fixed_float32_t z1)
  {
    return __builtin_shufflevector (z0, z1, 2, 10, 3, 11, 6, 14, 7, 15);
@@ -59,7 +59,7 @@ f4 (fixed_float32_t z0, fixed_float32_t z1)
  **     zipq1   z0\.h, z0\.h, z1\.h
  **     ret
  */
-fixed_bfloat16_t
+[[gnu::noipa]] fixed_bfloat16_t
  f5 (fixed_bfloat16_t z0, fixed_bfloat16_t z1)
  {
    return __builtin_shufflevector (z0, z1,
@@ -72,7 +72,7 @@ f5 (fixed_bfloat16_t z0, fixed_bfloat16_t z1)
  **     zipq2   z0\.h, z0\.h, z1\.h
  **     ret
  */
-fixed_bfloat16_t
+[[gnu::noipa]] fixed_bfloat16_t
  f6 (fixed_bfloat16_t z0, fixed_bfloat16_t z1)
  {
    return __builtin_shufflevector (z0, z1,
@@ -85,7 +85,7 @@ f6 (fixed_bfloat16_t z0, fixed_bfloat16_t z1)
  **     zipq1   z0\.b, z0\.b, z1\.b
  **     ret
  */
-fixed_uint8_t
+[[gnu::noipa]] fixed_uint8_t
  f7 (fixed_uint8_t z0, fixed_uint8_t z1)
  {
    return __builtin_shufflevector (z0, z1,
@@ -100,7 +100,7 @@ f7 (fixed_uint8_t z0, fixed_uint8_t z1)
  **     zipq2   z0\.b, z0\.b, z1\.b
  **     ret
  */
-fixed_uint8_t
+[[gnu::noipa]] fixed_uint8_t
  f8 (fixed_uint8_t z0, fixed_uint8_t z1)
  {
    return __builtin_shufflevector (z0, z1,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/zipq_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve2/zipq_1_run.c

new file mode 100644 (file)

index 0000000..211f9d9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/zipq_1_run.c
@@ -0,0 +1,78 @@
+/* { dg-do run { target { aarch64_sve256_hw && aarch64_sve2p1_hw } } } */
+/* { dg-options "-O2 -msve-vector-bits=256" } */
+
+#include "zipq_1.c"
+
+typedef svuint16_t fixed_uint16_t __attribute__((arm_sve_vector_bits(256)));
+
+#define TEST(A, B)                                                     \
+  do {                                                                 \
+    typeof(A) actual_ = (A);                                           \
+    if (__builtin_memcmp (&actual_, &(B), sizeof (actual_)) != 0)      \
+      __builtin_abort ();                                              \
+  } while (0)
+
+int
+main ()
+{
+  fixed_int64_t a64 = { 0x1122LL << 31, -1LL << 47, 0x5566 << 15, -2 };
+  fixed_int64_t b64 = { 42, -0x3344LL << 19, 303, -0x7788LL << 27 };
+  fixed_int64_t expected1 = { 0x1122LL << 31, 42,
+                             0x5566 << 15, 303 };
+  fixed_int64_t expected2 = { -1LL << 47, -0x3344LL << 19,
+                             -2, -0x7788LL << 27 };
+  TEST (f1 (a64, b64), expected1);
+  TEST (f2 (a64, b64), expected2);
+
+  fixed_float32_t a32 = { 0.5, 0.75, 1, 1.25, 2.5, 2.75, 3, 3.25 };
+  fixed_float32_t b32 = { -0.5, -0.75, -1, -1.25, -2.5, -2.75, -3, -3.25 };
+  fixed_float32_t expected3 = { 0.5, -0.5, 0.75, -0.75,
+                               2.5, -2.5, 2.75, -2.75 };
+  fixed_float32_t expected4 = { 1, -1, 1.25, -1.25,
+                               3, -3, 3.25, -3.25 };
+  TEST (f3 (a32, b32), expected3);
+  TEST (f4 (a32, b32), expected4);
+
+  fixed_uint16_t a16_i = { 0x9a12, 0xbc34, 0xde56, 0xf078,
+                          0x00ff, 0x11ee, 0x22dd, 0x33cc,
+                          0x44bb, 0x55aa, 0x6699, 0x7788,
+                          0xfe01, 0xdc23, 0xba45, 0x9867 };
+  fixed_uint16_t b16_i = { 0x1010, 0x2020, 0x3030, 0x4040,
+                          0x5050, 0x6060, 0x7070, 0x8080,
+                          0x9090, 0xa0a0, 0xb0b0, 0xc0c0,
+                          0xd0d0, 0xe0e0, 0xf0f0, 0x0f0f };
+  fixed_uint16_t expected5 = { 0x9a12, 0x1010, 0xbc34, 0x2020,
+                              0xde56, 0x3030, 0xf078, 0x4040,
+                              0x44bb, 0x9090, 0x55aa, 0xa0a0,
+                              0x6699, 0xb0b0, 0x7788, 0xc0c0 };
+  fixed_uint16_t expected6 = { 0x00ff, 0x5050, 0x11ee, 0x6060,
+                              0x22dd, 0x7070, 0x33cc, 0x8080,
+                              0xfe01, 0xd0d0, 0xdc23, 0xe0e0,
+                              0xba45, 0xf0f0, 0x9867, 0x0f0f };
+  fixed_bfloat16_t a16, b16;
+  __builtin_memcpy (&a16, &a16_i, sizeof (a16));
+  __builtin_memcpy (&b16, &b16_i, sizeof (b16));
+  TEST (f5 (a16, b16), expected5);
+  TEST (f6 (a16, b16), expected6);
+
+  fixed_uint8_t a8 = { 0x01, 0x12, 0x23, 0x34, 0x45, 0x56, 0x67, 0x70,
+                      0x89, 0x9a, 0xab, 0xbc, 0xcd, 0xde, 0xef, 0xf8,
+                      0xfe, 0xed, 0xdc, 0xcb, 0xba, 0xa9, 0x98, 0x8f,
+                      0x76, 0x65, 0x54, 0x43, 0x32, 0x21, 0x10, 0x07 };
+  fixed_uint8_t b8 = { 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88,
+                      0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff, 0x00,
+                      0x13, 0x24, 0x35, 0x46, 0x57, 0x68, 0x79, 0x8a,
+                      0x9b, 0xac, 0xbd, 0xce, 0xdf, 0xe0, 0xf1, 0x02 };
+  fixed_uint8_t expected7 = { 0x01, 0x11, 0x12, 0x22, 0x23, 0x33, 0x34, 0x44,
+                             0x45, 0x55, 0x56, 0x66, 0x67, 0x77, 0x70, 0x88,
+                             0xfe, 0x13, 0xed, 0x24, 0xdc, 0x35, 0xcb, 0x46,
+                             0xba, 0x57, 0xa9, 0x68, 0x98, 0x79, 0x8f, 0x8a };
+  fixed_uint8_t expected8 = { 0x89, 0x99, 0x9a, 0xaa, 0xab, 0xbb, 0xbc, 0xcc,
+                             0xcd, 0xdd, 0xde, 0xee, 0xef, 0xff, 0xf8, 0x00,
+                             0x76, 0x9b, 0x65, 0xac, 0x54, 0xbd, 0x43, 0xce,
+                             0x32, 0xdf, 0x21, 0xe0, 0x10, 0xf1, 0x07, 0x02 };
+  TEST (f7 (a8, b8), expected7);
+  TEST (f8 (a8, b8), expected8);
+
+  return 0;
+}
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp

index a62f459ad7ed88c782613936ca2d81b5ac449220..35dca396f4efab5799bf8284b82186a2b88ed179 100644 (file)
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -6417,6 +6417,23 @@ proc check_effective_target_aarch64_sve2_hw { } {
      }]
  }
  
+# Return true if this is an AArch64 target that can run SVE2.1 code.
+
+proc check_effective_target_aarch64_sve2p1_hw { } {
+    if { ![istarget aarch64*-*-*] } {
+       return 0
+    }
+    return [check_runtime aarch64_sve2p1_hw_available {
+       #pragma GCC target "+sve2p1"
+       int
+       main (void)
+       {
+         asm volatile ("dupq z0.b, z0.b[0]");
+         return 0;
+       }
+    }]
+}
+
  # Return true if this is an AArch64 target that can run SVE code and
  # if its SVE vectors have exactly BITS bits.
author	Richard Sandiford <richard.sandiford@arm.com>
	Mon, 21 Jul 2025 14:41:04 +0000 (15:41 +0100)
committer	Richard Sandiford <richard.sandiford@arm.com>
	Mon, 21 Jul 2025 14:41:04 +0000 (15:41 +0100)
gcc/config/aarch64/aarch64.cc		patch \| blob \| blame \| history
gcc/doc/sourcebuild.texi		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/aarch64/sve2/dupq_1.c		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/aarch64/sve2/dupq_1_run.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve2/extq_1.c		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/aarch64/sve2/extq_1_run.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve2/uzpq_1.c		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/aarch64/sve2/uzpq_1_run.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve2/zipq_1.c		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/aarch64/sve2/zipq_1_run.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/lib/target-supports.exp		patch \| blob \| blame \| history