aarch64: Optimize SVE extract last for VLS.

author Jennifer Schmitz <jschmitz@nvidia.com>

Wed, 12 Mar 2025 07:37:42 +0000 (00:37 -0700)

committer Jennifer Schmitz <jschmitz@nvidia.com>

Fri, 2 May 2025 09:33:21 +0000 (11:33 +0200)
author Jennifer Schmitz <jschmitz@nvidia.com>
Wed, 12 Mar 2025 07:37:42 +0000 (00:37 -0700)
committer Jennifer Schmitz <jschmitz@nvidia.com>
Fri, 2 May 2025 09:33:21 +0000 (11:33 +0200)
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md

index d4af3706294d41f0ea0e7b44e0e5ada2251a8b47..7bf12ff25ccd81ed71f58e6b47e7b358853d7e51 100644 (file)
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -2969,10 +2969,11 @@
    {
      poly_int64 val;
      if (poly_int_rtx_p (operands[2], &val)
-       && known_eq (val, GET_MODE_NUNITS (<MODE>mode) - 1))
+       && known_eq (val, GET_MODE_NUNITS (<MODE>mode) - 1)
+       && !val.is_constant ())
        {
-       /* The last element can be extracted with a LASTB and a false
-          predicate.  */
+       /* For VLA, extract the last element with a LASTB and a false
+          predicate. */
         rtx sel = aarch64_pfalse_reg (<VPRED>mode);
         emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
         DONE;
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/extract_1.c b/gcc/testsuite/gcc.target/aarch64/sve/extract_1.c

index 5d5edf26c19ca0b6595d25ae9d3a4f7de0686195..b5ca3b3e398715c8d7391221ba55fe400e0cdc9d 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/sve/extract_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/extract_1.c
@@ -56,40 +56,37 @@ typedef _Float16 vnx8hf __attribute__((vector_size (32)));
  
  TEST_ALL (EXTRACT)
  
-/* { dg-final { scan-assembler-times {\tfmov\tx[0-9]+, d[0-9]+\n} 2 { target aarch64_little_endian } } } */
-/* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[0\]\n} 1 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times {\tfmov\tx[0-9]+, d[0-9]+\n} 3 { target aarch64_little_endian } } } */
+/* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[0\]\n} 2 { target aarch64_big_endian } } } */
  /* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */
  /* { dg-final { scan-assembler-not {\tdup\td[0-9]+, v[0-9]+\.d\[0\]\n} } } */
  /* { dg-final { scan-assembler-times {\tdup\td[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */
  /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.d, z[0-9]+\.d\[2\]\n} 2 } } */
-/* { dg-final { scan-assembler-times {\tlastb\tx[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tlastb\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.d, z[0-9]+\.d\[3\]\n} 2 } } */
  
-/* { dg-final { scan-assembler-times {\tfmov\tw[0-9]+, s[0-9]+\n} 2 { target aarch64_little_endian } } } */
-/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[0\]\n} 1 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times {\tfmov\tw[0-9]+, s[0-9]+\n} 3 { target aarch64_little_endian } } } */
+/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[0\]\n} 2 { target aarch64_big_endian } } } */
  /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[1\]\n} 1 } } */
  /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[3\]\n} 1 } } */
  /* { dg-final { scan-assembler-not {\tdup\ts[0-9]+, v[0-9]+\.s\[0\]\n} } } */
  /* { dg-final { scan-assembler-times {\tdup\ts[0-9]+, v[0-9]+\.s\[1\]\n} 1 } } */
  /* { dg-final { scan-assembler-times {\tdup\ts[0-9]+, v[0-9]+\.s\[3\]\n} 1 } } */
  /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.s, z[0-9]+\.s\[4\]\n} 2 } } */
-/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tlastb\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.s, z[0-9]+\.s\[7\]\n} 2 } } */
  
  /* Also used to move the result of a non-Advanced SIMD extract.  */
-/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[0\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[0\]\n} 3 } } */
  /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[1\]\n} 1 } } */
  /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[7\]\n} 1 } } */
  /* { dg-final { scan-assembler-not {\tdup\th[0-9]+, v[0-9]+\.h\[0\]\n} } } */
  /* { dg-final { scan-assembler-times {\tdup\th[0-9]+, v[0-9]+\.h\[1\]\n} 1 } } */
  /* { dg-final { scan-assembler-times {\tdup\th[0-9]+, v[0-9]+\.h\[7\]\n} 1 } } */
  /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.h, z[0-9]+\.h\[8\]\n} 2 } } */
-/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tlastb\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.h, z[0-9]+\.h\[15\]\n} 2 } } */
  
  /* Also used to move the result of a non-Advanced SIMD extract.  */
-/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[0\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[0\]\n} 3 } } */
  /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[1\]\n} 1 } } */
  /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[15\]\n} 1 } } */
  /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.b, z[0-9]+\.b\[16\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.b, z[0-9]+\.b\[31\]\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/extract_2.c b/gcc/testsuite/gcc.target/aarch64/sve/extract_2.c

index 0e6ec836228018ed836c08b6a9af17ec5ac69962..a3886b2de22f69ac2b02d19d9358a0068017a566 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/sve/extract_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/extract_2.c
@@ -56,40 +56,37 @@ typedef _Float16 vnx16hf __attribute__((vector_size (64)));
  
  TEST_ALL (EXTRACT)
  
-/* { dg-final { scan-assembler-times {\tfmov\tx[0-9]+, d[0-9]+\n} 2 { target aarch64_little_endian } } } */
-/* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[0\]\n} 1 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times {\tfmov\tx[0-9]+, d[0-9]+\n} 3 { target aarch64_little_endian } } } */
+/* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[0\]\n} 2 { target aarch64_big_endian } } } */
  /* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */
  /* { dg-final { scan-assembler-not {\tdup\td[0-9]+, v[0-9]+\.d\[0\]\n} } } */
  /* { dg-final { scan-assembler-times {\tdup\td[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */
  /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.d, z[0-9]+\.d\[2\]\n} 2 } } */
-/* { dg-final { scan-assembler-times {\tlastb\tx[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tlastb\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.d, z[0-9]+\.d\[7\]\n} 2 } } */
  
-/* { dg-final { scan-assembler-times {\tfmov\tw[0-9]+, s[0-9]+\n} 2 { target aarch64_little_endian } } } */
-/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[0\]\n} 1 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times {\tfmov\tw[0-9]+, s[0-9]+\n} 3 { target aarch64_little_endian } } } */
+/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[0\]\n} 2 { target aarch64_big_endian } } } */
  /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[1\]\n} 1 } } */
  /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[3\]\n} 1 } } */
  /* { dg-final { scan-assembler-not {\tdup\ts[0-9]+, v[0-9]+\.s\[0\]\n} } } */
  /* { dg-final { scan-assembler-times {\tdup\ts[0-9]+, v[0-9]+\.s\[1\]\n} 1 } } */
  /* { dg-final { scan-assembler-times {\tdup\ts[0-9]+, v[0-9]+\.s\[3\]\n} 1 } } */
  /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.s, z[0-9]+\.s\[4\]\n} 2 } } */
-/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tlastb\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.s, z[0-9]+\.s\[15\]\n} 2 } } */
  
  /* Also used to move the result of a non-Advanced SIMD extract.  */
-/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[0\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[0\]\n} 3 } } */
  /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[1\]\n} 1 } } */
  /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[7\]\n} 1 } } */
  /* { dg-final { scan-assembler-not {\tdup\th[0-9]+, v[0-9]+\.h\[0\]\n} } } */
  /* { dg-final { scan-assembler-times {\tdup\th[0-9]+, v[0-9]+\.h\[1\]\n} 1 } } */
  /* { dg-final { scan-assembler-times {\tdup\th[0-9]+, v[0-9]+\.h\[7\]\n} 1 } } */
  /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.h, z[0-9]+\.h\[8\]\n} 2 } } */
-/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tlastb\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.h, z[0-9]+\.h\[31\]\n} 2 } } */
  
  /* Also used to move the result of a non-Advanced SIMD extract.  */
-/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[0\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[0\]\n} 3 } } */
  /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[1\]\n} 1 } } */
  /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[15\]\n} 1 } } */
  /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.b, z[0-9]+\.b\[16\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.b, z[0-9]+\.b\[63\]\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/extract_3.c b/gcc/testsuite/gcc.target/aarch64/sve/extract_3.c

index 0d7a2fa2527a194a7cf0014db17ea4c68a76e99e..c22b8a952de49936a7009f83b404a7ea36f94dbc 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/sve/extract_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/extract_3.c
@@ -77,18 +77,16 @@ typedef _Float16 vnx32hf __attribute__((vector_size (128)));
  
  TEST_ALL (EXTRACT)
  
-/* { dg-final { scan-assembler-times {\tfmov\tx[0-9]+, d[0-9]+\n} 5 { target aarch64_little_endian } } } */
-/* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[0\]\n} 1 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times {\tfmov\tx[0-9]+, d[0-9]+\n} 6 { target aarch64_little_endian } } } */
+/* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[0\]\n} 2 { target aarch64_big_endian } } } */
  /* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */
  /* { dg-final { scan-assembler-not {\tdup\td[0-9]+, v[0-9]+\.d\[0\]\n} } } */
  /* { dg-final { scan-assembler-times {\tdup\td[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */
  /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.d, z[0-9]+\.d\[2\]\n} 2 } } */
  /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.d, z[0-9]+\.d\[7\]\n} 2 } } */
-/* { dg-final { scan-assembler-times {\tlastb\tx[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tlastb\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */
  
-/* { dg-final { scan-assembler-times {\tfmov\tw[0-9]+, s[0-9]+\n} 5 { target aarch64_little_endian } } } */
-/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[0\]\n} 1 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times {\tfmov\tw[0-9]+, s[0-9]+\n} 6 { target aarch64_little_endian } } } */
+/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[0\]\n} 2 { target aarch64_big_endian } } } */
  /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[1\]\n} 1 } } */
  /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[3\]\n} 1 } } */
  /* { dg-final { scan-assembler-not {\tdup\ts[0-9]+, v[0-9]+\.s\[0\]\n} } } */
@@ -96,11 +94,9 @@ TEST_ALL (EXTRACT)
  /* { dg-final { scan-assembler-times {\tdup\ts[0-9]+, v[0-9]+\.s\[3\]\n} 1 } } */
  /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.s, z[0-9]+\.s\[4\]\n} 2 } } */
  /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.s, z[0-9]+\.s\[15\]\n} 2 } } */
-/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tlastb\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */
  
  /* Also used to move the result of a non-Advanced SIMD extract.  */
-/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[0\]\n} 5 } } */
+/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[0\]\n} 6 } } */
  /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[1\]\n} 1 } } */
  /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[7\]\n} 1 } } */
  /* { dg-final { scan-assembler-not {\tdup\th[0-9]+, v[0-9]+\.h\[0\]\n} } } */
@@ -108,19 +104,20 @@ TEST_ALL (EXTRACT)
  /* { dg-final { scan-assembler-times {\tdup\th[0-9]+, v[0-9]+\.h\[7\]\n} 1 } } */
  /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.h, z[0-9]+\.h\[8\]\n} 2 } } */
  /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.h, z[0-9]+\.h\[31\]\n} 2 } } */
-/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tlastb\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
  
  /* Also used to move the result of a non-Advanced SIMD extract.  */
-/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[0\]\n} 5 } } */
+/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[0\]\n} 6 } } */
  /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[1\]\n} 1 } } */
  /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[15\]\n} 1 } } */
  /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.b, z[0-9]+\.b\[16\]\n} 1 } } */
  /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.b, z[0-9]+\.b\[63\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */
  
  /* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #64\n} 7 } } */
  /* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #72\n} 2 } } */
  /* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #84\n} 2 } } */
  /* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #94\n} 2 } } */
  /* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #100\n} 1 } } */
+/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #120\n} 2 } } */
+/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #124\n} 2 } } */
+/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #126\n} 2 } } */
+/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #127\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/extract_4.c b/gcc/testsuite/gcc.target/aarch64/sve/extract_4.c

index a706291023f4bd3f190e0880fd94932108488c6e..0fa91757dfdf69b81f39f2314607a5659b7f2b70 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/sve/extract_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/extract_4.c
@@ -84,18 +84,16 @@ typedef _Float16 v128hf __attribute__((vector_size (256)));
  
  TEST_ALL (EXTRACT)
  
-/* { dg-final { scan-assembler-times {\tfmov\tx[0-9]+, d[0-9]+\n} 6 { target aarch64_little_endian } } } */
-/* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[0\]\n} 1 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times {\tfmov\tx[0-9]+, d[0-9]+\n} 7 { target aarch64_little_endian } } } */
+/* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[0\]\n} 2 { target aarch64_big_endian } } } */
  /* { dg-final { scan-assembler-times {\tumov\tx[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */
  /* { dg-final { scan-assembler-not {\tdup\td[0-9]+, v[0-9]+\.d\[0\]\n} } } */
  /* { dg-final { scan-assembler-times {\tdup\td[0-9]+, v[0-9]+\.d\[1\]\n} 1 } } */
  /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.d, z[0-9]+\.d\[2\]\n} 2 } } */
  /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.d, z[0-9]+\.d\[7\]\n} 2 } } */
-/* { dg-final { scan-assembler-times {\tlastb\tx[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tlastb\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */
  
-/* { dg-final { scan-assembler-times {\tfmov\tw[0-9]+, s[0-9]+\n} 6 { target aarch64_little_endian } } } */
-/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[0\]\n} 1 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times {\tfmov\tw[0-9]+, s[0-9]+\n} 7 { target aarch64_little_endian } } } */
+/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[0\]\n} 2 { target aarch64_big_endian } } } */
  /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[1\]\n} 1 } } */
  /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.s\[3\]\n} 1 } } */
  /* { dg-final { scan-assembler-not {\tdup\ts[0-9]+, v[0-9]+\.s\[0\]\n} } } */
@@ -103,11 +101,9 @@ TEST_ALL (EXTRACT)
  /* { dg-final { scan-assembler-times {\tdup\ts[0-9]+, v[0-9]+\.s\[3\]\n} 1 } } */
  /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.s, z[0-9]+\.s\[4\]\n} 2 } } */
  /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.s, z[0-9]+\.s\[15\]\n} 2 } } */
-/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tlastb\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */
  
  /* Also used to move the result of a non-Advanced SIMD extract.  */
-/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[0\]\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[0\]\n} 7 } } */
  /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[1\]\n} 1 } } */
  /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h\[7\]\n} 1 } } */
  /* { dg-final { scan-assembler-not {\tdup\th[0-9]+, v[0-9]+\.h\[0\]\n} } } */
@@ -115,16 +111,13 @@ TEST_ALL (EXTRACT)
  /* { dg-final { scan-assembler-times {\tdup\th[0-9]+, v[0-9]+\.h\[7\]\n} 1 } } */
  /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.h, z[0-9]+\.h\[8\]\n} 2 } } */
  /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.h, z[0-9]+\.h\[31\]\n} 2 } } */
-/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tlastb\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
  
  /* Also used to move the result of a non-Advanced SIMD extract.  */
-/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[0\]\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[0\]\n} 7 } } */
  /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[1\]\n} 1 } } */
  /* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.b\[15\]\n} 1 } } */
  /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.b, z[0-9]+\.b\[16\]\n} 1 } } */
  /* { dg-final { scan-assembler-times {\tdup\tz[0-9]+\.b, z[0-9]+\.b\[63\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tlastb\tw[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */
  
  /* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #64\n} 7 } } */
  /* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #72\n} 2 } } */
@@ -135,3 +128,7 @@ TEST_ALL (EXTRACT)
  /* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #124\n} 2 } } */
  /* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #126\n} 2 } } */
  /* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #127\n} 1 } } */
+/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #248\n} 2 } } */
+/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #252\n} 2 } } */
+/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #254\n} 2 } } */
+/* { dg-final { scan-assembler-times {\text\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b, #255\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/extract_last_128.c b/gcc/testsuite/gcc.target/aarch64/sve/extract_last_128.c

new file mode 100644 (file)

index 0000000..2684fb6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/extract_last_128.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -msve-vector-bits=128" } */
+
+#include <arm_sve.h>
+
+#define TEST(TYPE, TY)                 \
+  TYPE extract_last_##TY (sv##TYPE x)  \
+  {                                    \
+    svbool_t pg = svpfalse ();         \
+    return svlastb_##TY (pg, x);       \
+  }
+
+TEST(bfloat16_t, bf16)
+TEST(float16_t, f16)
+TEST(float32_t, f32)
+TEST(float64_t, f64)
+TEST(int8_t, s8)
+TEST(int16_t, s16)
+TEST(int32_t, s32)
+TEST(int64_t, s64)
+TEST(uint8_t, u8)
+TEST(uint16_t, u16)
+TEST(uint32_t, u32)
+TEST(uint64_t, u64)
+
+/* { dg-final { scan-assembler-times {\tdup\th0, v0\.h\[7\]} 2 } } */
+/* { dg-final { scan-assembler-times {\tdup\ts0, v0\.s\[3\]} 1 } } */
+/* { dg-final { scan-assembler-times {\tdup\td0, v0\.d\[1\]} 1 } } */
+/* { dg-final { scan-assembler-times {\tumov\tw0, v0\.h\[7\]} 2 } } */
+/* { dg-final { scan-assembler-times {\tumov\tw0, v0\.b\[15\]} 2 } } */
+/* { dg-final { scan-assembler-times {\tumov\tw0, v0\.s\[3\]} 2 } } */
+/* { dg-final { scan-assembler-times {\tumov\tx0, v0\.d\[1\]} 2 } } */
+/* { dg-final { scan-assembler-not "lastb" } } */
+\ No newline at end of file
author	Jennifer Schmitz <jschmitz@nvidia.com>
	Wed, 12 Mar 2025 07:37:42 +0000 (00:37 -0700)
committer	Jennifer Schmitz <jschmitz@nvidia.com>
	Fri, 2 May 2025 09:33:21 +0000 (11:33 +0200)
gcc/config/aarch64/aarch64-sve.md		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/aarch64/sve/extract_1.c		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/aarch64/sve/extract_2.c		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/aarch64/sve/extract_3.c		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/aarch64/sve/extract_4.c		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/aarch64/sve/extract_last_128.c	[new file with mode: 0644]	patch \| blob