]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
aarch64: Use SVE for V2DImode integer min/max operations
authorKyrylo Tkachov <ktkachov@nvidia.com>
Thu, 4 Sep 2025 09:46:39 +0000 (02:46 -0700)
committerKyrylo Tkachov <ktkachov@nvidia.com>
Fri, 5 Sep 2025 08:49:22 +0000 (10:49 +0200)
Unlike Advanced SIMD, SVE has instruction to perform smin, smax, umin, umax
on 64-bit elements.  Thus, we can use them with the fixed-width V2DImode
expander.  Most of the machinery is already there on the define_insn side,
supporting V2DImode operands of the SVE pattern.  We just need to wire up
the RTL emission to the v2di standard names for the TARGET_SVE case.

So for the smin case we now generate:
min_di:
        ldr     q30, [x0]
        ptrue   p7.b, all
        ldr     q31, [x1]
        smin    z30.d, p7/m, z30.d, z31.d
        str     q30, [x2]
        ret

min_imm_di:
        ldr     q31, [x0]
        smin    z31.d, z31.d, #5
        str     q31, [x2]
        ret

instead of the previous:
min_di:
        ldr     q30, [x0]
        ldr     q31, [x1]
        cmgt    v29.2d, v30.2d, v31.2d
        bsl     v29.16b, v31.16b, v30.16b
        str     q29, [x2]
        ret

min_imm_di:
        ldr     q31, [x0]
        mov     z30.d, #5
        cmgt    v29.2d, v30.2d, v31.2d
        bsl     v29.16b, v31.16b, v30.16b
        str     q29, [x2]
        ret

The register operand case is the same length, though the new ptrue can now be
shared and moved away.  But the immediate operand case is obviously better
as the SVE immediate form doesn't require a predicate operand.

Bootstrapped and tested on aarch64-none-linux-gnu.

Signed-off-by: Kyrylo Tkachov <ktkachov@nvidia.com>
gcc/

* config/aarch64/iterators.md (sve_di_suf): New mode attribute.
* config/aarch64/aarch64-sve.md (<optab><mode>3 SVE_INT_BINARY_MULTI):
Rename to...
(<optab><mode>3<sve_di_suf>): ... This.  Use SVE_I_SIMD_DI mode
iterator.
* config/aarch64/aarch64-simd.md (<su><maxmin>v2di3): Use the above
for TARGET_SVE.

gcc/testsuite/

* gcc.target/aarch64/sve/usminmax_di.c: New test.

gcc/config/aarch64/aarch64-simd.md
gcc/config/aarch64/aarch64-sve.md
gcc/config/aarch64/iterators.md
gcc/testsuite/gcc.target/aarch64/sve/usminmax_di.c [new file with mode: 0644]

index c111dc2c7f7cae0a7930a0552ace7b98d32f5630..14b9d5c78e3f61410e2f5821c538fefda79174d5 100644 (file)
   enum rtx_code cmp_operator;
   rtx cmp_fmt;
 
+  /* SVE has native D-forms of the MIN/MAX instructions.  */
+  if (TARGET_SVE)
+    {
+      emit_insn (gen_<su><maxmin>v2di3_as_sve (operands[0], operands[1],
+                                              operands[2]));
+      DONE;
+    }
+
   switch (<CODE>)
     {
     case UMIN:
index 51e2d7d7e8750a88f09e67c7767f976d1755ce09..1ebcffe88a32c4748eb3208113803fdb32984bb0 100644 (file)
 ;; -------------------------------------------------------------------------
 
 ;; Unpredicated integer binary operations that have an immediate form.
-(define_expand "<optab><mode>3"
-  [(set (match_operand:SVE_I 0 "register_operand")
-       (unspec:SVE_I
+(define_expand "<optab><mode>3<sve_di_suf>"
+  [(set (match_operand:SVE_I_SIMD_DI 0 "register_operand")
+       (unspec:SVE_I_SIMD_DI
          [(match_dup 3)
-          (SVE_INT_BINARY_MULTI:SVE_I
-            (match_operand:SVE_I 1 "register_operand")
-            (match_operand:SVE_I 2 "aarch64_sve_<sve_imm_con>_operand"))]
+          (SVE_INT_BINARY_MULTI:SVE_I_SIMD_DI
+            (match_operand:SVE_I_SIMD_DI 1 "register_operand")
+            (match_operand:SVE_I_SIMD_DI 2 "aarch64_sve_<sve_imm_con>_operand"))]
          UNSPEC_PRED_X))]
   "TARGET_SVE"
   {
index 7a6ea0d8d067562b693af6fb9759d5edbedc31f9..451b00f55af5a21146a4505451a68bd2dc408e8e 100644 (file)
                           (VNx8SI "VNx8HI") (VNx16SI "VNx16QI")
                           (VNx8DI "VNx8HI")])
 
+;; Suffix mapping Advanced SIMD modes to be expanded as SVE instructions.
+(define_mode_attr sve_di_suf [(VNx16QI "") (VNx8HI "") (VNx4SI "") (VNx2DI "")
+                             (VNx8QI "") (VNx4QI "") (VNx2QI "") (VNx4HI "")
+                             (VNx2HI "") (VNx2SI "") (V2DI "_as_sve")])
+
 ;; Register suffix narrowed modes for VQN.
 (define_mode_attr Vntype [(V8HI "8b") (V4SI "4h")
                          (V2DI "2s")])
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/usminmax_di.c b/gcc/testsuite/gcc.target/aarch64/sve/usminmax_di.c
new file mode 100644 (file)
index 0000000..5405308
--- /dev/null
@@ -0,0 +1,44 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 --param aarch64-autovec-preference=asimd-only" } */
+
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+
+#define FUNC(T, N, S)   \
+void min_##S (T * __restrict__ a, T * __restrict__ b, T * __restrict__ c)  \
+{                                                                         \
+  int i;                                                                  \
+  for (i = 0; i < N; i++)                                                 \
+    c[i] = MIN (a[i], b[i]);                                              \
+}                                                                         \
+void max_##S (T * __restrict__ a, T * __restrict__ b, T * __restrict__ c)  \
+{                                                                         \
+  int i;                                                                  \
+  for (i = 0; i < N; i++)                                                 \
+    c[i] = MAX (a[i], b[i]);                                              \
+}                                                                         \
+void min_imm_##S (T * __restrict__ a, T * __restrict__ b, T * __restrict__ c) \
+{                                                                            \
+  int i;                                                                     \
+  for (i = 0; i < N; i++)                                                    \
+    c[i] = MIN (a[i], 5);                                                    \
+}                                                                            \
+void max_imm_##S (T * __restrict__ a, T * __restrict__ b, T * __restrict__ c) \
+{                                                                            \
+  int i;                                                                     \
+  for (i = 0; i < N; i++)                                                    \
+    c[i] = MAX (a[i], 8);                                                    \
+}
+
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.d, z[0-9]+\.d, #8\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.d, z[0-9]+\.d, #5\n} 1 } } */
+FUNC (long long, 2, di)
+
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.d, z[0-9]+\.d, #8\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.d, z[0-9]+\.d, #5\n} 1 } } */
+FUNC (unsigned long long, 2, udi)
+