]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
aarch64: Add support for unpacked SVE FP conditional binary arithmetic
authorSpencer Abson <spencer.abson@arm.com>
Tue, 29 Jul 2025 16:37:26 +0000 (16:37 +0000)
committerSpencer Abson <spencer.abson@arm.com>
Tue, 29 Jul 2025 17:21:02 +0000 (17:21 +0000)
This patch extends the expander for conditional smax, smin, add, sub, mul,
min, max, and div to support partial SVE FP modes.

If exceptions from undefined vector elements must be suppressed, this
expansion converts the container-level predicate to an element-level one, and
ensures that these elements are inactive for the operation.  In practice, this
is a predicate AND with the existing mask and a container-size PTRUE.

gcc/ChangeLog:

* config/aarch64/aarch64-protos.h (aarch64_sve_emit_masked_fp_pred):
Declare.
* config/aarch64/aarch64-sve.md (and<mode>3):  Change this to...
(@and<mode>3): ...this, so that we can use gen_and3.
(@cond_<optab><mode>): Extend from SVE_FULL_F_B16B16 to SVE_F_B16B16,
use aarch64_predicate_operand.
(*cond_<optab><mode>_2_strict): Likewise.
(*cond_<optab><mode>_3_strict): Likewise.
(*cond_<optab><mode>_any_strict): Likwise.
(*cond_<optab><mode>_2_const_strict): Extend from SVE_FULL_F to SVE_F,
use aarch64_predicate_operand.
(*cond_<optab><mode>_any_const_strict): Likewise.
(*cond_sub<mode>_3_const_strict): Likwise.
(*cond_sub<mode>_const_strict): Likewise.
(*vcond_mask_<mode><vpred>): Use aarch64_predicate_operand, and update
the comment here.
* config/aarch64/aarch64.cc (aarch64_sve_emit_masked_fp_pred): New
function.  Helper to mask the predicate in conditional expanders.

gcc/testsuite/ChangeLog:

* g++.target/aarch64/sve/unpacked_cond_binary_bf16_2.C: New test.
* gcc.target/aarch64/sve/unpacked_cond_builtin_fmax_2.c: Likewise.
* gcc.target/aarch64/sve/unpacked_cond_builtin_fmin_2.c: Likewise.
* gcc.target/aarch64/sve/unpacked_cond_fadd_2.c: Likewise.
* gcc.target/aarch64/sve/unpacked_cond_fdiv_2.c: Likewise.
* gcc.target/aarch64/sve/unpacked_cond_fmaxnm_2.c: Likewise.
* gcc.target/aarch64/sve/unpacked_cond_fminnm_2.c: Likewise.
* gcc.target/aarch64/sve/unpacked_cond_fmul_2.c: Likewise.
* gcc.target/aarch64/sve/unpacked_cond_fsubr_2.c: Likewise.

12 files changed:
gcc/config/aarch64/aarch64-protos.h
gcc/config/aarch64/aarch64-sve.md
gcc/config/aarch64/aarch64.cc
gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_binary_bf16_2.C [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmax_2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmin_2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fadd_2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fdiv_2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmaxnm_2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fminnm_2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmul_2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fsubr_2.c [new file with mode: 0644]

index e946e8da11dad05e4b07ae43ae60841ce2637c67..38c307cdc3a621c9b40d7315368523c796fc1cf5 100644 (file)
@@ -1031,6 +1031,7 @@ rtx aarch64_pfalse_reg (machine_mode);
 bool aarch64_sve_same_pred_for_ptest_p (rtx *, rtx *);
 rtx aarch64_sve_packed_pred (machine_mode);
 rtx aarch64_sve_fp_pred (machine_mode, rtx *);
+rtx aarch64_sve_emit_masked_fp_pred (machine_mode, rtx);
 void aarch64_emit_load_store_through_mode (rtx, rtx, machine_mode);
 bool aarch64_expand_maskloadstore (rtx *, machine_mode);
 void aarch64_emit_sve_pred_move (rtx, rtx, rtx);
index b252eef411c2606f291c5a978533c2b994c3b155..fe407f7e77ff7d7c170f9a49d9bfc42065b910af 100644 (file)
 
 ;; Predicated floating-point operations with merging.
 (define_expand "@cond_<optab><mode>"
-  [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
-       (unspec:SVE_FULL_F_B16B16
+  [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+       (unspec:SVE_F_B16B16
          [(match_operand:<VPRED> 1 "register_operand")
-          (unspec:SVE_FULL_F_B16B16
+          (unspec:SVE_F_B16B16
             [(match_dup 1)
              (const_int SVE_STRICT_GP)
-             (match_operand:SVE_FULL_F_B16B16 2 "<sve_pred_fp_rhs1_operand>")
-             (match_operand:SVE_FULL_F_B16B16 3 "<sve_pred_fp_rhs2_operand>")]
+             (match_operand:SVE_F_B16B16 2 "<sve_pred_fp_rhs1_operand>")
+             (match_operand:SVE_F_B16B16 3 "<sve_pred_fp_rhs2_operand>")]
             SVE_COND_FP_BINARY)
-          (match_operand:SVE_FULL_F_B16B16 4 "aarch64_simd_reg_or_zero")]
+          (match_operand:SVE_F_B16B16 4 "aarch64_simd_reg_or_zero")]
          UNSPEC_SEL))]
   "TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
+  {
+    operands[1] = aarch64_sve_emit_masked_fp_pred (<MODE>mode, operands[1]);
+  }
 )
 
 ;; Predicated floating-point operations, merging with the first input.
 )
 
 (define_insn "*cond_<optab><mode>_2_strict"
-  [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
-       (unspec:SVE_FULL_F_B16B16
-         [(match_operand:<VPRED> 1 "register_operand")
-          (unspec:SVE_FULL_F_B16B16
+  [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+       (unspec:SVE_F_B16B16
+         [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+          (unspec:SVE_F_B16B16
             [(match_dup 1)
              (const_int SVE_STRICT_GP)
-             (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
-             (match_operand:SVE_FULL_F_B16B16 3 "register_operand")]
+             (match_operand:SVE_F_B16B16 2 "register_operand")
+             (match_operand:SVE_F_B16B16 3 "register_operand")]
             SVE_COND_FP_BINARY)
           (match_dup 2)]
          UNSPEC_SEL))]
 )
 
 (define_insn "*cond_<optab><mode>_2_const_strict"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-       (unspec:SVE_FULL_F
-         [(match_operand:<VPRED> 1 "register_operand")
-          (unspec:SVE_FULL_F
+  [(set (match_operand:SVE_F 0 "register_operand")
+       (unspec:SVE_F
+         [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+          (unspec:SVE_F
             [(match_dup 1)
              (const_int SVE_STRICT_GP)
-             (match_operand:SVE_FULL_F 2 "register_operand")
-             (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
+             (match_operand:SVE_F 2 "register_operand")
+             (match_operand:SVE_F 3 "<sve_pred_fp_rhs2_immediate>")]
             SVE_COND_FP_BINARY_I1)
           (match_dup 2)]
          UNSPEC_SEL))]
 )
 
 (define_insn "*cond_<optab><mode>_3_strict"
-  [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
-       (unspec:SVE_FULL_F_B16B16
-         [(match_operand:<VPRED> 1 "register_operand")
-          (unspec:SVE_FULL_F_B16B16
+  [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+       (unspec:SVE_F_B16B16
+         [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+          (unspec:SVE_F_B16B16
             [(match_dup 1)
              (const_int SVE_STRICT_GP)
-             (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
-             (match_operand:SVE_FULL_F_B16B16 3 "register_operand")]
+             (match_operand:SVE_F_B16B16 2 "register_operand")
+             (match_operand:SVE_F_B16B16 3 "register_operand")]
             SVE_COND_FP_BINARY)
           (match_dup 3)]
          UNSPEC_SEL))]
 )
 
 (define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
-  [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
-       (unspec:SVE_FULL_F_B16B16
-         [(match_operand:<VPRED> 1 "register_operand")
-          (unspec:SVE_FULL_F_B16B16
+  [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+       (unspec:SVE_F_B16B16
+         [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+          (unspec:SVE_F_B16B16
             [(match_dup 1)
              (const_int SVE_STRICT_GP)
-             (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
-             (match_operand:SVE_FULL_F_B16B16 3 "register_operand")]
+             (match_operand:SVE_F_B16B16 2 "register_operand")
+             (match_operand:SVE_F_B16B16 3 "register_operand")]
             SVE_COND_FP_BINARY)
-          (match_operand:SVE_FULL_F_B16B16 4 "aarch64_simd_reg_or_zero")]
+          (match_operand:SVE_F_B16B16 4 "aarch64_simd_reg_or_zero")]
          UNSPEC_SEL))]
   "TARGET_SVE
    && (<supports_bf16> || !<is_bf16>)
 )
 
 (define_insn_and_rewrite "*cond_<optab><mode>_any_const_strict"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-       (unspec:SVE_FULL_F
-         [(match_operand:<VPRED> 1 "register_operand")
-          (unspec:SVE_FULL_F
+  [(set (match_operand:SVE_F 0 "register_operand")
+       (unspec:SVE_F
+         [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+          (unspec:SVE_F
             [(match_dup 1)
              (const_int SVE_STRICT_GP)
-             (match_operand:SVE_FULL_F 2 "register_operand")
-             (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
+             (match_operand:SVE_F 2 "register_operand")
+             (match_operand:SVE_F 3 "<sve_pred_fp_rhs2_immediate>")]
             SVE_COND_FP_BINARY_I1)
-          (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
+          (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
          UNSPEC_SEL))]
   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
   {@ [ cons: =0 , 1   , 2 , 4   ]
 )
 
 (define_insn "*cond_add<mode>_2_const_strict"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-       (unspec:SVE_FULL_F
-         [(match_operand:<VPRED> 1 "register_operand")
-          (unspec:SVE_FULL_F
+  [(set (match_operand:SVE_F 0 "register_operand")
+       (unspec:SVE_F
+         [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+          (unspec:SVE_F
             [(match_dup 1)
              (const_int SVE_STRICT_GP)
-             (match_operand:SVE_FULL_F 2 "register_operand")
-             (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate")]
+             (match_operand:SVE_F 2 "register_operand")
+             (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_immediate")]
             UNSPEC_COND_FADD)
           (match_dup 2)]
          UNSPEC_SEL))]
 )
 
 (define_insn_and_rewrite "*cond_add<mode>_any_const_strict"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-       (unspec:SVE_FULL_F
-         [(match_operand:<VPRED> 1 "register_operand")
-          (unspec:SVE_FULL_F
+  [(set (match_operand:SVE_F 0 "register_operand")
+       (unspec:SVE_F
+         [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+          (unspec:SVE_F
             [(match_dup 1)
              (const_int SVE_STRICT_GP)
-             (match_operand:SVE_FULL_F 2 "register_operand")
-             (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate")]
+             (match_operand:SVE_F 2 "register_operand")
+             (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_immediate")]
             UNSPEC_COND_FADD)
-          (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
+          (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
          UNSPEC_SEL))]
   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
   {@ [ cons: =0 , 1   , 2 , 3   , 4   ]
 )
 
 (define_insn "*cond_sub<mode>_3_const_strict"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-       (unspec:SVE_FULL_F
-         [(match_operand:<VPRED> 1 "register_operand")
-          (unspec:SVE_FULL_F
+  [(set (match_operand:SVE_F 0 "register_operand")
+       (unspec:SVE_F
+         [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+          (unspec:SVE_F
             [(match_dup 1)
              (const_int SVE_STRICT_GP)
-             (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
-             (match_operand:SVE_FULL_F 3 "register_operand")]
+             (match_operand:SVE_F 2 "aarch64_sve_float_arith_immediate")
+             (match_operand:SVE_F 3 "register_operand")]
             UNSPEC_COND_FSUB)
           (match_dup 3)]
          UNSPEC_SEL))]
 )
 
 (define_insn_and_rewrite "*cond_sub<mode>_const_strict"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-       (unspec:SVE_FULL_F
-         [(match_operand:<VPRED> 1 "register_operand")
-          (unspec:SVE_FULL_F
+  [(set (match_operand:SVE_F 0 "register_operand")
+       (unspec:SVE_F
+         [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+          (unspec:SVE_F
             [(match_dup 1)
              (const_int SVE_STRICT_GP)
-             (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
-             (match_operand:SVE_FULL_F 3 "register_operand")]
+             (match_operand:SVE_F 2 "aarch64_sve_float_arith_immediate")
+             (match_operand:SVE_F 3 "register_operand")]
             UNSPEC_COND_FSUB)
-          (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
+          (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
          UNSPEC_SEL))]
   "TARGET_SVE && !rtx_equal_p (operands[3], operands[4])"
   {@ [ cons: =0 , 1   , 3 , 4   ]
 ;; Predicate AND.  We can reuse one of the inputs as the GP.
 ;; Doubling the second operand is the preferred implementation
 ;; of the MOV alias, so we use that instead of %1/z, %1, %2.
-(define_insn "and<mode>3"
+(define_insn "@and<mode>3"
   [(set (match_operand:PRED_ALL 0 "register_operand")
        (and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand")
                      (match_operand:PRED_ALL 2 "register_operand")))]
 ;;
 ;; For unpacked vectors, it doesn't really matter whether SEL uses the
 ;; the container size or the element size.  If SEL used the container size,
-;; it would ignore undefined bits of the predicate but would copy the
-;; upper (undefined) bits of each container along with the defined bits.
-;; If SEL used the element size, it would use undefined bits of the predicate
-;; to select between undefined elements in each input vector.  Thus the only
-;; difference is whether the undefined bits in a container always come from
-;; the same input as the defined bits, or whether the choice can vary
-;; independently of the defined bits.
+;; it would would copy the upper (undefined) bits of each container along
+;; with the corresponding defined bits.  If SEL used the element size,
+;; it would use separate predicate bits to select between the undefined
+;; elements in each input vector; these seperate predicate bits might
+;; themselves be undefined, depending on the mode of the predicate.
+;;
+;; Thus the only difference is whether the undefined bits in a container
+;; always come from the same input as the defined bits, or whether the
+;; choice can vary independently of the defined bits.
 ;;
 ;; For the other instructions, using the element size is more natural,
 ;; so we do that for SEL as well.
+;;
 (define_insn "*vcond_mask_<mode><vpred>"
   [(set (match_operand:SVE_ALL 0 "register_operand")
        (unspec:SVE_ALL
-         [(match_operand:<VPRED> 3 "register_operand")
+         [(match_operand:<VPRED> 3 "aarch64_predicate_operand")
           (match_operand:SVE_ALL 1 "aarch64_sve_reg_or_dup_imm")
           (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero")]
          UNSPEC_SEL))]
index a6d6bed97e8acde187e469cc818c3611f898d4c0..5502d0b48072089cab772726c1d3edb2af22e861 100644 (file)
@@ -3933,6 +3933,33 @@ aarch64_sve_fp_pred (machine_mode data_mode, rtx *strictness)
    return aarch64_ptrue_reg (aarch64_sve_pred_mode (data_mode));
 }
 
+/* PRED is a predicate that governs an operation on DATA_MODE.  If DATA_MODE
+   is a partial vector mode, and if exceptions must be suppressed for its
+   undefined elements, convert PRED from a container-level predicate to
+   an element-level predicate and ensure that the undefined elements
+   are inactive.  Make no changes otherwise.
+
+   Return the resultant predicate.  */
+rtx
+aarch64_sve_emit_masked_fp_pred (machine_mode data_mode, rtx pred)
+{
+  unsigned int vec_flags = aarch64_classify_vector_mode (data_mode);
+  if (flag_trapping_math && (vec_flags & VEC_PARTIAL))
+    {
+      /* Generate an element-level mask.  */
+      rtx mask = aarch64_sve_packed_pred (data_mode);
+      machine_mode pmode = GET_MODE (mask);
+
+      /* Apply the existing predicate.  */
+      rtx dst = gen_reg_rtx (pmode);
+      emit_insn (gen_and3 (pmode, dst, mask,
+                          gen_lowpart (pmode, pred)));
+      return dst;
+    }
+
+  return pred;
+}
+
 /* Emit a comparison CMP between OP0 and OP1, both of which have mode
    DATA_MODE, and return the result in a predicate of mode PRED_MODE.
    Use TARGET as the target register if nonnull and convenient.  */
diff --git a/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_binary_bf16_2.C b/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_binary_bf16_2.C
new file mode 100644 (file)
index 0000000..02880ef
--- /dev/null
@@ -0,0 +1,18 @@
+/* { dg-do compile }*/
+/* { dg-options "-O -ffinite-math-only -fno-signed-zeros -msve-vector-bits=2048 " } */
+
+#include "unpacked_cond_binary_bf16_1.C"
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 15 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 15 } } */
+/* { dg-final { scan-assembler-times {\tand} 30 } } */
+
+/* { dg-final { scan-assembler-times {\tbfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tbfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tbfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
+
+/* { dg-final { scan-assembler-times {\tbfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tbfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
+
+// There's no BFSUBR.
+/* { dg-final { scan-assembler-times {\tsel\t} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmax_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmax_2.c
new file mode 100644 (file)
index 0000000..f84ded5
--- /dev/null
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */
+
+#include "unpacked_cond_builtin_fmax_1.c"
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 7 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 14 } } */
+/* { dg-final { scan-assembler-times {\tand} 21 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 13 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 13 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 13 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.0\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.0\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmin_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmin_2.c
new file mode 100644 (file)
index 0000000..bceddf9
--- /dev/null
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */
+
+#include "unpacked_cond_builtin_fmin_1.c"
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 7 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 14 } } */
+/* { dg-final { scan-assembler-times {\tand} 21 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 13 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 13 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 13 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.0\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.0\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fadd_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fadd_2.c
new file mode 100644 (file)
index 0000000..e59864b
--- /dev/null
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */
+
+#include "unpacked_cond_fadd_1.c"
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 11 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 22 } } */
+/* { dg-final { scan-assembler-times {\tand} 33 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 19 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 19 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 19 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 5 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.5\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.5\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 10 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.5\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.5\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fdiv_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fdiv_2.c
new file mode 100644 (file)
index 0000000..1ca3dbf
--- /dev/null
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */
+
+#include "unpacked_cond_fdiv_1.c"
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 3 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 6 } } */
+/* { dg-final { scan-assembler-times {\tand} 9 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 7 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 7 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 7 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfdivr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfdiv\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfdivr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfdiv\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmaxnm_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmaxnm_2.c
new file mode 100644 (file)
index 0000000..282f3ed
--- /dev/null
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048 -fno-signed-zeros -ffinite-math-only" } */
+
+#include "unpacked_cond_fmaxnm_1.c"
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 7 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 14 } } */
+/* { dg-final { scan-assembler-times {\tand} 21 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 13 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 13 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 13 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.0\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.0\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fminnm_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fminnm_2.c
new file mode 100644 (file)
index 0000000..8226a6f
--- /dev/null
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048 -fno-signed-zeros -ffinite-math-only" } */
+
+#include "unpacked_cond_fminnm_1.c"
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 7 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 14 } } */
+/* { dg-final { scan-assembler-times {\tand} 21 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 13 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 13 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 13 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.0\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.0\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmul_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmul_2.c
new file mode 100644 (file)
index 0000000..21713f5
--- /dev/null
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */
+
+#include "unpacked_cond_fmul_1.c"
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 5 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 10 } } */
+/* { dg-final { scan-assembler-times {\tand} 15 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 10 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 10 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 10 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.5\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.5\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fsubr_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fsubr_2.c
new file mode 100644 (file)
index 0000000..cd7a0e1
--- /dev/null
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */
+
+#include "unpacked_cond_fsubr_1.c"
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 7 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 14 } } */
+/* { dg-final { scan-assembler-times {\tand} 21 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 13 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 13 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 13 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.5\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.5\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */