(unspec:<VPRED>
[(match_operand:<VPRED> 1)
(const_int SVE_KNOWN_PTRUE)
- (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
- (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "Dz, w")]
+ (match_operand:SVE_F 2 "register_operand" "w, w")
+ (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")]
SVE_COND_FP_CMP_I0)
(match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))]
"TARGET_SVE"
(unspec:<VPRED>
[(match_operand:<VPRED> 1)
(const_int SVE_KNOWN_PTRUE)
- (match_operand:SVE_FULL_F 2 "register_operand" "w")
- (match_operand:SVE_FULL_F 3 "register_operand" "w")]
+ (match_operand:SVE_F 2 "register_operand" "w")
+ (match_operand:SVE_F 3 "register_operand" "w")]
UNSPEC_COND_FCMUO)
(match_operand:<VPRED> 4 "register_operand" "Upl")))]
"TARGET_SVE"
(unspec:<VPRED>
[(match_operand:<VPRED> 1)
(const_int SVE_KNOWN_PTRUE)
- (match_operand:SVE_FULL_F 2 "register_operand" "w")
- (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "wDz")]
+ (match_operand:SVE_F 2 "register_operand" "w")
+ (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "wDz")]
SVE_COND_FP_CMP_I0))
(match_operand:<VPRED> 4 "register_operand" "Upa"))
(match_dup:<VPRED> 1)))
(unspec:<VPRED>
[(match_operand:<VPRED> 1)
(const_int SVE_KNOWN_PTRUE)
- (match_operand:SVE_FULL_F 2 "register_operand" "w")
- (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "wDz")]
+ (match_operand:SVE_F 2 "register_operand" "w")
+ (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "wDz")]
SVE_COND_FP_CMP_I0))
(not:<VPRED>
(match_operand:<VPRED> 4 "register_operand" "Upa")))
}
)
+;; Same for unordered comparisons.
(define_insn_and_split "*fcmuo<mode>_bic_combine"
[(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
(and:<VPRED>
(unspec:<VPRED>
[(match_operand:<VPRED> 1)
(const_int SVE_KNOWN_PTRUE)
- (match_operand:SVE_FULL_F 2 "register_operand" "w")
- (match_operand:SVE_FULL_F 3 "register_operand" "w")]
+ (match_operand:SVE_F 2 "register_operand" "w")
+ (match_operand:SVE_F 3 "register_operand" "w")]
UNSPEC_COND_FCMUO))
(match_operand:<VPRED> 4 "register_operand" "Upa"))
(match_dup:<VPRED> 1)))
}
)
-;; Same for unordered comparisons.
(define_insn_and_split "*fcmuo<mode>_nor_combine"
[(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
(and:<VPRED>
(unspec:<VPRED>
[(match_operand:<VPRED> 1)
(const_int SVE_KNOWN_PTRUE)
- (match_operand:SVE_FULL_F 2 "register_operand" "w")
- (match_operand:SVE_FULL_F 3 "register_operand" "w")]
+ (match_operand:SVE_F 2 "register_operand" "w")
+ (match_operand:SVE_F 3 "register_operand" "w")]
UNSPEC_COND_FCMUO))
(not:<VPRED>
(match_operand:<VPRED> 4 "register_operand" "Upa")))
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048 --param=aarch64-autovec-preference=sve-only -fno-trapping-math" } */
+
+#include "unpacked_fcm_1.c"
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 32 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 32 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 32 } } */
+
+/* Drop a PTRUE predicated AND with the loop mask and comparison result in
+ favour of predicating the comparison with the loop mask. */
+/* { dg-final { scan-assembler-not {\tand\t} } } */
+
+/* Similarly, for codes that are implemented via an inversion, prefer
+ NOT (predicated with the loop mask) over BIC+PTRUE. */
+/* { dg-final { scan-assembler-not {\tbic\t} } } */
+/* { dg-final { scan-assembler-times {\tnot\t} 15 } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048 --param=aarch64-autovec-preference=sve-only -fno-trapping-math" } */
+
+#include <stdint.h>
+
+/* Ensure that we still emit NOR here, rather than two NOTs. */
+
+#define TEST_FCM_NOR(TYPE0, TYPE1, CMP, COUNT) \
+ void \
+ f_##TYPE0##_##TYPE1##_##CMP (TYPE0 *__restrict out, \
+ TYPE1 *__restrict a, \
+ TYPE1 *__restrict b, \
+ TYPE1 *__restrict c) \
+ { \
+ for (unsigned int i = 0; i < COUNT; i++) \
+ out[i] = !(CMP (a[i], c[i]) | CMP (b[i], c[i])) ? 3 : out[i]; \
+ }
+
+#define GT(A, B) ((A) > (B))
+
+TEST_FCM_NOR (uint64_t, float, GT, 32)
+TEST_FCM_NOR (uint64_t, _Float16, GT, 32)
+TEST_FCM_NOR (uint32_t, _Float16, GT, 64)
+
+TEST_FCM_NOR (uint64_t, float, __builtin_isunordered, 32)
+TEST_FCM_NOR (uint64_t, _Float16, __builtin_isunordered, 32)
+TEST_FCM_NOR (uint32_t, _Float16, __builtin_isunordered, 64)
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 6 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 6 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 6 } } */
+
+/* { dg-final { scan-assembler-not {\tbic\t} } } */
+/* { dg-final { scan-assembler-not {\tnot\t} } } */
+/* { dg-final { scan-assembler-times {\tnor\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b, p[0-9]+\.b\n} 6 } } */