}
)
+(define_insn "*aarch64_sve2_nbsl_unpred<mode>"
+ [(set (match_operand:VDQ_I 0 "register_operand")
+ (not:VDQ_I
+ (xor:VDQ_I
+ (and:VDQ_I
+ (xor:VDQ_I
+ (match_operand:VDQ_I 1 "register_operand")
+ (match_operand:VDQ_I 2 "register_operand"))
+ (match_operand:VDQ_I 3 "register_operand"))
+ (match_dup BSL_DUP))))]
+ "TARGET_SVE2"
+ {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
+ [ w , <bsl_1st> , <bsl_2nd> , w ; * ] nbsl\t%Z0.d, %Z0.d, %Z<bsl_dup>.d, %Z3.d
+ [ ?&w , w , w , w ; yes ] movprfx\t%Z0, %Z<bsl_mov>\;nbsl\t%Z0.d, %Z0.d, %Z<bsl_dup>.d, %Z3.d
+ }
+)
+
;; Unpredicated bitwise select with inverted first operand.
;; (op3 ? ~bsl_mov : bsl_dup) == ((~(bsl_mov ^ bsl_dup) & op3) ^ bsl_dup)
(define_expand "@aarch64_sve2_bsl1n<mode>"
}
)
+(define_insn "*aarch64_sve2_bsl1n_unpred<mode>"
+ [(set (match_operand:VDQ_I 0 "register_operand")
+ (xor:VDQ_I
+ (and:VDQ_I
+ (not:VDQ_I
+ (xor:VDQ_I
+ (match_operand:VDQ_I 1 "register_operand")
+ (match_operand:VDQ_I 2 "register_operand")))
+ (match_operand:VDQ_I 3 "register_operand"))
+ (match_dup BSL_DUP)))]
+ "TARGET_SVE2"
+ {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
+ [ w , <bsl_1st> , <bsl_2nd> , w ; * ] bsl1n\t%Z0.d, %Z0.d, %Z<bsl_dup>.d, %Z3.d
+ [ ?&w , w , w , w ; yes ] movprfx\t%Z0, %Z<bsl_mov>\;bsl1n\t%Z0.d, %Z0.d, %Z<bsl_dup>.d, %Z3.d
+ }
+)
+
;; Unpredicated bitwise select with inverted second operand.
;; (bsl_dup ? bsl_mov : ~op3) == ((bsl_dup & bsl_mov) | (~op3 & ~bsl_dup))
(define_expand "@aarch64_sve2_bsl2n<mode>"
}
)
+(define_insn "*aarch64_sve2_bsl2n_unpred<mode>"
+ [(set (match_operand:VDQ_I 0 "register_operand")
+ (ior:VDQ_I
+ (and:VDQ_I
+ (match_operand:VDQ_I 1 "register_operand")
+ (match_operand:VDQ_I 2 "register_operand"))
+ (and:VDQ_I
+ (not:VDQ_I (match_operand:VDQ_I 3 "register_operand"))
+ (not:VDQ_I (match_dup BSL_DUP)))))]
+ "TARGET_SVE2"
+ {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
+ [ w , <bsl_1st> , <bsl_2nd> , w ; * ] bsl2n\t%Z0.d, %Z0.d, %Z3.d, %Z<bsl_dup>.d
+ [ ?&w , w , w , w ; yes ] movprfx\t%Z0, %Z<bsl_mov>\;bsl2n\t%Z0.d, %Z0.d, %Z3.d, %Z<bsl_dup>.d
+ }
+)
+
+(define_insn "*aarch64_sve2_bsl2n_unpred<mode>"
+ [(set (match_operand:VDQ_I 0 "register_operand")
+ (ior:VDQ_I
+ (and:VDQ_I
+ (match_operand:VDQ_I 1 "register_operand")
+ (match_operand:VDQ_I 2 "register_operand"))
+ (and:VDQ_I
+ (not:VDQ_I (match_dup BSL_DUP))
+ (not:VDQ_I (match_operand:VDQ_I 3 "register_operand")))))]
+ "TARGET_SVE2"
+ {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
+ [ w , <bsl_1st> , <bsl_2nd> , w ; * ] bsl2n\t%Z0.d, %Z0.d, %Z3.d, %Z<bsl_dup>.d
+ [ ?&w , w , w , w ; yes ] movprfx\t%Z0, %Z<bsl_mov>\;bsl2n\t%Z0.d, %Z0.d, %Z3.d, %Z<bsl_dup>.d
+ }
+)
+
;; -------------------------------------------------------------------------
;; ---- [INT] Shift-and-accumulate operations
;; -------------------------------------------------------------------------
--- /dev/null
+/* { dg-options "-O2 -mcpu=neoverse-v2 --param aarch64-autovec-preference=asimd-only" } */
+
+#include <stdint.h>
+
+#define OPNBSL(x,y,z) (~(((x) & (z)) | ((y) & ~(z))))
+#define OPBSL1N(x,y,z) ((~(x) & (z)) | ((y) & ~(z)))
+#define OPBSL2N(x,y,z) (((x) & (z)) | (~(y) & ~(z)))
+
+#define N 1024
+
+#define TYPE(N) int##N##_t
+
+#define TEST(SIZE, OP, SUFFIX) \
+void __attribute__ ((noinline, noclone)) \
+f_##SIZE##_##SUFFIX \
+ (TYPE(SIZE) *restrict a, TYPE(SIZE) *restrict b, \
+ TYPE(SIZE) *restrict c, TYPE(SIZE) *restrict d) \
+{ \
+ for (int i = 0; i < N; i++) \
+ a[i] = OP (b[i], c[i], d[i]); \
+}
+
+#define TEST_ALL(SIZE) \
+ TEST(SIZE, OPNBSL, nbsl) \
+ TEST(SIZE, OPBSL1N, bsl1n) \
+ TEST(SIZE, OPBSL2N, bsl2n)
+
+TEST_ALL(8);
+TEST_ALL(16);
+TEST_ALL(32);
+TEST_ALL(64);
+
+/* { dg-final { scan-assembler-times {\tnbsl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tbsl1n\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tbsl2n\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */
\ No newline at end of file