UNSPEC_KORTEST))]
"TARGET_AVX512F")
+;; Optimize cmp + setcc with mask register by kortest + setcc.
+(define_insn_and_split "*kortest_cmp<mode>_setcc"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm, qm")
+ (match_operator:QI 1 "bt_comparison_operator"
+ [(match_operand:SWI1248_AVX512BWDQ_64 2 "register_operand" "?k, <r>")
+ (const_int -1)]))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_AVX512BW"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ if (MASK_REGNO_P (REGNO (operands[2])))
+ {
+ emit_insn (gen_kortest<mode>_ccc (operands[2], operands[2]));
+ operands[4] = gen_rtx_REG (CCCmode, FLAGS_REG);
+ }
+ else
+ {
+ operands[4] = gen_rtx_REG (CCZmode, FLAGS_REG);
+ emit_insn (gen_rtx_SET (operands[4],
+ gen_rtx_COMPARE (CCZmode,
+ operands[2],
+ constm1_rtx)));
+ }
+ ix86_expand_setcc (operands[0],
+ GET_CODE (operands[1]),
+ operands[4],
+ const0_rtx);
+ DONE;
+})
+
+;; Optimize cmp + jcc with mask register by kortest + jcc.
+(define_insn_and_split "*kortest_cmp<mode>_jcc"
+ [(set (pc)
+ (if_then_else
+ (match_operator 0 "bt_comparison_operator"
+ [(match_operand:SWI1248_AVX512BWDQ_64 1 "register_operand" "?k, <r>")
+ (const_int -1)])
+ (label_ref (match_operand 2))
+ (pc)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_AVX512BW"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ if (MASK_REGNO_P (REGNO (operands[1])))
+ {
+ emit_insn (gen_kortest<mode>_ccc (operands[1], operands[1]));
+ operands[4] = gen_rtx_REG (CCCmode, FLAGS_REG);
+ }
+ else
+ {
+ operands[4] = gen_rtx_REG (CCZmode, FLAGS_REG);
+ emit_insn (gen_rtx_SET (operands[4],
+ gen_rtx_COMPARE (CCZmode,
+ operands[1],
+ constm1_rtx)));
+ }
+ ix86_expand_branch (GET_CODE (operands[0]),
+ operands[4],
+ const0_rtx,
+ operands[2]);
+ DONE;
+})
+
(define_insn "kunpckhi"
[(set (match_operand:HI 0 "register_operand" "=k")
(ior:HI
--- /dev/null
+/* PR target/113609 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64-v4" } */
+/* { dg-final { scan-assembler-not "^cmp" } } */
+/* { dg-final { scan-assembler-not "\[ \\t\]+sete" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-not "\[ \\t\]+setne" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-not "\[ \\t\]+je" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-not "\[ \\t\]+jne" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "\[ \\t\]+sete" 1 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "\[ \\t\]+setne" 1 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "\[ \\t\]+je" 1 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "\[ \\t\]+jne" 2 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "kortest" 12 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "kortest" 17 { target { ! ia32 } } } } */
+
+#include <immintrin.h>
+
+unsigned int
+cmp_vector_sete_mask8(__m128i a, __m128i b)
+{
+ __mmask8 k = _mm_cmpeq_epi16_mask (a, b);
+ if (k == (__mmask8) -1)
+ return 1;
+ else
+ return 0;
+}
+
+unsigned int
+cmp_vector_sete_mask16(__m128i a, __m128i b)
+{
+ __mmask16 k = _mm_cmpeq_epi8_mask (a, b);
+ if (k == (__mmask16) -1)
+ return 1;
+ else
+ return 0;
+}
+
+unsigned int
+cmp_vector_sete_mask32(__m256i a, __m256i b)
+{
+ __mmask32 k = _mm256_cmpeq_epi8_mask (a, b);
+ if (k == (__mmask32) -1)
+ return 1;
+ else
+ return 0;
+}
+
+unsigned int
+cmp_vector_sete_mask64(__m512i a, __m512i b)
+{
+ __mmask64 k = _mm512_cmpeq_epi8_mask (a, b);
+ if (k == (__mmask64) -1)
+ return 1;
+ else
+ return 0;
+}
+
+unsigned int
+cmp_vector_setne_mask8(__m128i a, __m128i b)
+{
+ __mmask8 k = _mm_cmpeq_epi16_mask (a, b);
+ if (k != (__mmask8) -1)
+ return 1;
+ else
+ return 0;
+}
+
+unsigned int
+cmp_vector_setne_mask16(__m128i a, __m128i b)
+{
+ __mmask16 k = _mm_cmpeq_epi8_mask (a, b);
+ if (k != (__mmask16) -1)
+ return 1;
+ else
+ return 0;
+}
+
+unsigned int
+cmp_vector_setne_mask32(__m256i a, __m256i b)
+{
+ __mmask32 k = _mm256_cmpeq_epi8_mask (a, b);
+ if (k != (__mmask32) -1)
+ return 1;
+ else
+ return 0;
+}
+
+unsigned int
+cmp_vector_setne_mask64(__m512i a, __m512i b)
+{
+ __mmask64 k = _mm512_cmpeq_epi8_mask (a, b);
+ if (k != (__mmask64) -1)
+ return 1;
+ else
+ return 0;
+}
+
+__m128i
+cmp_vector_je_mask8(__m128i a, __m128i b) {
+ __mmask8 k = _mm_cmpeq_epi16_mask (a, b);
+ if (k == (__mmask8) -1) {
+ a[0] = a[0] + 1;
+ }
+ else {
+ a[0] = a[0] - 1;
+ }
+ return a;
+}
+
+__m128i
+cmp_vector_je_mask16(__m128i a, __m128i b) {
+ __mmask16 k = _mm_cmpeq_epi8_mask (a, b);
+ if (k == (__mmask16) -1) {
+ a[0] = a[0] + 1;
+ }
+ else {
+ a[0] = a[0] - 1;
+ }
+ return a;
+}
+
+__m256i
+cmp_vector_je_mask32(__m256i a, __m256i b) {
+ __mmask32 k = _mm256_cmpeq_epi8_mask (a, b);
+ if (k == (__mmask32) -1) {
+ a[0] = a[0] + 1;
+ }
+ else {
+ a[0] = a[0] - 1;
+ }
+ return a;
+}
+
+__m512i
+cmp_vector_je_mask64(__m512i a, __m512i b) {
+ __mmask64 k = _mm512_cmpeq_epi8_mask (a, b);
+ if (k == (__mmask64) -1) {
+ a[0] = a[0] + 1;
+ }
+ else {
+ a[0] = a[0] - 5;
+ }
+ return a;
+}
+
+__m128i
+cmp_vector_jne_mask8(__m128i a, __m128i b) {
+ __mmask8 k = _mm_cmpeq_epi16_mask (a, b);
+ if (k == (__mmask8) -1) {
+ a[0] = a[0] + 1;
+ }
+ a[0] = a[0] - 4;
+ return a;
+}
+
+__m128i
+cmp_vector_jne_mask16(__m128i a, __m128i b) {
+ __mmask16 k = _mm_cmpeq_epi8_mask (a, b);
+ if (k == (__mmask16) -1) {
+ a[0] = a[0] + 1;
+ }
+ a[0] = a[0] - 4;
+ return a;
+}
+
+__m256i
+cmp_vector_jne_mask32(__m256i a, __m256i b) {
+ __mmask32 k = _mm256_cmpeq_epi8_mask (a, b);
+ if (k == (__mmask32) -1) {
+ a[0] = a[0] + 1;
+ }
+ a[0] = a[0] - 4;
+ return a;
+}
+
+__m512i
+cmp_vector_jne_mask64(__m512i a, __m512i b) {
+ __mmask64 k = _mm512_cmpeq_epi8_mask (a, b);
+ if (k == (__mmask64) -1) {
+ a[0] = a[0] + 1;
+ }
+ a[0] = a[0] - 4;
+ return a;
+}
+
+__m512i
+mask_cmp_vector_jne_mask64(__m512i a, __m512i b) {
+ __mmask64 k = _mm512_mask_cmpeq_epi8_mask ((__mmask64)0xffffffefffffffff, a, b);
+ if (k == (__mmask64) -1) {
+ a[0] = a[0] + 1;
+ }
+ a[0] = a[0] - 4;
+ return a;
+}
--- /dev/null
+/* PR target/113609 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64-v4" } */
+/* { dg-final { scan-assembler-times "\[ \\t\]+sete" 4 } } */
+/* { dg-final { scan-assembler-times "\[ \\t\]+setne" 4 } } */
+/* { dg-final { scan-assembler-times "\[ \\t\]+je" 4 } } */
+/* { dg-final { scan-assembler-times "\[ \\t\]+jne" 4 } } */
+
+#include <immintrin.h>
+
+unsigned int
+cmp_pi8_setcc(char a)
+{
+ if (a == -1)
+ return 1;
+ else
+ return 0;
+}
+
+unsigned int
+cmp_pi16_setcc(short a)
+{
+ if (a == -1)
+ return 1;
+ else
+ return 0;
+}
+
+unsigned int
+cmp_pi32_setcc(int a)
+{
+ if (a == -1)
+ return 1;
+ else
+ return 0;
+}
+
+unsigned int
+cmp_pi64_setcc(long long a)
+{
+ if (a == -1)
+ return 1;
+ else
+ return 0;
+}
+
+unsigned int
+cmp_pi8_setne(char a)
+{
+ if (a != -1)
+ return 1;
+ else
+ return 0;
+}
+
+unsigned int
+cmp_pi16_setne(short a)
+{
+ if (a != -1)
+ return 1;
+ else
+ return 0;
+}
+
+unsigned int
+cmp_pi32_setne(int a)
+{
+ if (a != -1)
+ return 1;
+ else
+ return 0;
+}
+
+unsigned int
+cmp_pi64_setne(long long a)
+{
+ if (a != -1)
+ return 1;
+ else
+ return 0;
+}
+
+__m128i
+cmp_pi8_je(__m128i a, char b) {
+ if (b == -1) {
+ a[0] = a[0] + 1;
+ }
+ else {
+ a[0] = a[0] - 1;
+ }
+ return a;
+}
+
+__m128i
+cmp_pi16_je(__m128i a, short b) {
+ if (b == -1) {
+ a[0] = a[0] + 1;
+ }
+ else {
+ a[0] = a[0] - 1;
+ }
+ return a;
+}
+
+__m128i
+cmp_pi32_je(__m128i a, int b) {
+ if (b == -1) {
+ a[0] = a[0] + 1;
+ }
+ else {
+ a[0] = a[0] - 1;
+ }
+ return a;
+}
+
+__m128i
+cmp_pi64_je(__m128i a, long long b) {
+ if (b == -1) {
+ a[0] = a[0] + 1;
+ }
+ else {
+ a[0] = a[0] - 1;
+ }
+ return a;
+}
+
+__m128i
+cmp_pi8_jne(__m128i a, char b) {
+ if (b == -1) {
+ a[0] = a[0] + 1;
+ }
+ a[0] = a[0] - 4;
+ return a;
+}
+
+__m128i
+cmp_pi16_jne(__m128i a, short b) {
+ if (b == -1) {
+ a[0] = a[0] + 1;
+ }
+ a[0] = a[0] - 4;
+ return a;
+}
+
+__m128i
+cmp_pi32_jne(__m128i a, int b) {
+ if (b == -1) {
+ a[0] = a[0] + 1;
+ }
+ a[0] = a[0] - 4;
+ return a;
+}
+
+__m128i
+cmp_pi64_jne(__m128i a, long long b) {
+ if (b == -1) {
+ a[0] = a[0] + 1;
+ }
+ a[0] = a[0] - 4;
+ return a;
+}