machine_mode mode1 = insn_data[d->icode].operand[1].mode;
enum rtx_code comparison = d->comparison;
+ /* ptest reg, reg sets the carry flag. */
+ if (comparison == LTU
+ && (d->code == IX86_BUILTIN_PTESTC
+ || d->code == IX86_BUILTIN_PTESTC256)
+ && rtx_equal_p (op0, op1))
+ {
+ if (!target)
+ target = gen_reg_rtx (SImode);
+ emit_move_insn (target, const1_rtx);
+ return target;
+ }
+
if (VECTOR_MODE_P (mode0))
op0 = safe_vector_operand (op0, mode0);
if (VECTOR_MODE_P (mode1))
else if (XINT (x, 1) == UNSPEC_PTEST)
{
*total = cost->sse_op;
- if (XVECLEN (x, 0) == 2
- && GET_CODE (XVECEXP (x, 0, 0)) == AND)
+ rtx test_op0 = XVECEXP (x, 0, 0);
+ if (!rtx_equal_p (test_op0, XVECEXP (x, 0, 1)))
+ return false;
+ if (GET_CODE (test_op0) == AND)
{
- rtx andop = XVECEXP (x, 0, 0);
- *total += rtx_cost (XEXP (andop, 0), GET_MODE (andop),
- AND, opno, speed)
- + rtx_cost (XEXP (andop, 1), GET_MODE (andop),
- AND, opno, speed);
- return true;
+ rtx and_op0 = XEXP (test_op0, 0);
+ if (GET_CODE (and_op0) == NOT)
+ and_op0 = XEXP (and_op0, 0);
+ *total += rtx_cost (and_op0, GET_MODE (and_op0),
+ AND, 0, speed)
+ + rtx_cost (XEXP (test_op0, 1), GET_MODE (and_op0),
+ AND, 1, speed);
}
+ else
+ *total = rtx_cost (test_op0, GET_MODE (test_op0),
+ UNSPEC, 0, speed);
+ return true;
}
return false;
[(set (reg:CCZ FLAGS_REG)
(unspec:CCZ [(match_dup 0) (match_dup 1)] UNSPEC_PTEST))])
+;; ptest reg,reg sets the carry flag.
+(define_split
+ [(set (reg:CCC FLAGS_REG)
+ (unspec:CCC [(match_operand:V_AVX 0 "register_operand")
+ (match_operand:V_AVX 1 "register_operand")]
+ UNSPEC_PTEST))]
+ "TARGET_SSE4_1
+ && rtx_equal_p (operands[0], operands[1])"
+ [(set (reg:CCC FLAGS_REG)
+ (unspec:CCC [(const_int 0)] UNSPEC_STC))])
+
+;; Changing the CCmode of FLAGS_REG requires updating both def and use.
+;; pandn/ptestz/set{n?}e -> ptestc/set{n?}c
+(define_split
+ [(set (match_operand:SWI 0 "register_operand")
+ (match_operator:SWI 3 "bt_comparison_operator"
+ [(unspec:CCZ [
+ (and:V_AVX (not:V_AVX (match_operand:V_AVX 1 "register_operand"))
+ (match_operand:V_AVX 2 "register_operand"))
+ (and:V_AVX (not:V_AVX (match_dup 1)) (match_dup 2))]
+ UNSPEC_PTEST)
+ (const_int 0)]))]
+ "TARGET_SSE4_1"
+ [(set (reg:CCC FLAGS_REG)
+ (unspec:CCC [(match_dup 1) (match_dup 2)] UNSPEC_PTEST))
+ (set (match_dup 0)
+ (match_op_dup 3 [(reg:CCC FLAGS_REG) (const_int 0)]))])
+
+(define_split
+ [(set (strict_low_part (match_operand:QI 0 "register_operand"))
+ (match_operator:QI 3 "bt_comparison_operator"
+ [(unspec:CCZ [
+ (and:V_AVX (not:V_AVX (match_operand:V_AVX 1 "register_operand"))
+ (match_operand:V_AVX 2 "register_operand"))
+ (and:V_AVX (not:V_AVX (match_dup 1)) (match_dup 2))]
+ UNSPEC_PTEST)
+ (const_int 0)]))]
+ "TARGET_SSE4_1"
+ [(set (reg:CCC FLAGS_REG)
+ (unspec:CCC [(match_dup 1) (match_dup 2)] UNSPEC_PTEST))
+ (set (strict_low_part (match_dup 0))
+ (match_op_dup 3 [(reg:CCC FLAGS_REG) (const_int 0)]))])
+
+;; pandn/ptestz/j{n?}e -> ptestc/j{n?}c
+(define_split
+ [(set (pc)
+ (if_then_else
+ (match_operator 3 "bt_comparison_operator"
+ [(unspec:CCZ [
+ (and:V_AVX
+ (not:V_AVX (match_operand:V_AVX 1 "register_operand"))
+ (match_operand:V_AVX 2 "register_operand"))
+ (and:V_AVX (not:V_AVX (match_dup 1)) (match_dup 2))]
+ UNSPEC_PTEST)
+ (const_int 0)])
+ (match_operand 0)
+ (pc)))]
+ "TARGET_SSE4_1"
+ [(set (reg:CCC FLAGS_REG)
+ (unspec:CCC [(match_dup 1) (match_dup 2)] UNSPEC_PTEST))
+ (set (pc) (if_then_else (match_op_dup 3 [(reg:CCC FLAGS_REG) (const_int 0)])
+ (match_dup 0)
+ (pc)))])
+
(define_expand "nearbyint<mode>2"
[(set (match_operand:VFH 0 "register_operand")
(unspec:VFH
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx" } */
+
+typedef long long __m256i __attribute__ ((__vector_size__ (32)));
+
+int foo (__m256i x, __m256i y)
+{
+ __m256i a = x & ~y;
+ return __builtin_ia32_ptestz256 (a, a);
+}
+
+int bar (__m256i x, __m256i y)
+{
+ __m256i a = ~x & y;
+ return __builtin_ia32_ptestz256 (a, a);
+}
+
+/* { dg-final { scan-assembler-times "vptest\[ \\t\]+%" 2 } } */
+/* { dg-final { scan-assembler-times "setc" 2 } } */
+/* { dg-final { scan-assembler-not "vpandn" } } */
+/* { dg-final { scan-assembler-not "sete" } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx" } */
+
+typedef long long __m256i __attribute__ ((__vector_size__ (32)));
+
+int foo (__m256i x, __m256i y)
+{
+ __m256i a = x & ~y;
+ return !__builtin_ia32_ptestz256 (a, a);
+}
+
+int bar (__m256i x, __m256i y)
+{
+ __m256i a = ~x & y;
+ return !__builtin_ia32_ptestz256 (a, a);
+}
+
+/* { dg-final { scan-assembler-times "vptest\[ \\t\]+%" 2} } */
+/* { dg-final { scan-assembler-times "setnc" 2 } } */
+/* { dg-final { scan-assembler-not "vpandn" } } */
+/* { dg-final { scan-assembler-not "setne" } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx" } */
+
+typedef long long __m256i __attribute__ ((__vector_size__ (32)));
+
+extern void ext (void);
+
+void foo (__m256i x, __m256i y)
+{
+ __m256i a = x & ~y;
+ if (__builtin_ia32_ptestz256 (a, a))
+ ext();
+}
+
+void bar (__m256i x, __m256i y)
+{
+ __m256i a = ~x & y;
+ if (__builtin_ia32_ptestz256 (a, a))
+ ext();
+}
+
+void foo2 (__m256i x, __m256i y)
+{
+ __m256i a = x & ~y;
+ if (__builtin_ia32_ptestz256 (a, a))
+ ext();
+}
+
+void bar2 (__m256i x, __m256i y)
+{
+ __m256i a = ~x & y;
+ if (__builtin_ia32_ptestz256 (a, a))
+ ext();
+}
+
+/* { dg-final { scan-assembler-times "ptest\[ \\t\]+%" 4 } } */
+/* { dg-final { scan-assembler-times "jn?c" 4 } } */
+/* { dg-final { scan-assembler-not "pandn" } } */
+/* { dg-final { scan-assembler-not "jne" } } */
+/* { dg-final { scan-assembler-not "je" } } */
return __builtin_ia32_ptestc256 (a, a);
}
-/* { dg-final { scan-assembler "vpand" } } */
+/* { dg-final { scan-assembler "movl\[ \\t]*\\\$1, %eax" } } */
return __builtin_ia32_ptestc128 (a, a);
}
-/* { dg-final { scan-assembler "pand" } } */
+/* { dg-final { scan-assembler "movl\[ \\t]*\\\$1, %eax" } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse4.1" } */
+
+typedef long long __m128i __attribute__ ((__vector_size__ (16)));
+
+int foo (__m128i x, __m128i y)
+{
+ __m128i a = x & ~y;
+ return __builtin_ia32_ptestz128 (a, a);
+}
+
+int bar (__m128i x, __m128i y)
+{
+ __m128i a = ~x & y;
+ return __builtin_ia32_ptestz128 (a, a);
+}
+
+/* { dg-final { scan-assembler-times "ptest\[ \\t\]+%" 2 } } */
+/* { dg-final { scan-assembler-times "setc" 2 } } */
+/* { dg-final { scan-assembler-not "pandn" } } */
+/* { dg-final { scan-assembler-not "sete" } } */
+
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse4.1" } */
+
+typedef long long __m128i __attribute__ ((__vector_size__ (16)));
+
+int foo (__m128i x, __m128i y)
+{
+ __m128i a = x & ~y;
+ return !__builtin_ia32_ptestz128 (a, a);
+}
+
+int bar (__m128i x, __m128i y)
+{
+ __m128i a = ~x & y;
+ return !__builtin_ia32_ptestz128 (a, a);
+}
+
+/* { dg-final { scan-assembler-times "ptest\[ \\t\]+%" 2 } } */
+/* { dg-final { scan-assembler-times "setnc" 2 } } */
+/* { dg-final { scan-assembler-not "pandn" } } */
+/* { dg-final { scan-assembler-not "setne" } } */
+
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse4.1" } */
+
+typedef long long __m128i __attribute__ ((__vector_size__ (16)));
+
+extern void ext (void);
+
+void foo (__m128i x, __m128i y)
+{
+ __m128i a = x & ~y;
+ if (__builtin_ia32_ptestz128 (a, a))
+ ext();
+}
+
+void bar (__m128i x, __m128i y)
+{
+ __m128i a = ~x & y;
+ if (__builtin_ia32_ptestz128 (a, a))
+ ext();
+}
+
+void foo2 (__m128i x, __m128i y)
+{
+ __m128i a = x & ~y;
+ if (__builtin_ia32_ptestz128 (a, a))
+ ext();
+}
+
+void bar2 (__m128i x, __m128i y)
+{
+ __m128i a = ~x & y;
+ if (__builtin_ia32_ptestz128 (a, a))
+ ext();
+}
+
+/* { dg-final { scan-assembler-times "ptest\[ \\t\]+%" 4 } } */
+/* { dg-final { scan-assembler-times "jn?c" 4 } } */
+/* { dg-final { scan-assembler-not "pandn" } } */
+/* { dg-final { scan-assembler-not "jne" } } */
+/* { dg-final { scan-assembler-not "je" } } */