;; Mapping of vector modes to VPTERNLOG suffix
(define_mode_attr ternlogsuffix
[(V8DI "q") (V4DI "q") (V2DI "q")
+ (V8DF "q") (V4DF "q") (V2DF "q")
(V16SI "d") (V8SI "d") (V4SI "d")
+ (V16SF "d") (V8SF "d") (V4SF "d")
(V32HI "d") (V16HI "d") (V8HI "d")
(V64QI "d") (V32QI "d") (V16QI "d")])
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
+(define_insn "*<avx512>_vternlog<mode>_all"
+ [(set (match_operand:V 0 "register_operand" "=v")
+ (unspec:V
+ [(match_operand:V 1 "register_operand" "0")
+ (match_operand:V 2 "register_operand" "v")
+ (match_operand:V 3 "nonimmediate_operand" "vm")
+ (match_operand:SI 4 "const_0_to_255_operand")]
+ UNSPEC_VTERNLOG))]
+ "TARGET_AVX512F"
+ "vpternlog<ternlogsuffix>\t{%4, %3, %2, %0|%0, %2, %3, %4}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+;; There must be lots of other combinations like
+;;
+;; (any_logic:V
+;; (any_logic:V op1 op2)
+;; (any_logic:V op1 op3))
+;;
+;; (any_logic:V
+;; (any_logic:V
+;; (any_logic:V op1, op2)
+;; op3)
+;; op1)
+;;
+;; and so on.
+
+(define_code_iterator any_logic1 [and ior xor])
+(define_code_iterator any_logic2 [and ior xor])
+(define_code_attr logic_op [(and "&") (ior "|") (xor "^")])
+
+(define_insn_and_split "*<avx512>_vpternlog<mode>_1"
+ [(set (match_operand:V 0 "register_operand")
+ (any_logic:V
+ (any_logic1:V
+ (match_operand:V 1 "reg_or_notreg_operand")
+ (match_operand:V 2 "reg_or_notreg_operand"))
+ (any_logic2:V
+ (match_operand:V 3 "reg_or_notreg_operand")
+ (match_operand:V 4 "reg_or_notreg_operand"))))]
+ "(<MODE_SIZE> == 64 || TARGET_AVX512VL)
+ && ix86_pre_reload_split ()
+ && (rtx_equal_p (STRIP_UNARY (operands[1]),
+ STRIP_UNARY (operands[4]))
+ || rtx_equal_p (STRIP_UNARY (operands[2]),
+ STRIP_UNARY (operands[4]))
+ || rtx_equal_p (STRIP_UNARY (operands[1]),
+ STRIP_UNARY (operands[3]))
+ || rtx_equal_p (STRIP_UNARY (operands[2]),
+ STRIP_UNARY (operands[3])))"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (unspec:V
+ [(match_dup 6)
+ (match_dup 2)
+ (match_dup 1)
+ (match_dup 5)]
+ UNSPEC_VTERNLOG))]
+{
+ /* VPTERNLOGD reg6, reg2, reg1, imm8. */
+ int reg6 = 0xF0;
+ int reg2 = 0xCC;
+ int reg1 = 0xAA;
+ int reg3 = 0;
+ int reg4 = 0;
+ int reg_mask, tmp1, tmp2;
+ if (rtx_equal_p (STRIP_UNARY (operands[1]),
+ STRIP_UNARY (operands[4])))
+ {
+ reg4 = reg1;
+ reg3 = reg6;
+ operands[6] = operands[3];
+ }
+ else if (rtx_equal_p (STRIP_UNARY (operands[2]),
+ STRIP_UNARY (operands[4])))
+ {
+ reg4 = reg2;
+ reg3 = reg6;
+ operands[6] = operands[3];
+ }
+ else if (rtx_equal_p (STRIP_UNARY (operands[1]),
+ STRIP_UNARY (operands[3])))
+ {
+ reg4 = reg6;
+ reg3 = reg1;
+ operands[6] = operands[4];
+ }
+ else
+ {
+ reg4 = reg6;
+ reg3 = reg2;
+ operands[6] = operands[4];
+ }
+
+ reg1 = UNARY_P (operands[1]) ? ~reg1 : reg1;
+ reg2 = UNARY_P (operands[2]) ? ~reg2 : reg2;
+ reg3 = UNARY_P (operands[3]) ? ~reg3 : reg3;
+ reg4 = UNARY_P (operands[4]) ? ~reg4 : reg4;
+
+ tmp1 = reg1 <any_logic1:logic_op> reg2;
+ tmp2 = reg3 <any_logic2:logic_op> reg4;
+ reg_mask = tmp1 <any_logic:logic_op> tmp2;
+ reg_mask &= 0xFF;
+
+ operands[1] = STRIP_UNARY (operands[1]);
+ operands[2] = STRIP_UNARY (operands[2]);
+ operands[6] = STRIP_UNARY (operands[6]);
+ operands[5] = GEN_INT (reg_mask);
+})
+
+(define_insn_and_split "*<avx512>_vpternlog<mode>_2"
+ [(set (match_operand:V 0 "register_operand")
+ (any_logic:V
+ (any_logic1:V
+ (any_logic2:V
+ (match_operand:V 1 "reg_or_notreg_operand")
+ (match_operand:V 2 "reg_or_notreg_operand"))
+ (match_operand:V 3 "reg_or_notreg_operand"))
+ (match_operand:V 4 "reg_or_notreg_operand")))]
+ "(<MODE_SIZE> == 64 || TARGET_AVX512VL)
+ && ix86_pre_reload_split ()
+ && (rtx_equal_p (STRIP_UNARY (operands[1]),
+ STRIP_UNARY (operands[4]))
+ || rtx_equal_p (STRIP_UNARY (operands[2]),
+ STRIP_UNARY (operands[4]))
+ || rtx_equal_p (STRIP_UNARY (operands[1]),
+ STRIP_UNARY (operands[3]))
+ || rtx_equal_p (STRIP_UNARY (operands[2]),
+ STRIP_UNARY (operands[3])))"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (unspec:V
+ [(match_dup 6)
+ (match_dup 2)
+ (match_dup 1)
+ (match_dup 5)]
+ UNSPEC_VTERNLOG))]
+{
+ /* VPTERNLOGD reg6, reg2, reg1, imm8. */
+ int reg6 = 0xF0;
+ int reg2 = 0xCC;
+ int reg1 = 0xAA;
+ int reg3 = 0;
+ int reg4 = 0;
+ int reg_mask, tmp1, tmp2;
+ if (rtx_equal_p (STRIP_UNARY (operands[1]),
+ STRIP_UNARY (operands[4])))
+ {
+ reg4 = reg1;
+ reg3 = reg6;
+ operands[6] = operands[3];
+ }
+ else if (rtx_equal_p (STRIP_UNARY (operands[2]),
+ STRIP_UNARY (operands[4])))
+ {
+ reg4 = reg2;
+ reg3 = reg6;
+ operands[6] = operands[3];
+ }
+ else if (rtx_equal_p (STRIP_UNARY (operands[1]),
+ STRIP_UNARY (operands[3])))
+ {
+ reg4 = reg6;
+ reg3 = reg1;
+ operands[6] = operands[4];
+ }
+ else
+ {
+ reg4 = reg6;
+ reg3 = reg2;
+ operands[6] = operands[4];
+ }
+
+ reg1 = UNARY_P (operands[1]) ? ~reg1 : reg1;
+ reg2 = UNARY_P (operands[2]) ? ~reg2 : reg2;
+ reg3 = UNARY_P (operands[3]) ? ~reg3 : reg3;
+ reg4 = UNARY_P (operands[4]) ? ~reg4 : reg4;
+
+ tmp1 = reg1 <any_logic2:logic_op> reg2;
+ tmp2 = tmp1 <any_logic1:logic_op> reg3;
+ reg_mask = tmp2 <any_logic:logic_op> reg4;
+ reg_mask &= 0xFF;
+
+ operands[1] = STRIP_UNARY (operands[1]);
+ operands[2] = STRIP_UNARY (operands[2]);
+ operands[6] = STRIP_UNARY (operands[6]);
+ operands[5] = GEN_INT (reg_mask);
+})
+
+(define_insn_and_split "*<avx512>_vpternlog<mode>_3"
+ [(set (match_operand:V 0 "register_operand")
+ (any_logic:V
+ (any_logic1:V
+ (match_operand:V 1 "reg_or_notreg_operand")
+ (match_operand:V 2 "reg_or_notreg_operand"))
+ (match_operand:V 3 "reg_or_notreg_operand")))]
+ "(<MODE_SIZE> == 64 || TARGET_AVX512VL)
+ && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (unspec:V
+ [(match_dup 3)
+ (match_dup 2)
+ (match_dup 1)
+ (match_dup 4)]
+ UNSPEC_VTERNLOG))]
+{
+ /* VPTERNLOGD reg3, reg2, reg1, imm8. */
+ int reg3 = 0xF0;
+ int reg2 = 0xCC;
+ int reg1 = 0xAA;
+ int reg_mask, tmp1;
+
+ reg1 = UNARY_P (operands[1]) ? ~reg1 : reg1;
+ reg2 = UNARY_P (operands[2]) ? ~reg2 : reg2;
+ reg3 = UNARY_P (operands[3]) ? ~reg3 : reg3;
+
+ tmp1 = reg1 <any_logic1:logic_op> reg2;
+ reg_mask = tmp1 <any_logic:logic_op> reg3;
+ reg_mask &= 0xFF;
+
+ operands[1] = STRIP_UNARY (operands[1]);
+ operands[2] = STRIP_UNARY (operands[2]);
+ operands[3] = STRIP_UNARY (operands[3]);
+ operands[4] = GEN_INT (reg_mask);
+})
+
+
(define_insn "<avx512>_vternlog<mode>_mask"
[(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
(vec_merge:VI48_AVX512VL
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-times "vpternlog" 6 } } */
+/* { dg-final { scan-assembler-not "vpxor" } } */
+/* { dg-final { scan-assembler-not "vpor" } } */
+/* { dg-final { scan-assembler-not "vpand" } } */
+
+#include<immintrin.h>
+__m256d
+__attribute__((noipa, target("avx512vl")))
+copysign2_pd(__m256d from, __m256d to) {
+ __m256i a = _mm256_castpd_si256(from);
+ __m256d avx_signbit = _mm256_castsi256_pd(_mm256_slli_epi64(_mm256_cmpeq_epi64(a, a), 63));
+ /* (avx_signbit & from) | (~avx_signbit & to) */
+ return _mm256_or_pd(_mm256_and_pd(avx_signbit, from), _mm256_andnot_pd(avx_signbit, to));
+}
+
+__m256i
+__attribute__((noipa, target("avx512vl")))
+foo (__m256i src1, __m256i src2, __m256i src3)
+{
+ return (src2 & ~src1) | (src3 & src1);
+}
+
+__m256i
+__attribute__ ((noipa, target("avx512vl")))
+foo1 (__m256i src1, __m256i src2, __m256i src3)
+{
+ return (src2 & src1) | (src3 & ~src1);
+}
+
+__m256i
+__attribute__ ((noipa, target("avx512vl")))
+foo2 (__m256i src1, __m256i src2, __m256i src3)
+{
+ return (src2 & src1) | (~src3 & src1);
+}
+
+__m256i
+__attribute__ ((noipa, target("avx512vl")))
+foo3 (__m256i src1, __m256i src2, __m256i src3)
+{
+ return (~src2 & src1) | (src3 & src1);
+}
+
+__m256i
+__attribute__ ((noipa, target("avx512vl")))
+foo4 (__m256i src1, __m256i src2, __m256i src3)
+{
+ return src3 & src2 ^ src1;
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx2 -mno-avx512f" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512VL
+
+#include "avx512f-helper.h"
+
+#include "pr101989-1.c"
+__m256d
+avx2_copysign2_pd (__m256d from, __m256d to) {
+ __m256i a = _mm256_castpd_si256(from);
+ __m256d avx_signbit = _mm256_castsi256_pd(_mm256_slli_epi64(_mm256_cmpeq_epi64(a, a), 63));
+ /* (avx_signbit & from) | (~avx_signbit & to) */
+ return _mm256_or_pd(_mm256_and_pd(avx_signbit, from), _mm256_andnot_pd(avx_signbit, to));
+}
+
+__m256i
+avx2_foo (__m256i src1, __m256i src2, __m256i src3)
+{
+ return (src2 & ~src1) | (src3 & src1);
+}
+
+__m256i
+avx2_foo1 (__m256i src1, __m256i src2, __m256i src3)
+{
+ return (src2 & src1) | (src3 & ~src1);
+}
+
+__m256i
+avx2_foo2 (__m256i src1, __m256i src2, __m256i src3)
+{
+ return (src2 & src1) | (~src3 & src1);
+}
+
+__m256i
+avx2_foo3 (__m256i src1, __m256i src2, __m256i src3)
+{
+ return (~src2 & src1) | (src3 & src1);
+}
+
+__m256i
+avx2_foo4 (__m256i src1, __m256i src2, __m256i src3)
+{
+ return src3 & src2 ^ src1;
+}
+
+
+void
+test_256 (void)
+{
+ union256i_q q1, q2, q3, res2, exp2;
+ union256d d1, d2, res1, exp1;
+ int i, sign = 1;
+
+ for (i = 0; i < 4; i++)
+ {
+ d1.a[i] = 12.34 * (i + 2000) * sign;
+ d2.a[i] = 56.78 * (i - 30) * sign;
+ q1.a[i] = 12 * (i + 2000) * sign;
+ q2.a[i] = 56 * (i - 30) * sign;
+ q3.a[i] = 90 * (i + 40) * sign;
+ res1.a[i] = DEFAULT_VALUE;
+ exp1.a[i] = DEFAULT_VALUE;
+ res2.a[i] = exp2.a[i] = -1;
+ sign = -sign;
+ }
+
+ exp1.x = avx2_copysign2_pd (d1.x, d2.x);
+ res1.x = copysign2_pd (d1.x, d2.x);
+ if (UNION_CHECK (256, d) (res1, exp1.a))
+ abort ();
+
+ exp2.x = avx2_foo1 (q1.x, q2.x, q3.x);
+ res2.x = foo1 (q1.x, q2.x, q3.x);
+ if (UNION_CHECK (256, i_q) (res2, exp2.a))
+ abort ();
+
+ exp2.x = avx2_foo2 (q1.x, q2.x, q3.x);
+ res2.x = foo2 (q1.x, q2.x, q3.x);
+ if (UNION_CHECK (256, i_q) (res2, exp2.a))
+ abort ();
+
+ exp2.x = avx2_foo3 (q1.x, q2.x, q3.x);
+ res2.x = foo3 (q1.x, q2.x, q3.x);
+ if (UNION_CHECK (256, i_q) (res2, exp2.a))
+ abort ();
+
+ exp2.x = avx2_foo4 (q1.x, q2.x, q3.x);
+ res2.x = foo4 (q1.x, q2.x, q3.x);
+ if (UNION_CHECK (256, i_q) (res2, exp2.a))
+ abort ();
+
+ exp2.x = avx2_foo (q1.x, q2.x, q3.x);
+ res2.x = foo (q1.x, q2.x, q3.x);
+ if (UNION_CHECK (256, i_q) (res2, exp2.a))
+ abort ();
+}
+
+static void
+test_128 ()
+{}