extern int avx_vpermilp_parallel (rtx par, machine_mode mode);
extern int avx_vperm2f128_parallel (rtx par, machine_mode mode);
+extern int vpternlog_redundant_operand_mask (rtx[]);
+extern void substitute_vpternlog_operands (rtx[]);
+
extern bool ix86_expand_strlen (rtx, rtx, rtx, rtx);
extern bool ix86_expand_set_or_cpymem (rtx, rtx, rtx, rtx, rtx, rtx,
rtx, rtx, rtx, rtx, bool);
return mask + 1;
}
\f
+/* Return a mask of VPTERNLOG operands that do not affect output. */
+
+int
+vpternlog_redundant_operand_mask (rtx *operands)
+{
+ int mask = 0;
+ int imm8 = XINT (operands[4], 0);
+
+ if (((imm8 >> 4) & 0x0F) == (imm8 & 0x0F))
+ mask |= 1;
+ if (((imm8 >> 2) & 0x33) == (imm8 & 0x33))
+ mask |= 2;
+ if (((imm8 >> 1) & 0x55) == (imm8 & 0x55))
+ mask |= 4;
+
+ return mask;
+}
+
+/* Eliminate false dependencies on operands that do not affect output
+ by substituting other operands of a VPTERNLOG. */
+
+void
+substitute_vpternlog_operands (rtx *operands)
+{
+ int mask = vpternlog_redundant_operand_mask (operands);
+
+ if (mask & 1) /* The first operand is redundant. */
+ operands[1] = operands[2];
+
+ if (mask & 2) /* The second operand is redundant. */
+ operands[2] = operands[1];
+
+ if (mask & 4) /* The third operand is redundant. */
+ operands[3] = operands[1];
+ else if (REG_P (operands[3]))
+ {
+ if (mask & 1)
+ operands[1] = operands[3];
+ if (mask & 2)
+ operands[2] = operands[3];
+ }
+}
+\f
/* Return a register priority for hard reg REGNO. */
static int
ix86_register_priority (int hard_regno)
(symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL")
(const_string "*")))])
+;; When VPTERNLOG happens to be invariant w.r.t first and second operands,
+;; and the third operand is memory, eliminate false dependencies by loading
+;; memory into the output operand first.
+(define_split
+ [(set (match_operand:V 0 "register_operand")
+ (unspec:V
+ [(match_operand:V 1 "register_operand")
+ (match_operand:V 2 "register_operand")
+ (match_operand:V 3 "memory_operand")
+ (match_operand:SI 4 "const_0_to_255_operand")]
+ UNSPEC_VTERNLOG))]
+ "!reload_completed && vpternlog_redundant_operand_mask (operands) == 3"
+ [(set (match_dup 0)
+ (match_dup 3))
+ (set (match_dup 0)
+ (unspec:V
+ [(match_dup 0)
+ (match_dup 0)
+ (match_dup 0)
+ (match_dup 4)]
+ UNSPEC_VTERNLOG))])
+
+;; Eliminate false dependencies when VPTERNLOG is invariant w.r.t any
+;; of input operands (except the case handled in the above split).
+(define_split
+ [(set (match_operand:V 0 "register_operand")
+ (unspec:V
+ [(match_operand:V 1 "register_operand")
+ (match_operand:V 2 "register_operand")
+ (match_operand:V 3 "nonimmediate_operand")
+ (match_operand:SI 4 "const_0_to_255_operand")]
+ UNSPEC_VTERNLOG))]
+ "!reload_completed && vpternlog_redundant_operand_mask (operands) != 0"
+ [(set (match_dup 0)
+ (unspec:V
+ [(match_dup 1)
+ (match_dup 2)
+ (match_dup 3)
+ (match_dup 4)]
+ UNSPEC_VTERNLOG))]
+ "substitute_vpternlog_operands (operands);")
+
;; There must be lots of other combinations like
;;
;; (any_logic:V
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vmovdqa" 4 } } */
+/* { dg-final { scan-assembler-times {vpternlog[^\n\r]*\(%rdx\)} 2 } } */
+
+#include <immintrin.h>
+
+__m512i f(__m512i* a, __m512i* b, __m512i* c)
+{
+ return _mm512_ternarylogic_epi64 (a[0], b[0], c[0], ~_MM_TERNLOG_B | ~_MM_TERNLOG_C);
+}
+
+__m512i g(__m512i* a, __m512i* b, __m512i* c)
+{
+ return _mm512_ternarylogic_epi64 (a[0], b[0], c[0], ~_MM_TERNLOG_A | ~_MM_TERNLOG_C);
+}
+
+__m512i h(__m512i* a, __m512i* b, __m512i* c)
+{
+ return _mm512_ternarylogic_epi64 (a[0], b[0], c[0], ~_MM_TERNLOG_A | ~_MM_TERNLOG_B);
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vmovdqa" 1 } } */
+/* { dg-final { scan-assembler "vpternlog.*zmm0.*zmm0.*zmm0" } } */
+
+#include <immintrin.h>
+
+__m512i f(__m512i* a, __m512i* b, __m512i* c)
+{
+ return _mm512_ternarylogic_epi64 (a[0], b[0], c[0], ~_MM_TERNLOG_C);
+}
+