return false;
case UNSPEC:
- if (XINT (x, 1) == UNSPEC_TP)
- *total = 0;
- else if (XINT (x, 1) == UNSPEC_VTERNLOG)
+ switch (XINT (x, 1))
{
+ case UNSPEC_TP:
+ *total = 0;
+ break;
+
+ case UNSPEC_VTERNLOG:
*total = cost->sse_op;
- *total += rtx_cost (XVECEXP (x, 0, 0), mode, code, 0, speed);
- *total += rtx_cost (XVECEXP (x, 0, 1), mode, code, 1, speed);
- *total += rtx_cost (XVECEXP (x, 0, 2), mode, code, 2, speed);
+ if (!REG_P (XVECEXP (x, 0, 0)))
+ *total += rtx_cost (XVECEXP (x, 0, 0), mode, code, 0, speed);
+ if (!REG_P (XVECEXP (x, 0, 1)))
+ *total += rtx_cost (XVECEXP (x, 0, 1), mode, code, 1, speed);
+ if (!REG_P (XVECEXP (x, 0, 2)))
+ *total += rtx_cost (XVECEXP (x, 0, 2), mode, code, 2, speed);
return true;
- }
- else if (XINT (x, 1) == UNSPEC_PTEST)
- {
+
+ case UNSPEC_PTEST:
+ {
+ *total = cost->sse_op;
+ rtx test_op0 = XVECEXP (x, 0, 0);
+ if (!rtx_equal_p (test_op0, XVECEXP (x, 0, 1)))
+ return false;
+ if (GET_CODE (test_op0) == AND)
+ {
+ rtx and_op0 = XEXP (test_op0, 0);
+ if (GET_CODE (and_op0) == NOT)
+ and_op0 = XEXP (and_op0, 0);
+ *total += rtx_cost (and_op0, GET_MODE (and_op0),
+ AND, 0, speed)
+ + rtx_cost (XEXP (test_op0, 1), GET_MODE (and_op0),
+ AND, 1, speed);
+ }
+ else
+ *total = rtx_cost (test_op0, GET_MODE (test_op0),
+ UNSPEC, 0, speed);
+ }
+ return true;
+
+ case UNSPEC_BLENDV:
*total = cost->sse_op;
- rtx test_op0 = XVECEXP (x, 0, 0);
- if (!rtx_equal_p (test_op0, XVECEXP (x, 0, 1)))
- return false;
- if (GET_CODE (test_op0) == AND)
+ if (!REG_P (XVECEXP (x, 0, 0)))
+ *total += rtx_cost (XVECEXP (x, 0, 0), mode, code, 0, speed);
+ if (!REG_P (XVECEXP (x, 0, 1)))
+ *total += rtx_cost (XVECEXP (x, 0, 1), mode, code, 1, speed);
+ if (!REG_P (XVECEXP (x, 0, 2)))
{
- rtx and_op0 = XEXP (test_op0, 0);
- if (GET_CODE (and_op0) == NOT)
- and_op0 = XEXP (and_op0, 0);
- *total += rtx_cost (and_op0, GET_MODE (and_op0),
- AND, 0, speed)
- + rtx_cost (XEXP (test_op0, 1), GET_MODE (and_op0),
- AND, 1, speed);
+ rtx cond = XVECEXP (x, 0, 2);
+ if ((GET_CODE (cond) == LT || GET_CODE (cond) == GT)
+ && CONST_VECTOR_P (XEXP (cond, 1)))
+ {
+ /* avx2_blendvpd256_gt and friends. */
+ if (!REG_P (XEXP (cond, 0)))
+ *total += rtx_cost (XEXP (cond, 0), mode, code, 2, speed);
+ }
+ else
+ *total += rtx_cost (cond, mode, code, 2, speed);
}
- else
- *total = rtx_cost (test_op0, GET_MODE (test_op0),
- UNSPEC, 0, speed);
return true;
+
+ case UNSPEC_MOVMSK:
+ *total = cost->sse_op;
+ return true;
+
+ default:
+ break;
}
return false;
}
return false;
+ case EQ:
+ case GT:
+ case GTU:
+ case LT:
+ case LTU:
+ if (TARGET_SSE2
+ && GET_MODE_CLASS (mode) == MODE_VECTOR_INT
+ && GET_MODE_SIZE (mode) >= 8)
+ {
+ /* vpcmpeq */
+ *total = speed ? COSTS_N_INSNS (1) : COSTS_N_BYTES (4);
+ if (!REG_P (XEXP (x, 0)))
+ *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
+ if (!REG_P (XEXP (x, 1)))
+ *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
+ return true;
+ }
+ if (TARGET_XOP
+ && GET_MODE_CLASS (mode) == MODE_VECTOR_INT
+ && GET_MODE_SIZE (mode) <= 16)
+ {
+ /* vpcomeq */
+ *total = speed ? COSTS_N_INSNS (1) : COSTS_N_BYTES (6);
+ if (!REG_P (XEXP (x, 0)))
+ *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
+ if (!REG_P (XEXP (x, 1)))
+ *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
+ return true;
+ }
+ return false;
+
+ case NE:
+ case GE:
+ case GEU:
+ if (TARGET_XOP
+ && GET_MODE_CLASS (mode) == MODE_VECTOR_INT
+ && GET_MODE_SIZE (mode) <= 16)
+ {
+ /* vpcomneq */
+ *total = speed ? COSTS_N_INSNS (1) : COSTS_N_BYTES (6);
+ if (!REG_P (XEXP (x, 0)))
+ *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
+ if (!REG_P (XEXP (x, 1)))
+ *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
+ return true;
+ }
+ if (TARGET_SSE2
+ && GET_MODE_CLASS (mode) == MODE_VECTOR_INT
+ && GET_MODE_SIZE (mode) >= 8)
+ {
+ if (TARGET_AVX512F && GET_MODE_SIZE (mode) >= 16)
+ /* vpcmpeq + vpternlog */
+ *total = speed ? COSTS_N_INSNS (2) : COSTS_N_BYTES (11);
+ else
+ /* vpcmpeq + pxor + vpcmpeq */
+ *total = speed ? COSTS_N_INSNS (3) : COSTS_N_BYTES (12);
+ if (!REG_P (XEXP (x, 0)))
+ *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
+ if (!REG_P (XEXP (x, 1)))
+ *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
+ return true;
+ }
+ return false;
+
default:
return false;
}
gcc_assert (icode != CODE_FOR_nothing);
+ /* Find the comparison generating the mask OP0. */
+ tree cmp_op0 = NULL_TREE;
+ tree cmp_op1 = NULL_TREE;
+ enum tree_code cmp_code = TREE_CODE (op0);
+ if (TREE_CODE_CLASS (cmp_code) == tcc_comparison)
+ {
+ cmp_op0 = TREE_OPERAND (op0, 0);
+ cmp_op1 = TREE_OPERAND (op0, 1);
+ }
+ else if (cmp_code == SSA_NAME)
+ {
+ gimple *def_stmt = get_gimple_for_ssa_name (op0);
+ if (def_stmt && is_gimple_assign (def_stmt))
+ {
+ cmp_code = gimple_assign_rhs_code (def_stmt);
+ if (TREE_CODE_CLASS (cmp_code) == tcc_comparison)
+ {
+ cmp_op0 = gimple_assign_rhs1 (def_stmt);
+ cmp_op1 = gimple_assign_rhs2 (def_stmt);
+ }
+ }
+ }
+
+ /* Decide whether to invert comparison based on rtx_cost. */
+ if (cmp_op0)
+ {
+ enum tree_code rev_code;
+ tree op_type = TREE_TYPE (cmp_op0);
+ int unsignedp = TYPE_UNSIGNED (op_type);
+ rev_code = invert_tree_comparison (cmp_code, HONOR_NANS (op_type));
+
+ if (rev_code != ERROR_MARK)
+ {
+ tree cmp_type = TREE_TYPE (op0);
+ machine_mode cmp_mode = TYPE_MODE (cmp_type);
+ machine_mode op_mode = TYPE_MODE (op_type);
+ bool speed_p = optimize_insn_for_speed_p ();
+ rtx reg = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 1);
+ enum rtx_code cmp_rtx_code = convert_tree_comp_to_rtx (cmp_code,
+ unsignedp);
+ rtx veccmp = gen_rtx_fmt_ee (cmp_rtx_code, cmp_mode, reg, reg);
+ int old_cost = rtx_cost (veccmp, cmp_mode, SET, 0, speed_p);
+ enum rtx_code rev_rtx_code = convert_tree_comp_to_rtx (rev_code,
+ unsignedp);
+ PUT_CODE (veccmp, rev_rtx_code);
+ int new_cost = rtx_cost (veccmp, cmp_mode, SET, 0, speed_p);
+ if (new_cost < old_cost)
+ {
+ op0 = fold_build2_loc (EXPR_LOCATION (op0), rev_code,
+ cmp_type, cmp_op0, cmp_op1);
+ std::swap (op1, op2);
+ }
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file,
+ ";; %sswapping operands of .VCOND_MASK\n",
+ new_cost >= old_cost ? "not " : "");
+ fprintf (dump_file,
+ ";; cost of original %s: %d\n",
+ GET_RTX_NAME (cmp_rtx_code), old_cost);
+ fprintf (dump_file,
+ ";; cost of replacement %s: %d\n",
+ GET_RTX_NAME (rev_rtx_code), new_cost);
+ }
+ }
+ }
+
mask = expand_normal (op0);
rtx_op1 = expand_normal (op1);
rtx_op2 = expand_normal (op2);