arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
op1 = expand_normal (arg0);
- if (!integer_zerop (arg0))
- op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
op2 = expand_normal (arg1);
if (!register_operand (op2, mode0))
}
op0 = gen_reg_rtx (mode0);
- if (integer_zerop (arg0))
+ if (op1 == const0_rtx)
{
/* If arg0 is 0, optimize right away into add or sub
instruction that sets CCCmode flags. */
else
{
/* Generate CF from input operand. */
- emit_insn (gen_addqi3_cconly_overflow (op1, constm1_rtx));
+ if (!CONST_INT_P (op1))
+ {
+ op1 = convert_to_mode (QImode, op1, 1);
+ op1 = copy_to_mode_reg (QImode, op1);
+ emit_insn (gen_addqi3_cconly_overflow (op1, constm1_rtx));
+ }
+ else
+ emit_insn (gen_x86_stc ());
/* Generate instruction that consumes CF. */
op1 = gen_rtx_REG (CCCmode, FLAGS_REG);
&& REGNO (XEXP (op1, 0)) == FLAGS_REG
&& XEXP (op1, 1) == const0_rtx)
return CCCmode;
+ /* Similarly for *x86_cmc pattern.
+ Match LTU of op0 (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
+ and op1 (geu:QI (reg:CCC FLAGS_REG) (const_int 0)).
+ It is sufficient to test that the operand modes are CCCmode. */
+ else if (code == LTU
+ && GET_CODE (op0) == NEG
+ && GET_CODE (XEXP (op0, 0)) == LTU
+ && GET_MODE (XEXP (XEXP (op0, 0), 0)) == CCCmode
+ && GET_CODE (op1) == GEU
+ && GET_MODE (XEXP (op1, 0)) == CCCmode)
+ return CCCmode;
else
return CCmode;
case GTU: /* CF=0 & ZF=0 */
*total = 0;
return true;
}
+ /* Match x
+ (compare:CCC (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
+ (geu:QI (reg:CCC FLAGS_REG) (const_int 0))) */
+ if (mode == CCCmode
+ && GET_CODE (op0) == NEG
+ && GET_CODE (XEXP (op0, 0)) == LTU
+ && REG_P (XEXP (XEXP (op0, 0), 0))
+ && GET_MODE (XEXP (XEXP (op0, 0), 0)) == CCCmode
+ && REGNO (XEXP (XEXP (op0, 0), 0)) == FLAGS_REG
+ && XEXP (XEXP (op0, 0), 1) == const0_rtx
+ && GET_CODE (op1) == GEU
+ && REG_P (XEXP (op1, 0))
+ && GET_MODE (XEXP (op1, 0)) == CCCmode
+ && REGNO (XEXP (op1, 0)) == FLAGS_REG
+ && XEXP (op1, 1) == const0_rtx)
+ {
+ /* This is *x86_cmc. */
+ if (!speed)
+ *total = COSTS_N_BYTES (1);
+ else if (TARGET_SLOW_STC)
+ *total = COSTS_N_INSNS (2);
+ else
+ *total = COSTS_N_INSNS (1);
+ return true;
+ }
if (SCALAR_INT_MODE_P (GET_MODE (op0))
&& GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
ix86_tune_features[X86_TUNE_V2DF_REDUCTION_PREFER_HADDPD]
#define TARGET_DEST_FALSE_DEP_FOR_GLC \
ix86_tune_features[X86_TUNE_DEST_FALSE_DEP_FOR_GLC]
+#define TARGET_SLOW_STC ix86_tune_features[X86_TUNE_SLOW_STC]
/* Feature tests against the various architecture variations. */
enum ix86_arch_indices {
UNSPEC_INSN_FALSE_DEP
UNSPEC_SBB
UNSPEC_CC_NE
+ UNSPEC_STC
;; For SSE/MMX support:
UNSPEC_FIX_NOTRUNC
[(set_attr "type" "ssecomi")
(set_attr "prefix" "evex")
(set_attr "mode" "HF")])
+
+;; Set carry flag.
+(define_insn "x86_stc"
+ [(set (reg:CCC FLAGS_REG) (unspec:CCC [(const_int 0)] UNSPEC_STC))]
+ ""
+ "stc"
+ [(set_attr "length" "1")
+ (set_attr "length_immediate" "0")
+ (set_attr "modrm" "0")])
+
+;; On Pentium 4, set the carry flag using mov $1,%al;addb $-1,%al.
+(define_peephole2
+ [(match_scratch:QI 0 "r")
+ (set (reg:CCC FLAGS_REG) (unspec:CCC [(const_int 0)] UNSPEC_STC))]
+ "TARGET_SLOW_STC && !optimize_insn_for_size_p ()"
+ [(set (match_dup 0) (const_int 1))
+ (parallel
+ [(set (reg:CCC FLAGS_REG)
+ (compare:CCC (plus:QI (match_dup 0) (const_int -1))
+ (match_dup 0)))
+ (set (match_dup 0) (plus:QI (match_dup 0) (const_int -1)))])])
+
+;; Complement carry flag.
+(define_insn "*x86_cmc"
+ [(set (reg:CCC FLAGS_REG)
+ (compare:CCC (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
+ (geu:QI (reg:CCC FLAGS_REG) (const_int 0))))]
+ ""
+ "cmc"
+ [(set_attr "length" "1")
+ (set_attr "length_immediate" "0")
+ (set_attr "use_carry" "1")
+ (set_attr "modrm" "0")])
+
+;; On Pentium 4, cmc is replaced with setnc %al;addb $-1,%al.
+(define_peephole2
+ [(match_scratch:QI 0 "r")
+ (set (reg:CCC FLAGS_REG)
+ (compare:CCC (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
+ (geu:QI (reg:CCC FLAGS_REG) (const_int 0))))]
+ "TARGET_SLOW_STC && !optimize_insn_for_size_p ()"
+ [(set (match_dup 0) (ne:QI (reg:CCC FLAGS_REG) (const_int 0)))
+ (parallel
+ [(set (reg:CCC FLAGS_REG)
+ (compare:CCC (plus:QI (match_dup 0) (const_int -1))
+ (match_dup 0)))
+ (set (match_dup 0) (plus:QI (match_dup 0) (const_int -1)))])])
\f
;; Push/pop instructions.
"#"
"&& 1"
[(const_int 0)])
+
+;; Set the carry flag from the carry flag.
+(define_insn_and_split "*setccc"
+ [(set (reg:CCC FLAGS_REG)
+ (reg:CCC FLAGS_REG))]
+ "ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(const_int 0)])
+
+;; Set the carry flag from the carry flag.
+(define_insn_and_split "*setcc_qi_negqi_ccc_1_<mode>"
+ [(set (reg:CCC FLAGS_REG)
+ (ltu:CCC (reg:CC_CCC FLAGS_REG) (const_int 0)))]
+ "ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(const_int 0)])
+
+;; Set the carry flag from the carry flag.
+(define_insn_and_split "*setcc_qi_negqi_ccc_2_<mode>"
+ [(set (reg:CCC FLAGS_REG)
+ (unspec:CCC [(ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))
+ (const_int 0)] UNSPEC_CC_NE))]
+ "ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(const_int 0)])
\f
;; Overflow setting add instructions
/* X86_TUNE_EMIT_VZEROUPPER: This enables vzeroupper instruction insertion
before a transfer of control flow out of the function. */
DEF_TUNE (X86_TUNE_EMIT_VZEROUPPER, "emit_vzeroupper", ~m_KNL)
+
+/* X86_TUNE_SLOW_STC: This disables use of stc, clc and cmc carry flag
+ modifications on architectures where theses operations are slow. */
+DEF_TUNE (X86_TUNE_SLOW_STC, "slow_stc", m_PENT4)
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+unsigned int o1;
+unsigned int o2;
+
+unsigned int foo_xor (unsigned int a, unsigned int b,
+ unsigned int c, unsigned int d)
+{
+ unsigned int c1 = __builtin_ia32_addcarryx_u32 (1, a, b, &o1);
+ return __builtin_ia32_addcarryx_u32 (c1 ^ 1, c, d, &o2);
+}
+
+unsigned int foo_sub (unsigned int a, unsigned int b,
+ unsigned int c, unsigned int d)
+{
+ unsigned int c1 = __builtin_ia32_addcarryx_u32 (1, a, b, &o1);
+ return __builtin_ia32_addcarryx_u32 (1 - c1, c, d, &o2);
+}
+
+unsigned int foo_eqz (unsigned int a, unsigned int b,
+ unsigned int c, unsigned int d)
+{
+ unsigned int c1 = __builtin_ia32_addcarryx_u32 (1, a, b, &o1);
+ return __builtin_ia32_addcarryx_u32 (c1 == 0, c, d, &o2);
+}
+
+/* { dg-final { scan-assembler "cmc" } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+typedef unsigned int u32;
+
+unsigned int foo (unsigned int a, unsigned int b, unsigned int *c)
+{
+ return __builtin_ia32_addcarryx_u32 (1, a, b, c);
+}
+
+unsigned int bar (unsigned int b, unsigned int *c)
+{
+ return __builtin_ia32_addcarryx_u32 (1, 2, b, c);
+}
+
+unsigned int baz (unsigned int a, unsigned int *c)
+{
+ return __builtin_ia32_addcarryx_u32 (1, a, 3, c);
+}
+
+/* { dg-final { scan-assembler "stc" } } */