emit_insn (gen_ashr3 (low[0], low[0],
GEN_INT (count - half_width)));
}
+ else if (count == 1
+ && (TARGET_USE_RCR || optimize_size > 1))
+ {
+ if (!rtx_equal_p (operands[0], operands[1]))
+ emit_move_insn (operands[0], operands[1]);
+ if (mode == DImode)
+ {
+ emit_insn (gen_ashrsi3_carry (high[0], high[0]));
+ emit_insn (gen_rcrsi2 (low[0], low[0]));
+ }
+ else
+ {
+ emit_insn (gen_ashrdi3_carry (high[0], high[0]));
+ emit_insn (gen_rcrdi2 (low[0], low[0]));
+ }
+ }
else
{
gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
emit_insn (gen_lshr3 (low[0], low[0],
GEN_INT (count - half_width)));
}
+ else if (count == 1
+ && (TARGET_USE_RCR || optimize_size > 1))
+ {
+ if (!rtx_equal_p (operands[0], operands[1]))
+ emit_move_insn (operands[0], operands[1]);
+ if (mode == DImode)
+ {
+ emit_insn (gen_lshrsi3_carry (high[0], high[0]));
+ emit_insn (gen_rcrsi2 (low[0], low[0]));
+ }
+ else
+ {
+ emit_insn (gen_lshrdi3_carry (high[0], high[0]));
+ emit_insn (gen_rcrdi2 (low[0], low[0]));
+ }
+ }
else
{
gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
#define TARGET_DEST_FALSE_DEP_FOR_GLC \
ix86_tune_features[X86_TUNE_DEST_FALSE_DEP_FOR_GLC]
#define TARGET_SLOW_STC ix86_tune_features[X86_TUNE_SLOW_STC]
+#define TARGET_USE_RCR ix86_tune_features[X86_TUNE_USE_RCR]
/* Feature tests against the various architecture variations. */
enum ix86_arch_indices {
[(parallel [(set (strict_low_part (match_dup 0))
(bswap:HI (match_dup 0)))
(clobber (reg:CC FLAGS_REG))])])
+
+;; Rotations through carry flag
+(define_insn "rcrsi2"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (plus:SI
+ (lshiftrt:SI (match_operand:SI 1 "register_operand" "0")
+ (const_int 1))
+ (ashift:SI (ltu:SI (reg:CCC FLAGS_REG) (const_int 0))
+ (const_int 31))))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "rcr{l}\t%0"
+ [(set_attr "type" "ishift1")
+ (set_attr "memory" "none")
+ (set_attr "length_immediate" "0")
+ (set_attr "mode" "SI")])
+
+(define_insn "rcrdi2"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (plus:DI
+ (lshiftrt:DI (match_operand:DI 1 "register_operand" "0")
+ (const_int 1))
+ (ashift:DI (ltu:DI (reg:CCC FLAGS_REG) (const_int 0))
+ (const_int 63))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ "rcr{q}\t%0"
+ [(set_attr "type" "ishift1")
+ (set_attr "length_immediate" "0")
+ (set_attr "mode" "DI")])
+
+;; Versions of sar and shr that set the carry flag.
+(define_insn "<insn><mode>3_carry"
+ [(set (reg:CCC FLAGS_REG)
+ (unspec:CCC [(and:SWI48 (match_operand:SWI48 1 "register_operand" "0")
+ (const_int 1))
+ (const_int 0)] UNSPEC_CC_NE))
+ (set (match_operand:SWI48 0 "register_operand" "=r")
+ (any_shiftrt:SWI48 (match_dup 1) (const_int 1)))]
+ ""
+{
+ if (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ return "<shift>{<imodesuffix>}\t%0";
+ return "<shift>{<imodesuffix>}\t{1, %0|%0, 1}";
+}
+ [(set_attr "type" "ishift1")
+ (set (attr "length_immediate")
+ (if_then_else
+ (ior (match_test "TARGET_SHIFT1")
+ (match_test "optimize_function_for_size_p (cfun)"))
+ (const_string "0")
+ (const_string "*")))
+ (set_attr "mode" "<MODE>")])
\f
;; Bit set / bit test instructions
/* X86_TUNE_SLOW_STC: This disables use of stc, clc and cmc carry flag
modifications on architectures where theses operations are slow. */
DEF_TUNE (X86_TUNE_SLOW_STC, "slow_stc", m_PENT4)
+
+/* X86_TUNE_USE_RCR: Controls use of rcr 1 instruction instead of shrd. */
+DEF_TUNE (X86_TUNE_USE_RCR, "use_rcr", m_AMD_MULTIPLE)
--- /dev/null
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-Oz" } */
+unsigned __int128 foo(unsigned __int128 x) { return x >> 1; }
+__int128 bar(__int128 x) { return x >> 1; }
+/* { dg-final { scan-assembler-times "rcrq" 2 } } */
+/* { dg-final { scan-assembler-not "shrdq" } } */
--- /dev/null
+/* { dg-do compile { target ia32 } } */
+/* { dg-options "-Oz -mno-stv" } */
+unsigned long long foo(unsigned long long x) { return x >> 1; }
+long long bar(long long x) { return x >> 1; }
+/* { dg-final { scan-assembler-times "rcrl" 2 } } */
+/* { dg-final { scan-assembler-not "shrdl" } } */