(set_attr "znver1_decode" "vector")
(set_attr "mode" "DI")])
+(define_insn "bsr_rex64_1"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (minus:DI (const_int 63)
+ (clz:DI (match_operand:DI 1 "nonimmediate_operand" "rm"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_LZCNT && TARGET_64BIT"
+ "bsr{q}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "alu1")
+ (set_attr "prefix_0f" "1")
+ (set_attr "znver1_decode" "vector")
+ (set_attr "mode" "DI")])
+
(define_insn "bsr"
[(set (reg:CCZ FLAGS_REG)
(compare:CCZ (match_operand:SI 1 "nonimmediate_operand" "rm")
(set_attr "znver1_decode" "vector")
(set_attr "mode" "SI")])
+(define_insn "bsr_1"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (minus:SI (const_int 31)
+ (clz:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_LZCNT"
+ "bsr{l}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "alu1")
+ (set_attr "prefix_0f" "1")
+ (set_attr "znver1_decode" "vector")
+ (set_attr "mode" "SI")])
+
+(define_insn "bsr_zext_1"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI
+ (minus:SI
+ (const_int 31)
+ (clz:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_LZCNT && TARGET_64BIT"
+ "bsr{l}\t{%1, %k0|%k0, %1}"
+ [(set_attr "type" "alu1")
+ (set_attr "prefix_0f" "1")
+ (set_attr "znver1_decode" "vector")
+ (set_attr "mode" "SI")])
+
+; As bsr is undefined behavior on zero and for other input
+; values it is in range 0 to 63, we can optimize away sign-extends.
+(define_insn_and_split "*bsr_rex64_2"
+ [(set (match_operand:DI 0 "register_operand")
+ (xor:DI
+ (sign_extend:DI
+ (minus:SI
+ (const_int 63)
+ (subreg:SI (clz:DI (match_operand:DI 1 "nonimmediate_operand"))
+ 0)))
+ (const_int 63)))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_LZCNT && TARGET_64BIT && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(parallel [(set (reg:CCZ FLAGS_REG)
+ (compare:CCZ (match_dup 1) (const_int 0)))
+ (set (match_dup 2)
+ (minus:DI (const_int 63) (clz:DI (match_dup 1))))])
+ (parallel [(set (match_dup 0)
+ (zero_extend:DI (xor:SI (match_dup 3) (const_int 63))))
+ (clobber (reg:CC FLAGS_REG))])]
+{
+ operands[2] = gen_reg_rtx (DImode);
+ operands[3] = lowpart_subreg (SImode, operands[2], DImode);
+})
+
+(define_insn_and_split "*bsr_2"
+ [(set (match_operand:DI 0 "register_operand")
+ (sign_extend:DI
+ (xor:SI
+ (minus:SI
+ (const_int 31)
+ (clz:SI (match_operand:SI 1 "nonimmediate_operand")))
+ (const_int 31))))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_LZCNT && TARGET_64BIT && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(parallel [(set (reg:CCZ FLAGS_REG)
+ (compare:CCZ (match_dup 1) (const_int 0)))
+ (set (match_dup 2)
+ (minus:SI (const_int 31) (clz:SI (match_dup 1))))])
+ (parallel [(set (match_dup 0)
+ (zero_extend:DI (xor:SI (match_dup 2) (const_int 31))))
+ (clobber (reg:CC FLAGS_REG))])]
+ "operands[2] = gen_reg_rtx (SImode);")
+
+; Splitters to optimize 64 - __builtin_clzl (x) or 32 - __builtin_clz (x).
+; Again, as for !TARGET_LZCNT CLZ is UB at zero, CLZ is guaranteed to be
+; in [0, 63] or [0, 31] range.
+(define_split
+ [(set (match_operand:SI 0 "register_operand")
+ (minus:SI
+ (match_operand:SI 2 "const_int_operand")
+ (xor:SI
+ (minus:SI (const_int 63)
+ (subreg:SI
+ (clz:DI (match_operand:DI 1 "nonimmediate_operand"))
+ 0))
+ (const_int 63))))]
+ "!TARGET_LZCNT && TARGET_64BIT && ix86_pre_reload_split ()"
+ [(set (match_dup 3)
+ (minus:DI (const_int 63) (clz:DI (match_dup 1))))
+ (set (match_dup 0)
+ (plus:SI (match_dup 5) (match_dup 4)))]
+{
+ operands[3] = gen_reg_rtx (DImode);
+ operands[5] = lowpart_subreg (SImode, operands[3], DImode);
+ if (INTVAL (operands[2]) == 63)
+ {
+ emit_insn (gen_bsr_rex64_1 (operands[3], operands[1]));
+ emit_move_insn (operands[0], operands[5]);
+ DONE;
+ }
+ operands[4] = gen_int_mode (UINTVAL (operands[2]) - 63, SImode);
+})
+
+(define_split
+ [(set (match_operand:SI 0 "register_operand")
+ (minus:SI
+ (match_operand:SI 2 "const_int_operand")
+ (xor:SI
+ (minus:SI (const_int 31)
+ (clz:SI (match_operand:SI 1 "nonimmediate_operand")))
+ (const_int 31))))]
+ "!TARGET_LZCNT && ix86_pre_reload_split ()"
+ [(set (match_dup 3)
+ (minus:SI (const_int 31) (clz:SI (match_dup 1))))
+ (set (match_dup 0)
+ (plus:SI (match_dup 3) (match_dup 4)))]
+{
+ if (INTVAL (operands[2]) == 31)
+ {
+ emit_insn (gen_bsr_1 (operands[0], operands[1]));
+ DONE;
+ }
+ operands[3] = gen_reg_rtx (SImode);
+ operands[4] = gen_int_mode (UINTVAL (operands[2]) - 31, SImode);
+})
+
+(define_split
+ [(set (match_operand:DI 0 "register_operand")
+ (minus:DI
+ (match_operand:DI 2 "const_int_operand")
+ (xor:DI
+ (sign_extend:DI
+ (minus:SI (const_int 63)
+ (subreg:SI
+ (clz:DI (match_operand:DI 1 "nonimmediate_operand"))
+ 0)))
+ (const_int 63))))]
+ "!TARGET_LZCNT
+ && TARGET_64BIT
+ && ix86_pre_reload_split ()
+ && ((unsigned HOST_WIDE_INT)
+ trunc_int_for_mode (UINTVAL (operands[2]) - 63, SImode)
+ == UINTVAL (operands[2]) - 63)"
+ [(set (match_dup 3)
+ (minus:DI (const_int 63) (clz:DI (match_dup 1))))
+ (set (match_dup 0)
+ (plus:DI (match_dup 3) (match_dup 4)))]
+{
+ if (INTVAL (operands[2]) == 63)
+ {
+ emit_insn (gen_bsr_rex64_1 (operands[0], operands[1]));
+ DONE;
+ }
+ operands[3] = gen_reg_rtx (DImode);
+ operands[4] = GEN_INT (UINTVAL (operands[2]) - 63);
+})
+
+(define_split
+ [(set (match_operand:DI 0 "register_operand")
+ (minus:DI
+ (match_operand:DI 2 "const_int_operand")
+ (sign_extend:DI
+ (xor:SI
+ (minus:SI (const_int 31)
+ (clz:SI (match_operand:SI 1 "nonimmediate_operand")))
+ (const_int 31)))))]
+ "!TARGET_LZCNT
+ && TARGET_64BIT
+ && ix86_pre_reload_split ()
+ && ((unsigned HOST_WIDE_INT)
+ trunc_int_for_mode (UINTVAL (operands[2]) - 31, SImode)
+ == UINTVAL (operands[2]) - 31)"
+ [(set (match_dup 3)
+ (zero_extend:DI (minus:SI (const_int 31) (clz:SI (match_dup 1)))))
+ (set (match_dup 0)
+ (plus:DI (match_dup 3) (match_dup 4)))]
+{
+ if (INTVAL (operands[2]) == 31)
+ {
+ emit_insn (gen_bsr_zext_1 (operands[0], operands[1]));
+ DONE;
+ }
+ operands[3] = gen_reg_rtx (DImode);
+ operands[4] = GEN_INT (UINTVAL (operands[2]) - 31);
+})
+
(define_expand "clz<mode>2"
[(parallel
[(set (reg:CCZ FLAGS_REG)
(compare:CCZ (match_operand:SWI48 1 "nonimmediate_operand" "rm")
(const_int 0)))
- (set (match_operand:SWI48 0 "register_operand")
- (minus:SWI48
- (match_dup 2)
- (clz:SWI48 (match_dup 1))))])
+ (set (match_dup 3) (minus:SWI48
+ (match_dup 2)
+ (clz:SWI48 (match_dup 1))))])
(parallel
- [(set (match_dup 0) (xor:SWI48 (match_dup 0) (match_dup 2)))
+ [(set (match_operand:SWI48 0 "register_operand")
+ (xor:SWI48 (match_dup 3) (match_dup 2)))
(clobber (reg:CC FLAGS_REG))])]
""
{
DONE;
}
operands[2] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode)-1);
+ operands[3] = gen_reg_rtx (<MODE>mode);
})
(define_insn_and_split "clz<mode>2_lzcnt"