;; For PREFETCHI support
UNSPECV_PREFETCHI
+
+ ;; For USER_MSR support
+ UNSPECV_URDMSR
+ UNSPECV_UWRMSR
])
;; Constants to represent rounding modes in the ROUND instruction
\f
;; Processor type.
(define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,nehalem,
- atom,slm,glm,haswell,generic,lujiazui,amdfam10,bdver1,
+ atom,slm,glm,haswell,generic,lujiazui,yongfeng,amdfam10,bdver1,
bdver2,bdver3,bdver4,btver2,znver1,znver2,znver3,znver4"
(const (symbol_ref "ix86_schedule")))
(include "core2.md")
(include "haswell.md")
(include "lujiazui.md")
+(include "yongfeng.md")
\f
;; Operand and operator predicates and constraints
DONE;
})
+(define_expand "cbranchxi4"
+ [(set (reg:CC FLAGS_REG)
+ (compare:CC (match_operand:XI 1 "nonimmediate_operand")
+ (match_operand:XI 2 "nonimmediate_operand")))
+ (set (pc) (if_then_else
+ (match_operator 0 "bt_comparison_operator"
+ [(reg:CC FLAGS_REG) (const_int 0)])
+ (label_ref (match_operand 3))
+ (pc)))]
+ "TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256"
+{
+ ix86_expand_branch (GET_CODE (operands[0]),
+ operands[1], operands[2], operands[3]);
+ DONE;
+})
+
(define_expand "cstore<mode>4"
[(set (reg:CC FLAGS_REG)
(compare:CC (match_operand:SDWIM 2 "nonimmediate_operand")
[(set_attr "type" "imovx")
(set_attr "mode" "SI")])
+;; Split sign-extension of single least significant bit as and x,$1;neg x
+(define_insn_and_split "*extv<mode>_1_0"
+ [(set (match_operand:SWI48 0 "register_operand" "=r")
+ (sign_extract:SWI48 (match_operand:SWI48 1 "register_operand" "0")
+ (const_int 1)
+ (const_int 0)))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "#"
+ ""
+ [(parallel [(set (match_dup 0) (and:SWI48 (match_dup 1) (const_int 1)))
+ (clobber (reg:CC FLAGS_REG))])
+ (parallel [(set (match_dup 0) (neg:SWI48 (match_dup 0)))
+ (clobber (reg:CC FLAGS_REG))])])
+
(define_expand "extzv<mode>"
[(set (match_operand:SWI248 0 "register_operand")
(zero_extract:SWI248 (match_operand:SWI248 1 "register_operand")
[(parallel [(set (match_operand:<DWI> 0 "register_operand")
(mult:<DWI>
(any_extend:<DWI>
- (match_operand:DWIH 1 "nonimmediate_operand"))
+ (match_operand:DWIH 1 "register_operand"))
(any_extend:<DWI>
- (match_operand:DWIH 2 "register_operand"))))
+ (match_operand:DWIH 2 "nonimmediate_operand"))))
(clobber (reg:CC FLAGS_REG))])])
(define_expand "<u>mulqihi3"
[(parallel [(set (match_operand:HI 0 "register_operand")
(mult:HI
(any_extend:HI
- (match_operand:QI 1 "nonimmediate_operand"))
+ (match_operand:QI 1 "register_operand"))
(any_extend:HI
- (match_operand:QI 2 "register_operand"))))
+ (match_operand:QI 2 "nonimmediate_operand"))))
(clobber (reg:CC FLAGS_REG))])]
"TARGET_QIMODE_MATH")
(define_insn "*bmi2_umul<mode><dwi>3_1"
[(set (match_operand:DWIH 0 "register_operand" "=r")
(mult:DWIH
- (match_operand:DWIH 2 "nonimmediate_operand" "%d")
+ (match_operand:DWIH 2 "register_operand" "%d")
(match_operand:DWIH 3 "nonimmediate_operand" "rm")))
(set (match_operand:DWIH 1 "register_operand" "=r")
- (truncate:DWIH
- (lshiftrt:<DWI>
- (mult:<DWI> (zero_extend:<DWI> (match_dup 2))
- (zero_extend:<DWI> (match_dup 3)))
- (match_operand:QI 4 "const_int_operand"))))]
- "TARGET_BMI2 && INTVAL (operands[4]) == <MODE_SIZE> * BITS_PER_UNIT
+ (umul_highpart:DWIH (match_dup 2) (match_dup 3)))]
+ "TARGET_BMI2
&& !(MEM_P (operands[2]) && MEM_P (operands[3]))"
"mulx\t{%3, %0, %1|%1, %0, %3}"
[(set_attr "type" "imulx")
[(set (match_operand:<DWI> 0 "register_operand" "=r,A")
(mult:<DWI>
(zero_extend:<DWI>
- (match_operand:DWIH 1 "nonimmediate_operand" "%d,0"))
+ (match_operand:DWIH 1 "register_operand" "%d,a"))
(zero_extend:<DWI>
(match_operand:DWIH 2 "nonimmediate_operand" "rm,rm"))))
(clobber (reg:CC FLAGS_REG))]
[(parallel [(set (match_dup 3)
(mult:DWIH (match_dup 1) (match_dup 2)))
(set (match_dup 4)
- (truncate:DWIH
- (lshiftrt:<DWI>
- (mult:<DWI> (zero_extend:<DWI> (match_dup 1))
- (zero_extend:<DWI> (match_dup 2)))
- (match_dup 5))))])]
+ (umul_highpart:DWIH (match_dup 1) (match_dup 2)))])]
{
split_double_mode (<DWI>mode, &operands[0], 1, &operands[3], &operands[4]);
[(set (match_operand:<DWI> 0 "register_operand" "=A")
(mult:<DWI>
(sign_extend:<DWI>
- (match_operand:DWIH 1 "nonimmediate_operand" "%0"))
+ (match_operand:DWIH 1 "register_operand" "%a"))
(sign_extend:<DWI>
(match_operand:DWIH 2 "nonimmediate_operand" "rm"))))
(clobber (reg:CC FLAGS_REG))]
[(set (match_operand:HI 0 "register_operand" "=a")
(mult:HI
(any_extend:HI
- (match_operand:QI 1 "nonimmediate_operand" "%0"))
+ (match_operand:QI 1 "register_operand" "%0"))
(any_extend:HI
(match_operand:QI 2 "nonimmediate_operand" "qm"))))
(clobber (reg:CC FLAGS_REG))]
(set_attr "bdver1_decode" "direct")
(set_attr "mode" "QI")])
+;; Widening multiplication peephole2s to tweak register allocation.
+;; mov imm,%rdx; mov %rdi,%rax; mulq %rdx -> mov imm,%rax; mulq %rdi
+(define_peephole2
+ [(set (match_operand:DWIH 0 "general_reg_operand")
+ (match_operand:DWIH 1 "immediate_operand"))
+ (set (match_operand:DWIH 2 "general_reg_operand")
+ (match_operand:DWIH 3 "general_reg_operand"))
+ (parallel [(set (match_operand:<DWI> 4 "general_reg_operand")
+ (mult:<DWI> (zero_extend:<DWI> (match_dup 2))
+ (zero_extend:<DWI> (match_dup 0))))
+ (clobber (reg:CC FLAGS_REG))])]
+ "REGNO (operands[3]) != AX_REG
+ && REGNO (operands[0]) != REGNO (operands[2])
+ && REGNO (operands[0]) != REGNO (operands[3])
+ && (REGNO (operands[0]) == REGNO (operands[4])
+ || REGNO (operands[0]) == DX_REG
+ || peep2_reg_dead_p (3, operands[0]))"
+ [(set (match_dup 2) (match_dup 1))
+ (parallel [(set (match_dup 4)
+ (mult:<DWI> (zero_extend:<DWI> (match_dup 2))
+ (zero_extend:<DWI> (match_dup 3))))
+ (clobber (reg:CC FLAGS_REG))])])
+
+;; mov imm,%rax; mov %rdi,%rdx; mulx %rax -> mov imm,%rdx; mulx %rdi
+(define_peephole2
+ [(set (match_operand:DWIH 0 "general_reg_operand")
+ (match_operand:DWIH 1 "immediate_operand"))
+ (set (match_operand:DWIH 2 "general_reg_operand")
+ (match_operand:DWIH 3 "general_reg_operand"))
+ (parallel [(set (match_operand:DWIH 4 "general_reg_operand")
+ (mult:DWIH (match_dup 2) (match_dup 0)))
+ (set (match_operand:DWIH 5 "general_reg_operand")
+ (umul_highpart:DWIH (match_dup 2) (match_dup 0)))])]
+ "REGNO (operands[3]) != DX_REG
+ && REGNO (operands[0]) != REGNO (operands[2])
+ && REGNO (operands[0]) != REGNO (operands[3])
+ && (REGNO (operands[0]) == REGNO (operands[4])
+ || REGNO (operands[0]) == REGNO (operands[5])
+ || peep2_reg_dead_p (3, operands[0]))"
+ [(set (match_dup 2) (match_dup 1))
+ (parallel [(set (match_dup 4)
+ (mult:DWIH (match_dup 2) (match_dup 3)))
+ (set (match_dup 5)
+ (umul_highpart:DWIH (match_dup 2) (match_dup 3)))])])
+
;; Highpart multiplication patterns
(define_insn "<s>mul<mode>3_highpart"
[(set (match_operand:DWIH 0 "register_operand" "=d")
operands[3] = gen_int_mode (INTVAL (operands[3]), QImode);
})
+;; Narrow test instructions with immediate operands that test
+;; memory locations for zero. E.g. testl $0x00aa0000, mem can be
+;; converted to testb $0xaa, mem+2. Reject volatile locations and
+;; targets where reading (possibly unaligned) part of memory
+;; location after a large write to the same address causes
+;; store-to-load forwarding stall.
+(define_peephole2
+ [(set (reg:CCZ FLAGS_REG)
+ (compare:CCZ
+ (and:SWI248 (match_operand:SWI248 0 "memory_operand")
+ (match_operand 1 "const_int_operand"))
+ (const_int 0)))]
+ "!TARGET_PARTIAL_MEMORY_READ_STALL && !MEM_VOLATILE_P (operands[0])"
+ [(set (reg:CCZ FLAGS_REG)
+ (compare:CCZ (match_dup 2) (const_int 0)))]
+{
+ unsigned HOST_WIDE_INT ival = UINTVAL (operands[1]);
+ int first_nonzero_byte, bitsize;
+ rtx new_addr, new_const;
+ machine_mode new_mode;
+
+ if (ival == 0)
+ FAIL;
+
+ /* Clear bits outside mode width. */
+ ival &= GET_MODE_MASK (<MODE>mode);
+
+ first_nonzero_byte = ctz_hwi (ival) / BITS_PER_UNIT;
+
+ ival >>= first_nonzero_byte * BITS_PER_UNIT;
+
+ bitsize = sizeof (ival) * BITS_PER_UNIT - clz_hwi (ival);
+
+ if (bitsize <= GET_MODE_BITSIZE (QImode))
+ new_mode = QImode;
+ else if (bitsize <= GET_MODE_BITSIZE (HImode))
+ new_mode = HImode;
+ else if (bitsize <= GET_MODE_BITSIZE (SImode))
+ new_mode = SImode;
+ else
+ new_mode = DImode;
+
+ if (GET_MODE_SIZE (new_mode) >= GET_MODE_SIZE (<MODE>mode))
+ FAIL;
+
+ new_addr = adjust_address (operands[0], new_mode, first_nonzero_byte);
+ new_const = gen_int_mode (ival, new_mode);
+
+ operands[2] = gen_rtx_AND (new_mode, new_addr, new_const);
+})
+
;; %%% This used to optimize known byte-wide and operations to memory,
;; and sometimes to QImode registers. If this is considered useful,
;; it should be done with splitters.
DONE;
})
+(define_expand "roundhf2"
+ [(match_operand:HF 0 "register_operand")
+ (match_operand:HF 1 "register_operand")]
+ "TARGET_AVX512FP16 && !flag_trapping_math && !flag_rounding_math"
+{
+ ix86_expand_round_sse4 (operands[0], operands[1]);
+ DONE;
+})
+
(define_expand "round<mode>2"
[(match_operand:X87MODEF 0 "register_operand")
(match_operand:X87MODEF 1 "nonimmediate_operand")]
[(set_attr "type" "fpspc")
(set_attr "mode" "<MODE>")])
+(define_expand "lroundhf<mode>2"
+ [(set (match_operand:SWI248 0 "register_operand")
+ (unspec:SWI248 [(match_operand:HF 1 "nonimmediate_operand")]
+ UNSPEC_FIX_NOTRUNC))]
+ "TARGET_AVX512FP16 && !flag_trapping_math && !flag_rounding_math"
+{
+ ix86_expand_lround (operands[0], operands[1]);
+ DONE;
+})
+
+(define_expand "lrinthf<mode>2"
+ [(set (match_operand:SWI48 0 "register_operand")
+ (unspec:SWI48 [(match_operand:HF 1 "nonimmediate_operand")]
+ UNSPEC_FIX_NOTRUNC))]
+ "TARGET_AVX512FP16")
+
(define_expand "lrint<MODEF:mode><SWI48:mode>2"
[(set (match_operand:SWI48 0 "register_operand")
(unspec:SWI48 [(match_operand:MODEF 1 "nonimmediate_operand")]
&& (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
&& flag_unsafe_math_optimizations")
+(define_expand "l<rounding_insn>hf<mode>2"
+ [(set (match_operand:SWI48 0 "nonimmediate_operand")
+ (unspec:SWI48 [(match_operand:HF 1 "register_operand")]
+ FIST_ROUNDING))]
+ "TARGET_AVX512FP16"
+{
+ rtx tmp = gen_reg_rtx (HFmode);
+ emit_insn (gen_sse4_1_roundhf2 (tmp, operands[1],
+ GEN_INT (ROUND_<ROUNDING> | ROUND_NO_EXC)));
+ emit_insn (gen_fix_trunchf<mode>2 (operands[0], tmp));
+ DONE;
+})
+
(define_expand "l<rounding_insn><MODEF:mode><SWI48:mode>2"
[(parallel [(set (match_operand:SWI48 0 "nonimmediate_operand")
(unspec:SWI48 [(match_operand:MODEF 1 "register_operand")]
(match_operand 1 "memory_operand")]
""
{
+ rtx scratch = gen_reg_rtx (word_mode);
+
emit_insn (gen_stack_protect_set_1
- (ptr_mode, operands[0], operands[1]));
+ (ptr_mode, word_mode, operands[0], operands[1], scratch));
DONE;
})
-(define_insn "@stack_protect_set_1_<mode>"
+(define_insn "@stack_protect_set_1_<PTR:mode>_<SWI48:mode>"
[(set (match_operand:PTR 0 "memory_operand" "=m")
(unspec:PTR [(match_operand:PTR 1 "memory_operand" "m")]
UNSPEC_SP_SET))
- (set (match_scratch:PTR 2 "=&r") (const_int 0))
+ (set (match_operand:SWI48 2 "register_operand" "=&r") (const_int 0))
(clobber (reg:CC FLAGS_REG))]
""
{
- output_asm_insn ("mov{<imodesuffix>}\t{%1, %2|%2, %1}", operands);
- output_asm_insn ("mov{<imodesuffix>}\t{%2, %0|%0, %2}", operands);
+ output_asm_insn ("mov{<PTR:imodesuffix>}\t{%1, %<PTR:k>2|%<PTR:k>2, %1}",
+ operands);
+ output_asm_insn ("mov{<PTR:imodesuffix>}\t{%<PTR:k>2, %0|%0, %<PTR:k>2}",
+ operands);
return "xor{l}\t%k2, %k2";
}
[(set_attr "type" "multi")])
;; Patterns and peephole2s to optimize stack_protect_set_1_<mode>
-;; immediately followed by *mov{s,d}i_internal to the same register,
-;; where we can avoid the xor{l} above. We don't split this, so that
-;; scheduling or anything else doesn't separate the *stack_protect_set*
-;; pattern from the set of the register that overwrites the register
-;; with a new value.
-(define_insn "*stack_protect_set_2_<mode>"
+;; immediately followed by *mov{s,d}i_internal, where we can avoid
+;; the xor{l} above. We don't split this, so that scheduling or
+;; anything else doesn't separate the *stack_protect_set* pattern from
+;; the set of the register that overwrites the register with a new value.
+
+(define_peephole2
+ [(parallel [(set (match_operand:PTR 0 "memory_operand")
+ (unspec:PTR [(match_operand:PTR 1 "memory_operand")]
+ UNSPEC_SP_SET))
+ (set (match_operand:W 2 "general_reg_operand") (const_int 0))
+ (clobber (reg:CC FLAGS_REG))])
+ (parallel [(set (match_operand:SWI48 3 "general_reg_operand")
+ (match_operand:SWI48 4 "const0_operand"))
+ (clobber (reg:CC FLAGS_REG))])]
+ "peep2_reg_dead_p (0, operands[3])
+ && peep2_reg_dead_p (1, operands[2])"
+ [(parallel [(set (match_dup 0)
+ (unspec:PTR [(match_dup 1)] UNSPEC_SP_SET))
+ (set (match_dup 3) (const_int 0))
+ (clobber (reg:CC FLAGS_REG))])])
+
+(define_insn "*stack_protect_set_2_<mode>_si"
[(set (match_operand:PTR 0 "memory_operand" "=m")
(unspec:PTR [(match_operand:PTR 3 "memory_operand" "m")]
UNSPEC_SP_SET))
(set (match_operand:SI 1 "register_operand" "=&r")
- (match_operand:SI 2 "general_operand" "g"))
- (clobber (reg:CC FLAGS_REG))]
- "reload_completed
- && !reg_overlap_mentioned_p (operands[1], operands[2])"
+ (match_operand:SI 2 "general_operand" "g"))]
+ "reload_completed"
{
output_asm_insn ("mov{<imodesuffix>}\t{%3, %<k>1|%<k>1, %3}", operands);
output_asm_insn ("mov{<imodesuffix>}\t{%<k>1, %0|%0, %<k>1}", operands);
[(set_attr "type" "multi")
(set_attr "length" "24")])
-(define_peephole2
- [(parallel [(set (match_operand:PTR 0 "memory_operand")
- (unspec:PTR [(match_operand:PTR 1 "memory_operand")]
- UNSPEC_SP_SET))
- (set (match_operand:PTR 2 "general_reg_operand") (const_int 0))
- (clobber (reg:CC FLAGS_REG))])
- (set (match_operand:SI 3 "general_reg_operand")
- (match_operand:SI 4))]
- "REGNO (operands[2]) == REGNO (operands[3])
- && general_operand (operands[4], SImode)
- && (general_reg_operand (operands[4], SImode)
- || memory_operand (operands[4], SImode)
- || immediate_operand (operands[4], SImode))
- && !reg_overlap_mentioned_p (operands[3], operands[4])"
- [(parallel [(set (match_dup 0)
- (unspec:PTR [(match_dup 1)] UNSPEC_SP_SET))
- (set (match_dup 3) (match_dup 4))
- (clobber (reg:CC FLAGS_REG))])])
-
-(define_insn "*stack_protect_set_3"
- [(set (match_operand:DI 0 "memory_operand" "=m,m,m")
- (unspec:DI [(match_operand:DI 3 "memory_operand" "m,m,m")]
- UNSPEC_SP_SET))
- (set (match_operand:DI 1 "register_operand" "=&r,r,r")
- (match_operand:DI 2 "general_operand" "Z,rem,i"))
- (clobber (reg:CC FLAGS_REG))]
- "TARGET_64BIT
- && reload_completed
- && !reg_overlap_mentioned_p (operands[1], operands[2])"
+(define_insn "*stack_protect_set_2_<mode>_di"
+ [(set (match_operand:PTR 0 "memory_operand" "=m,m,m")
+ (unspec:PTR [(match_operand:PTR 3 "memory_operand" "m,m,m")]
+ UNSPEC_SP_SET))
+ (set (match_operand:DI 1 "register_operand" "=&r,&r,&r")
+ (match_operand:DI 2 "general_operand" "Z,rem,i"))]
+ "TARGET_64BIT && reload_completed"
{
- output_asm_insn ("mov{q}\t{%3, %1|%1, %3}", operands);
- output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", operands);
+ output_asm_insn ("mov{<imodesuffix>}\t{%3, %<k>1|%<k>1, %3}", operands);
+ output_asm_insn ("mov{<imodesuffix>}\t{%<k>1, %0|%0, %<k>1}", operands);
if (pic_32bit_operand (operands[2], DImode))
return "lea{q}\t{%E2, %1|%1, %E2}";
else if (which_alternative == 0)
(set_attr "length" "24")])
(define_peephole2
- [(parallel [(set (match_operand:DI 0 "memory_operand")
- (unspec:DI [(match_operand:DI 1 "memory_operand")]
- UNSPEC_SP_SET))
- (set (match_operand:DI 2 "general_reg_operand") (const_int 0))
- (clobber (reg:CC FLAGS_REG))])
- (set (match_dup 2) (match_operand:DI 3))]
- "TARGET_64BIT
- && general_operand (operands[3], DImode)
- && (general_reg_operand (operands[3], DImode)
- || memory_operand (operands[3], DImode)
- || x86_64_zext_immediate_operand (operands[3], DImode)
- || x86_64_immediate_operand (operands[3], DImode)
- || (CONSTANT_P (operands[3])
- && (!flag_pic || LEGITIMATE_PIC_OPERAND_P (operands[3]))))
- && !reg_overlap_mentioned_p (operands[2], operands[3])"
- [(parallel [(set (match_dup 0)
- (unspec:PTR [(match_dup 1)] UNSPEC_SP_SET))
- (set (match_dup 2) (match_dup 3))
- (clobber (reg:CC FLAGS_REG))])])
+ [(parallel [(set (match_operand:PTR 0 "memory_operand")
+ (unspec:PTR [(match_operand:PTR 1 "memory_operand")]
+ UNSPEC_SP_SET))
+ (set (match_operand:W 2 "general_reg_operand") (const_int 0))
+ (clobber (reg:CC FLAGS_REG))])
+ (set (match_operand:SWI48 3 "general_reg_operand")
+ (match_operand:SWI48 4 "general_operand"))]
+ "peep2_reg_dead_p (0, operands[3])
+ && peep2_reg_dead_p (1, operands[2])"
+ [(parallel [(set (match_dup 0)
+ (unspec:PTR [(match_dup 1)] UNSPEC_SP_SET))
+ (set (match_dup 3) (match_dup 4))])])
(define_expand "stack_protect_test"
[(match_operand 0 "memory_operand")
DONE;
})
+(define_insn "urdmsr"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (unspec_volatile:DI
+ [(match_operand:DI 1 "x86_64_szext_nonmemory_operand" "reZ")]
+ UNSPECV_URDMSR))]
+ "TARGET_USER_MSR && TARGET_64BIT"
+ "urdmsr\t{%1, %0|%0, %1}"
+ [(set_attr "prefix" "vex")
+ (set_attr "type" "other")])
+
+(define_insn "uwrmsr"
+ [(unspec_volatile
+ [(match_operand:DI 0 "x86_64_szext_nonmemory_operand" "reZ")
+ (match_operand:DI 1 "register_operand" "r")]
+ UNSPECV_UWRMSR)]
+ "TARGET_USER_MSR && TARGET_64BIT"
+ "uwrmsr\t{%1, %0|%0, %1}"
+ [(set_attr "prefix" "vex")
+ (set_attr "type" "other")])
+
(include "mmx.md")
(include "sse.md")
(include "sync.md")