;; GCC machine description for IA-32 and x86-64.
-;; Copyright (C) 1988-2022 Free Software Foundation, Inc.
+;; Copyright (C) 1988-2023 Free Software Foundation, Inc.
;; Mostly by William Schelter.
;; x86_64 support added by Jan Hubicka
;;
UNSPEC_INSN_FALSE_DEP
UNSPEC_SBB
UNSPEC_CC_NE
+ UNSPEC_STC
;; For SSE/MMX support:
UNSPEC_FIX_NOTRUNC
UNSPEC_MASKMOV
UNSPEC_MOVCC_MASK
UNSPEC_MOVMSK
+ UNSPEC_INSERTPS
UNSPEC_BLENDV
UNSPEC_PSHUFB
UNSPEC_XOP_PERMUTE
UNSPEC_RSQRT
UNSPEC_PSADBW
+ ;; Different from generic us_truncate RTX
+ ;; as it does unsigned saturation of signed source.
+ UNSPEC_US_TRUNCATE
+
;; For AVX/AVX512F support
UNSPEC_SCALEF
UNSPEC_PCMP
;; For insn_callee_abi:
UNSPEC_CALLEE_ABI
+ ;; For PUSH2/POP2 support
+ UNSPEC_APXPUSH2
+ UNSPEC_APXPOP2_LOW
+ UNSPEC_APXPOP2_HIGH
])
(define_c_enum "unspecv" [
;; For PREFETCHI support
UNSPECV_PREFETCHI
+
+ ;; For USER_MSR support
+ UNSPECV_URDMSR
+ UNSPECV_UWRMSR
])
;; Constants to represent rounding modes in the ROUND instruction
(MASK5_REG 73)
(MASK6_REG 74)
(MASK7_REG 75)
- (FIRST_PSEUDO_REG 76)
+ (R16_REG 76)
+ (R17_REG 77)
+ (R18_REG 78)
+ (R19_REG 79)
+ (R20_REG 80)
+ (R21_REG 81)
+ (R22_REG 82)
+ (R23_REG 83)
+ (R24_REG 84)
+ (R25_REG 85)
+ (R26_REG 86)
+ (R27_REG 87)
+ (R28_REG 88)
+ (R29_REG 89)
+ (R30_REG 90)
+ (R31_REG 91)
+ (FIRST_PSEUDO_REG 92)
])
;; Insn callee abi index.
\f
;; Processor type.
(define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,nehalem,
- atom,slm,glm,haswell,generic,lujiazui,amdfam10,bdver1,
+ atom,slm,glm,haswell,generic,lujiazui,yongfeng,amdfam10,bdver1,
bdver2,bdver3,bdver4,btver2,znver1,znver2,znver3,znver4"
(const (symbol_ref "ix86_schedule")))
(const_string "unknown")]
(const_string "integer")))
+;; Used to control the "enabled" attribute on a per-instruction basis.
+(define_attr "isa" "base,x64,nox64,x64_sse2,x64_sse4,x64_sse4_noavx,
+ x64_avx,x64_avx512bw,x64_avx512dq,aes,
+ sse_noavx,sse2,sse2_noavx,sse3,sse3_noavx,sse4,sse4_noavx,
+ avx,noavx,avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,avx512f_512,
+ noavx512f,avx512bw,avx512bw_512,noavx512bw,avx512dq,
+ noavx512dq,fma_or_avx512vl,avx512vl,noavx512vl,avxvnni,
+ avx512vnnivl,avx512fp16,avxifma,avx512ifmavl,avxneconvert,
+ avx512bf16vl,vpclmulqdqvl,avx_noavx512f,avx_noavx512vl"
+ (const_string "base"))
+
;; The (bounding maximum) length of an instruction immediate.
(define_attr "length_immediate" ""
(cond [(eq_attr "type" "incdec,setcc,icmov,str,lea,other,multi,idiv,leave,
bitmanip,imulx,msklog,mskmov")
(const_int 0)
+ (ior (eq_attr "type" "sse4arg")
+ (eq_attr "isa" "fma4"))
+ (const_int 1)
(eq_attr "unit" "i387,sse,mmx")
(const_int 0)
(eq_attr "type" "alu,alu1,negnot,imovx,ishift,ishiftx,ishift1,
(const_int 0)))
;; There are also additional prefixes in 3DNOW, SSSE3.
-;; ssemuladd,sse4arg default to 0f24/0f25 and DREX byte,
-;; sseiadd1,ssecvt1 to 0f7a with no DREX byte.
;; 3DNOW has 0f0f prefix, SSSE3 and SSE4_{1,2} 0f38/0f3a.
+;; While generally inapplicable to VEX/XOP/EVEX encodings, "length_vex" uses
+;; the attribute evaluating to zero to know that VEX2 encoding may be usable.
(define_attr "prefix_extra" ""
- (cond [(eq_attr "type" "ssemuladd,sse4arg")
- (const_int 2)
- (eq_attr "type" "sseiadd1,ssecvt1")
+ (cond [(eq_attr "type" "ssemuladd,sse4arg,sseiadd1,ssecvt1")
(const_int 1)
]
(const_int 0)))
(const_string "vex")
(eq_attr "mode" "XI,V16SF,V8DF")
(const_string "evex")
+ (eq_attr "type" "ssemuladd")
+ (if_then_else (eq_attr "isa" "fma4")
+ (const_string "vex")
+ (const_string "maybe_evex"))
+ (eq_attr "type" "sse4arg")
+ (const_string "vex")
]
(const_string "orig")))
;; Define attribute to indicate unaligned ssemov insns
(define_attr "movu" "0,1" (const_string "0"))
-;; Used to control the "enabled" attribute on a per-instruction basis.
-(define_attr "isa" "base,x64,nox64,x64_sse2,x64_sse4,x64_sse4_noavx,
- x64_avx,x64_avx512bw,x64_avx512dq,
- sse_noavx,sse2,sse2_noavx,sse3,sse3_noavx,sse4,sse4_noavx,
- avx,noavx,avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f,
- avx512bw,noavx512bw,avx512dq,noavx512dq,fma_or_avx512vl,
- avx512vl,noavx512vl,avxvnni,avx512vnnivl,avx512fp16,avxifma,
- avx512ifmavl,avxneconvert,avx512bf16vl"
- (const_string "base"))
+;; Define attribute to indicate gpr32 insns.
+(define_attr "gpr32" "0, 1" (const_string "1"))
;; Define instruction set of MMX instructions
(define_attr "mmx_isa" "base,native,sse,sse_noavx,avx"
(symbol_ref "TARGET_64BIT && TARGET_AVX512BW")
(eq_attr "isa" "x64_avx512dq")
(symbol_ref "TARGET_64BIT && TARGET_AVX512DQ")
+ (eq_attr "isa" "aes") (symbol_ref "TARGET_AES")
(eq_attr "isa" "sse_noavx")
(symbol_ref "TARGET_SSE && !TARGET_AVX")
(eq_attr "isa" "sse2") (symbol_ref "TARGET_SSE2")
(eq_attr "isa" "sse4_noavx")
(symbol_ref "TARGET_SSE4_1 && !TARGET_AVX")
(eq_attr "isa" "avx") (symbol_ref "TARGET_AVX")
+ (eq_attr "isa" "avx_noavx512f")
+ (symbol_ref "TARGET_AVX && !TARGET_AVX512F")
(eq_attr "isa" "noavx") (symbol_ref "!TARGET_AVX")
(eq_attr "isa" "avx2") (symbol_ref "TARGET_AVX2")
(eq_attr "isa" "noavx2") (symbol_ref "!TARGET_AVX2")
(eq_attr "isa" "fma_or_avx512vl")
(symbol_ref "TARGET_FMA || TARGET_AVX512VL")
(eq_attr "isa" "avx512f") (symbol_ref "TARGET_AVX512F")
+ (eq_attr "isa" "avx512f_512")
+ (symbol_ref "TARGET_AVX512F && TARGET_EVEX512")
(eq_attr "isa" "noavx512f") (symbol_ref "!TARGET_AVX512F")
(eq_attr "isa" "avx512bw") (symbol_ref "TARGET_AVX512BW")
+ (eq_attr "isa" "avx512bw_512")
+ (symbol_ref "TARGET_AVX512BW && TARGET_EVEX512")
(eq_attr "isa" "noavx512bw") (symbol_ref "!TARGET_AVX512BW")
(eq_attr "isa" "avx512dq") (symbol_ref "TARGET_AVX512DQ")
(eq_attr "isa" "noavx512dq") (symbol_ref "!TARGET_AVX512DQ")
(eq_attr "isa" "avxneconvert") (symbol_ref "TARGET_AVXNECONVERT")
(eq_attr "isa" "avx512bf16vl")
(symbol_ref "TARGET_AVX512BF16 && TARGET_AVX512VL")
+ (eq_attr "isa" "vpclmulqdqvl")
+ (symbol_ref "TARGET_VPCLMULQDQ && TARGET_AVX512VL")
(eq_attr "mmx_isa" "native")
(symbol_ref "!TARGET_MMX_WITH_SSE")
(set_attr "type" "multi")])
(define_code_iterator plusminus [plus minus])
+(define_code_iterator plusminusmult [plus minus mult])
(define_code_iterator plusminusmultdiv [plus minus mult div])
(define_code_iterator sat_plusminus [ss_plus us_plus ss_minus us_minus])
(define_mode_attr DWI [(QI "HI") (HI "SI") (SI "DI") (DI "TI") (TI "OI")])
(define_mode_attr dwi [(QI "hi") (HI "si") (SI "di") (DI "ti") (TI "oi")])
+;; Half sized integer modes.
+(define_mode_attr HALF [(TI "DI") (DI "SI")])
+(define_mode_attr half [(TI "di") (DI "si")])
+
;; LEA mode corresponding to an integer mode
(define_mode_attr LEAMODE [(QI "SI") (HI "SI") (SI "SI") (DI "DI")])
(include "bdver3.md")
(include "btver2.md")
(include "znver.md")
+(include "znver4.md")
(include "geode.md")
(include "atom.md")
(include "slm.md")
(include "core2.md")
(include "haswell.md")
(include "lujiazui.md")
+(include "yongfeng.md")
\f
;; Operand and operator predicates and constraints
(define_expand "cbranch<mode>4"
[(set (reg:CC FLAGS_REG)
- (compare:CC (match_operand:SDWIM 1 "nonimmediate_operand")
- (match_operand:SDWIM 2 "<general_operand>")))
+ (compare:CC (match_operand:SWIM1248x 1 "nonimmediate_operand")
+ (match_operand:SWIM1248x 2 "<general_operand>")))
(set (pc) (if_then_else
(match_operator 0 "ordered_comparison_operator"
[(reg:CC FLAGS_REG) (const_int 0)])
DONE;
})
+(define_expand "cbranchti4"
+ [(set (reg:CC FLAGS_REG)
+ (compare:CC (match_operand:TI 1 "nonimmediate_operand")
+ (match_operand:TI 2 "ix86_timode_comparison_operand")))
+ (set (pc) (if_then_else
+ (match_operator 0 "ix86_timode_comparison_operator"
+ [(reg:CC FLAGS_REG) (const_int 0)])
+ (label_ref (match_operand 3))
+ (pc)))]
+ "TARGET_64BIT || TARGET_SSE4_1"
+{
+ ix86_expand_branch (GET_CODE (operands[0]),
+ operands[1], operands[2], operands[3]);
+ DONE;
+})
+
(define_expand "cbranchoi4"
[(set (reg:CC FLAGS_REG)
(compare:CC (match_operand:OI 1 "nonimmediate_operand")
DONE;
})
+(define_expand "cbranchxi4"
+ [(set (reg:CC FLAGS_REG)
+ (compare:CC (match_operand:XI 1 "nonimmediate_operand")
+ (match_operand:XI 2 "nonimmediate_operand")))
+ (set (pc) (if_then_else
+ (match_operator 0 "bt_comparison_operator"
+ [(reg:CC FLAGS_REG) (const_int 0)])
+ (label_ref (match_operand 3))
+ (pc)))]
+ "TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256"
+{
+ ix86_expand_branch (GET_CODE (operands[0]),
+ operands[1], operands[2], operands[3]);
+ DONE;
+})
+
(define_expand "cstore<mode>4"
[(set (reg:CC FLAGS_REG)
(compare:CC (match_operand:SDWIM 2 "nonimmediate_operand")
(define_mode_iterator SWI1248_AVX512BWDQ_64
[(QI "TARGET_AVX512DQ") HI
- (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW && TARGET_64BIT")])
+ (SI "TARGET_AVX512BW")
+ (DI "TARGET_AVX512BW && TARGET_EVEX512 && TARGET_64BIT")])
(define_insn "*cmp<mode>_ccz_1"
[(set (reg FLAGS_REG)
[(set_attr "type" "icmp")
(set_attr "mode" "<MODE>")])
+(define_insn "*cmpqi_ext<mode>_1_mem_rex64"
+ [(set (reg FLAGS_REG)
+ (compare
+ (match_operand:QI 0 "norex_memory_operand" "Bn")
+ (subreg:QI
+ (match_operator:SWI248 2 "extract_operator"
+ [(match_operand 1 "int248_register_operand" "Q")
+ (const_int 8)
+ (const_int 8)]) 0)))]
+ "TARGET_64BIT && reload_completed
+ && ix86_match_ccmode (insn, CCmode)"
+ "cmp{b}\t{%h1, %0|%0, %h1}"
+ [(set_attr "type" "icmp")
+ (set_attr "mode" "QI")])
+
(define_insn "*cmpqi_ext<mode>_1"
[(set (reg FLAGS_REG)
(compare
(match_operand:QI 0 "nonimmediate_operand" "QBc,m")
(subreg:QI
- (zero_extract:SWI248
- (match_operand:SWI248 1 "register_operand" "Q,Q")
- (const_int 8)
- (const_int 8)) 0)))]
+ (match_operator:SWI248 2 "extract_operator"
+ [(match_operand 1 "int248_register_operand" "Q,Q")
+ (const_int 8)
+ (const_int 8)]) 0)))]
"ix86_match_ccmode (insn, CCmode)"
"cmp{b}\t{%h1, %0|%0, %h1}"
[(set_attr "isa" "*,nox64")
(set_attr "type" "icmp")
(set_attr "mode" "QI")])
+(define_peephole2
+ [(set (match_operand:QI 0 "register_operand")
+ (match_operand:QI 1 "norex_memory_operand"))
+ (set (match_operand 3 "flags_reg_operand")
+ (match_operator 4 "compare_operator"
+ [(match_dup 0)
+ (subreg:QI
+ (match_operator:SWI248 5 "extract_operator"
+ [(match_operand 2 "int248_register_operand")
+ (const_int 8)
+ (const_int 8)]) 0)]))]
+ "TARGET_64BIT
+ && peep2_reg_dead_p (2, operands[0])"
+ [(set (match_dup 3)
+ (match_op_dup 4
+ [(match_dup 1)
+ (subreg:QI
+ (match_op_dup 5
+ [(match_dup 2)
+ (const_int 8)
+ (const_int 8)]) 0)]))])
+
(define_insn "*cmpqi_ext<mode>_2"
[(set (reg FLAGS_REG)
(compare
(subreg:QI
- (zero_extract:SWI248
- (match_operand:SWI248 0 "register_operand" "Q")
- (const_int 8)
- (const_int 8)) 0)
+ (match_operator:SWI248 2 "extract_operator"
+ [(match_operand 0 "int248_register_operand" "Q")
+ (const_int 8)
+ (const_int 8)]) 0)
(match_operand:QI 1 "const0_operand")))]
"ix86_match_ccmode (insn, CCNOmode)"
"test{b}\t%h0, %h0"
(const_int 8)) 0)
(match_operand:QI 1 "const_int_operand")))])
+(define_insn "*cmpqi_ext<mode>_3_mem_rex64"
+ [(set (reg FLAGS_REG)
+ (compare
+ (subreg:QI
+ (match_operator:SWI248 2 "extract_operator"
+ [(match_operand 0 "int248_register_operand" "Q")
+ (const_int 8)
+ (const_int 8)]) 0)
+ (match_operand:QI 1 "norex_memory_operand" "Bn")))]
+ "TARGET_64BIT && reload_completed
+ && ix86_match_ccmode (insn, CCmode)"
+ "cmp{b}\t{%1, %h0|%h0, %1}"
+ [(set_attr "type" "icmp")
+ (set_attr "mode" "QI")])
+
(define_insn "*cmpqi_ext<mode>_3"
[(set (reg FLAGS_REG)
(compare
(subreg:QI
- (zero_extract:SWI248
- (match_operand:SWI248 0 "register_operand" "Q,Q")
- (const_int 8)
- (const_int 8)) 0)
+ (match_operator:SWI248 2 "extract_operator"
+ [(match_operand 0 "int248_register_operand" "Q,Q")
+ (const_int 8)
+ (const_int 8)]) 0)
(match_operand:QI 1 "general_operand" "QnBc,m")))]
"ix86_match_ccmode (insn, CCmode)"
"cmp{b}\t{%1, %h0|%h0, %1}"
(set_attr "type" "icmp")
(set_attr "mode" "QI")])
+(define_peephole2
+ [(set (match_operand:QI 0 "register_operand")
+ (match_operand:QI 1 "norex_memory_operand"))
+ (set (match_operand 3 "flags_reg_operand")
+ (match_operator 4 "compare_operator"
+ [(subreg:QI
+ (match_operator:SWI248 5 "extract_operator"
+ [(match_operand 2 "int248_register_operand")
+ (const_int 8)
+ (const_int 8)]) 0)
+ (match_dup 0)]))]
+ "TARGET_64BIT
+ && peep2_reg_dead_p (2, operands[0])"
+ [(set (match_dup 3)
+ (match_op_dup 4
+ [(subreg:QI
+ (match_op_dup 5
+ [(match_dup 2)
+ (const_int 8)
+ (const_int 8)]) 0)
+ (match_dup 1)]))])
+
(define_insn "*cmpqi_ext<mode>_4"
[(set (reg FLAGS_REG)
(compare
(subreg:QI
- (zero_extract:SWI248
- (match_operand:SWI248 0 "register_operand" "Q")
- (const_int 8)
- (const_int 8)) 0)
+ (match_operator:SWI248 2 "extract_operator"
+ [(match_operand 0 "int248_register_operand" "Q")
+ (const_int 8)
+ (const_int 8)]) 0)
(subreg:QI
- (zero_extract:SWI248
- (match_operand:SWI248 1 "register_operand" "Q")
- (const_int 8)
- (const_int 8)) 0)))]
+ (match_operator:SWI248 3 "extract_operator"
+ [(match_operand 1 "int248_register_operand" "Q")
+ (const_int 8)
+ (const_int 8)]) 0)))]
"ix86_match_ccmode (insn, CCmode)"
"cmp{b}\t{%h1, %h0|%h0, %h1}"
[(set_attr "type" "icmp")
(const_int 0)])
(label_ref (match_operand 3))
(pc)))]
- ""
+ "TARGET_80387 || (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH)"
{
rtx op1 = ix86_expand_fast_convert_bf_to_sf (operands[1]);
rtx op2 = ix86_expand_fast_convert_bf_to_sf (operands[2]);
(match_operator 1 "comparison_operator"
[(reg:CC FLAGS_REG)
(const_int 0)]))]
- ""
+ "TARGET_80387 || (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH)"
{
rtx op1 = ix86_expand_fast_convert_bf_to_sf (operands[2]);
rtx op2 = ix86_expand_fast_convert_bf_to_sf (operands[3]);
[(set_attr "type" "ssecomi")
(set_attr "prefix" "evex")
(set_attr "mode" "HF")])
+
+;; Set carry flag.
+(define_insn "x86_stc"
+ [(set (reg:CCC FLAGS_REG) (unspec:CCC [(const_int 0)] UNSPEC_STC))]
+ ""
+ "stc"
+ [(set_attr "length" "1")
+ (set_attr "length_immediate" "0")
+ (set_attr "modrm" "0")])
+
+;; On Pentium 4, set the carry flag using mov $1,%al;addb $-1,%al.
+(define_peephole2
+ [(match_scratch:QI 0 "r")
+ (set (reg:CCC FLAGS_REG) (unspec:CCC [(const_int 0)] UNSPEC_STC))]
+ "TARGET_SLOW_STC && !optimize_insn_for_size_p ()"
+ [(set (match_dup 0) (const_int 1))
+ (parallel
+ [(set (reg:CCC FLAGS_REG)
+ (compare:CCC (plus:QI (match_dup 0) (const_int -1))
+ (match_dup 0)))
+ (set (match_dup 0) (plus:QI (match_dup 0) (const_int -1)))])])
+
+;; Complement carry flag.
+(define_insn "*x86_cmc"
+ [(set (reg:CCC FLAGS_REG)
+ (compare:CCC (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
+ (geu:QI (reg:CCC FLAGS_REG) (const_int 0))))]
+ ""
+ "cmc"
+ [(set_attr "length" "1")
+ (set_attr "length_immediate" "0")
+ (set_attr "use_carry" "1")
+ (set_attr "modrm" "0")])
+
+;; On Pentium 4, cmc is replaced with setnc %al;addb $-1,%al.
+(define_peephole2
+ [(match_scratch:QI 0 "r")
+ (set (reg:CCC FLAGS_REG)
+ (compare:CCC (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
+ (geu:QI (reg:CCC FLAGS_REG) (const_int 0))))]
+ "TARGET_SLOW_STC && !optimize_insn_for_size_p ()"
+ [(set (match_dup 0) (ne:QI (reg:CCC FLAGS_REG) (const_int 0)))
+ (parallel
+ [(set (reg:CCC FLAGS_REG)
+ (compare:CCC (plus:QI (match_dup 0) (const_int -1))
+ (match_dup 0)))
+ (set (match_dup 0) (plus:QI (match_dup 0) (const_int -1)))])])
\f
;; Push/pop instructions.
(define_expand "movxi"
[(set (match_operand:XI 0 "nonimmediate_operand")
(match_operand:XI 1 "general_operand"))]
- "TARGET_AVX512F"
+ "TARGET_AVX512F && TARGET_EVEX512"
"ix86_expand_vector_move (XImode, operands); DONE;")
(define_expand "movoi"
(define_insn "*movxi_internal_avx512f"
[(set (match_operand:XI 0 "nonimmediate_operand" "=v,v ,v ,m")
(match_operand:XI 1 "nonimmediate_or_sse_const_operand" " C,BC,vm,v"))]
- "TARGET_AVX512F
+ "TARGET_AVX512F && TARGET_EVEX512
&& (register_operand (operands[0], XImode)
|| register_operand (operands[1], XImode))"
{
(define_insn "*movdi_internal"
[(set (match_operand:DI 0 "nonimmediate_operand"
- "=r ,o ,r,r ,r,m ,*y,*y,?*y,?m,?r,?*y,?v,?v,?v,m ,m,?r ,?*Yd,?r,?v,?*y,?*x,*k,*k ,*r,*m,*k")
+ "=r ,o ,r,r ,r,m ,*y,*y,?*y,?m,?r,?*y,?Yv,?v,?v,m ,m,?r ,?*Yd,?r,?v,?*y,?*x,*k,*k ,*r,*m,*k")
(match_operand:DI 1 "general_operand"
- "riFo,riF,Z,rem,i,re,C ,*y,Bk ,*y,*y,r ,C ,?v,Bk,?v,v,*Yd,r ,?v,r ,*x ,*y ,*r,*kBk,*k,*k,CBC"))]
+ "riFo,riF,Z,rem,i,re,C ,*y,Bk ,*y,*y,r ,C ,?v,Bk,?v,v,*Yd,r ,?v,r ,*x ,*y ,*r,*kBk,*k,*k,CBC"))]
"!(MEM_P (operands[0]) && MEM_P (operands[1]))
&& ix86_hardreg_mov_ok (operands[0], operands[1])"
{
(set (attr "mode")
(cond [(eq_attr "alternative" "2")
(const_string "SI")
- (eq_attr "alternative" "12,13")
+ (eq_attr "alternative" "12")
(cond [(match_test "TARGET_AVX")
(const_string "TI")
(ior (not (match_test "TARGET_SSE2"))
(const_string "V4SF")
]
(const_string "TI"))
+ (eq_attr "alternative" "13")
+ (cond [(match_test "TARGET_AVX512VL")
+ (const_string "TI")
+ (match_test "TARGET_AVX512F")
+ (const_string "DF")
+ (match_test "TARGET_AVX")
+ (const_string "TI")
+ (ior (not (match_test "TARGET_SSE2"))
+ (match_test "optimize_function_for_size_p (cfun)"))
+ (const_string "V4SF")
+ ]
+ (const_string "TI"))
(and (eq_attr "alternative" "14,15,16")
(not (match_test "TARGET_SSE2")))
(define_insn "*movsi_internal"
[(set (match_operand:SI 0 "nonimmediate_operand"
- "=r,m ,*y,*y,?*y,?m,?r,?*y,?v,?v,?v,m ,?r,?v,*k,*k ,*rm,*k")
+ "=r,m ,*y,*y,?*y,?m,?r,?*y,?Yv,?v,?v,m ,?r,?v,*k,*k ,*rm,*k")
(match_operand:SI 1 "general_operand"
- "g ,re,C ,*y,Bk ,*y,*y,r ,C ,?v,Bk,?v,?v,r ,*r,*kBk,*k ,CBC"))]
+ "g ,re,C ,*y,Bk ,*y,*y,r ,C ,?v,Bk,?v,?v,r ,*r,*kBk,*k ,CBC"))]
"!(MEM_P (operands[0]) && MEM_P (operands[1]))
&& ix86_hardreg_mov_ok (operands[0], operands[1])"
{
(set (attr "mode")
(cond [(eq_attr "alternative" "2,3")
(const_string "DI")
- (eq_attr "alternative" "8,9")
+ (eq_attr "alternative" "8")
(cond [(match_test "TARGET_AVX")
(const_string "TI")
(ior (not (match_test "TARGET_SSE2"))
(const_string "V4SF")
]
(const_string "TI"))
+ (eq_attr "alternative" "9")
+ (cond [(match_test "TARGET_AVX512VL")
+ (const_string "TI")
+ (match_test "TARGET_AVX512F")
+ (const_string "SF")
+ (match_test "TARGET_AVX")
+ (const_string "TI")
+ (ior (not (match_test "TARGET_SSE2"))
+ (match_test "optimize_function_for_size_p (cfun)"))
+ (const_string "V4SF")
+ ]
+ (const_string "TI"))
(and (eq_attr "alternative" "10,11")
(not (match_test "TARGET_SSE2")))
(define_insn "*movhi_internal"
[(set (match_operand:HI 0 "nonimmediate_operand"
- "=r,r,r,m ,*k,*k ,r ,m ,*k ,?r,?*v,*v,*v,*v,m")
+ "=r,r,r,m ,*k,*k ,r ,m ,*k ,?r,?*v,*Yv,*v,*v,jm,m")
(match_operand:HI 1 "general_operand"
- "r ,n,m,rn,r ,*km,*k,*k,CBC,*v,r ,C ,*v,m ,*v"))]
+ "r ,n,m,rn,r ,*km,*k,*k,CBC,*v,r ,C ,*v,m ,*x,*v"))]
"!(MEM_P (operands[0]) && MEM_P (operands[1]))
&& ix86_hardreg_mov_ok (operands[0], operands[1])"
{
(cond [(eq_attr "alternative" "9,10,11,12,13")
(const_string "sse2")
(eq_attr "alternative" "14")
- (const_string "sse4")
+ (const_string "sse4_noavx")
+ (eq_attr "alternative" "15")
+ (const_string "avx")
]
(const_string "*")))
+ (set (attr "gpr32")
+ (if_then_else (eq_attr "alternative" "14")
+ (const_string "0")
+ (const_string "1")))
(set (attr "type")
(cond [(eq_attr "alternative" "4,5,6,7")
(const_string "mskmov")
(eq_attr "alternative" "8")
(const_string "msklog")
- (eq_attr "alternative" "13,14")
+ (eq_attr "alternative" "13,14,15")
(if_then_else (match_test "TARGET_AVX512FP16")
(const_string "ssemov")
(const_string "sselog1"))
(set (attr "prefix")
(cond [(eq_attr "alternative" "4,5,6,7,8")
(const_string "vex")
- (eq_attr "alternative" "9,10,11,12,13,14")
+ (eq_attr "alternative" "9,10,11,12,13,14,15")
(const_string "maybe_evex")
]
(const_string "orig")))
(if_then_else (match_test "TARGET_AVX512FP16")
(const_string "HI")
(const_string "SI"))
- (eq_attr "alternative" "13,14")
+ (eq_attr "alternative" "13,14,15")
(if_then_else (match_test "TARGET_AVX512FP16")
(const_string "HI")
(const_string "TI"))
]
(const_string "TI"))
(eq_attr "alternative" "12")
- (cond [(match_test "TARGET_AVX512FP16")
+ (cond [(match_test "TARGET_AVX512VL")
+ (const_string "TI")
+ (match_test "TARGET_AVX512FP16")
(const_string "HF")
+ (match_test "TARGET_AVX512F")
+ (const_string "SF")
(match_test "TARGET_AVX")
(const_string "TI")
(ior (not (match_test "TARGET_SSE2"))
[(parallel [(set (match_dup 1) (match_dup 2))
(set (match_dup 2) (match_dup 1))])])
+;; Convert xchg with a REG_UNUSED note to a mov (variant #1).
+(define_peephole2
+ [(parallel [(set (match_operand:SWI 0 "general_reg_operand")
+ (match_operand:SWI 1 "general_reg_operand"))
+ (set (match_dup 1) (match_dup 0))])]
+ "((REGNO (operands[0]) != AX_REG
+ && REGNO (operands[1]) != AX_REG)
+ || optimize_size < 2
+ || !optimize_insn_for_size_p ())
+ && peep2_reg_dead_p (1, operands[0])"
+ [(set (match_dup 1) (match_dup 0))])
+
+;; Convert xchg with a REG_UNUSED note to a mov (variant #2).
+(define_peephole2
+ [(parallel [(set (match_operand:SWI 0 "general_reg_operand")
+ (match_operand:SWI 1 "general_reg_operand"))
+ (set (match_dup 1) (match_dup 0))])]
+ "((REGNO (operands[0]) != AX_REG
+ && REGNO (operands[1]) != AX_REG)
+ || optimize_size < 2
+ || !optimize_insn_for_size_p ())
+ && peep2_reg_dead_p (1, operands[1])"
+ [(set (match_dup 0) (match_dup 1))])
+
;; Convert moves to/from AX_REG into xchg with -Oz.
(define_peephole2
[(set (match_operand:SWI48 0 "general_reg_operand")
(define_expand "extv<mode>"
[(set (match_operand:SWI24 0 "register_operand")
(sign_extract:SWI24 (match_operand:SWI24 1 "register_operand")
- (match_operand:SI 2 "const_int_operand")
- (match_operand:SI 3 "const_int_operand")))]
+ (match_operand:QI 2 "const_int_operand")
+ (match_operand:QI 3 "const_int_operand")))]
""
{
/* Handle extractions from %ah et al. */
(define_insn "*extv<mode>"
[(set (match_operand:SWI24 0 "register_operand" "=R")
- (sign_extract:SWI24 (match_operand:SWI24 1 "register_operand" "Q")
+ (sign_extract:SWI24 (match_operand 1 "int248_register_operand" "Q")
(const_int 8)
(const_int 8)))]
""
[(set_attr "type" "imovx")
(set_attr "mode" "SI")])
+;; Split sign-extension of single least significant bit as and x,$1;neg x
+(define_insn_and_split "*extv<mode>_1_0"
+ [(set (match_operand:SWI48 0 "register_operand" "=r")
+ (sign_extract:SWI48 (match_operand:SWI48 1 "register_operand" "0")
+ (const_int 1)
+ (const_int 0)))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "#"
+ ""
+ [(parallel [(set (match_dup 0) (and:SWI48 (match_dup 1) (const_int 1)))
+ (clobber (reg:CC FLAGS_REG))])
+ (parallel [(set (match_dup 0) (neg:SWI48 (match_dup 0)))
+ (clobber (reg:CC FLAGS_REG))])])
+
(define_expand "extzv<mode>"
[(set (match_operand:SWI248 0 "register_operand")
(zero_extract:SWI248 (match_operand:SWI248 1 "register_operand")
- (match_operand:SI 2 "const_int_operand")
- (match_operand:SI 3 "const_int_operand")))]
+ (match_operand:QI 2 "const_int_operand")
+ (match_operand:QI 3 "const_int_operand")))]
""
{
if (ix86_expand_pextr (operands))
operands[1] = copy_to_reg (operands[1]);
})
-(define_insn "*extzvqi_mem_rex64"
- [(set (match_operand:QI 0 "norex_memory_operand" "=Bn")
- (subreg:QI
- (zero_extract:SWI248
- (match_operand:SWI248 1 "register_operand" "Q")
- (const_int 8)
- (const_int 8)) 0))]
- "TARGET_64BIT && reload_completed"
- "mov{b}\t{%h1, %0|%0, %h1}"
- [(set_attr "type" "imov")
- (set_attr "mode" "QI")])
-
(define_insn "*extzv<mode>"
[(set (match_operand:SWI248 0 "register_operand" "=R")
- (zero_extract:SWI248 (match_operand:SWI248 1 "register_operand" "Q")
+ (zero_extract:SWI248 (match_operand 1 "int248_register_operand" "Q")
(const_int 8)
(const_int 8)))]
""
[(set_attr "type" "imovx")
(set_attr "mode" "SI")])
+(define_insn "*extzvqi_mem_rex64"
+ [(set (match_operand:QI 0 "norex_memory_operand" "=Bn")
+ (subreg:QI
+ (match_operator:SWI248 2 "extract_operator"
+ [(match_operand 1 "int248_register_operand" "Q")
+ (const_int 8)
+ (const_int 8)]) 0))]
+ "TARGET_64BIT && reload_completed"
+ "mov{b}\t{%h1, %0|%0, %h1}"
+ [(set_attr "type" "imov")
+ (set_attr "mode" "QI")])
+
(define_insn "*extzvqi"
[(set (match_operand:QI 0 "nonimmediate_operand" "=QBc,?R,m")
(subreg:QI
- (zero_extract:SWI248
- (match_operand:SWI248 1 "register_operand" "Q,Q,Q")
- (const_int 8)
- (const_int 8)) 0))]
+ (match_operator:SWI248 2 "extract_operator"
+ [(match_operand 1 "int248_register_operand" "Q,Q,Q")
+ (const_int 8)
+ (const_int 8)]) 0))]
""
{
switch (get_attr_type (insn))
(define_peephole2
[(set (match_operand:QI 0 "register_operand")
(subreg:QI
- (zero_extract:SWI248 (match_operand:SWI248 1 "register_operand")
- (const_int 8)
- (const_int 8)) 0))
+ (match_operator:SWI248 3 "extract_operator"
+ [(match_operand 1 "int248_register_operand")
+ (const_int 8)
+ (const_int 8)]) 0))
(set (match_operand:QI 2 "norex_memory_operand") (match_dup 0))]
"TARGET_64BIT
&& peep2_reg_dead_p (2, operands[0])"
[(set (match_dup 2)
(subreg:QI
- (zero_extract:SWI248 (match_dup 1)
- (const_int 8)
- (const_int 8)) 0))])
+ (match_op_dup 3
+ [(match_dup 1)
+ (const_int 8)
+ (const_int 8)]) 0))])
(define_expand "insv<mode>"
[(set (zero_extract:SWI248 (match_operand:SWI248 0 "register_operand")
- (match_operand:SI 1 "const_int_operand")
- (match_operand:SI 2 "const_int_operand"))
+ (match_operand:QI 1 "const_int_operand")
+ (match_operand:QI 2 "const_int_operand"))
(match_operand:SWI248 3 "register_operand"))]
""
{
(define_insn "*insvqi_1_mem_rex64"
[(set (zero_extract:SWI248
- (match_operand:SWI248 0 "register_operand" "+Q")
+ (match_operand 0 "int248_register_operand" "+Q")
(const_int 8)
(const_int 8))
(subreg:SWI248
(define_insn "@insv<mode>_1"
[(set (zero_extract:SWI248
- (match_operand:SWI248 0 "register_operand" "+Q,Q")
+ (match_operand 0 "int248_register_operand" "+Q,Q")
(const_int 8)
(const_int 8))
(match_operand:SWI248 1 "general_operand" "QnBc,m"))]
(define_insn "*insvqi_1"
[(set (zero_extract:SWI248
- (match_operand:SWI248 0 "register_operand" "+Q,Q")
+ (match_operand 0 "int248_register_operand" "+Q,Q")
(const_int 8)
(const_int 8))
(subreg:SWI248
(define_peephole2
[(set (match_operand:QI 0 "register_operand")
(match_operand:QI 1 "norex_memory_operand"))
- (set (zero_extract:SWI248 (match_operand:SWI248 2 "register_operand")
+ (set (zero_extract:SWI248 (match_operand 2 "int248_register_operand")
(const_int 8)
(const_int 8))
(subreg:SWI248 (match_dup 0) 0))]
[(parallel [(set (match_operand:SWI48 0 "general_reg_operand")
(const_int 0))
(clobber (reg:CC FLAGS_REG))])
- (set (zero_extract:SWI248 (match_operand:SWI248 1 "general_reg_operand")
+ (set (zero_extract:SWI248 (match_operand 1 "int248_register_operand")
(const_int 8)
(const_int 8))
(const_int 0))]
(define_peephole2
[(set (match_operand:SWI48 0 "general_reg_operand")
(match_operand:SWI48 1 "const_int_operand"))
- (set (zero_extract:SWI248 (match_operand:SWI248 2 "general_reg_operand")
+ (set (zero_extract:SWI248 (match_operand 2 "int248_register_operand")
(const_int 8)
(const_int 8))
(match_operand:SWI248 3 "const_int_operand"))]
operands[4] = gen_int_mode (tmp, <SWI48:MODE>mode);
})
-
-(define_code_iterator any_extract [sign_extract zero_extract])
-
(define_insn "*insvqi_2"
[(set (zero_extract:SWI248
- (match_operand:SWI248 0 "register_operand" "+Q")
+ (match_operand 0 "int248_register_operand" "+Q")
(const_int 8)
(const_int 8))
- (any_extract:SWI248
- (match_operand:SWI248 1 "register_operand" "Q")
- (const_int 8)
- (const_int 8)))]
+ (match_operator:SWI248 2 "extract_operator"
+ [(match_operand 1 "int248_register_operand" "Q")
+ (const_int 8)
+ (const_int 8)]))]
""
"mov{b}\t{%h1, %h0|%h0, %h1}"
[(set_attr "type" "imov")
(define_insn "*insvqi_3"
[(set (zero_extract:SWI248
- (match_operand:SWI248 0 "register_operand" "+Q")
+ (match_operand 0 "int248_register_operand" "+Q")
(const_int 8)
(const_int 8))
(any_shiftrt:SWI248
"mov{b}\t{%h1, %h0|%h0, %h1}"
[(set_attr "type" "imov")
(set_attr "mode" "QI")])
+
+(define_code_iterator any_or_plus [plus ior xor])
+
+(define_insn_and_split "*insvti_highpart_1"
+ [(set (match_operand:TI 0 "nonimmediate_operand" "=ro,r,r,&r")
+ (any_or_plus:TI
+ (and:TI
+ (match_operand:TI 1 "nonimmediate_operand" "r,m,r,m")
+ (match_operand:TI 3 "const_scalar_int_operand" "n,n,n,n"))
+ (ashift:TI
+ (zero_extend:TI
+ (match_operand:DI 2 "nonimmediate_operand" "r,r,m,m"))
+ (const_int 64))))]
+ "TARGET_64BIT
+ && CONST_WIDE_INT_P (operands[3])
+ && CONST_WIDE_INT_NUNITS (operands[3]) == 2
+ && CONST_WIDE_INT_ELT (operands[3], 0) == -1
+ && CONST_WIDE_INT_ELT (operands[3], 1) == 0"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ operands[4] = gen_lowpart (DImode, operands[1]);
+ split_double_concat (TImode, operands[0], operands[4], operands[2]);
+ DONE;
+})
+
+(define_insn_and_split "*insvti_lowpart_1"
+ [(set (match_operand:TI 0 "nonimmediate_operand" "=ro,r,r,&r")
+ (any_or_plus:TI
+ (and:TI
+ (match_operand:TI 1 "nonimmediate_operand" "r,m,r,m")
+ (match_operand:TI 3 "const_scalar_int_operand" "n,n,n,n"))
+ (zero_extend:TI
+ (match_operand:DI 2 "nonimmediate_operand" "r,r,m,m"))))]
+ "TARGET_64BIT
+ && CONST_WIDE_INT_P (operands[3])
+ && CONST_WIDE_INT_NUNITS (operands[3]) == 2
+ && CONST_WIDE_INT_ELT (operands[3], 0) == 0
+ && CONST_WIDE_INT_ELT (operands[3], 1) == -1"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ operands[4] = gen_highpart (DImode, operands[1]);
+ split_double_concat (TImode, operands[0], operands[2], operands[4]);
+ DONE;
+})
+
+(define_insn_and_split "*insvdi_lowpart_1"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=ro,r,r,&r")
+ (any_or_plus:DI
+ (and:DI
+ (match_operand:DI 1 "nonimmediate_operand" "r,m,r,m")
+ (match_operand:DI 3 "const_int_operand" "n,n,n,n"))
+ (zero_extend:DI
+ (match_operand:SI 2 "nonimmediate_operand" "r,r,m,m"))))]
+ "!TARGET_64BIT
+ && CONST_INT_P (operands[3])
+ && UINTVAL (operands[3]) == 0xffffffff00000000ll"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ operands[4] = gen_highpart (SImode, operands[1]);
+ split_double_concat (DImode, operands[0], operands[2], operands[4]);
+ DONE;
+})
\f
;; Floating point push instructions.
(set_attr "type" "push,multi")
(set_attr "mode" "SI,TI")])
+(define_insn "push2_di"
+ [(set (match_operand:TI 0 "push_operand" "=<")
+ (unspec:TI [(match_operand:DI 1 "register_operand" "r")
+ (match_operand:DI 2 "register_operand" "r")]
+ UNSPEC_APXPUSH2))]
+ "TARGET_APX_PUSH2POP2"
+ "push2\t%1, %2"
+ [(set_attr "mode" "TI")
+ (set_attr "type" "multi")
+ (set_attr "prefix" "evex")])
+
+(define_insn "pop2_di"
+ [(parallel [(set (match_operand:DI 0 "register_operand" "=r")
+ (unspec:DI [(match_operand:TI 1 "pop_operand" ">")]
+ UNSPEC_APXPOP2_LOW))
+ (set (match_operand:DI 2 "register_operand" "=r")
+ (unspec:DI [(const_int 0)] UNSPEC_APXPOP2_HIGH))])]
+ "TARGET_APX_PUSH2POP2"
+ "pop2\t%0, %2"
+ [(set_attr "mode" "TI")
+ (set_attr "prefix" "evex")])
+
(define_insn "*pushsf_rex64"
[(set (match_operand:SF 0 "push_operand" "=X,X,X")
(match_operand:SF 1 "nonmemory_no_elim_operand" "f,rF,v"))]
;; Possible store forwarding (partial memory) stall in alternatives 4, 6 and 7.
(define_insn "*movdf_internal"
[(set (match_operand:DF 0 "nonimmediate_operand"
- "=Yf*f,m ,Yf*f,?r ,!o,?*r ,!o,!o,?r,?m,?r,?r,v,v,v,m,*x,*x,*x,m ,r ,v,r ,o ,r ,m")
+ "=Yf*f,m ,Yf*f,?r ,!o,?*r ,!o,!o,?r,?m,?r,?r,Yv,v,v,m,*x,*x,*x,m ,?r,?v,r ,o ,r ,m")
(match_operand:DF 1 "general_operand"
- "Yf*fm,Yf*f,G ,roF,r ,*roF,*r,F ,rm,rC,C ,F ,C,v,m,v,C ,*x,m ,*x,v,r ,roF,rF,rmF,rC"))]
+ "Yf*fm,Yf*f,G ,roF,r ,*roF,*r,F ,rm,rC,C ,F ,C ,v,m,v,C ,*x,m ,*x, v, r,roF,rF,rmF,rC"))]
"!(MEM_P (operands[0]) && MEM_P (operands[1]))
&& (lra_in_progress || reload_completed
|| !CONST_DOUBLE_P (operands[1])
/* movaps is one byte shorter for non-AVX targets. */
(eq_attr "alternative" "13,17")
- (cond [(match_test "TARGET_AVX")
+ (cond [(match_test "TARGET_AVX512VL")
+ (const_string "V2DF")
+ (match_test "TARGET_AVX512F")
(const_string "DF")
+ (match_test "TARGET_AVX")
+ (const_string "V2DF")
(ior (not (match_test "TARGET_SSE2"))
(match_test "optimize_function_for_size_p (cfun)"))
(const_string "V4SF")
(define_insn "*movsf_internal"
[(set (match_operand:SF 0 "nonimmediate_operand"
- "=Yf*f,m ,Yf*f,?r ,?m,v,v,v,m,?r,?v,!*y,!*y,!m,!r,!*y,r ,m")
+ "=Yf*f,m ,Yf*f,?r ,?m,Yv,v,v,m,?r,?v,!*y,!*y,!m,!r,!*y,r ,m")
(match_operand:SF 1 "general_operand"
- "Yf*fm,Yf*f,G ,rmF,rF,C,v,m,v,v ,r ,*y ,m ,*y,*y,r ,rmF,rF"))]
+ "Yf*fm,Yf*f,G ,rmF,rF,C ,v,m,v,v ,r ,*y ,m ,*y,*y,r ,rmF,rF"))]
"!(MEM_P (operands[0]) && MEM_P (operands[1]))
&& (lra_in_progress || reload_completed
|| !CONST_DOUBLE_P (operands[1])
(eq_attr "alternative" "11")
(const_string "DI")
(eq_attr "alternative" "5")
- (cond [(and (match_test "TARGET_AVX512F")
+ (cond [(and (match_test "TARGET_AVX512F && TARGET_EVEX512")
(not (match_test "TARGET_PREFER_AVX256")))
(const_string "V16SF")
(match_test "TARGET_AVX")
better to maintain the whole registers in single format
to avoid problems on using packed logical operations. */
(eq_attr "alternative" "6")
- (cond [(ior (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
+ (cond [(match_test "TARGET_AVX512VL")
+ (const_string "V4SF")
+ (match_test "TARGET_AVX512F")
+ (const_string "SF")
+ (ior (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
(match_test "TARGET_SSE_SPLIT_REGS"))
(const_string "V4SF")
]
(define_insn "*mov<mode>_internal"
[(set (match_operand:HFBF 0 "nonimmediate_operand"
- "=?r,?r,?r,?m,v,v,?r,m,?v,v")
+ "=?r,?r,?r,?m ,Yv,v,?r,jm,m,?v,v")
(match_operand:HFBF 1 "general_operand"
- "r ,F ,m ,r<hfbfconstf>,C,v, v,v,r ,m"))]
+ "r ,F ,m ,r<hfbfconstf>,C ,v, v,v ,v,r ,m"))]
"!(MEM_P (operands[0]) && MEM_P (operands[1]))
&& (lra_in_progress
|| reload_completed
}
}
[(set (attr "isa")
- (cond [(eq_attr "alternative" "4,5,6,8,9")
+ (cond [(eq_attr "alternative" "4,5,6,9,10")
(const_string "sse2")
(eq_attr "alternative" "7")
- (const_string "sse4")
+ (const_string "sse4_noavx")
+ (eq_attr "alternative" "8")
+ (const_string "avx")
]
(const_string "*")))
+ (set (attr "gpr32")
+ (if_then_else (eq_attr "alternative" "8")
+ (const_string "0")
+ (const_string "1")))
(set (attr "type")
(cond [(eq_attr "alternative" "4")
(const_string "sselog1")
- (eq_attr "alternative" "5,6,8")
+ (eq_attr "alternative" "5,6,9")
(const_string "ssemov")
- (eq_attr "alternative" "7,9")
+ (eq_attr "alternative" "7,8,10")
(if_then_else
(match_test ("TARGET_AVX512FP16"))
(const_string "ssemov")
]
(const_string "imov")))
(set (attr "prefix")
- (cond [(eq_attr "alternative" "4,5,6,7,8,9")
+ (cond [(eq_attr "alternative" "4,5,6,7,8,9,10")
(const_string "maybe_vex")
]
(const_string "orig")))
(set (attr "mode")
(cond [(eq_attr "alternative" "4")
(const_string "V4SF")
- (eq_attr "alternative" "6,8")
+ (eq_attr "alternative" "6,9")
(if_then_else
(match_test "TARGET_AVX512FP16")
(const_string "HI")
(const_string "SI"))
- (eq_attr "alternative" "7,9")
+ (eq_attr "alternative" "7,8,10")
(if_then_else
(match_test "TARGET_AVX512FP16")
(const_string "HI")
(const_string "TI"))
(eq_attr "alternative" "5")
- (cond [(match_test "TARGET_AVX512FP16")
+ (cond [(match_test "TARGET_AVX512VL")
+ (const_string "V4SF")
+ (match_test "TARGET_AVX512FP16")
(const_string "HF")
+ (match_test "TARGET_AVX512F")
+ (const_string "SF")
+ (match_test "TARGET_AVX")
+ (const_string "V4SF")
(ior (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
(match_test "TARGET_SSE_SPLIT_REGS"))
(const_string "V4SF")
(eq_attr "alternative" "12")
(const_string "x64_avx512bw")
(eq_attr "alternative" "13")
- (const_string "avx512bw")
+ (const_string "avx512bw_512")
]
(const_string "*")))
(set (attr "mmx_isa")
"split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);")
(define_mode_attr kmov_isa
- [(QI "avx512dq") (HI "avx512f") (SI "avx512bw") (DI "avx512bw")])
+ [(QI "avx512dq") (HI "avx512f") (SI "avx512bw") (DI "avx512bw_512")])
(define_insn "zero_extend<mode>di2"
[(set (match_operand:DI 0 "register_operand" "=r,*r,*k")
"!TARGET_64BIT"
"#")
+(define_insn "extendditi2"
+ [(set (match_operand:TI 0 "nonimmediate_operand" "=*A,r,?r,?*o")
+ (sign_extend:TI (match_operand:DI 1 "register_operand" "0,0,r,r")))
+ (clobber (reg:CC FLAGS_REG))
+ (clobber (match_scratch:DI 2 "=X,X,X,&r"))]
+ "TARGET_64BIT"
+ "#")
+
;; Split the memory case. If the source register doesn't die, it will stay
;; this way, if it does die, following peephole2s take care of it.
(define_split
- [(set (match_operand:DI 0 "memory_operand")
- (sign_extend:DI (match_operand:SI 1 "register_operand")))
+ [(set (match_operand:<DWI> 0 "memory_operand")
+ (sign_extend:<DWI> (match_operand:DWIH 1 "register_operand")))
(clobber (reg:CC FLAGS_REG))
- (clobber (match_operand:SI 2 "register_operand"))]
+ (clobber (match_operand:DWIH 2 "register_operand"))]
"reload_completed"
[(const_int 0)]
{
- split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);
+ rtx bits = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT - 1);
+
+ split_double_mode (<DWI>mode, &operands[0], 1, &operands[3], &operands[4]);
emit_move_insn (operands[3], operands[1]);
&& REGNO (operands[1]) == AX_REG
&& REGNO (operands[2]) == DX_REG)
{
- emit_insn (gen_ashrsi3_cvt (operands[2], operands[1], GEN_INT (31)));
+ emit_insn (gen_ashr<mode>3_cvt (operands[2], operands[1], bits));
}
else
{
emit_move_insn (operands[2], operands[1]);
- emit_insn (gen_ashrsi3_cvt (operands[2], operands[2], GEN_INT (31)));
+ emit_insn (gen_ashr<mode>3_cvt (operands[2], operands[2], bits));
}
emit_move_insn (operands[4], operands[2]);
DONE;
;; Peepholes for the case where the source register does die, after
;; being split with the above splitter.
(define_peephole2
- [(set (match_operand:SI 0 "memory_operand")
- (match_operand:SI 1 "general_reg_operand"))
- (set (match_operand:SI 2 "general_reg_operand") (match_dup 1))
+ [(set (match_operand:DWIH 0 "memory_operand")
+ (match_operand:DWIH 1 "general_reg_operand"))
+ (set (match_operand:DWIH 2 "general_reg_operand") (match_dup 1))
(parallel [(set (match_dup 2)
- (ashiftrt:SI (match_dup 2) (const_int 31)))
+ (ashiftrt:DWIH (match_dup 2)
+ (match_operand 4 "const_int_operand")))
(clobber (reg:CC FLAGS_REG))])
- (set (match_operand:SI 3 "memory_operand") (match_dup 2))]
+ (set (match_operand:DWIH 3 "memory_operand") (match_dup 2))]
"REGNO (operands[1]) != REGNO (operands[2])
+ && INTVAL (operands[4]) == (<MODE_SIZE> * BITS_PER_UNIT - 1)
&& peep2_reg_dead_p (2, operands[1])
&& peep2_reg_dead_p (4, operands[2])
&& !reg_mentioned_p (operands[2], operands[3])"
[(set (match_dup 0) (match_dup 1))
- (parallel [(set (match_dup 1) (ashiftrt:SI (match_dup 1) (const_int 31)))
+ (parallel [(set (match_dup 1) (ashiftrt:DWIH (match_dup 1) (match_dup 4)))
(clobber (reg:CC FLAGS_REG))])
(set (match_dup 3) (match_dup 1))])
(define_peephole2
- [(set (match_operand:SI 0 "memory_operand")
- (match_operand:SI 1 "general_reg_operand"))
- (parallel [(set (match_operand:SI 2 "general_reg_operand")
- (ashiftrt:SI (match_dup 1) (const_int 31)))
+ [(set (match_operand:DWIH 0 "memory_operand")
+ (match_operand:DWIH 1 "general_reg_operand"))
+ (parallel [(set (match_operand:DWIH 2 "general_reg_operand")
+ (ashiftrt:DWIH (match_dup 1)
+ (match_operand 4 "const_int_operand")))
(clobber (reg:CC FLAGS_REG))])
- (set (match_operand:SI 3 "memory_operand") (match_dup 2))]
+ (set (match_operand:DWIH 3 "memory_operand") (match_dup 2))]
"/* cltd is shorter than sarl $31, %eax */
!optimize_function_for_size_p (cfun)
&& REGNO (operands[1]) == AX_REG
&& REGNO (operands[2]) == DX_REG
+ && INTVAL (operands[4]) == (<MODE_SIZE> * BITS_PER_UNIT - 1)
&& peep2_reg_dead_p (2, operands[1])
&& peep2_reg_dead_p (3, operands[2])
&& !reg_mentioned_p (operands[2], operands[3])"
[(set (match_dup 0) (match_dup 1))
- (parallel [(set (match_dup 1) (ashiftrt:SI (match_dup 1) (const_int 31)))
+ (parallel [(set (match_dup 1) (ashiftrt:DWIH (match_dup 1) (match_dup 4)))
(clobber (reg:CC FLAGS_REG))])
(set (match_dup 3) (match_dup 1))])
;; Extend to register case. Optimize case where source and destination
;; registers match and cases where we can use cltd.
(define_split
- [(set (match_operand:DI 0 "register_operand")
- (sign_extend:DI (match_operand:SI 1 "register_operand")))
+ [(set (match_operand:<DWI> 0 "register_operand")
+ (sign_extend:<DWI> (match_operand:DWIH 1 "register_operand")))
(clobber (reg:CC FLAGS_REG))
- (clobber (match_scratch:SI 2))]
+ (clobber (match_scratch:DWIH 2))]
"reload_completed"
[(const_int 0)]
{
- split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);
+ rtx bits = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT - 1);
+
+ split_double_mode (<DWI>mode, &operands[0], 1, &operands[3], &operands[4]);
if (REGNO (operands[3]) != REGNO (operands[1]))
emit_move_insn (operands[3], operands[1]);
+ rtx src = operands[1];
+ if (REGNO (operands[3]) == AX_REG)
+ src = operands[3];
+
/* Generate a cltd if possible and doing so it profitable. */
if ((optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
- && REGNO (operands[3]) == AX_REG
+ && REGNO (src) == AX_REG
&& REGNO (operands[4]) == DX_REG)
{
- emit_insn (gen_ashrsi3_cvt (operands[4], operands[3], GEN_INT (31)));
+ emit_insn (gen_ashr<mode>3_cvt (operands[4], src, bits));
DONE;
}
if (REGNO (operands[4]) != REGNO (operands[1]))
emit_move_insn (operands[4], operands[1]);
- emit_insn (gen_ashrsi3_cvt (operands[4], operands[4], GEN_INT (31)));
+ emit_insn (gen_ashr<mode>3_cvt (operands[4], operands[4], bits));
DONE;
})
(if_then_else (eq_attr "prefix_0f" "0")
(const_string "0")
(const_string "1")))])
+
+(define_insn "*extendqi<SWI24:mode>_ext_1"
+ [(set (match_operand:SWI24 0 "register_operand" "=R")
+ (sign_extend:SWI24
+ (subreg:QI
+ (match_operator:SWI248 2 "extract_operator"
+ [(match_operand 1 "int248_register_operand" "Q")
+ (const_int 8)
+ (const_int 8)]) 0)))]
+ ""
+ "movs{b<SWI24:imodesuffix>|x}\t{%h1, %0|%0, %h1}"
+ [(set_attr "type" "imovx")
+ (set_attr "mode" "<SWI24:MODE>")])
\f
;; Conversions between float and double.
&& optimize_insn_for_speed_p ()
&& reload_completed
&& (!EXT_REX_SSE_REG_P (operands[0])
- || TARGET_AVX512VL)"
+ || TARGET_AVX512VL || TARGET_EVEX512)"
[(set (match_dup 2)
(float_extend:V2DF
(vec_select:V2SF
;; Don't use float_extend since psrlld doesn't raise
;; exceptions and turn a sNaN into a qNaN.
(define_insn "extendbfsf2_1"
- [(set (match_operand:SF 0 "register_operand" "=x,Yw")
+ [(set (match_operand:SF 0 "register_operand" "=x,Yv,v")
(unspec:SF
- [(match_operand:BF 1 "register_operand" " 0,Yw")]
+ [(match_operand:BF 1 "register_operand" " 0,Yv,v")]
UNSPEC_CVTBFSF))]
"TARGET_SSE2"
"@
pslld\t{$16, %0|%0, 16}
- vpslld\t{$16, %1, %0|%0, %1, 16}"
- [(set_attr "isa" "noavx,avx")
- (set_attr "type" "sseishft")
+ vpslld\t{$16, %1, %0|%0, %1, 16}
+ vpslld\t{$16, %g1, %g0|%g0, %g1, 16}"
+ [(set_attr "isa" "noavx,avx,*")
+ (set_attr "type" "sseishft1")
(set_attr "length_immediate" "1")
- (set_attr "prefix_data16" "1,*")
- (set_attr "prefix" "orig,vex")
- (set_attr "mode" "TI")
- (set_attr "memory" "none")])
+ (set_attr "prefix_data16" "1,*,*")
+ (set_attr "prefix" "orig,maybe_evex,evex")
+ (set_attr "mode" "TI,TI,XI")
+ (set_attr "memory" "none")
+ (set (attr "enabled")
+ (if_then_else (eq_attr "alternative" "2")
+ (symbol_ref "TARGET_AVX512F && TARGET_EVEX512
+ && !TARGET_AVX512VL && !TARGET_PREFER_AVX256")
+ (const_string "*")))])
(define_expand "extend<mode>xf2"
[(set (match_operand:XF 0 "nonimmediate_operand")
(clobber (reg:CC FLAGS_REG))])]
"split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[3]);")
+(define_insn_and_split "*add<dwi>3_doubleword_concat"
+ [(set (match_operand:<DWI> 0 "register_operand" "=&r")
+ (plus:<DWI>
+ (any_or_plus:<DWI>
+ (ashift:<DWI>
+ (zero_extend:<DWI>
+ (match_operand:DWIH 2 "nonimmediate_operand" "rm"))
+ (match_operand:QI 3 "const_int_operand"))
+ (zero_extend:<DWI>
+ (match_operand:DWIH 4 "nonimmediate_operand" "rm")))
+ (match_operand:<DWI> 1 "register_operand" "0")))
+ (clobber (reg:CC FLAGS_REG))]
+ "INTVAL (operands[3]) == <MODE_SIZE> * BITS_PER_UNIT"
+ "#"
+ "&& reload_completed"
+ [(parallel [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (plus:DWIH (match_dup 1) (match_dup 4))
+ (match_dup 1)))
+ (set (match_dup 0)
+ (plus:DWIH (match_dup 1) (match_dup 4)))])
+ (parallel [(set (match_dup 5)
+ (plus:DWIH
+ (plus:DWIH
+ (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
+ (match_dup 6))
+ (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[5]);")
+
+(define_insn_and_split "*add<dwi>3_doubleword_concat_zext"
+ [(set (match_operand:<DWI> 0 "register_operand" "=&r")
+ (plus:<DWI>
+ (any_or_plus:<DWI>
+ (ashift:<DWI>
+ (zero_extend:<DWI>
+ (match_operand:DWIH 2 "nonimmediate_operand" "rm"))
+ (match_operand:QI 3 "const_int_operand"))
+ (zero_extend:<DWI>
+ (match_operand:DWIH 4 "nonimmediate_operand" "rm")))
+ (zero_extend:<DWI>
+ (match_operand:DWIH 1 "nonimmediate_operand" "rm"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "INTVAL (operands[3]) == <MODE_SIZE> * BITS_PER_UNIT"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 0) (match_dup 4))
+ (set (match_dup 5) (match_dup 2))
+ (parallel [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (plus:DWIH (match_dup 0) (match_dup 1))
+ (match_dup 0)))
+ (set (match_dup 0)
+ (plus:DWIH (match_dup 0) (match_dup 1)))])
+ (parallel [(set (match_dup 5)
+ (plus:DWIH
+ (plus:DWIH
+ (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
+ (match_dup 5))
+ (const_int 0)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "split_double_mode (<DWI>mode, &operands[0], 1, &operands[0], &operands[5]);")
+
(define_insn "*add<mode>_1"
[(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,r")
(plus:SWI48
(const_string "*")))
(set_attr "mode" "<MODE>")])
+(define_insn "*addqi_ext<mode>_0"
+ [(set (match_operand:QI 0 "nonimm_x64constmem_operand" "=QBc,m")
+ (plus:QI
+ (subreg:QI
+ (match_operator:SWI248 3 "extract_operator"
+ [(match_operand 2 "int248_register_operand" "Q,Q")
+ (const_int 8)
+ (const_int 8)]) 0)
+ (match_operand:QI 1 "nonimm_x64constmem_operand" "0,0")))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "add{b}\t{%h2, %0|%0, %h2}"
+ [(set_attr "isa" "*,nox64")
+ (set_attr "type" "alu")
+ (set_attr "mode" "QI")])
+
(define_expand "addqi_ext_1"
[(parallel
[(set (zero_extract:HI (match_operand:HI 0 "register_operand")
(define_insn "*addqi_ext<mode>_1"
[(set (zero_extract:SWI248
- (match_operand:SWI248 0 "register_operand" "+Q,Q")
+ (match_operand 0 "int248_register_operand" "+Q,Q")
(const_int 8)
(const_int 8))
(subreg:SWI248
(plus:QI
(subreg:QI
- (zero_extract:SWI248
- (match_operand:SWI248 1 "register_operand" "0,0")
- (const_int 8)
- (const_int 8)) 0)
- (match_operand:QI 2 "general_operand" "QnBc,m")) 0))
+ (match_operator:SWI248 3 "extract_operator"
+ [(match_operand 1 "int248_register_operand" "0,0")
+ (const_int 8)
+ (const_int 8)]) 0)
+ (match_operand:QI 2 "general_x64constmem_operand" "QnBc,m")) 0))
(clobber (reg:CC FLAGS_REG))]
"/* FIXME: without this LRA can't reload this pattern, see PR82524. */
rtx_equal_p (operands[0], operands[1])"
(define_insn "*addqi_ext<mode>_2"
[(set (zero_extract:SWI248
- (match_operand:SWI248 0 "register_operand" "+Q")
+ (match_operand 0 "int248_register_operand" "+Q")
(const_int 8)
(const_int 8))
(subreg:SWI248
(plus:QI
(subreg:QI
- (zero_extract:SWI248
- (match_operand:SWI248 1 "register_operand" "%0")
- (const_int 8)
- (const_int 8)) 0)
+ (match_operator:SWI248 3 "extract_operator"
+ [(match_operand 1 "int248_register_operand" "%0")
+ (const_int 8)
+ (const_int 8)]) 0)
(subreg:QI
- (zero_extract:SWI248
- (match_operand:SWI248 2 "register_operand" "Q")
- (const_int 8)
- (const_int 8)) 0)) 0))
+ (match_operator:SWI248 4 "extract_operator"
+ [(match_operand 2 "int248_register_operand" "Q")
+ (const_int 8)
+ (const_int 8)]) 0)) 0))
(clobber (reg:CC FLAGS_REG))]
"/* FIXME: without this LRA can't reload this pattern, see PR82524. */
rtx_equal_p (operands[0], operands[1])
(define_insn_and_split "*lea<mode>_general_1"
[(set (match_operand:SWI12 0 "register_operand" "=r")
(plus:SWI12
- (plus:SWI12 (match_operand:SWI12 1 "index_register_operand" "l")
+ (plus:SWI12 (match_operand:SWI12 1 "register_no_SP_operand" "l")
(match_operand:SWI12 2 "register_operand" "r"))
(match_operand:SWI12 3 "immediate_operand" "i")))]
"!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
(define_insn_and_split "*lea<mode>_general_2"
[(set (match_operand:SWI12 0 "register_operand" "=r")
(plus:SWI12
- (mult:SWI12 (match_operand:SWI12 1 "index_register_operand" "l")
+ (mult:SWI12 (match_operand:SWI12 1 "register_no_SP_operand" "l")
(match_operand 2 "const248_operand" "n"))
(match_operand:SWI12 3 "nonmemory_operand" "ri")))]
"!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
(define_insn_and_split "*lea<mode>_general_2b"
[(set (match_operand:SWI12 0 "register_operand" "=r")
(plus:SWI12
- (ashift:SWI12 (match_operand:SWI12 1 "index_register_operand" "l")
+ (ashift:SWI12 (match_operand:SWI12 1 "register_no_SP_operand" "l")
(match_operand 2 "const123_operand" "n"))
(match_operand:SWI12 3 "nonmemory_operand" "ri")))]
"!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
[(set (match_operand:SWI12 0 "register_operand" "=r")
(plus:SWI12
(plus:SWI12
- (mult:SWI12 (match_operand:SWI12 1 "index_register_operand" "l")
+ (mult:SWI12 (match_operand:SWI12 1 "register_no_SP_operand" "l")
(match_operand 2 "const248_operand" "n"))
(match_operand:SWI12 3 "register_operand" "r"))
(match_operand:SWI12 4 "immediate_operand" "i")))]
[(set (match_operand:SWI12 0 "register_operand" "=r")
(plus:SWI12
(plus:SWI12
- (ashift:SWI12 (match_operand:SWI12 1 "index_register_operand" "l")
+ (ashift:SWI12 (match_operand:SWI12 1 "register_no_SP_operand" "l")
(match_operand 2 "const123_operand" "n"))
(match_operand:SWI12 3 "register_operand" "r"))
(match_operand:SWI12 4 "immediate_operand" "i")))]
[(set (match_operand:SWI12 0 "register_operand" "=r")
(any_or:SWI12
(ashift:SWI12
- (match_operand:SWI12 1 "index_register_operand" "l")
+ (match_operand:SWI12 1 "register_no_SP_operand" "l")
(match_operand 2 "const_0_to_3_operand"))
(match_operand 3 "const_int_operand")))]
"(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
[(set (match_operand:SWI48 0 "register_operand" "=r")
(any_or:SWI48
(ashift:SWI48
- (match_operand:SWI48 1 "index_register_operand" "l")
+ (match_operand:SWI48 1 "register_no_SP_operand" "l")
(match_operand 2 "const_0_to_3_operand"))
(match_operand 3 "const_int_operand")))]
"(unsigned HOST_WIDE_INT) INTVAL (operands[3])
[(set_attr "type" "alu")
(set_attr "mode" "SI")])
+(define_insn "*subqi_ext<mode>_0"
+ [(set (match_operand:QI 0 "nonimm_x64constmem_operand" "=QBc,m")
+ (minus:QI
+ (match_operand:QI 1 "nonimm_x64constmem_operand" "0,0")
+ (subreg:QI
+ (match_operator:SWI248 3 "extract_operator"
+ [(match_operand 2 "int248_register_operand" "Q,Q")
+ (const_int 8)
+ (const_int 8)]) 0)))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "sub{b}\t{%h2, %0|%0, %h2}"
+ [(set_attr "isa" "*,nox64")
+ (set_attr "type" "alu")
+ (set_attr "mode" "QI")])
+
(define_insn "*subqi_ext<mode>_2"
[(set (zero_extract:SWI248
- (match_operand:SWI248 0 "register_operand" "+Q")
+ (match_operand 0 "int248_register_operand" "+Q")
(const_int 8)
(const_int 8))
(subreg:SWI248
(minus:QI
(subreg:QI
- (zero_extract:SWI248
- (match_operand:SWI248 1 "register_operand" "0")
- (const_int 8)
- (const_int 8)) 0)
+ (match_operator:SWI248 3 "extract_operator"
+ [(match_operand 1 "int248_register_operand" "0")
+ (const_int 8)
+ (const_int 8)]) 0)
(subreg:QI
- (zero_extract:SWI248
- (match_operand:SWI248 2 "register_operand" "Q")
- (const_int 8)
- (const_int 8)) 0)) 0))
+ (match_operator:SWI248 4 "extract_operator"
+ [(match_operand 2 "int248_register_operand" "Q")
+ (const_int 8)
+ (const_int 8)]) 0)) 0))
(clobber (reg:CC FLAGS_REG))]
"/* FIXME: without this LRA can't reload this pattern, see PR82524. */
rtx_equal_p (operands[0], operands[1])"
[(set (reg:CC FLAGS_REG)
(compare:CC (match_dup 0) (match_dup 1)))])
+(define_peephole2
+ [(set (match_operand:SWI 0 "general_reg_operand")
+ (match_operand:SWI 1 "memory_operand"))
+ (parallel [(set (reg:CC FLAGS_REG)
+ (compare:CC (match_dup 0)
+ (match_operand:SWI 2 "memory_operand")))
+ (set (match_dup 0)
+ (minus:SWI (match_dup 0) (match_dup 2)))])
+ (set (match_dup 1) (match_dup 0))]
+ "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+ && peep2_reg_dead_p (3, operands[0])
+ && !reg_overlap_mentioned_p (operands[0], operands[1])
+ && !reg_overlap_mentioned_p (operands[0], operands[2])"
+ [(set (match_dup 0) (match_dup 2))
+ (parallel [(set (reg:CC FLAGS_REG)
+ (compare:CC (match_dup 1) (match_dup 0)))
+ (set (match_dup 1)
+ (minus:SWI (match_dup 1) (match_dup 0)))])])
+
;; decl %eax; cmpl $-1, %eax; jne .Lxx; can be optimized into
;; subl $1, %eax; jnc .Lxx;
(define_peephole2
(set_attr "pent_pair" "pu")
(set_attr "mode" "<MODE>")])
+(define_peephole2
+ [(set (match_operand:SWI 0 "general_reg_operand")
+ (match_operand:SWI 1 "memory_operand"))
+ (parallel [(set (match_dup 0)
+ (plus:SWI
+ (plus:SWI
+ (match_operator:SWI 4 "ix86_carry_flag_operator"
+ [(match_operand 3 "flags_reg_operand")
+ (const_int 0)])
+ (match_dup 0))
+ (match_operand:SWI 2 "memory_operand")))
+ (clobber (reg:CC FLAGS_REG))])
+ (set (match_dup 1) (match_dup 0))]
+ "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+ && peep2_reg_dead_p (3, operands[0])
+ && !reg_overlap_mentioned_p (operands[0], operands[1])
+ && !reg_overlap_mentioned_p (operands[0], operands[2])"
+ [(set (match_dup 0) (match_dup 2))
+ (parallel [(set (match_dup 1)
+ (plus:SWI (plus:SWI (match_op_dup 4
+ [(match_dup 3) (const_int 0)])
+ (match_dup 1))
+ (match_dup 0)))
+ (clobber (reg:CC FLAGS_REG))])])
+
+(define_peephole2
+ [(set (match_operand:SWI 0 "general_reg_operand")
+ (match_operand:SWI 1 "memory_operand"))
+ (parallel [(set (match_dup 0)
+ (plus:SWI
+ (plus:SWI
+ (match_operator:SWI 4 "ix86_carry_flag_operator"
+ [(match_operand 3 "flags_reg_operand")
+ (const_int 0)])
+ (match_dup 0))
+ (match_operand:SWI 2 "memory_operand")))
+ (clobber (reg:CC FLAGS_REG))])
+ (set (match_operand:SWI 5 "general_reg_operand") (match_dup 0))
+ (set (match_dup 1) (match_dup 5))]
+ "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+ && peep2_reg_dead_p (3, operands[0])
+ && peep2_reg_dead_p (4, operands[5])
+ && !reg_overlap_mentioned_p (operands[0], operands[1])
+ && !reg_overlap_mentioned_p (operands[0], operands[2])
+ && !reg_overlap_mentioned_p (operands[5], operands[1])"
+ [(set (match_dup 0) (match_dup 2))
+ (parallel [(set (match_dup 1)
+ (plus:SWI (plus:SWI (match_op_dup 4
+ [(match_dup 3) (const_int 0)])
+ (match_dup 1))
+ (match_dup 0)))
+ (clobber (reg:CC FLAGS_REG))])])
+
(define_insn "*add<mode>3_carry_0"
[(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
(plus:SWI
(set_attr "pent_pair" "pu")
(set_attr "mode" "<MODE>")])
+(define_peephole2
+ [(parallel [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (zero_extend:<DWI>
+ (plus:SWI48
+ (plus:SWI48
+ (match_operator:SWI48 4 "ix86_carry_flag_operator"
+ [(match_operand 2 "flags_reg_operand")
+ (const_int 0)])
+ (match_operand:SWI48 0 "general_reg_operand"))
+ (match_operand:SWI48 1 "memory_operand")))
+ (plus:<DWI>
+ (zero_extend:<DWI> (match_dup 1))
+ (match_operator:<DWI> 3 "ix86_carry_flag_operator"
+ [(match_dup 2) (const_int 0)]))))
+ (set (match_dup 0)
+ (plus:SWI48 (plus:SWI48 (match_op_dup 4
+ [(match_dup 2) (const_int 0)])
+ (match_dup 0))
+ (match_dup 1)))])
+ (set (match_dup 1) (match_dup 0))]
+ "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+ && peep2_reg_dead_p (2, operands[0])
+ && !reg_overlap_mentioned_p (operands[0], operands[1])"
+ [(parallel [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (zero_extend:<DWI>
+ (plus:SWI48
+ (plus:SWI48
+ (match_op_dup 4
+ [(match_dup 2) (const_int 0)])
+ (match_dup 1))
+ (match_dup 0)))
+ (plus:<DWI>
+ (zero_extend:<DWI> (match_dup 0))
+ (match_op_dup 3
+ [(match_dup 2) (const_int 0)]))))
+ (set (match_dup 1)
+ (plus:SWI48 (plus:SWI48 (match_op_dup 4
+ [(match_dup 2) (const_int 0)])
+ (match_dup 1))
+ (match_dup 0)))])])
+
+(define_peephole2
+ [(set (match_operand:SWI48 0 "general_reg_operand")
+ (match_operand:SWI48 1 "memory_operand"))
+ (parallel [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (zero_extend:<DWI>
+ (plus:SWI48
+ (plus:SWI48
+ (match_operator:SWI48 5 "ix86_carry_flag_operator"
+ [(match_operand 3 "flags_reg_operand")
+ (const_int 0)])
+ (match_dup 0))
+ (match_operand:SWI48 2 "memory_operand")))
+ (plus:<DWI>
+ (zero_extend:<DWI> (match_dup 2))
+ (match_operator:<DWI> 4 "ix86_carry_flag_operator"
+ [(match_dup 3) (const_int 0)]))))
+ (set (match_dup 0)
+ (plus:SWI48 (plus:SWI48 (match_op_dup 5
+ [(match_dup 3) (const_int 0)])
+ (match_dup 0))
+ (match_dup 2)))])
+ (set (match_dup 1) (match_dup 0))]
+ "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+ && peep2_reg_dead_p (3, operands[0])
+ && !reg_overlap_mentioned_p (operands[0], operands[1])
+ && !reg_overlap_mentioned_p (operands[0], operands[2])"
+ [(set (match_dup 0) (match_dup 2))
+ (parallel [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (zero_extend:<DWI>
+ (plus:SWI48
+ (plus:SWI48
+ (match_op_dup 5
+ [(match_dup 3) (const_int 0)])
+ (match_dup 1))
+ (match_dup 0)))
+ (plus:<DWI>
+ (zero_extend:<DWI> (match_dup 0))
+ (match_op_dup 4
+ [(match_dup 3) (const_int 0)]))))
+ (set (match_dup 1)
+ (plus:SWI48 (plus:SWI48 (match_op_dup 5
+ [(match_dup 3) (const_int 0)])
+ (match_dup 1))
+ (match_dup 0)))])])
+
+(define_peephole2
+ [(parallel [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (zero_extend:<DWI>
+ (plus:SWI48
+ (plus:SWI48
+ (match_operator:SWI48 4 "ix86_carry_flag_operator"
+ [(match_operand 2 "flags_reg_operand")
+ (const_int 0)])
+ (match_operand:SWI48 0 "general_reg_operand"))
+ (match_operand:SWI48 1 "memory_operand")))
+ (plus:<DWI>
+ (zero_extend:<DWI> (match_dup 1))
+ (match_operator:<DWI> 3 "ix86_carry_flag_operator"
+ [(match_dup 2) (const_int 0)]))))
+ (set (match_dup 0)
+ (plus:SWI48 (plus:SWI48 (match_op_dup 4
+ [(match_dup 2) (const_int 0)])
+ (match_dup 0))
+ (match_dup 1)))])
+ (set (match_operand:QI 5 "general_reg_operand")
+ (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
+ (set (match_operand:SWI48 6 "general_reg_operand")
+ (zero_extend:SWI48 (match_dup 5)))
+ (set (match_dup 1) (match_dup 0))]
+ "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+ && peep2_reg_dead_p (4, operands[0])
+ && !reg_overlap_mentioned_p (operands[0], operands[1])
+ && !reg_overlap_mentioned_p (operands[0], operands[5])
+ && !reg_overlap_mentioned_p (operands[5], operands[1])
+ && !reg_overlap_mentioned_p (operands[0], operands[6])
+ && !reg_overlap_mentioned_p (operands[6], operands[1])"
+ [(parallel [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (zero_extend:<DWI>
+ (plus:SWI48
+ (plus:SWI48
+ (match_op_dup 4
+ [(match_dup 2) (const_int 0)])
+ (match_dup 1))
+ (match_dup 0)))
+ (plus:<DWI>
+ (zero_extend:<DWI> (match_dup 0))
+ (match_op_dup 3
+ [(match_dup 2) (const_int 0)]))))
+ (set (match_dup 1)
+ (plus:SWI48 (plus:SWI48 (match_op_dup 4
+ [(match_dup 2) (const_int 0)])
+ (match_dup 1))
+ (match_dup 0)))])
+ (set (match_dup 5) (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
+ (set (match_dup 6) (zero_extend:SWI48 (match_dup 5)))])
+
(define_expand "addcarry<mode>_0"
[(parallel
[(set (reg:CCC FLAGS_REG)
(set_attr "pent_pair" "pu")
(set_attr "mode" "<MODE>")])
+(define_peephole2
+ [(set (match_operand:SWI 0 "general_reg_operand")
+ (match_operand:SWI 1 "memory_operand"))
+ (parallel [(set (match_dup 0)
+ (minus:SWI
+ (minus:SWI
+ (match_dup 0)
+ (match_operator:SWI 4 "ix86_carry_flag_operator"
+ [(match_operand 3 "flags_reg_operand")
+ (const_int 0)]))
+ (match_operand:SWI 2 "memory_operand")))
+ (clobber (reg:CC FLAGS_REG))])
+ (set (match_dup 1) (match_dup 0))]
+ "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+ && peep2_reg_dead_p (3, operands[0])
+ && !reg_overlap_mentioned_p (operands[0], operands[1])
+ && !reg_overlap_mentioned_p (operands[0], operands[2])"
+ [(set (match_dup 0) (match_dup 2))
+ (parallel [(set (match_dup 1)
+ (minus:SWI (minus:SWI (match_dup 1)
+ (match_op_dup 4
+ [(match_dup 3) (const_int 0)]))
+ (match_dup 0)))
+ (clobber (reg:CC FLAGS_REG))])])
+
+(define_peephole2
+ [(set (match_operand:SWI 0 "general_reg_operand")
+ (match_operand:SWI 1 "memory_operand"))
+ (parallel [(set (match_dup 0)
+ (minus:SWI
+ (minus:SWI
+ (match_dup 0)
+ (match_operator:SWI 4 "ix86_carry_flag_operator"
+ [(match_operand 3 "flags_reg_operand")
+ (const_int 0)]))
+ (match_operand:SWI 2 "memory_operand")))
+ (clobber (reg:CC FLAGS_REG))])
+ (set (match_operand:SWI 5 "general_reg_operand") (match_dup 0))
+ (set (match_dup 1) (match_dup 5))]
+ "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+ && peep2_reg_dead_p (3, operands[0])
+ && peep2_reg_dead_p (4, operands[5])
+ && !reg_overlap_mentioned_p (operands[0], operands[1])
+ && !reg_overlap_mentioned_p (operands[0], operands[2])
+ && !reg_overlap_mentioned_p (operands[5], operands[1])"
+ [(set (match_dup 0) (match_dup 2))
+ (parallel [(set (match_dup 1)
+ (minus:SWI (minus:SWI (match_dup 1)
+ (match_op_dup 4
+ [(match_dup 3) (const_int 0)]))
+ (match_dup 0)))
+ (clobber (reg:CC FLAGS_REG))])])
+
(define_insn "*sub<mode>3_carry_0"
[(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
(minus:SWI
[(set (reg:CCC FLAGS_REG)
(compare:CCC
(zero_extend:<DWI>
- (match_operand:SWI48 1 "nonimmediate_operand" "0"))
+ (match_operand:SWI48 1 "nonimmediate_operand" "0,0"))
(plus:<DWI>
(match_operator:<DWI> 4 "ix86_carry_flag_operator"
[(match_operand 3 "flags_reg_operand") (const_int 0)])
(zero_extend:<DWI>
- (match_operand:SWI48 2 "nonimmediate_operand" "rm")))))
- (set (match_operand:SWI48 0 "register_operand" "=r")
+ (match_operand:SWI48 2 "nonimmediate_operand" "r,rm")))))
+ (set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
(minus:SWI48 (minus:SWI48
(match_dup 1)
(match_operator:SWI48 5 "ix86_carry_flag_operator"
(set_attr "pent_pair" "pu")
(set_attr "mode" "<MODE>")])
+(define_peephole2
+ [(set (match_operand:SWI48 0 "general_reg_operand")
+ (match_operand:SWI48 1 "memory_operand"))
+ (parallel [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (zero_extend:<DWI> (match_dup 0))
+ (plus:<DWI>
+ (match_operator:<DWI> 4 "ix86_carry_flag_operator"
+ [(match_operand 3 "flags_reg_operand") (const_int 0)])
+ (zero_extend:<DWI>
+ (match_operand:SWI48 2 "memory_operand")))))
+ (set (match_dup 0)
+ (minus:SWI48
+ (minus:SWI48
+ (match_dup 0)
+ (match_operator:SWI48 5 "ix86_carry_flag_operator"
+ [(match_dup 3) (const_int 0)]))
+ (match_dup 2)))])
+ (set (match_dup 1) (match_dup 0))]
+ "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+ && peep2_reg_dead_p (3, operands[0])
+ && !reg_overlap_mentioned_p (operands[0], operands[1])
+ && !reg_overlap_mentioned_p (operands[0], operands[2])"
+ [(set (match_dup 0) (match_dup 2))
+ (parallel [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (zero_extend:<DWI> (match_dup 1))
+ (plus:<DWI> (match_op_dup 4
+ [(match_dup 3) (const_int 0)])
+ (zero_extend:<DWI> (match_dup 0)))))
+ (set (match_dup 1)
+ (minus:SWI48 (minus:SWI48 (match_dup 1)
+ (match_op_dup 5
+ [(match_dup 3) (const_int 0)]))
+ (match_dup 0)))])])
+
+(define_peephole2
+ [(set (match_operand:SWI48 6 "general_reg_operand")
+ (match_operand:SWI48 7 "memory_operand"))
+ (set (match_operand:SWI48 8 "general_reg_operand")
+ (match_operand:SWI48 9 "memory_operand"))
+ (parallel [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (zero_extend:<DWI>
+ (match_operand:SWI48 0 "general_reg_operand"))
+ (plus:<DWI>
+ (match_operator:<DWI> 4 "ix86_carry_flag_operator"
+ [(match_operand 3 "flags_reg_operand") (const_int 0)])
+ (zero_extend:<DWI>
+ (match_operand:SWI48 2 "general_reg_operand")))))
+ (set (match_dup 0)
+ (minus:SWI48
+ (minus:SWI48
+ (match_dup 0)
+ (match_operator:SWI48 5 "ix86_carry_flag_operator"
+ [(match_dup 3) (const_int 0)]))
+ (match_dup 2)))])
+ (set (match_operand:SWI48 1 "memory_operand") (match_dup 0))]
+ "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+ && peep2_reg_dead_p (4, operands[0])
+ && peep2_reg_dead_p (3, operands[2])
+ && !reg_overlap_mentioned_p (operands[0], operands[1])
+ && !reg_overlap_mentioned_p (operands[2], operands[1])
+ && !reg_overlap_mentioned_p (operands[6], operands[9])
+ && (rtx_equal_p (operands[6], operands[0])
+ ? (rtx_equal_p (operands[7], operands[1])
+ && rtx_equal_p (operands[8], operands[2]))
+ : (rtx_equal_p (operands[8], operands[0])
+ && rtx_equal_p (operands[9], operands[1])
+ && rtx_equal_p (operands[6], operands[2])))"
+ [(set (match_dup 0) (match_dup 9))
+ (parallel [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (zero_extend:<DWI> (match_dup 1))
+ (plus:<DWI> (match_op_dup 4
+ [(match_dup 3) (const_int 0)])
+ (zero_extend:<DWI> (match_dup 0)))))
+ (set (match_dup 1)
+ (minus:SWI48 (minus:SWI48 (match_dup 1)
+ (match_op_dup 5
+ [(match_dup 3) (const_int 0)]))
+ (match_dup 0)))])]
+{
+ if (!rtx_equal_p (operands[6], operands[0]))
+ operands[9] = operands[7];
+})
+
+(define_peephole2
+ [(set (match_operand:SWI48 6 "general_reg_operand")
+ (match_operand:SWI48 7 "memory_operand"))
+ (set (match_operand:SWI48 8 "general_reg_operand")
+ (match_operand:SWI48 9 "memory_operand"))
+ (parallel [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (zero_extend:<DWI>
+ (match_operand:SWI48 0 "general_reg_operand"))
+ (plus:<DWI>
+ (match_operator:<DWI> 4 "ix86_carry_flag_operator"
+ [(match_operand 3 "flags_reg_operand") (const_int 0)])
+ (zero_extend:<DWI>
+ (match_operand:SWI48 2 "general_reg_operand")))))
+ (set (match_dup 0)
+ (minus:SWI48
+ (minus:SWI48
+ (match_dup 0)
+ (match_operator:SWI48 5 "ix86_carry_flag_operator"
+ [(match_dup 3) (const_int 0)]))
+ (match_dup 2)))])
+ (set (match_operand:QI 10 "general_reg_operand")
+ (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
+ (set (match_operand:SWI48 11 "general_reg_operand")
+ (zero_extend:SWI48 (match_dup 10)))
+ (set (match_operand:SWI48 1 "memory_operand") (match_dup 0))]
+ "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+ && peep2_reg_dead_p (6, operands[0])
+ && peep2_reg_dead_p (3, operands[2])
+ && !reg_overlap_mentioned_p (operands[0], operands[1])
+ && !reg_overlap_mentioned_p (operands[2], operands[1])
+ && !reg_overlap_mentioned_p (operands[6], operands[9])
+ && !reg_overlap_mentioned_p (operands[0], operands[10])
+ && !reg_overlap_mentioned_p (operands[10], operands[1])
+ && !reg_overlap_mentioned_p (operands[0], operands[11])
+ && !reg_overlap_mentioned_p (operands[11], operands[1])
+ && (rtx_equal_p (operands[6], operands[0])
+ ? (rtx_equal_p (operands[7], operands[1])
+ && rtx_equal_p (operands[8], operands[2]))
+ : (rtx_equal_p (operands[8], operands[0])
+ && rtx_equal_p (operands[9], operands[1])
+ && rtx_equal_p (operands[6], operands[2])))"
+ [(set (match_dup 0) (match_dup 9))
+ (parallel [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (zero_extend:<DWI> (match_dup 1))
+ (plus:<DWI> (match_op_dup 4
+ [(match_dup 3) (const_int 0)])
+ (zero_extend:<DWI> (match_dup 0)))))
+ (set (match_dup 1)
+ (minus:SWI48 (minus:SWI48 (match_dup 1)
+ (match_op_dup 5
+ [(match_dup 3) (const_int 0)]))
+ (match_dup 0)))])
+ (set (match_dup 10) (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
+ (set (match_dup 11) (zero_extend:SWI48 (match_dup 10)))]
+{
+ if (!rtx_equal_p (operands[6], operands[0]))
+ operands[9] = operands[7];
+})
+
(define_expand "subborrow<mode>_0"
[(parallel
[(set (reg:CC FLAGS_REG)
(minus:SWI48 (match_dup 1) (match_dup 2)))])]
"ix86_binary_operator_ok (MINUS, <MODE>mode, operands)")
+(define_expand "uaddc<mode>5"
+ [(match_operand:SWI48 0 "register_operand")
+ (match_operand:SWI48 1 "register_operand")
+ (match_operand:SWI48 2 "register_operand")
+ (match_operand:SWI48 3 "register_operand")
+ (match_operand:SWI48 4 "nonmemory_operand")]
+ ""
+{
+ rtx cf = gen_rtx_REG (CCCmode, FLAGS_REG), pat, pat2;
+ if (operands[4] == const0_rtx)
+ emit_insn (gen_addcarry<mode>_0 (operands[0], operands[2], operands[3]));
+ else
+ {
+ ix86_expand_carry (operands[4]);
+ pat = gen_rtx_LTU (<DWI>mode, cf, const0_rtx);
+ pat2 = gen_rtx_LTU (<MODE>mode, cf, const0_rtx);
+ emit_insn (gen_addcarry<mode> (operands[0], operands[2], operands[3],
+ cf, pat, pat2));
+ }
+ rtx cc = gen_reg_rtx (QImode);
+ pat = gen_rtx_LTU (QImode, cf, const0_rtx);
+ emit_insn (gen_rtx_SET (cc, pat));
+ emit_insn (gen_zero_extendqi<mode>2 (operands[1], cc));
+ DONE;
+})
+
+(define_expand "usubc<mode>5"
+ [(match_operand:SWI48 0 "register_operand")
+ (match_operand:SWI48 1 "register_operand")
+ (match_operand:SWI48 2 "register_operand")
+ (match_operand:SWI48 3 "register_operand")
+ (match_operand:SWI48 4 "nonmemory_operand")]
+ ""
+{
+ rtx cf, pat, pat2;
+ if (operands[4] == const0_rtx)
+ {
+ cf = gen_rtx_REG (CCmode, FLAGS_REG);
+ emit_insn (gen_subborrow<mode>_0 (operands[0], operands[2],
+ operands[3]));
+ }
+ else
+ {
+ cf = gen_rtx_REG (CCCmode, FLAGS_REG);
+ ix86_expand_carry (operands[4]);
+ pat = gen_rtx_LTU (<DWI>mode, cf, const0_rtx);
+ pat2 = gen_rtx_LTU (<MODE>mode, cf, const0_rtx);
+ emit_insn (gen_subborrow<mode> (operands[0], operands[2], operands[3],
+ cf, pat, pat2));
+ }
+ rtx cc = gen_reg_rtx (QImode);
+ pat = gen_rtx_LTU (QImode, cf, const0_rtx);
+ emit_insn (gen_rtx_SET (cc, pat));
+ emit_insn (gen_zero_extendqi<mode>2 (operands[1], cc));
+ DONE;
+})
+
(define_mode_iterator CC_CCC [CC CCC])
;; Pre-reload splitter to optimize
"ix86_pre_reload_split ()"
"#"
"&& 1"
- [(const_int 0)])
+ [(const_int 0)]
+ "emit_note (NOTE_INSN_DELETED); DONE;")
+
+;; Set the carry flag from the carry flag.
+(define_insn_and_split "*setccc"
+ [(set (reg:CCC FLAGS_REG)
+ (reg:CCC FLAGS_REG))]
+ "ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ "emit_note (NOTE_INSN_DELETED); DONE;")
+
+;; Set the carry flag from the carry flag.
+(define_insn_and_split "*setcc_qi_negqi_ccc_1_<mode>"
+ [(set (reg:CCC FLAGS_REG)
+ (ltu:CCC (reg:CC_CCC FLAGS_REG) (const_int 0)))]
+ "ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ "emit_note (NOTE_INSN_DELETED); DONE;")
+
+;; Set the carry flag from the carry flag.
+(define_insn_and_split "*setcc_qi_negqi_ccc_2_<mode>"
+ [(set (reg:CCC FLAGS_REG)
+ (unspec:CCC [(ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))
+ (const_int 0)] UNSPEC_CC_NE))]
+ "ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ "emit_note (NOTE_INSN_DELETED); DONE;")
\f
;; Overflow setting add instructions
[(set_attr "type" "alu")
(set_attr "mode" "<MODE>")])
-(define_insn "*add<mode>3_cc_overflow_1"
+(define_insn "@add<mode>3_cc_overflow_1"
[(set (reg:CCC FLAGS_REG)
(compare:CCC
(plus:SWI
(match_dup 1)))
(set (match_dup 1) (plus:SWI (match_dup 1) (match_dup 0)))])])
+(define_peephole2
+ [(set (match_operand:SWI 0 "general_reg_operand")
+ (match_operand:SWI 1 "memory_operand"))
+ (parallel [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (plus:SWI (match_dup 0)
+ (match_operand:SWI 2 "memory_operand"))
+ (match_dup 0)))
+ (set (match_dup 0) (plus:SWI (match_dup 0) (match_dup 2)))])
+ (set (match_dup 1) (match_dup 0))]
+ "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+ && peep2_reg_dead_p (3, operands[0])
+ && !reg_overlap_mentioned_p (operands[0], operands[1])
+ && !reg_overlap_mentioned_p (operands[0], operands[2])"
+ [(set (match_dup 0) (match_dup 2))
+ (parallel [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (plus:SWI (match_dup 1) (match_dup 0))
+ (match_dup 1)))
+ (set (match_dup 1) (plus:SWI (match_dup 1) (match_dup 0)))])])
+
(define_insn "*addsi3_zext_cc_overflow_1"
[(set (reg:CCC FLAGS_REG)
(compare:CCC
[(parallel [(set (match_operand:<DWI> 0 "register_operand")
(mult:<DWI>
(any_extend:<DWI>
- (match_operand:DWIH 1 "nonimmediate_operand"))
+ (match_operand:DWIH 1 "register_operand"))
(any_extend:<DWI>
- (match_operand:DWIH 2 "register_operand"))))
+ (match_operand:DWIH 2 "nonimmediate_operand"))))
(clobber (reg:CC FLAGS_REG))])])
(define_expand "<u>mulqihi3"
[(parallel [(set (match_operand:HI 0 "register_operand")
(mult:HI
(any_extend:HI
- (match_operand:QI 1 "nonimmediate_operand"))
+ (match_operand:QI 1 "register_operand"))
(any_extend:HI
- (match_operand:QI 2 "register_operand"))))
+ (match_operand:QI 2 "nonimmediate_operand"))))
(clobber (reg:CC FLAGS_REG))])]
"TARGET_QIMODE_MATH")
(define_insn "*bmi2_umul<mode><dwi>3_1"
[(set (match_operand:DWIH 0 "register_operand" "=r")
(mult:DWIH
- (match_operand:DWIH 2 "nonimmediate_operand" "%d")
+ (match_operand:DWIH 2 "register_operand" "%d")
(match_operand:DWIH 3 "nonimmediate_operand" "rm")))
(set (match_operand:DWIH 1 "register_operand" "=r")
- (truncate:DWIH
- (lshiftrt:<DWI>
- (mult:<DWI> (zero_extend:<DWI> (match_dup 2))
- (zero_extend:<DWI> (match_dup 3)))
- (match_operand:QI 4 "const_int_operand"))))]
- "TARGET_BMI2 && INTVAL (operands[4]) == <MODE_SIZE> * BITS_PER_UNIT
+ (umul_highpart:DWIH (match_dup 2) (match_dup 3)))]
+ "TARGET_BMI2
&& !(MEM_P (operands[2]) && MEM_P (operands[3]))"
"mulx\t{%3, %0, %1|%1, %0, %3}"
[(set_attr "type" "imulx")
[(set (match_operand:<DWI> 0 "register_operand" "=r,A")
(mult:<DWI>
(zero_extend:<DWI>
- (match_operand:DWIH 1 "nonimmediate_operand" "%d,0"))
+ (match_operand:DWIH 1 "register_operand" "%d,a"))
(zero_extend:<DWI>
(match_operand:DWIH 2 "nonimmediate_operand" "rm,rm"))))
(clobber (reg:CC FLAGS_REG))]
[(parallel [(set (match_dup 3)
(mult:DWIH (match_dup 1) (match_dup 2)))
(set (match_dup 4)
- (truncate:DWIH
- (lshiftrt:<DWI>
- (mult:<DWI> (zero_extend:<DWI> (match_dup 1))
- (zero_extend:<DWI> (match_dup 2)))
- (match_dup 5))))])]
+ (umul_highpart:DWIH (match_dup 1) (match_dup 2)))])]
{
split_double_mode (<DWI>mode, &operands[0], 1, &operands[3], &operands[4]);
[(set (match_operand:<DWI> 0 "register_operand" "=A")
(mult:<DWI>
(sign_extend:<DWI>
- (match_operand:DWIH 1 "nonimmediate_operand" "%0"))
+ (match_operand:DWIH 1 "register_operand" "%a"))
(sign_extend:<DWI>
(match_operand:DWIH 2 "nonimmediate_operand" "rm"))))
(clobber (reg:CC FLAGS_REG))]
[(set (match_operand:HI 0 "register_operand" "=a")
(mult:HI
(any_extend:HI
- (match_operand:QI 1 "nonimmediate_operand" "%0"))
+ (match_operand:QI 1 "register_operand" "%0"))
(any_extend:HI
(match_operand:QI 2 "nonimmediate_operand" "qm"))))
(clobber (reg:CC FLAGS_REG))]
(set_attr "bdver1_decode" "direct")
(set_attr "mode" "QI")])
+;; Widening multiplication peephole2s to tweak register allocation.
+;; mov imm,%rdx; mov %rdi,%rax; mulq %rdx -> mov imm,%rax; mulq %rdi
+(define_peephole2
+ [(set (match_operand:DWIH 0 "general_reg_operand")
+ (match_operand:DWIH 1 "immediate_operand"))
+ (set (match_operand:DWIH 2 "general_reg_operand")
+ (match_operand:DWIH 3 "general_reg_operand"))
+ (parallel [(set (match_operand:<DWI> 4 "general_reg_operand")
+ (mult:<DWI> (zero_extend:<DWI> (match_dup 2))
+ (zero_extend:<DWI> (match_dup 0))))
+ (clobber (reg:CC FLAGS_REG))])]
+ "REGNO (operands[3]) != AX_REG
+ && REGNO (operands[0]) != REGNO (operands[2])
+ && REGNO (operands[0]) != REGNO (operands[3])
+ && (REGNO (operands[0]) == REGNO (operands[4])
+ || REGNO (operands[0]) == DX_REG
+ || peep2_reg_dead_p (3, operands[0]))"
+ [(set (match_dup 2) (match_dup 1))
+ (parallel [(set (match_dup 4)
+ (mult:<DWI> (zero_extend:<DWI> (match_dup 2))
+ (zero_extend:<DWI> (match_dup 3))))
+ (clobber (reg:CC FLAGS_REG))])])
+
+;; mov imm,%rax; mov %rdi,%rdx; mulx %rax -> mov imm,%rdx; mulx %rdi
+(define_peephole2
+ [(set (match_operand:DWIH 0 "general_reg_operand")
+ (match_operand:DWIH 1 "immediate_operand"))
+ (set (match_operand:DWIH 2 "general_reg_operand")
+ (match_operand:DWIH 3 "general_reg_operand"))
+ (parallel [(set (match_operand:DWIH 4 "general_reg_operand")
+ (mult:DWIH (match_dup 2) (match_dup 0)))
+ (set (match_operand:DWIH 5 "general_reg_operand")
+ (umul_highpart:DWIH (match_dup 2) (match_dup 0)))])]
+ "REGNO (operands[3]) != DX_REG
+ && REGNO (operands[0]) != REGNO (operands[2])
+ && REGNO (operands[0]) != REGNO (operands[3])
+ && (REGNO (operands[0]) == REGNO (operands[4])
+ || REGNO (operands[0]) == REGNO (operands[5])
+ || peep2_reg_dead_p (3, operands[0]))"
+ [(set (match_dup 2) (match_dup 1))
+ (parallel [(set (match_dup 4)
+ (mult:DWIH (match_dup 2) (match_dup 3)))
+ (set (match_dup 5)
+ (umul_highpart:DWIH (match_dup 2) (match_dup 3)))])])
+
;; Highpart multiplication patterns
(define_insn "<s>mul<mode>3_highpart"
[(set (match_operand:DWIH 0 "register_operand" "=d")
(compare
(and:QI
(subreg:QI
- (zero_extract:SWI248
- (match_operand:SWI248 0 "register_operand" "Q,Q")
- (const_int 8)
- (const_int 8)) 0)
- (match_operand:QI 1 "general_operand" "QnBc,m"))
+ (match_operator:SWI248 2 "extract_operator"
+ [(match_operand 0 "int248_register_operand" "Q,Q")
+ (const_int 8)
+ (const_int 8)]) 0)
+ (match_operand:QI 1 "general_x64constmem_operand" "QnBc,m"))
(const_int 0)))]
"ix86_match_ccmode (insn, CCNOmode)"
"test{b}\t{%1, %h0|%h0, %1}"
(compare
(and:QI
(subreg:QI
- (zero_extract:SWI248
- (match_operand:SWI248 0 "register_operand" "Q")
- (const_int 8)
- (const_int 8)) 0)
+ (match_operator:SWI248 2 "extract_operator"
+ [(match_operand 0 "int248_register_operand" "Q")
+ (const_int 8)
+ (const_int 8)]) 0)
(subreg:QI
- (zero_extract:SWI248
- (match_operand:SWI248 1 "register_operand" "Q")
- (const_int 8)
- (const_int 8)) 0))
+ (match_operator:SWI248 3 "extract_operator"
+ [(match_operand 1 "int248_register_operand" "Q")
+ (const_int 8)
+ (const_int 8)]) 0))
(const_int 0)))]
"ix86_match_ccmode (insn, CCNOmode)"
"test{b}\t{%h1, %h0|%h0, %h1}"
(match_operator 1 "compare_operator"
[(zero_extract:SWI248
(match_operand 2 "int_nonimmediate_operand" "rm")
- (match_operand 3 "const_int_operand")
- (match_operand 4 "const_int_operand"))
+ (match_operand:QI 3 "const_int_operand")
+ (match_operand:QI 4 "const_int_operand"))
(const_int 0)]))]
"/* Ensure that resulting mask is zero or sign extended operand. */
INTVAL (operands[4]) >= 0
(match_op_dup 1
[(and:QI
(subreg:QI
- (zero_extract:SI (match_dup 2)
+ (zero_extract:HI (match_dup 2)
(const_int 8)
(const_int 8)) 0)
(match_dup 3))
(const_int 0)]))]
{
- operands[2] = gen_lowpart (SImode, operands[2]);
+ operands[2] = gen_lowpart (HImode, operands[2]);
operands[3] = gen_int_mode (INTVAL (operands[3]) >> 8, QImode);
})
operands[3] = gen_int_mode (INTVAL (operands[3]), QImode);
})
+;; Narrow test instructions with immediate operands that test
+;; memory locations for zero. E.g. testl $0x00aa0000, mem can be
+;; converted to testb $0xaa, mem+2. Reject volatile locations and
+;; targets where reading (possibly unaligned) part of memory
+;; location after a large write to the same address causes
+;; store-to-load forwarding stall.
+(define_peephole2
+ [(set (reg:CCZ FLAGS_REG)
+ (compare:CCZ
+ (and:SWI248 (match_operand:SWI248 0 "memory_operand")
+ (match_operand 1 "const_int_operand"))
+ (const_int 0)))]
+ "!TARGET_PARTIAL_MEMORY_READ_STALL && !MEM_VOLATILE_P (operands[0])"
+ [(set (reg:CCZ FLAGS_REG)
+ (compare:CCZ (match_dup 2) (const_int 0)))]
+{
+ unsigned HOST_WIDE_INT ival = UINTVAL (operands[1]);
+ int first_nonzero_byte, bitsize;
+ rtx new_addr, new_const;
+ machine_mode new_mode;
+
+ if (ival == 0)
+ FAIL;
+
+ /* Clear bits outside mode width. */
+ ival &= GET_MODE_MASK (<MODE>mode);
+
+ first_nonzero_byte = ctz_hwi (ival) / BITS_PER_UNIT;
+
+ ival >>= first_nonzero_byte * BITS_PER_UNIT;
+
+ bitsize = sizeof (ival) * BITS_PER_UNIT - clz_hwi (ival);
+
+ if (bitsize <= GET_MODE_BITSIZE (QImode))
+ new_mode = QImode;
+ else if (bitsize <= GET_MODE_BITSIZE (HImode))
+ new_mode = HImode;
+ else if (bitsize <= GET_MODE_BITSIZE (SImode))
+ new_mode = SImode;
+ else
+ new_mode = DImode;
+
+ if (GET_MODE_SIZE (new_mode) >= GET_MODE_SIZE (<MODE>mode))
+ FAIL;
+
+ new_addr = adjust_address (operands[0], new_mode, first_nonzero_byte);
+ new_const = gen_int_mode (ival, new_mode);
+
+ operands[2] = gen_rtx_AND (new_mode, new_addr, new_const);
+})
+
;; %%% This used to optimize known byte-wide and operations to memory,
;; and sometimes to QImode registers. If this is considered useful,
;; it should be done with splitters.
and{q}\t{%2, %0|%0, %2}
#
#"
- [(set_attr "isa" "x64,x64,x64,x64,avx512bw")
+ [(set_attr "isa" "x64,x64,x64,x64,avx512bw_512")
(set_attr "type" "alu,alu,alu,imovx,msklog")
(set_attr "length_immediate" "*,*,*,0,*")
(set (attr "prefix_rex")
"(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
&& reload_completed"
[(parallel
- [(set (zero_extract:SI (match_dup 0)
+ [(set (zero_extract:HI (match_dup 0)
(const_int 8)
(const_int 8))
- (subreg:SI
+ (subreg:HI
(xor:QI
(subreg:QI
- (zero_extract:SI (match_dup 0)
+ (zero_extract:HI (match_dup 0)
(const_int 8)
(const_int 8)) 0)
(subreg:QI
- (zero_extract:SI (match_dup 0)
+ (zero_extract:HI (match_dup 0)
(const_int 8)
(const_int 8)) 0)) 0))
(clobber (reg:CC FLAGS_REG))])]
- "operands[0] = gen_lowpart (SImode, operands[0]);")
+ "operands[0] = gen_lowpart (HImode, operands[0]);")
(define_insn "*anddi_2"
[(set (reg FLAGS_REG)
[(set_attr "type" "alu")
(set_attr "mode" "<MODE>")])
+(define_insn "*andqi_ext<mode>_0"
+ [(set (match_operand:QI 0 "nonimm_x64constmem_operand" "=QBc,m")
+ (and:QI
+ (subreg:QI
+ (match_operator:SWI248 3 "extract_operator"
+ [(match_operand 2 "int248_register_operand" "Q,Q")
+ (const_int 8)
+ (const_int 8)]) 0)
+ (match_operand:QI 1 "nonimm_x64constmem_operand" "0,0")))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "and{b}\t{%h2, %0|%0, %h2}"
+ [(set_attr "isa" "*,nox64")
+ (set_attr "type" "alu")
+ (set_attr "mode" "QI")])
+
(define_expand "andqi_ext_1"
[(parallel
[(set (zero_extract:HI (match_operand:HI 0 "register_operand")
(define_insn "*andqi_ext<mode>_1"
[(set (zero_extract:SWI248
- (match_operand:SWI248 0 "register_operand" "+Q,Q")
+ (match_operand 0 "int248_register_operand" "+Q,Q")
(const_int 8)
(const_int 8))
(subreg:SWI248
(and:QI
(subreg:QI
- (zero_extract:SWI248
- (match_operand:SWI248 1 "register_operand" "0,0")
- (const_int 8)
- (const_int 8)) 0)
- (match_operand:QI 2 "general_operand" "QnBc,m")) 0))
+ (match_operator:SWI248 3 "extract_operator"
+ [(match_operand 1 "int248_register_operand" "0,0")
+ (const_int 8)
+ (const_int 8)]) 0)
+ (match_operand:QI 2 "general_x64constmem_operand" "QnBc,m")) 0))
(clobber (reg:CC FLAGS_REG))]
"/* FIXME: without this LRA can't reload this pattern, see PR82524. */
rtx_equal_p (operands[0], operands[1])"
(compare
(and:QI
(subreg:QI
- (zero_extract:SWI248
- (match_operand:SWI248 1 "register_operand" "0,0")
- (const_int 8)
- (const_int 8)) 0)
- (match_operand:QI 2 "general_operand" "QnBc,m"))
+ (match_operator:SWI248 3 "extract_operator"
+ [(match_operand 1 "int248_register_operand" "0,0")
+ (const_int 8)
+ (const_int 8)]) 0)
+ (match_operand:QI 2 "general_x64constmem_operand" "QnBc,m"))
(const_int 0)))
(set (zero_extract:SWI248
- (match_operand:SWI248 0 "register_operand" "+Q,Q")
+ (match_operand 0 "int248_register_operand" "+Q,Q")
(const_int 8)
(const_int 8))
(subreg:SWI248
(and:QI
(subreg:QI
- (zero_extract:SWI248
- (match_dup 1)
- (const_int 8)
- (const_int 8)) 0)
+ (match_op_dup 3
+ [(match_dup 1)
+ (const_int 8)
+ (const_int 8)]) 0)
(match_dup 2)) 0))]
"ix86_match_ccmode (insn, CCNOmode)
/* FIXME: without this LRA can't reload this pattern, see PR82524. */
(define_insn "*andqi_ext<mode>_2"
[(set (zero_extract:SWI248
- (match_operand:SWI248 0 "register_operand" "+Q")
+ (match_operand 0 "int248_register_operand" "+Q")
(const_int 8)
(const_int 8))
(subreg:SWI248
(and:QI
(subreg:QI
- (zero_extract:SWI248
- (match_operand:SWI248 1 "register_operand" "%0")
- (const_int 8)
- (const_int 8)) 0)
+ (match_operator:SWI248 3 "extract_operator"
+ [(match_operand 1 "int248_register_operand" "%0")
+ (const_int 8)
+ (const_int 8)]) 0)
(subreg:QI
- (zero_extract:SWI248
- (match_operand:SWI248 2 "register_operand" "Q")
- (const_int 8)
- (const_int 8)) 0)) 0))
+ (match_operator:SWI248 4 "extract_operator"
+ [(match_operand 2 "int248_register_operand" "Q")
+ (const_int 8)
+ (const_int 8)]) 0)) 0))
(clobber (reg:CC FLAGS_REG))]
"/* FIXME: without this LRA can't reload this pattern, see PR82524. */
rtx_equal_p (operands[0], operands[1])
[(set_attr "type" "alu")
(set_attr "mode" "QI")])
+;; *andqi_ext<mode>_3 is defined via *<code>qi_ext<mode>_3 below.
+
;; Convert wide AND instructions with immediate operand to shorter QImode
;; equivalents when possible.
;; Don't do the splitting with memory operands, since it introduces risk
&& (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
&& !(~INTVAL (operands[2]) & ~(255 << 8))"
[(parallel
- [(set (zero_extract:SI (match_dup 0)
+ [(set (zero_extract:HI (match_dup 0)
(const_int 8)
(const_int 8))
- (subreg:SI
+ (subreg:HI
(and:QI
(subreg:QI
- (zero_extract:SI (match_dup 1)
+ (zero_extract:HI (match_dup 1)
(const_int 8)
(const_int 8)) 0)
(match_dup 2)) 0))
(clobber (reg:CC FLAGS_REG))])]
{
- operands[0] = gen_lowpart (SImode, operands[0]);
- operands[1] = gen_lowpart (SImode, operands[1]);
+ operands[0] = gen_lowpart (HImode, operands[0]);
+ operands[1] = gen_lowpart (HImode, operands[1]);
operands[2] = gen_int_mode (INTVAL (operands[2]) >> 8, QImode);
})
(not:SWI48 (match_operand:SWI48 1 "register_operand" "r,r,k"))
(match_operand:SWI48 2 "nonimmediate_operand" "r,m,k")))
(clobber (reg:CC FLAGS_REG))]
- "TARGET_BMI || TARGET_AVX512BW"
+ "TARGET_BMI
+ || (TARGET_AVX512BW && (<MODE>mode == SImode || TARGET_EVEX512))"
"@
andn\t{%2, %1, %0|%0, %1, %2}
andn\t{%2, %1, %0|%0, %1, %2}
#"
- [(set_attr "isa" "bmi,bmi,avx512bw")
+ [(set_attr "isa" "bmi,bmi,<kmov_isa>")
(set_attr "type" "bitmanip,bitmanip,msklog")
(set_attr "btver2_decode" "direct, double,*")
(set_attr "mode" "<MODE>")])
<logic>{<imodesuffix>}\t{%2, %0|%0, %2}
<logic>{<imodesuffix>}\t{%2, %0|%0, %2}
#"
- [(set (attr "isa")
- (cond [(eq_attr "alternative" "2")
- (if_then_else (eq_attr "mode" "SI,DI")
- (const_string "avx512bw")
- (const_string "avx512f"))
- ]
- (const_string "*")))
+ [(set_attr "isa" "*,*,<kmov_isa>")
(set_attr "type" "alu, alu, msklog")
(set_attr "mode" "<MODE>")])
DONE;
}
}
- [(set (attr "isa")
- (cond [(eq_attr "alternative" "2")
- (if_then_else (eq_attr "mode" "SI,DI")
- (const_string "avx512bw")
- (const_string "avx512f"))
- ]
- (const_string "*")))
+ [(set_attr "isa" "*,*,<kmov_isa>")
(set_attr "type" "alu, alu, msklog")
(set_attr "mode" "<MODE>")])
[(set_attr "type" "alu")
(set_attr "mode" "<MODE>")])
+(define_insn "*<code>qi_ext<mode>_0"
+ [(set (match_operand:QI 0 "nonimm_x64constmem_operand" "=QBc,m")
+ (any_or:QI
+ (subreg:QI
+ (match_operator:SWI248 3 "extract_operator"
+ [(match_operand 2 "int248_register_operand" "Q,Q")
+ (const_int 8)
+ (const_int 8)]) 0)
+ (match_operand:QI 1 "nonimm_x64constmem_operand" "0,0")))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "<logic>{b}\t{%h2, %0|%0, %h2}"
+ [(set_attr "isa" "*,nox64")
+ (set_attr "type" "alu")
+ (set_attr "mode" "QI")])
+
(define_insn "*<code>qi_ext<mode>_1"
[(set (zero_extract:SWI248
- (match_operand:SWI248 0 "register_operand" "+Q,Q")
+ (match_operand 0 "int248_register_operand" "+Q,Q")
(const_int 8)
(const_int 8))
(subreg:SWI248
(any_or:QI
(subreg:QI
- (zero_extract:SWI248
- (match_operand:SWI248 1 "register_operand" "0,0")
- (const_int 8)
- (const_int 8)) 0)
- (match_operand:QI 2 "general_operand" "QnBc,m")) 0))
+ (match_operator:SWI248 3 "extract_operator"
+ [(match_operand 1 "int248_register_operand" "0,0")
+ (const_int 8)
+ (const_int 8)]) 0)
+ (match_operand:QI 2 "general_x64constmem_operand" "QnBc,m")) 0))
(clobber (reg:CC FLAGS_REG))]
"(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
/* FIXME: without this LRA can't reload this pattern, see PR82524. */
(define_insn "*<code>qi_ext<mode>_2"
[(set (zero_extract:SWI248
- (match_operand:SWI248 0 "register_operand" "+Q")
+ (match_operand 0 "int248_register_operand" "+Q")
(const_int 8)
(const_int 8))
(subreg:SWI248
(any_or:QI
(subreg:QI
- (zero_extract:SWI248
- (match_operand:SWI248 1 "register_operand" "%0")
- (const_int 8)
- (const_int 8)) 0)
+ (match_operator:SWI248 3 "extract_operator"
+ [(match_operand 1 "int248_register_operand" "%0")
+ (const_int 8)
+ (const_int 8)]) 0)
(subreg:QI
- (zero_extract:SWI248
- (match_operand:SWI248 2 "register_operand" "Q")
- (const_int 8)
- (const_int 8)) 0)) 0))
+ (match_operator:SWI248 4 "extract_operator"
+ [(match_operand 2 "int248_register_operand" "Q")
+ (const_int 8)
+ (const_int 8)]) 0)) 0))
+ (clobber (reg:CC FLAGS_REG))]
+ "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+ /* FIXME: without this LRA can't reload this pattern, see PR82524. */
+ && (rtx_equal_p (operands[0], operands[1])
+ || rtx_equal_p (operands[0], operands[2]))"
+ "<logic>{b}\t{%h2, %h0|%h0, %h2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "QI")])
+
+(define_insn "*<code>qi_ext<mode>_3"
+ [(set (zero_extract:SWI248
+ (match_operand 0 "int248_register_operand" "+Q")
+ (const_int 8)
+ (const_int 8))
+ (zero_extract:SWI248
+ (any_logic:SWI248
+ (match_operand 1 "int248_register_operand" "%0")
+ (match_operand 2 "int248_register_operand" "Q"))
+ (const_int 8)
+ (const_int 8)))
(clobber (reg:CC FLAGS_REG))]
"(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
/* FIXME: without this LRA can't reload this pattern, see PR82524. */
&& (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
&& !(INTVAL (operands[2]) & ~(255 << 8))"
[(parallel
- [(set (zero_extract:SI (match_dup 0)
+ [(set (zero_extract:HI (match_dup 0)
(const_int 8)
(const_int 8))
- (subreg:SI
+ (subreg:HI
(any_or:QI
(subreg:QI
- (zero_extract:SI (match_dup 1)
+ (zero_extract:HI (match_dup 1)
(const_int 8)
(const_int 8)) 0)
(match_dup 2)) 0))
emit_note (NOTE_INSN_DELETED);
DONE;
}
- operands[0] = gen_lowpart (SImode, operands[0]);
- operands[1] = gen_lowpart (SImode, operands[1]);
+ operands[0] = gen_lowpart (HImode, operands[0]);
+ operands[1] = gen_lowpart (HImode, operands[1]);
operands[2] = gen_int_mode (INTVAL (operands[2]) >> 8, QImode);
})
(compare
(xor:QI
(subreg:QI
- (zero_extract:SWI248
- (match_operand:SWI248 1 "register_operand" "0,0")
- (const_int 8)
- (const_int 8)) 0)
- (match_operand:QI 2 "general_operand" "QnBc,m"))
+ (match_operator:SWI248 3 "extract_operator"
+ [(match_operand 1 "int248_register_operand" "0,0")
+ (const_int 8)
+ (const_int 8)]) 0)
+ (match_operand:QI 2 "general_x64constmem_operand" "QnBc,m"))
(const_int 0)))
(set (zero_extract:SWI248
- (match_operand:SWI248 0 "register_operand" "+Q,Q")
+ (match_operand 0 "int248_register_operand" "+Q,Q")
(const_int 8)
(const_int 8))
(subreg:SWI248
(xor:QI
(subreg:QI
- (zero_extract:SWI248
- (match_dup 1)
- (const_int 8)
- (const_int 8)) 0)
+ (match_op_dup 3
+ [(match_dup 1)
+ (const_int 8)
+ (const_int 8)]) 0)
(match_dup 2)) 0))]
"ix86_match_ccmode (insn, CCNOmode)
/* FIXME: without this LRA can't reload this pattern, see PR82524. */
(set_attr "type" "alu")
(set_attr "mode" "QI")])
+;; Peephole2 rega = 0; rega op= regb into rega = regb.
+(define_peephole2
+ [(parallel [(set (match_operand:SWI 0 "general_reg_operand")
+ (const_int 0))
+ (clobber (reg:CC FLAGS_REG))])
+ (parallel [(set (match_dup 0)
+ (any_or_plus:SWI (match_dup 0)
+ (match_operand:SWI 1 "<general_operand>")))
+ (clobber (reg:CC FLAGS_REG))])]
+ "!reg_mentioned_p (operands[0], operands[1])"
+ [(set (match_dup 0) (match_dup 1))])
+
+;; Peephole2 dead instruction in rega = 0; rega op= rega.
+(define_peephole2
+ [(parallel [(set (match_operand:SWI 0 "general_reg_operand")
+ (const_int 0))
+ (clobber (reg:CC FLAGS_REG))])
+ (parallel [(set (match_dup 0)
+ (any_or_plus:SWI (match_dup 0) (match_dup 0)))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
+ [(parallel [(set (match_dup 0) (const_int 0))
+ (clobber (reg:CC FLAGS_REG))])])
+
;; Split DST = (HI<<32)|LO early to minimize register usage.
-(define_code_iterator any_or_plus [plus ior xor])
(define_insn_and_split "*concat<mode><dwi>3_1"
- [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro")
+ [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
(any_or_plus:<DWI>
- (ashift:<DWI> (match_operand:<DWI> 1 "register_operand" "r")
- (match_operand:<DWI> 2 "const_int_operand"))
- (zero_extend:<DWI> (match_operand:DWIH 3 "register_operand" "r"))))]
+ (ashift:<DWI> (match_operand:<DWI> 1 "register_operand" "r,r")
+ (match_operand:QI 2 "const_int_operand"))
+ (zero_extend:<DWI>
+ (match_operand:DWIH 3 "nonimmediate_operand" "r,m"))))]
"INTVAL (operands[2]) == <MODE_SIZE> * BITS_PER_UNIT"
"#"
"&& reload_completed"
- [(clobber (const_int 0))]
+ [(const_int 0)]
{
split_double_concat (<DWI>mode, operands[0], operands[3],
gen_lowpart (<MODE>mode, operands[1]));
})
(define_insn_and_split "*concat<mode><dwi>3_2"
- [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro")
+ [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
(any_or_plus:<DWI>
- (zero_extend:<DWI> (match_operand:DWIH 1 "register_operand" "r"))
- (ashift:<DWI> (match_operand:<DWI> 2 "register_operand" "r")
- (match_operand:<DWI> 3 "const_int_operand"))))]
+ (zero_extend:<DWI>
+ (match_operand:DWIH 1 "nonimmediate_operand" "r,m"))
+ (ashift:<DWI> (match_operand:<DWI> 2 "register_operand" "r,r")
+ (match_operand:QI 3 "const_int_operand"))))]
"INTVAL (operands[3]) == <MODE_SIZE> * BITS_PER_UNIT"
"#"
"&& reload_completed"
- [(clobber (const_int 0))]
+ [(const_int 0)]
{
split_double_concat (<DWI>mode, operands[0], operands[1],
gen_lowpart (<MODE>mode, operands[2]));
})
(define_insn_and_split "*concat<mode><dwi>3_3"
- [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro")
+ [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r,r,&r,x")
(any_or_plus:<DWI>
(ashift:<DWI>
- (zero_extend:<DWI> (match_operand:DWIH 1 "register_operand" "r"))
- (match_operand:<DWI> 2 "const_int_operand"))
- (zero_extend:<DWI> (match_operand:DWIH 3 "register_operand" "r"))))]
+ (zero_extend:<DWI>
+ (match_operand:DWIH 1 "nonimmediate_operand" "r,m,r,m,x"))
+ (match_operand:QI 2 "const_int_operand"))
+ (zero_extend:<DWI>
+ (match_operand:DWIH 3 "nonimmediate_operand" "r,r,m,m,0"))))]
"INTVAL (operands[2]) == <MODE_SIZE> * BITS_PER_UNIT"
"#"
"&& reload_completed"
- [(clobber (const_int 0))]
+ [(const_int 0)]
{
- split_double_concat (<DWI>mode, operands[0], operands[3], operands[1]);
+ if (SSE_REG_P (operands[0]))
+ {
+ rtx tmp = gen_rtx_REG (V2DImode, REGNO (operands[0]));
+ emit_insn (gen_vec_concatv2di (tmp, operands[3], operands[1]));
+ }
+ else
+ split_double_concat (<DWI>mode, operands[0], operands[3], operands[1]);
DONE;
-})
+}
+ [(set_attr "isa" "*,*,*,x64,x64")])
(define_insn_and_split "*concat<mode><dwi>3_4"
- [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro")
+ [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r,r,&r")
(any_or_plus:<DWI>
- (zero_extend:<DWI> (match_operand:DWIH 1 "register_operand" "r"))
+ (zero_extend:<DWI>
+ (match_operand:DWIH 1 "nonimmediate_operand" "r,m,r,m"))
(ashift:<DWI>
- (zero_extend:<DWI> (match_operand:DWIH 2 "register_operand" "r"))
- (match_operand:<DWI> 3 "const_int_operand"))))]
+ (zero_extend:<DWI>
+ (match_operand:DWIH 2 "nonimmediate_operand" "r,r,m,m"))
+ (match_operand:QI 3 "const_int_operand"))))]
"INTVAL (operands[3]) == <MODE_SIZE> * BITS_PER_UNIT"
"#"
"&& reload_completed"
- [(clobber (const_int 0))]
+ [(const_int 0)]
{
split_double_concat (<DWI>mode, operands[0], operands[1], operands[2]);
DONE;
-})
+}
+ [(set_attr "isa" "*,*,*,x64")])
+
+(define_insn_and_split "*concat<half><mode>3_5"
+ [(set (match_operand:DWI 0 "nonimmediate_operand" "=r,o,o")
+ (any_or_plus:DWI
+ (ashift:DWI (match_operand:DWI 1 "register_operand" "r,r,r")
+ (match_operand:QI 2 "const_int_operand"))
+ (match_operand:DWI 3 "const_scalar_int_operand" "n,n,Wd")))]
+ "INTVAL (operands[2]) == <MODE_SIZE> * BITS_PER_UNIT / 2
+ && (<MODE>mode == DImode
+ ? CONST_INT_P (operands[3])
+ && (UINTVAL (operands[3]) & ~GET_MODE_MASK (SImode)) == 0
+ : CONST_INT_P (operands[3])
+ ? INTVAL (operands[3]) >= 0
+ : CONST_WIDE_INT_NUNITS (operands[3]) == 2
+ && CONST_WIDE_INT_ELT (operands[3], 1) == 0)
+ && !(CONST_INT_P (operands[3])
+ ? ix86_endbr_immediate_operand (operands[3], VOIDmode)
+ : ix86_endbr_immediate_operand (GEN_INT (CONST_WIDE_INT_ELT (operands[3],
+ 0)),
+ VOIDmode))"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ rtx op3 = simplify_subreg (<HALF>mode, operands[3], <MODE>mode, 0);
+ split_double_concat (<MODE>mode, operands[0], op3,
+ gen_lowpart (<HALF>mode, operands[1]));
+ DONE;
+}
+ [(set_attr "isa" "*,nox64,x64")])
+
+(define_insn_and_split "*concat<mode><dwi>3_6"
+ [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=r,o,o,r")
+ (any_or_plus:<DWI>
+ (ashift:<DWI>
+ (zero_extend:<DWI>
+ (match_operand:DWIH 1 "nonimmediate_operand" "r,r,r,m"))
+ (match_operand:QI 2 "const_int_operand"))
+ (match_operand:<DWI> 3 "const_scalar_int_operand" "n,n,Wd,n")))]
+ "INTVAL (operands[2]) == <MODE_SIZE> * BITS_PER_UNIT
+ && (<DWI>mode == DImode
+ ? CONST_INT_P (operands[3])
+ && (UINTVAL (operands[3]) & ~GET_MODE_MASK (SImode)) == 0
+ : CONST_INT_P (operands[3])
+ ? INTVAL (operands[3]) >= 0
+ : CONST_WIDE_INT_NUNITS (operands[3]) == 2
+ && CONST_WIDE_INT_ELT (operands[3], 1) == 0)
+ && !(CONST_INT_P (operands[3])
+ ? ix86_endbr_immediate_operand (operands[3], VOIDmode)
+ : ix86_endbr_immediate_operand (GEN_INT (CONST_WIDE_INT_ELT (operands[3],
+ 0)),
+ VOIDmode))"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ rtx op3 = simplify_subreg (<MODE>mode, operands[3], <DWI>mode, 0);
+ split_double_concat (<DWI>mode, operands[0], op3, operands[1]);
+ DONE;
+}
+ [(set_attr "isa" "*,nox64,x64,*")])
+
+(define_insn_and_split "*concat<mode><dwi>3_7"
+ [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=r,o,o,r")
+ (any_or_plus:<DWI>
+ (zero_extend:<DWI>
+ (match_operand:DWIH 1 "nonimmediate_operand" "r,r,r,m"))
+ (match_operand:<DWI> 2 "const_scalar_int_operand" "n,n,Wd,n")))]
+ "<DWI>mode == DImode
+ ? CONST_INT_P (operands[2])
+ && (UINTVAL (operands[2]) & GET_MODE_MASK (SImode)) == 0
+ && !ix86_endbr_immediate_operand (operands[2], VOIDmode)
+ : CONST_WIDE_INT_P (operands[2])
+ && CONST_WIDE_INT_NUNITS (operands[2]) == 2
+ && CONST_WIDE_INT_ELT (operands[2], 0) == 0
+ && !ix86_endbr_immediate_operand (GEN_INT (CONST_WIDE_INT_ELT (operands[2],
+ 1)),
+ VOIDmode)"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ rtx op2;
+ if (<DWI>mode == DImode)
+ op2 = gen_int_mode (INTVAL (operands[2]) >> 32, <MODE>mode);
+ else
+ op2 = gen_int_mode (CONST_WIDE_INT_ELT (operands[2], 1), <MODE>mode);
+ split_double_concat (<DWI>mode, operands[0], operands[1], op2);
+ DONE;
+}
+ [(set_attr "isa" "*,nox64,x64,*")])
\f
;; Negation instructions
(define_insn "*negqi_ext<mode>_2"
[(set (zero_extract:SWI248
- (match_operand:SWI248 0 "register_operand" "+Q")
+ (match_operand 0 "int248_register_operand" "+Q")
(const_int 8)
(const_int 8))
(subreg:SWI248
(neg:QI
(subreg:QI
- (zero_extract:SWI248
- (match_operand:SWI248 1 "register_operand" "0")
- (const_int 8)
- (const_int 8)) 0)) 0))
+ (match_operator:SWI248 2 "extract_operator"
+ [(match_operand 1 "int248_register_operand" "0")
+ (const_int 8)
+ (const_int 8)]) 0)) 0))
(clobber (reg:CC FLAGS_REG))]
"/* FIXME: without this LRA can't reload this pattern, see PR82524. */
rtx_equal_p (operands[0], operands[1])"
"@
not{<imodesuffix>}\t%0
#"
- [(set (attr "isa")
- (cond [(eq_attr "alternative" "1")
- (if_then_else (eq_attr "mode" "SI,DI")
- (const_string "avx512bw")
- (const_string "avx512f"))
- ]
- (const_string "*")))
+ [(set_attr "isa" "*,<kmov_isa>")
(set_attr "type" "negnot,msklog")
(set_attr "mode" "<MODE>")])
"@
not{l}\t%k0
#"
- [(set_attr "isa" "x64,avx512bw")
+ [(set_attr "isa" "x64,avx512bw_512")
(set_attr "type" "negnot,msklog")
(set_attr "mode" "SI,SI")])
(match_operand:<DWI> 1 "register_operand")
(subreg:QI
(and
- (match_operand 2 "register_operand" "c")
+ (match_operand 2 "int248_register_operand" "c")
(match_operand 3 "const_int_operand")) 0)))
(clobber (reg:CC FLAGS_REG))]
"((INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT)) == 0
|| ((INTVAL (operands[3]) & (2 * <MODE_SIZE> * BITS_PER_UNIT - 1))
== (2 * <MODE_SIZE> * BITS_PER_UNIT - 1)))
- && GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT
- && IN_RANGE (GET_MODE_SIZE (GET_MODE (operands[2])), 2,
- 4 << (TARGET_64BIT ? 1 : 0))
&& ix86_pre_reload_split ()"
"#"
"&& 1"
[(const_int 0)]
"ix86_split_ashl (operands, operands[3], <DWI>mode); DONE;")
+(define_insn_and_split "*ashl<dwi>3_doubleword_highpart"
+ [(set (match_operand:<DWI> 0 "register_operand" "=r")
+ (ashift:<DWI>
+ (any_extend:<DWI> (match_operand:DWIH 1 "nonimmediate_operand" "rm"))
+ (match_operand:QI 2 "const_int_operand")))
+ (clobber (reg:CC FLAGS_REG))]
+ "INTVAL (operands[2]) >= <MODE_SIZE> * BITS_PER_UNIT
+ && INTVAL (operands[2]) < <MODE_SIZE> * BITS_PER_UNIT * 2"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ split_double_mode (<DWI>mode, &operands[0], 1, &operands[0], &operands[3]);
+ int bits = INTVAL (operands[2]) - (<MODE_SIZE> * BITS_PER_UNIT);
+ if (!rtx_equal_p (operands[3], operands[1]))
+ emit_move_insn (operands[3], operands[1]);
+ if (bits > 0)
+ emit_insn (gen_ashl<mode>3 (operands[3], operands[3], GEN_INT (bits)));
+ ix86_expand_clear (operands[0]);
+ DONE;
+})
+
(define_insn "x86_64_shld"
[(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
(ior:DI (ashift:DI (match_dup 0)
(match_operand:SWI48 1 "nonimmediate_operand")
(subreg:QI
(and
- (match_operand 2 "register_operand" "c,r")
+ (match_operand 2 "int248_register_operand" "c,r")
(match_operand 3 "const_int_operand")) 0)))
(clobber (reg:CC FLAGS_REG))]
"ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)
&& (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
== GET_MODE_BITSIZE (<MODE>mode)-1
- && GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT
- && IN_RANGE (GET_MODE_SIZE (GET_MODE (operands[2])), 2,
- 4 << (TARGET_64BIT ? 1 : 0))
&& ix86_pre_reload_split ()"
"#"
"&& 1"
return "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
}
}
- [(set_attr "isa" "*,*,bmi2,avx512bw")
+ [(set_attr "isa" "*,*,bmi2,<kmov_isa>")
(set (attr "type")
(cond [(eq_attr "alternative" "1")
(const_string "lea")
;; Convert ashift to the lea pattern to avoid flags dependency.
(define_split
- [(set (match_operand:SWI 0 "register_operand")
- (ashift:SWI (match_operand:SWI 1 "index_register_operand")
+ [(set (match_operand:SWI 0 "general_reg_operand")
+ (ashift:SWI (match_operand:SWI 1 "index_reg_operand")
(match_operand 2 "const_0_to_3_operand")))
(clobber (reg:CC FLAGS_REG))]
"reload_completed
;; Convert ashift to the lea pattern to avoid flags dependency.
(define_split
- [(set (match_operand:DI 0 "register_operand")
+ [(set (match_operand:DI 0 "general_reg_operand")
(zero_extend:DI
- (ashift:SI (match_operand:SI 1 "index_register_operand")
+ (ashift:SI (match_operand:SI 1 "index_reg_operand")
(match_operand 2 "const_0_to_3_operand"))))
(clobber (reg:CC FLAGS_REG))]
"TARGET_64BIT && reload_completed
(define_insn "*ashlqi_ext<mode>_2"
[(set (zero_extract:SWI248
- (match_operand:SWI248 0 "register_operand" "+Q")
+ (match_operand 0 "int248_register_operand" "+Q")
(const_int 8)
(const_int 8))
(subreg:SWI248
(ashift:QI
(subreg:QI
- (zero_extract:SWI248
- (match_operand:SWI248 1 "register_operand" "0")
- (const_int 8)
- (const_int 8)) 0)
+ (match_operator:SWI248 3 "extract_operator"
+ [(match_operand 1 "int248_register_operand" "0")
+ (const_int 8)
+ (const_int 8)]) 0)
(match_operand:QI 2 "nonmemory_operand" "cI")) 0))
(clobber (reg:CC FLAGS_REG))]
"/* FIXME: without this LRA can't reload this pattern, see PR82524. */
(match_operand:SWI48 1 "nonimmediate_operand")
(subreg:QI
(and
- (match_operand 2 "register_operand" "c,r")
+ (match_operand 2 "int248_register_operand" "c,r")
(match_operand 3 "const_int_operand")) 0)))
(clobber (reg:CC FLAGS_REG))]
"ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
&& (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
== GET_MODE_BITSIZE (<MODE>mode)-1
- && GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT
- && IN_RANGE (GET_MODE_SIZE (GET_MODE (operands[2])), 2,
- 4 << (TARGET_64BIT ? 1 : 0))
&& ix86_pre_reload_split ()"
"#"
"&& 1"
(match_operand:<DWI> 1 "register_operand")
(subreg:QI
(and
- (match_operand 2 "register_operand" "c")
+ (match_operand 2 "int248_register_operand" "c")
(match_operand 3 "const_int_operand")) 0)))
(clobber (reg:CC FLAGS_REG))]
"((INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT)) == 0
|| ((INTVAL (operands[3]) & (2 * <MODE_SIZE> * BITS_PER_UNIT - 1))
== (2 * <MODE_SIZE> * BITS_PER_UNIT - 1)))
- && GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT
- && IN_RANGE (GET_MODE_SIZE (GET_MODE (operands[2])), 2,
- 4 << (TARGET_64BIT ? 1 : 0))
&& ix86_pre_reload_split ()"
"#"
"&& 1"
return "shr{<imodesuffix>}\t{%2, %0|%0, %2}";
}
}
- [(set_attr "isa" "*,bmi2,avx512bw")
+ [(set_attr "isa" "*,bmi2,<kmov_isa>")
(set_attr "type" "ishift,ishiftx,msklog")
(set (attr "length_immediate")
(if_then_else
(define_insn "*<insn>qi_ext<mode>_2"
[(set (zero_extract:SWI248
- (match_operand:SWI248 0 "register_operand" "+Q")
+ (match_operand 0 "int248_register_operand" "+Q")
(const_int 8)
(const_int 8))
(subreg:SWI248
(any_shiftrt:QI
(subreg:QI
- (zero_extract:SWI248
- (match_operand:SWI248 1 "register_operand" "0")
- (const_int 8)
- (const_int 8)) 0)
+ (match_operator:SWI248 3 "extract_operator"
+ [(match_operand 1 "int248_register_operand" "0")
+ (const_int 8)
+ (const_int 8)]) 0)
(match_operand:QI 2 "nonmemory_operand" "cI")) 0))
(clobber (reg:CC FLAGS_REG))]
"/* FIXME: without this LRA can't reload this pattern, see PR82524. */
(const_string "0")
(const_string "*")))
(set_attr "mode" "QI")])
+
+(define_insn_and_split "*extend<dwi>2_doubleword_highpart"
+ [(set (match_operand:<DWI> 0 "register_operand" "=r")
+ (ashiftrt:<DWI>
+ (ashift:<DWI> (match_operand:<DWI> 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_int_operand"))
+ (match_operand:QI 3 "const_int_operand")))
+ (clobber (reg:CC FLAGS_REG))]
+ "INTVAL (operands[2]) == INTVAL (operands[3])
+ && UINTVAL (operands[2]) < <MODE_SIZE> * BITS_PER_UNIT"
+ "#"
+ "&& reload_completed"
+ [(parallel [(set (match_dup 4)
+ (ashift:DWIH (match_dup 4) (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])
+ (parallel [(set (match_dup 4)
+ (ashiftrt:DWIH (match_dup 4) (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "split_double_mode (<DWI>mode, &operands[0], 1, &operands[0], &operands[4]);")
+
+(define_insn_and_split "*extendv2di2_highpart_stv"
+ [(set (match_operand:V2DI 0 "register_operand" "=v")
+ (ashiftrt:V2DI
+ (ashift:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "vm")
+ (match_operand:QI 2 "const_int_operand"))
+ (match_operand:QI 3 "const_int_operand")))]
+ "!TARGET_64BIT && TARGET_STV && TARGET_AVX512VL
+ && INTVAL (operands[2]) == INTVAL (operands[3])
+ && UINTVAL (operands[2]) < 32"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 0)
+ (ashift:V2DI (match_dup 1) (match_dup 2)))
+ (set (match_dup 0)
+ (ashiftrt:V2DI (match_dup 0) (match_dup 2)))])
\f
;; Rotate instructions
emit_insn (gen_ix86_<insn>ti3_doubleword
(operands[0], operands[1], operands[2]));
else if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 64)
- emit_insn (gen_<insn>64ti2_doubleword (operands[0], operands[1]));
+ {
+ operands[1] = force_reg (TImode, operands[1]);
+ emit_insn (gen_<insn>64ti2_doubleword (operands[0], operands[1]));
+ }
else
{
rtx amount = force_reg (QImode, operands[2]);
emit_insn (gen_ix86_<insn>di3_doubleword
(operands[0], operands[1], operands[2]));
else if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 32)
- emit_insn (gen_<insn>32di2_doubleword (operands[0], operands[1]));
+ {
+ operands[1] = force_reg (DImode, operands[1]);
+ emit_insn (gen_<insn>32di2_doubleword (operands[0], operands[1]));
+ }
else
FAIL;
(match_operand:SWI 1 "nonimmediate_operand")
(subreg:QI
(and
- (match_operand 2 "register_operand" "c")
+ (match_operand 2 "int248_register_operand" "c")
(match_operand 3 "const_int_operand")) 0)))
(clobber (reg:CC FLAGS_REG))]
"ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
&& (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
== GET_MODE_BITSIZE (<MODE>mode)-1
- && GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT
- && IN_RANGE (GET_MODE_SIZE (GET_MODE (operands[2])), 2,
- 4 << (TARGET_64BIT ? 1 : 0))
&& ix86_pre_reload_split ()"
"#"
"&& 1"
(match_operand:SWI 1 "const_int_operand")
(subreg:QI
(and
- (match_operand 2 "register_operand")
+ (match_operand 2 "int248_register_operand")
(match_operand 3 "const_int_operand")) 0)))]
"(INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode) - 1))
- == GET_MODE_BITSIZE (<MODE>mode) - 1
- && GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT
- && IN_RANGE (GET_MODE_SIZE (GET_MODE (operands[2])), 2,
- 4 << (TARGET_64BIT ? 1 : 0))"
+ == GET_MODE_BITSIZE (<MODE>mode) - 1"
[(set (match_dup 4) (match_dup 1))
(set (match_dup 0)
(any_rotate:SWI (match_dup 4)
})
(define_insn_and_split "<insn>32di2_doubleword"
- [(set (match_operand:DI 0 "register_operand" "=r,r,r")
- (any_rotate:DI (match_operand:DI 1 "nonimmediate_operand" "0,r,o")
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (any_rotate:DI (match_operand:DI 1 "register_operand" "0,r")
(const_int 32)))]
"!TARGET_64BIT"
"#"
})
(define_insn_and_split "<insn>64ti2_doubleword"
- [(set (match_operand:TI 0 "register_operand" "=r,r,r")
- (any_rotate:TI (match_operand:TI 1 "nonimmediate_operand" "0,r,o")
+ [(set (match_operand:TI 0 "register_operand" "=r,r")
+ (any_rotate:TI (match_operand:TI 1 "register_operand" "0,r")
(const_int 64)))]
"TARGET_64BIT"
"#"
[(parallel [(set (strict_low_part (match_dup 0))
(bswap:HI (match_dup 0)))
(clobber (reg:CC FLAGS_REG))])])
+
+;; Rotations through carry flag
+(define_insn "rcrsi2"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (plus:SI
+ (lshiftrt:SI (match_operand:SI 1 "register_operand" "0")
+ (const_int 1))
+ (ashift:SI (ltu:SI (reg:CCC FLAGS_REG) (const_int 0))
+ (const_int 31))))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "rcr{l}\t%0"
+ [(set_attr "type" "ishift1")
+ (set_attr "memory" "none")
+ (set_attr "length_immediate" "0")
+ (set_attr "mode" "SI")])
+
+(define_insn "rcrdi2"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (plus:DI
+ (lshiftrt:DI (match_operand:DI 1 "register_operand" "0")
+ (const_int 1))
+ (ashift:DI (ltu:DI (reg:CCC FLAGS_REG) (const_int 0))
+ (const_int 63))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ "rcr{q}\t%0"
+ [(set_attr "type" "ishift1")
+ (set_attr "length_immediate" "0")
+ (set_attr "mode" "DI")])
+
+;; Versions of sar and shr that set the carry flag.
+(define_insn "<insn><mode>3_carry"
+ [(set (reg:CCC FLAGS_REG)
+ (unspec:CCC [(and:SWI48 (match_operand:SWI48 1 "register_operand" "0")
+ (const_int 1))
+ (const_int 0)] UNSPEC_CC_NE))
+ (set (match_operand:SWI48 0 "register_operand" "=r")
+ (any_shiftrt:SWI48 (match_dup 1) (const_int 1)))]
+ ""
+{
+ if (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ return "<shift>{<imodesuffix>}\t%0";
+ return "<shift>{<imodesuffix>}\t{1, %0|%0, 1}";
+}
+ [(set_attr "type" "ishift1")
+ (set (attr "length_immediate")
+ (if_then_else
+ (ior (match_test "TARGET_SHIFT1")
+ (match_test "optimize_function_for_size_p (cfun)"))
+ (const_string "0")
+ (const_string "*")))
+ (set_attr "mode" "<MODE>")])
\f
;; Bit set / bit test instructions
(const_int 1)
(subreg:QI
(and
- (match_operand 1 "register_operand")
+ (match_operand 1 "int248_register_operand")
(match_operand 2 "const_int_operand")) 0))
(match_operand:SWI48 3 "register_operand")))
(clobber (reg:CC FLAGS_REG))]
"TARGET_USE_BT
&& (INTVAL (operands[2]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
== GET_MODE_BITSIZE (<MODE>mode)-1
- && GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
- && IN_RANGE (GET_MODE_SIZE (GET_MODE (operands[1])), 2,
- 4 << (TARGET_64BIT ? 1 : 0))
&& ix86_pre_reload_split ()"
"#"
"&& 1"
(const_int -2)
(subreg:QI
(and
- (match_operand 1 "register_operand")
+ (match_operand 1 "int248_register_operand")
(match_operand 2 "const_int_operand")) 0))
(match_operand:SWI48 3 "register_operand")))
(clobber (reg:CC FLAGS_REG))]
"TARGET_USE_BT
&& (INTVAL (operands[2]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
== GET_MODE_BITSIZE (<MODE>mode)-1
- && GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
- && IN_RANGE (GET_MODE_SIZE (GET_MODE (operands[1])), 2,
- 4 << (TARGET_64BIT ? 1 : 0))
&& ix86_pre_reload_split ()"
"#"
"&& 1"
[(set (zero_extract:HI
(match_operand:SWI12 0 "nonimmediate_operand")
(const_int 1)
- (zero_extend:SI (match_operand:QI 1 "register_operand")))
+ (match_operand:QI 1 "register_operand"))
(const_int 0))
(clobber (reg:CC FLAGS_REG))]
"TARGET_USE_BT && ix86_pre_reload_split ()"
[(set (zero_extract:HI
(match_operand:SWI12 0 "register_operand")
(const_int 1)
- (zero_extend:SI (match_operand:QI 1 "register_operand")))
+ (match_operand:QI 1 "register_operand"))
(const_int 0))
(clobber (reg:CC FLAGS_REG))]
"TARGET_USE_BT && ix86_pre_reload_split ()"
(define_insn "*btsq_imm"
[(set (zero_extract:DI (match_operand:DI 0 "nonimmediate_operand" "+rm")
(const_int 1)
- (match_operand 1 "const_0_to_63_operand"))
+ (match_operand:QI 1 "const_0_to_63_operand"))
(const_int 1))
(clobber (reg:CC FLAGS_REG))]
"TARGET_64BIT && (TARGET_USE_BT || reload_completed)"
(define_insn "*btrq_imm"
[(set (zero_extract:DI (match_operand:DI 0 "nonimmediate_operand" "+rm")
(const_int 1)
- (match_operand 1 "const_0_to_63_operand"))
+ (match_operand:QI 1 "const_0_to_63_operand"))
(const_int 0))
(clobber (reg:CC FLAGS_REG))]
"TARGET_64BIT && (TARGET_USE_BT || reload_completed)"
(define_insn "*btcq_imm"
[(set (zero_extract:DI (match_operand:DI 0 "nonimmediate_operand" "+rm")
(const_int 1)
- (match_operand 1 "const_0_to_63_operand"))
+ (match_operand:QI 1 "const_0_to_63_operand"))
(not:DI (zero_extract:DI (match_dup 0) (const_int 1) (match_dup 1))))
(clobber (reg:CC FLAGS_REG))]
"TARGET_64BIT && (TARGET_USE_BT || reload_completed)"
(parallel [(set (zero_extract:DI
(match_operand:DI 0 "nonimmediate_operand")
(const_int 1)
- (match_operand 1 "const_0_to_63_operand"))
+ (match_operand:QI 1 "const_0_to_63_operand"))
(const_int 1))
(clobber (reg:CC FLAGS_REG))])]
"TARGET_64BIT && !TARGET_USE_BT"
(parallel [(set (zero_extract:DI
(match_operand:DI 0 "nonimmediate_operand")
(const_int 1)
- (match_operand 1 "const_0_to_63_operand"))
+ (match_operand:QI 1 "const_0_to_63_operand"))
(const_int 0))
(clobber (reg:CC FLAGS_REG))])]
"TARGET_64BIT && !TARGET_USE_BT"
(parallel [(set (zero_extract:DI
(match_operand:DI 0 "nonimmediate_operand")
(const_int 1)
- (match_operand 1 "const_0_to_63_operand"))
+ (match_operand:QI 1 "const_0_to_63_operand"))
(not:DI (zero_extract:DI
(match_dup 0) (const_int 1) (match_dup 1))))
(clobber (reg:CC FLAGS_REG))])]
(zero_extract:SWI48
(match_operand:SWI48 0 "nonimmediate_operand" "r,m")
(const_int 1)
- (match_operand:SI 1 "nonmemory_operand" "r<S>,<S>"))
+ (match_operand:QI 1 "nonmemory_operand" "q<S>,<S>"))
(const_int 0)))]
""
{
switch (get_attr_mode (insn))
{
case MODE_SI:
- return "bt{l}\t{%1, %k0|%k0, %1}";
+ return "bt{l}\t{%k1, %k0|%k0, %k1}";
case MODE_DI:
return "bt{q}\t{%q1, %0|%0, %q1}";
(const_string "SI")
(const_string "<MODE>")))])
+(define_insn_and_split "*bt<SWI48:mode>_mask"
+ [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (zero_extract:SWI48
+ (match_operand:SWI48 0 "nonimmediate_operand" "r,m")
+ (const_int 1)
+ (subreg:QI
+ (and:SWI248
+ (match_operand:SWI248 1 "register_operand")
+ (match_operand 2 "const_int_operand")) 0))
+ (const_int 0)))]
+ "TARGET_USE_BT
+ && (INTVAL (operands[2]) & (GET_MODE_BITSIZE (<SWI48:MODE>mode)-1))
+ == GET_MODE_BITSIZE (<SWI48:MODE>mode)-1
+ && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (zero_extract:SWI48 (match_dup 0) (const_int 1) (match_dup 1))
+ (const_int 0)))]
+ "operands[1] = gen_lowpart (QImode, operands[1]);")
+
(define_insn_and_split "*jcc_bt<mode>"
[(set (pc)
(if_then_else (match_operator 0 "bt_comparison_operator"
[(zero_extract:SWI48
(match_operand:SWI48 1 "nonimmediate_operand")
(const_int 1)
- (match_operand:SI 2 "nonmemory_operand"))
+ (match_operand:QI 2 "nonmemory_operand"))
(const_int 0)])
(label_ref (match_operand 3))
(pc)))
PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
})
-(define_insn_and_split "*jcc_bt<mode>_1"
- [(set (pc)
- (if_then_else (match_operator 0 "bt_comparison_operator"
- [(zero_extract:SWI48
- (match_operand:SWI48 1 "register_operand")
- (const_int 1)
- (zero_extend:SI
- (match_operand:QI 2 "register_operand")))
- (const_int 0)])
- (label_ref (match_operand 3))
- (pc)))
- (clobber (reg:CC FLAGS_REG))]
- "(TARGET_USE_BT || optimize_function_for_size_p (cfun))
- && ix86_pre_reload_split ()"
- "#"
- "&& 1"
- [(set (reg:CCC FLAGS_REG)
- (compare:CCC
- (zero_extract:SWI48
- (match_dup 1)
- (const_int 1)
- (match_dup 2))
- (const_int 0)))
- (set (pc)
- (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
- (label_ref (match_dup 3))
- (pc)))]
-{
- operands[2] = lowpart_subreg (SImode, operands[2], QImode);
- operands[0] = shallow_copy_rtx (operands[0]);
- PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
-})
-
;; Avoid useless masking of bit offset operand.
(define_insn_and_split "*jcc_bt<mode>_mask"
[(set (pc)
[(zero_extract:SWI48
(match_operand:SWI48 1 "register_operand")
(const_int 1)
- (and:SI
- (match_operand:SI 2 "register_operand")
+ (and:QI
+ (match_operand:QI 2 "register_operand")
(match_operand 3 "const_int_operand")))])
(label_ref (match_operand 4))
(pc)))
PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
})
-(define_insn_and_split "*jcc_bt<mode>_mask_1"
+;; Avoid useless masking of bit offset operand.
+(define_insn_and_split "*jcc_bt<SWI48:mode>_mask_1"
[(set (pc)
- (if_then_else (match_operator 0 "bt_comparison_operator"
+ (if_then_else (match_operator 0 "bt_comparison_operator"
[(zero_extract:SWI48
(match_operand:SWI48 1 "register_operand")
(const_int 1)
- (zero_extend:SI
- (subreg:QI
- (and
- (match_operand 2 "register_operand")
- (match_operand 3 "const_int_operand")) 0)))])
+ (subreg:QI
+ (and:SWI248
+ (match_operand:SWI248 2 "register_operand")
+ (match_operand 3 "const_int_operand")) 0))])
(label_ref (match_operand 4))
(pc)))
(clobber (reg:CC FLAGS_REG))]
"(TARGET_USE_BT || optimize_function_for_size_p (cfun))
- && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
- == GET_MODE_BITSIZE (<MODE>mode)-1
- && GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT
- && IN_RANGE (GET_MODE_SIZE (GET_MODE (operands[2])), 2,
- 4 << (TARGET_64BIT ? 1 : 0))
+ && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<SWI48:MODE>mode)-1))
+ == GET_MODE_BITSIZE (<SWI48:MODE>mode)-1
&& ix86_pre_reload_split ()"
"#"
"&& 1"
(label_ref (match_dup 4))
(pc)))]
{
- operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
- operands[2] = gen_lowpart (SImode, operands[2]);
operands[0] = shallow_copy_rtx (operands[0]);
PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
+ operands[2] = gen_lowpart (QImode, operands[2]);
})
;; Help combine recognize bt followed by cmov
[(zero_extract:SWI48
(match_operand:SWI48 1 "register_operand")
(const_int 1)
- (zero_extend:SI (match_operand:QI 2 "register_operand")))
+ (match_operand:QI 2 "register_operand"))
(const_int 0)])
(match_operand:SWI248 3 "nonimmediate_operand")
(match_operand:SWI248 4 "nonimmediate_operand")))]
{
if (GET_CODE (operands[5]) == EQ)
std::swap (operands[3], operands[4]);
- operands[2] = lowpart_subreg (SImode, operands[2], QImode);
})
;; Help combine recognize bt followed by setc
(zero_extract:SWI48
(match_operand:SWI48 1 "register_operand")
(const_int 1)
- (zero_extend:SI (match_operand:QI 2 "register_operand"))))
+ (match_operand:QI 2 "register_operand")))
(clobber (reg:CC FLAGS_REG))]
"TARGET_USE_BT && ix86_pre_reload_split ()"
"#"
(zero_extract:SWI48 (match_dup 1) (const_int 1) (match_dup 2))
(const_int 0)))
(set (match_dup 0)
- (eq:QI (reg:CCC FLAGS_REG) (const_int 0)))]
-{
- operands[2] = lowpart_subreg (SImode, operands[2], QImode);
-})
+ (eq:QI (reg:CCC FLAGS_REG) (const_int 0)))])
;; Help combine recognize bt followed by setnc
(define_insn_and_split "*bt<mode>_setncqi"
(zero_extract:SWI48 (match_dup 1) (const_int 1) (match_dup 2))
(const_int 0)))
(set (match_dup 0)
- (ne:QI (reg:CCC FLAGS_REG) (const_int 0)))]
-{
- operands[2] = lowpart_subreg (SImode, operands[2], QImode);
-})
+ (ne:QI (reg:CCC FLAGS_REG) (const_int 0)))])
(define_insn_and_split "*bt<mode>_setnc<mode>"
[(set (match_operand:SWI48 0 "register_operand")
(set (match_dup 3)
(ne:QI (reg:CCC FLAGS_REG) (const_int 0)))
(set (match_dup 0) (zero_extend:SWI48 (match_dup 3)))]
+ "operands[3] = gen_reg_rtx (QImode);")
+
+;; Help combine recognize bt followed by setnc (PR target/110588)
+(define_insn_and_split "*bt<mode>_setncqi_2"
+ [(set (match_operand:QI 0 "register_operand")
+ (eq:QI
+ (zero_extract:SWI48
+ (match_operand:SWI48 1 "register_operand")
+ (const_int 1)
+ (match_operand:QI 2 "register_operand"))
+ (const_int 0)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_USE_BT && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (zero_extract:SWI48 (match_dup 1) (const_int 1) (match_dup 2))
+ (const_int 0)))
+ (set (match_dup 0)
+ (ne:QI (reg:CCC FLAGS_REG) (const_int 0)))])
+
+;; Help combine recognize bt followed by setc
+(define_insn_and_split "*bt<mode>_setc<mode>_mask"
+ [(set (match_operand:SWI48 0 "register_operand")
+ (zero_extract:SWI48
+ (match_operand:SWI48 1 "register_operand")
+ (const_int 1)
+ (subreg:QI
+ (and:SWI48
+ (match_operand:SWI48 2 "register_operand")
+ (match_operand 3 "const_int_operand")) 0)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_USE_BT
+ && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
+ == GET_MODE_BITSIZE (<MODE>mode)-1
+ && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (zero_extract:SWI48 (match_dup 1) (const_int 1) (match_dup 2))
+ (const_int 0)))
+ (set (match_dup 3)
+ (eq:QI (reg:CCC FLAGS_REG) (const_int 0)))
+ (set (match_dup 0) (zero_extend:SWI48 (match_dup 3)))]
{
- operands[2] = lowpart_subreg (SImode, operands[2], QImode);
+ operands[2] = gen_lowpart (QImode, operands[2]);
operands[3] = gen_reg_rtx (QImode);
})
\f
[(set (match_operand:MODEF 0 "register_operand" "=x,x")
(match_operator:MODEF 3 "sse_comparison_operator"
[(match_operand:MODEF 1 "register_operand" "0,x")
- (match_operand:MODEF 2 "nonimmediate_operand" "xm,xm")]))]
+ (match_operand:MODEF 2 "nonimmediate_operand" "xm,xjm")]))]
"SSE_FLOAT_MODE_P (<MODE>mode)"
"@
cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "isa" "noavx,avx")
+ (set_attr "gpr32" "1,0")
(set_attr "type" "ssecmp")
(set_attr "length_immediate" "1")
(set_attr "prefix" "orig,vex")
(set_attr "type" "bitmanip")
(set_attr "mode" "<MODE>")])
-(define_insn_and_split "*clzsi2_lzcnt_zext"
+(define_insn_and_split "*clzsi2_lzcnt_zext"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (and:DI
+ (subreg:DI
+ (clz:SI
+ (match_operand:SI 1 "nonimmediate_operand" "rm")) 0)
+ (const_int 63)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_LZCNT && TARGET_64BIT"
+ "lzcnt{l}\t{%1, %k0|%k0, %1}"
+ "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
+ && optimize_function_for_speed_p (cfun)
+ && !reg_mentioned_p (operands[0], operands[1])"
+ [(parallel
+ [(set (match_dup 0)
+ (and:DI (subreg:DI (clz:SI (match_dup 1)) 0) (const_int 63)))
+ (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
+ (clobber (reg:CC FLAGS_REG))])]
+ "ix86_expand_clear (operands[0]);"
+ [(set_attr "prefix_rep" "1")
+ (set_attr "type" "bitmanip")
+ (set_attr "mode" "SI")])
+
+; False dependency happens when destination is only updated by tzcnt,
+; lzcnt or popcnt. There is no false dependency when destination is
+; also used in source.
+(define_insn "*clzsi2_lzcnt_zext_falsedep"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (and:DI
+ (subreg:DI
+ (clz:SI
+ (match_operand:SWI48 1 "nonimmediate_operand" "rm")) 0)
+ (const_int 63)))
+ (unspec [(match_operand:DI 2 "register_operand" "0")]
+ UNSPEC_INSN_FALSE_DEP)
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_LZCNT"
+ "lzcnt{l}\t{%1, %k0|%k0, %1}"
+ [(set_attr "prefix_rep" "1")
+ (set_attr "type" "bitmanip")
+ (set_attr "mode" "SI")])
+
+(define_insn_and_split "*clzsi2_lzcnt_zext_2"
[(set (match_operand:DI 0 "register_operand" "=r")
- (and:DI
- (subreg:DI
- (clz:SI
- (match_operand:SI 1 "nonimmediate_operand" "rm")) 0)
- (const_int 63)))
+ (zero_extend:DI
+ (clz:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))))
(clobber (reg:CC FLAGS_REG))]
"TARGET_LZCNT && TARGET_64BIT"
"lzcnt{l}\t{%1, %k0|%k0, %1}"
&& !reg_mentioned_p (operands[0], operands[1])"
[(parallel
[(set (match_dup 0)
- (and:DI (subreg:DI (clz:SI (match_dup 1)) 0) (const_int 63)))
+ (zero_extend:DI (clz:SI (match_dup 1))))
(unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
(clobber (reg:CC FLAGS_REG))])]
"ix86_expand_clear (operands[0]);"
; False dependency happens when destination is only updated by tzcnt,
; lzcnt or popcnt. There is no false dependency when destination is
; also used in source.
-(define_insn "*clzsi2_lzcnt_zext_falsedep"
+(define_insn "*clzsi2_lzcnt_zext_2_falsedep"
[(set (match_operand:DI 0 "register_operand" "=r")
- (and:DI
- (subreg:DI
- (clz:SI
- (match_operand:SWI48 1 "nonimmediate_operand" "rm")) 0)
- (const_int 63)))
+ (zero_extend:DI
+ (clz:SI (match_operand:SWI48 1 "nonimmediate_operand" "rm"))))
(unspec [(match_operand:DI 2 "register_operand" "0")]
UNSPEC_INSN_FALSE_DEP)
(clobber (reg:CC FLAGS_REG))]
[(parallel
[(set (match_operand:SWI48 0 "register_operand")
(if_then_else:SWI48
- (ne:QI (and:SWI48 (match_operand:SWI48 2 "register_operand")
- (const_int 255))
+ (ne:QI (match_operand:QI 2 "register_operand")
(const_int 0))
(zero_extract:SWI48
(match_operand:SWI48 1 "nonimmediate_operand")
- (umin:SWI48 (and:SWI48 (match_dup 2) (const_int 255))
- (match_dup 3))
+ (umin:QI (match_dup 2) (match_dup 3))
(const_int 0))
(const_int 0)))
(clobber (reg:CC FLAGS_REG))])]
"TARGET_BMI2"
- "operands[3] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);")
+{
+ operands[2] = gen_lowpart (QImode, operands[2]);
+ operands[3] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);
+})
(define_insn "*bmi2_bzhi_<mode>3"
[(set (match_operand:SWI48 0 "register_operand" "=r")
(if_then_else:SWI48
- (ne:QI (and:SWI48 (match_operand:SWI48 2 "register_operand" "r")
- (const_int 255))
+ (ne:QI (match_operand:QI 2 "register_operand" "q")
(const_int 0))
(zero_extract:SWI48
(match_operand:SWI48 1 "nonimmediate_operand" "rm")
- (umin:SWI48 (and:SWI48 (match_dup 2) (const_int 255))
- (match_operand:SWI48 3 "const_int_operand"))
- (const_int 0))
- (const_int 0)))
- (clobber (reg:CC FLAGS_REG))]
- "TARGET_BMI2 && INTVAL (operands[3]) == <MODE_SIZE> * BITS_PER_UNIT"
- "bzhi\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "bitmanip")
- (set_attr "prefix" "vex")
- (set_attr "mode" "<MODE>")])
-
-(define_insn "*bmi2_bzhi_<mode>3_1"
- [(set (match_operand:SWI48 0 "register_operand" "=r")
- (if_then_else:SWI48
- (ne:QI (match_operand:QI 2 "register_operand" "r") (const_int 0))
- (zero_extract:SWI48
- (match_operand:SWI48 1 "nonimmediate_operand" "rm")
- (umin:SWI48 (zero_extend:SWI48 (match_dup 2))
- (match_operand:SWI48 3 "const_int_operand"))
+ (umin:QI (match_dup 2)
+ (match_operand:QI 3 "const_int_operand"))
(const_int 0))
(const_int 0)))
(clobber (reg:CC FLAGS_REG))]
(ne:QI (match_operand:QI 2 "register_operand" "r") (const_int 0))
(zero_extract:SWI48
(match_operand:SWI48 1 "nonimmediate_operand" "rm")
- (umin:SWI48 (zero_extend:SWI48 (match_dup 2))
- (match_operand:SWI48 3 "const_int_operand"))
+ (umin:QI (match_dup 2)
+ (match_operand:QI 3 "const_int_operand"))
(const_int 0))
(const_int 0))
(const_int 0)))
(set_attr "prefix" "vex")
(set_attr "mode" "DI")])
+(define_insn "*bmi2_bzhi_zero_extendsidi_5"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (and:DI
+ (zero_extend:DI
+ (plus:SI
+ (ashift:SI (const_int 1)
+ (match_operand:QI 2 "register_operand" "r"))
+ (const_int -1)))
+ (match_operand:DI 1 "nonimmediate_operand" "rm")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && TARGET_BMI2"
+ "bzhi\t{%q2, %q1, %q0|%q0, %q1, %q2}"
+ [(set_attr "type" "bitmanip")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "DI")])
+
(define_insn "bmi2_pdep_<mode>3"
[(set (match_operand:SWI48 0 "register_operand" "=r")
(unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")
[(set (match_operand:SWI48 0 "register_operand" "=r")
(zero_extract:SWI48
(match_operand:SWI48 1 "nonimmediate_operand" "rm")
- (match_operand 2 "const_0_to_255_operand")
- (match_operand 3 "const_0_to_255_operand")))
+ (match_operand:QI 2 "const_0_to_255_operand")
+ (match_operand:QI 3 "const_0_to_255_operand")))
(clobber (reg:CC FLAGS_REG))]
"TARGET_TBM"
{
(set_attr "type" "bitmanip")
(set_attr "mode" "SI")])
+(define_insn_and_split "*popcountsi2_zext_2"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI
+ (popcount:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_POPCNT && TARGET_64BIT"
+{
+#if TARGET_MACHO
+ return "popcnt\t{%1, %k0|%k0, %1}";
+#else
+ return "popcnt{l}\t{%1, %k0|%k0, %1}";
+#endif
+}
+ "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
+ && optimize_function_for_speed_p (cfun)
+ && !reg_mentioned_p (operands[0], operands[1])"
+ [(parallel
+ [(set (match_dup 0)
+ (zero_extend:DI (popcount:SI (match_dup 1))))
+ (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
+ (clobber (reg:CC FLAGS_REG))])]
+ "ix86_expand_clear (operands[0]);"
+ [(set_attr "prefix_rep" "1")
+ (set_attr "type" "bitmanip")
+ (set_attr "mode" "SI")])
+
+; False dependency happens when destination is only updated by tzcnt,
+; lzcnt or popcnt. There is no false dependency when destination is
+; also used in source.
+(define_insn "*popcountsi2_zext_2_falsedep"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI
+ (popcount:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))))
+ (unspec [(match_operand:DI 2 "register_operand" "0")]
+ UNSPEC_INSN_FALSE_DEP)
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_POPCNT && TARGET_64BIT"
+{
+#if TARGET_MACHO
+ return "popcnt\t{%1, %k0|%k0, %1}";
+#else
+ return "popcnt{l}\t{%1, %k0|%k0, %1}";
+#endif
+}
+ [(set_attr "prefix_rep" "1")
+ (set_attr "type" "bitmanip")
+ (set_attr "mode" "SI")])
+
(define_insn_and_split "*popcounthi2_1"
[(set (match_operand:SI 0 "register_operand")
(popcount:SI
DONE;
})
+(define_insn_and_split "*popcounthi2_2"
+ [(set (match_operand:SI 0 "register_operand")
+ (zero_extend:SI
+ (popcount:HI (match_operand:HI 1 "nonimmediate_operand"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_POPCNT
+ && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ rtx tmp = gen_reg_rtx (HImode);
+
+ emit_insn (gen_popcounthi2 (tmp, operands[1]));
+ emit_insn (gen_zero_extendhisi2 (operands[0], tmp));
+ DONE;
+})
+
(define_insn "popcounthi2"
[(set (match_operand:HI 0 "register_operand" "=r")
(popcount:HI
PUT_CODE (operands[5], GET_CODE (operands[5]) == EQ ? UNORDERED : ORDERED);
})
+;; Eliminate HImode popcount&1 using parity flag (variant 2)
+(define_peephole2
+ [(match_scratch:HI 0 "Q")
+ (parallel [(set (match_operand:HI 1 "register_operand")
+ (popcount:HI
+ (match_operand:HI 2 "nonimmediate_operand")))
+ (clobber (reg:CC FLAGS_REG))])
+ (set (reg:CCZ FLAGS_REG)
+ (compare:CCZ (and:QI (match_operand:QI 3 "register_operand")
+ (const_int 1))
+ (const_int 0)))
+ (set (pc) (if_then_else (match_operator 4 "bt_comparison_operator"
+ [(reg:CCZ FLAGS_REG)
+ (const_int 0)])
+ (label_ref (match_operand 5))
+ (pc)))]
+ "REGNO (operands[1]) == REGNO (operands[3])
+ && peep2_reg_dead_p (2, operands[1])
+ && peep2_reg_dead_p (2, operands[3])
+ && peep2_regno_dead_p (3, FLAGS_REG)"
+ [(set (match_dup 0) (match_dup 2))
+ (parallel [(set (reg:CC FLAGS_REG)
+ (unspec:CC [(match_dup 0)] UNSPEC_PARITY))
+ (clobber (match_dup 0))])
+ (set (pc) (if_then_else (match_op_dup 4 [(reg:CC FLAGS_REG)
+ (const_int 0)])
+ (label_ref (match_dup 5))
+ (pc)))]
+{
+ operands[4] = shallow_copy_rtx (operands[4]);
+ PUT_CODE (operands[4], GET_CODE (operands[4]) == EQ ? UNORDERED : ORDERED);
+})
+
\f
;; Thread-local storage patterns for ELF.
;;
(set_attr "mode" "HF")])
(define_insn "*rcpsf2_sse"
- [(set (match_operand:SF 0 "register_operand" "=x,x,x")
- (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "0,x,m")]
+ [(set (match_operand:SF 0 "register_operand" "=x,x,x,x")
+ (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "0,x,m,ja")]
UNSPEC_RCP))]
"TARGET_SSE && TARGET_SSE_MATH"
"@
%vrcpss\t{%d1, %0|%0, %d1}
%vrcpss\t{%d1, %0|%0, %d1}
- %vrcpss\t{%1, %d0|%d0, %1}"
- [(set_attr "type" "sse")
+ rcpss\t{%1, %d0|%d0, %1}
+ vrcpss\t{%1, %d0|%d0, %1}"
+ [(set_attr "isa" "*,*,noavx,avx")
+ (set_attr "gpr32" "1,1,1,0")
+ (set_attr "type" "sse")
(set_attr "atom_sse_attr" "rcp")
(set_attr "btver2_sse_attr" "rcp")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "SF")
- (set_attr "avx_partial_xmm_update" "false,false,true")
+ (set_attr "avx_partial_xmm_update" "false,false,true,true")
(set (attr "preferred_for_speed")
(cond [(match_test "TARGET_AVX")
(symbol_ref "true")
- (eq_attr "alternative" "1,2")
+ (eq_attr "alternative" "1,2,3")
(symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY")
]
(symbol_ref "true")))])
(set_attr "bdver1_decode" "direct")])
(define_insn "*rsqrtsf2_sse"
- [(set (match_operand:SF 0 "register_operand" "=x,x,x")
- (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "0,x,m")]
+ [(set (match_operand:SF 0 "register_operand" "=x,x,x,x")
+ (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "0,x,m,ja")]
UNSPEC_RSQRT))]
"TARGET_SSE && TARGET_SSE_MATH"
"@
%vrsqrtss\t{%d1, %0|%0, %d1}
%vrsqrtss\t{%d1, %0|%0, %d1}
- %vrsqrtss\t{%1, %d0|%d0, %1}"
- [(set_attr "type" "sse")
+ rsqrtss\t{%1, %d0|%d0, %1}
+ vrsqrtss\t{%1, %d0|%d0, %1}"
+ [(set_attr "isa" "*,*,noavx,avx")
+ (set_attr "gpr32" "1,1,1,0")
+ (set_attr "type" "sse")
(set_attr "atom_sse_attr" "rcp")
(set_attr "btver2_sse_attr" "rcp")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "SF")
- (set_attr "avx_partial_xmm_update" "false,false,true")
+ (set_attr "avx_partial_xmm_update" "false,false,true,true")
(set (attr "preferred_for_speed")
(cond [(match_test "TARGET_AVX")
(symbol_ref "true")
- (eq_attr "alternative" "1,2")
+ (eq_attr "alternative" "1,2,3")
(symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY")
]
(symbol_ref "true")))])
(set (reg:CCFP FPSR_REG)
(unspec:CCFP [(match_dup 2) (match_dup 3)]
UNSPEC_C2_FLAG))]
- "TARGET_USE_FANCY_MATH_387
- && flag_finite_math_only"
+ "TARGET_USE_FANCY_MATH_387"
"fprem"
[(set_attr "type" "fpspc")
(set_attr "znver1_decode" "vector")
[(use (match_operand:XF 0 "register_operand"))
(use (match_operand:XF 1 "general_operand"))
(use (match_operand:XF 2 "general_operand"))]
- "TARGET_USE_FANCY_MATH_387
- && flag_finite_math_only"
+ "TARGET_USE_FANCY_MATH_387"
{
rtx_code_label *label = gen_label_rtx ();
[(use (match_operand:MODEF 0 "register_operand"))
(use (match_operand:MODEF 1 "general_operand"))
(use (match_operand:MODEF 2 "general_operand"))]
- "TARGET_USE_FANCY_MATH_387
- && flag_finite_math_only"
+ "TARGET_USE_FANCY_MATH_387"
{
rtx (*gen_truncxf) (rtx, rtx);
(set (reg:CCFP FPSR_REG)
(unspec:CCFP [(match_dup 2) (match_dup 3)]
UNSPEC_C2_FLAG))]
- "TARGET_USE_FANCY_MATH_387
- && flag_finite_math_only"
+ "TARGET_USE_FANCY_MATH_387"
"fprem1"
[(set_attr "type" "fpspc")
(set_attr "znver1_decode" "vector")
[(use (match_operand:XF 0 "register_operand"))
(use (match_operand:XF 1 "general_operand"))
(use (match_operand:XF 2 "general_operand"))]
- "TARGET_USE_FANCY_MATH_387
- && flag_finite_math_only"
+ "TARGET_USE_FANCY_MATH_387"
{
rtx_code_label *label = gen_label_rtx ();
[(use (match_operand:MODEF 0 "register_operand"))
(use (match_operand:MODEF 1 "general_operand"))
(use (match_operand:MODEF 2 "general_operand"))]
- "TARGET_USE_FANCY_MATH_387
- && flag_finite_math_only"
+ "TARGET_USE_FANCY_MATH_387"
{
rtx (*gen_truncxf) (rtx, rtx);
(define_insn "sse4_1_round<mode>2"
[(set (match_operand:MODEFH 0 "register_operand" "=x,x,x,v,v")
(unspec:MODEFH
- [(match_operand:MODEFH 1 "nonimmediate_operand" "0,x,m,v,m")
+ [(match_operand:MODEFH 1 "nonimmediate_operand" "0,x,jm,v,m")
(match_operand:SI 2 "const_0_to_15_operand")]
UNSPEC_ROUND))]
"TARGET_SSE4_1"
vrndscale<ssemodesuffix>\t{%2, %1, %d0|%d0, %1, %2}"
[(set_attr "type" "ssecvt")
(set_attr "prefix_extra" "1,1,1,*,*")
- (set_attr "length_immediate" "*,*,*,1,1")
+ (set_attr "length_immediate" "1")
+ (set_attr "gpr32" "1,1,0,1,1")
(set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,evex,evex")
(set_attr "isa" "noavx512f,noavx512f,noavx512f,avx512f,avx512f")
(set_attr "avx_partial_xmm_update" "false,false,true,false,true")
DONE;
})
+(define_expand "roundhf2"
+ [(match_operand:HF 0 "register_operand")
+ (match_operand:HF 1 "register_operand")]
+ "TARGET_AVX512FP16 && !flag_trapping_math && !flag_rounding_math"
+{
+ ix86_expand_round_sse4 (operands[0], operands[1]);
+ DONE;
+})
+
(define_expand "round<mode>2"
[(match_operand:X87MODEF 0 "register_operand")
(match_operand:X87MODEF 1 "nonimmediate_operand")]
[(set_attr "type" "fpspc")
(set_attr "mode" "<MODE>")])
+(define_expand "lroundhf<mode>2"
+ [(set (match_operand:SWI248 0 "register_operand")
+ (unspec:SWI248 [(match_operand:HF 1 "nonimmediate_operand")]
+ UNSPEC_FIX_NOTRUNC))]
+ "TARGET_AVX512FP16 && !flag_trapping_math && !flag_rounding_math"
+{
+ ix86_expand_lround (operands[0], operands[1]);
+ DONE;
+})
+
+(define_expand "lrinthf<mode>2"
+ [(set (match_operand:SWI48 0 "register_operand")
+ (unspec:SWI48 [(match_operand:HF 1 "nonimmediate_operand")]
+ UNSPEC_FIX_NOTRUNC))]
+ "TARGET_AVX512FP16")
+
(define_expand "lrint<MODEF:mode><SWI48:mode>2"
[(set (match_operand:SWI48 0 "register_operand")
(unspec:SWI48 [(match_operand:MODEF 1 "nonimmediate_operand")]
&& (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
&& flag_unsafe_math_optimizations")
+(define_expand "l<rounding_insn>hf<mode>2"
+ [(set (match_operand:SWI48 0 "nonimmediate_operand")
+ (unspec:SWI48 [(match_operand:HF 1 "register_operand")]
+ FIST_ROUNDING))]
+ "TARGET_AVX512FP16"
+{
+ rtx tmp = gen_reg_rtx (HFmode);
+ emit_insn (gen_sse4_1_roundhf2 (tmp, operands[1],
+ GEN_INT (ROUND_<ROUNDING> | ROUND_NO_EXC)));
+ emit_insn (gen_fix_trunchf<mode>2 (operands[0], tmp));
+ DONE;
+})
+
(define_expand "l<rounding_insn><MODEF:mode><SWI48:mode>2"
[(parallel [(set (match_operand:SWI48 0 "nonimmediate_operand")
(unspec:SWI48 [(match_operand:MODEF 1 "register_operand")]
})
(define_insn "movmsk_df"
- [(set (match_operand:SI 0 "register_operand" "=r")
+ [(set (match_operand:SI 0 "register_operand" "=r,jr")
(unspec:SI
- [(match_operand:DF 1 "register_operand" "x")]
+ [(match_operand:DF 1 "register_operand" "x,x")]
UNSPEC_MOVMSK))]
"SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH"
"%vmovmskpd\t{%1, %0|%0, %1}"
- [(set_attr "type" "ssemov")
- (set_attr "prefix" "maybe_vex")
+ [(set_attr "isa" "noavx,avx")
+ (set_attr "type" "ssemov")
+ (set_attr "prefix" "maybe_evex")
(set_attr "mode" "DF")])
;; Use movmskpd in SSE mode to avoid store forwarding stall
[(set_attr "type" "icmov")
(set_attr "mode" "SI")])
+(define_insn "*movsicc_noc_zext_1"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r")
+ (zero_extend:DI
+ (if_then_else:SI (match_operator 1 "ix86_comparison_operator"
+ [(reg FLAGS_REG) (const_int 0)])
+ (match_operand:SI 2 "nonimmediate_operand" "rm,0")
+ (match_operand:SI 3 "nonimmediate_operand" "0,rm"))))]
+ "TARGET_64BIT
+ && TARGET_CMOVE && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
+ "@
+ cmov%O2%C1\t{%2, %k0|%k0, %2}
+ cmov%O2%c1\t{%3, %k0|%k0, %3}"
+ [(set_attr "type" "icmov")
+ (set_attr "mode" "SI")])
+
+
;; Don't do conditional moves with memory inputs. This splitter helps
;; register starved x86_32 by forcing inputs into registers before reload.
(define_split
(match_dup 0)))]
{
operands[7] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (1)), 0, 0));
- operands[8] = replace_rtx (operands[5], operands[0], operands[1], true);
- operands[9] = replace_rtx (operands[6], operands[0], operands[1], true);
+ operands[8]
+ = ix86_replace_reg_with_reg (operands[5], operands[0], operands[1]);
+ operands[9]
+ = ix86_replace_reg_with_reg (operands[6], operands[0], operands[1]);
})
;; Eliminate a reg-reg mov by inverting the condition of a cmov (#2).
(match_dup 0)))]
{
operands[7] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (2)), 0, 0));
- operands[8] = replace_rtx (operands[5], operands[0], operands[1], true);
- operands[9] = replace_rtx (operands[6], operands[0], operands[1], true);
+ operands[8]
+ = ix86_replace_reg_with_reg (operands[5], operands[0], operands[1]);
+ operands[9]
+ = ix86_replace_reg_with_reg (operands[6], operands[0], operands[1]);
})
(define_insn "movhf_mask"
(match_operand:MODEF 3 "register_operand" "x")))]
"TARGET_XOP"
"vpcmov\t{%1, %3, %2, %0|%0, %2, %3, %1}"
- [(set_attr "type" "sse4arg")])
+ [(set_attr "type" "sse4arg")
+ (set_attr "mode" "TI")])
;; These versions of the min/max patterns are intentionally ignorant of
;; their behavior wrt -0.0 and NaN (via the commutative operand mark).
(set_attr "type" "sseadd")
(set_attr "mode" "<MODE>")])
+;; Operands order in min/max instruction matters for signed zero and NANs.
+(define_insn_and_split "*ieee_max<mode>3_1"
+ [(set (match_operand:MODEF 0 "register_operand")
+ (unspec:MODEF
+ [(match_operand:MODEF 1 "register_operand")
+ (match_operand:MODEF 2 "register_operand")
+ (lt:MODEF
+ (match_operand:MODEF 3 "register_operand")
+ (match_operand:MODEF 4 "register_operand"))]
+ UNSPEC_BLENDV))]
+ "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+ && (rtx_equal_p (operands[1], operands[3])
+ && rtx_equal_p (operands[2], operands[4]))
+ && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (unspec:MODEF
+ [(match_dup 2)
+ (match_dup 1)]
+ UNSPEC_IEEE_MAX))])
+
+(define_insn_and_split "*ieee_min<mode>3_1"
+ [(set (match_operand:MODEF 0 "register_operand")
+ (unspec:MODEF
+ [(match_operand:MODEF 1 "register_operand")
+ (match_operand:MODEF 2 "register_operand")
+ (lt:MODEF
+ (match_operand:MODEF 3 "register_operand")
+ (match_operand:MODEF 4 "register_operand"))]
+ UNSPEC_BLENDV))]
+ "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+ && (rtx_equal_p (operands[1], operands[4])
+ && rtx_equal_p (operands[2], operands[3]))
+ && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (unspec:MODEF
+ [(match_dup 2)
+ (match_dup 1)]
+ UNSPEC_IEEE_MIN))])
+
;; Make two stack loads independent:
;; fld aa fld aa
;; fld %st(0) -> fld bb
(match_operator 1 "compare_operator"
[(and:QI
(subreg:QI
- (zero_extract:SWI248 (match_operand:SWI248 2 "QIreg_operand")
- (const_int 8)
- (const_int 8)) 0)
+ (match_operator:SWI248 4 "extract_operator"
+ [(match_operand 2 "int248_register_operand")
+ (const_int 8)
+ (const_int 8)]) 0)
(match_operand 3 "const_int_operand"))
(const_int 0)]))]
"! TARGET_PARTIAL_REG_STALL
(match_op_dup 1
[(and:QI
(subreg:QI
- (zero_extract:SWI248 (match_dup 2)
- (const_int 8)
- (const_int 8)) 0)
+ (match_op_dup 4 [(match_dup 2)
+ (const_int 8)
+ (const_int 8)]) 0)
(match_dup 3))
(const_int 0)]))
(set (zero_extract:SWI248 (match_dup 2)
(subreg:SWI248
(and:QI
(subreg:QI
- (zero_extract:SWI248 (match_dup 2)
- (const_int 8)
- (const_int 8)) 0)
+ (match_op_dup 4 [(match_dup 2)
+ (const_int 8)
+ (const_int 8)]) 0)
(match_dup 3)) 0))])])
;; Don't do logical operations with memory inputs.
(parallel [(set (match_dup 0)
(match_op_dup 3 [(match_dup 0) (match_dup 1)]))
(clobber (reg:CC FLAGS_REG))])]
- "operands[4] = replace_rtx (operands[2], operands[0], operands[1], true);")
+{
+ operands[4]
+ = ix86_replace_reg_with_reg (operands[2], operands[0], operands[1]);
+})
(define_peephole2
[(set (match_operand 0 "mmx_reg_operand")
(match_operand 1 "memory_operand")]
""
{
+ rtx scratch = gen_reg_rtx (word_mode);
+
emit_insn (gen_stack_protect_set_1
- (ptr_mode, operands[0], operands[1]));
+ (ptr_mode, word_mode, operands[0], operands[1], scratch));
DONE;
})
-(define_insn "@stack_protect_set_1_<mode>"
+(define_insn "@stack_protect_set_1_<PTR:mode>_<SWI48:mode>"
[(set (match_operand:PTR 0 "memory_operand" "=m")
(unspec:PTR [(match_operand:PTR 1 "memory_operand" "m")]
UNSPEC_SP_SET))
- (set (match_scratch:PTR 2 "=&r") (const_int 0))
+ (set (match_operand:SWI48 2 "register_operand" "=&r") (const_int 0))
(clobber (reg:CC FLAGS_REG))]
""
{
- output_asm_insn ("mov{<imodesuffix>}\t{%1, %2|%2, %1}", operands);
- output_asm_insn ("mov{<imodesuffix>}\t{%2, %0|%0, %2}", operands);
+ output_asm_insn ("mov{<PTR:imodesuffix>}\t{%1, %<PTR:k>2|%<PTR:k>2, %1}",
+ operands);
+ output_asm_insn ("mov{<PTR:imodesuffix>}\t{%<PTR:k>2, %0|%0, %<PTR:k>2}",
+ operands);
return "xor{l}\t%k2, %k2";
}
[(set_attr "type" "multi")])
;; Patterns and peephole2s to optimize stack_protect_set_1_<mode>
-;; immediately followed by *mov{s,d}i_internal to the same register,
-;; where we can avoid the xor{l} above. We don't split this, so that
-;; scheduling or anything else doesn't separate the *stack_protect_set*
-;; pattern from the set of the register that overwrites the register
-;; with a new value.
-(define_insn "*stack_protect_set_2_<mode>"
+;; immediately followed by *mov{s,d}i_internal, where we can avoid
+;; the xor{l} above. We don't split this, so that scheduling or
+;; anything else doesn't separate the *stack_protect_set* pattern from
+;; the set of the register that overwrites the register with a new value.
+
+(define_peephole2
+ [(parallel [(set (match_operand:PTR 0 "memory_operand")
+ (unspec:PTR [(match_operand:PTR 1 "memory_operand")]
+ UNSPEC_SP_SET))
+ (set (match_operand:W 2 "general_reg_operand") (const_int 0))
+ (clobber (reg:CC FLAGS_REG))])
+ (parallel [(set (match_operand:SWI48 3 "general_reg_operand")
+ (match_operand:SWI48 4 "const0_operand"))
+ (clobber (reg:CC FLAGS_REG))])]
+ "peep2_reg_dead_p (0, operands[3])
+ && peep2_reg_dead_p (1, operands[2])"
+ [(parallel [(set (match_dup 0)
+ (unspec:PTR [(match_dup 1)] UNSPEC_SP_SET))
+ (set (match_dup 3) (const_int 0))
+ (clobber (reg:CC FLAGS_REG))])])
+
+(define_insn "*stack_protect_set_2_<mode>_si"
[(set (match_operand:PTR 0 "memory_operand" "=m")
(unspec:PTR [(match_operand:PTR 3 "memory_operand" "m")]
UNSPEC_SP_SET))
(set (match_operand:SI 1 "register_operand" "=&r")
- (match_operand:SI 2 "general_operand" "g"))
- (clobber (reg:CC FLAGS_REG))]
- "reload_completed
- && !reg_overlap_mentioned_p (operands[1], operands[2])"
+ (match_operand:SI 2 "general_operand" "g"))]
+ "reload_completed"
{
output_asm_insn ("mov{<imodesuffix>}\t{%3, %<k>1|%<k>1, %3}", operands);
output_asm_insn ("mov{<imodesuffix>}\t{%<k>1, %0|%0, %<k>1}", operands);
[(set_attr "type" "multi")
(set_attr "length" "24")])
-(define_peephole2
- [(parallel [(set (match_operand:PTR 0 "memory_operand")
- (unspec:PTR [(match_operand:PTR 1 "memory_operand")]
- UNSPEC_SP_SET))
- (set (match_operand:PTR 2 "general_reg_operand") (const_int 0))
- (clobber (reg:CC FLAGS_REG))])
- (set (match_operand:SI 3 "general_reg_operand")
- (match_operand:SI 4))]
- "REGNO (operands[2]) == REGNO (operands[3])
- && general_operand (operands[4], SImode)
- && (general_reg_operand (operands[4], SImode)
- || memory_operand (operands[4], SImode)
- || immediate_operand (operands[4], SImode))
- && !reg_overlap_mentioned_p (operands[3], operands[4])"
- [(parallel [(set (match_dup 0)
- (unspec:PTR [(match_dup 1)] UNSPEC_SP_SET))
- (set (match_dup 3) (match_dup 4))
- (clobber (reg:CC FLAGS_REG))])])
-
-(define_insn "*stack_protect_set_3"
- [(set (match_operand:DI 0 "memory_operand" "=m,m,m")
- (unspec:DI [(match_operand:DI 3 "memory_operand" "m,m,m")]
- UNSPEC_SP_SET))
- (set (match_operand:DI 1 "register_operand" "=&r,r,r")
- (match_operand:DI 2 "general_operand" "Z,rem,i"))
- (clobber (reg:CC FLAGS_REG))]
- "TARGET_64BIT
- && reload_completed
- && !reg_overlap_mentioned_p (operands[1], operands[2])"
+(define_insn "*stack_protect_set_2_<mode>_di"
+ [(set (match_operand:PTR 0 "memory_operand" "=m,m,m")
+ (unspec:PTR [(match_operand:PTR 3 "memory_operand" "m,m,m")]
+ UNSPEC_SP_SET))
+ (set (match_operand:DI 1 "register_operand" "=&r,&r,&r")
+ (match_operand:DI 2 "general_operand" "Z,rem,i"))]
+ "TARGET_64BIT && reload_completed"
{
- output_asm_insn ("mov{q}\t{%3, %1|%1, %3}", operands);
- output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", operands);
+ output_asm_insn ("mov{<imodesuffix>}\t{%3, %<k>1|%<k>1, %3}", operands);
+ output_asm_insn ("mov{<imodesuffix>}\t{%<k>1, %0|%0, %<k>1}", operands);
if (pic_32bit_operand (operands[2], DImode))
return "lea{q}\t{%E2, %1|%1, %E2}";
else if (which_alternative == 0)
(set_attr "length" "24")])
(define_peephole2
- [(parallel [(set (match_operand:DI 0 "memory_operand")
- (unspec:DI [(match_operand:DI 1 "memory_operand")]
- UNSPEC_SP_SET))
- (set (match_operand:DI 2 "general_reg_operand") (const_int 0))
- (clobber (reg:CC FLAGS_REG))])
- (set (match_dup 2) (match_operand:DI 3))]
- "TARGET_64BIT
- && general_operand (operands[3], DImode)
- && (general_reg_operand (operands[3], DImode)
- || memory_operand (operands[3], DImode)
- || x86_64_zext_immediate_operand (operands[3], DImode)
- || x86_64_immediate_operand (operands[3], DImode)
- || (CONSTANT_P (operands[3])
- && (!flag_pic || LEGITIMATE_PIC_OPERAND_P (operands[3]))))
- && !reg_overlap_mentioned_p (operands[2], operands[3])"
- [(parallel [(set (match_dup 0)
- (unspec:PTR [(match_dup 1)] UNSPEC_SP_SET))
- (set (match_dup 2) (match_dup 3))
- (clobber (reg:CC FLAGS_REG))])])
+ [(parallel [(set (match_operand:PTR 0 "memory_operand")
+ (unspec:PTR [(match_operand:PTR 1 "memory_operand")]
+ UNSPEC_SP_SET))
+ (set (match_operand:W 2 "general_reg_operand") (const_int 0))
+ (clobber (reg:CC FLAGS_REG))])
+ (set (match_operand:SWI48 3 "general_reg_operand")
+ (match_operand:SWI48 4 "general_operand"))]
+ "peep2_reg_dead_p (0, operands[3])
+ && peep2_reg_dead_p (1, operands[2])"
+ [(parallel [(set (match_dup 0)
+ (unspec:PTR [(match_dup 1)] UNSPEC_SP_SET))
+ (set (match_dup 3) (match_dup 4))])])
(define_expand "stack_protect_test"
[(match_operand 0 "memory_operand")
(symbol_ref "ix86_attr_length_address_default (insn) + 3"))])
(define_insn "fxsave64"
- [(set (match_operand:BLK 0 "memory_operand" "=m")
+ [(set (match_operand:BLK 0 "memory_operand" "=jm")
(unspec_volatile:BLK [(const_int 0)] UNSPECV_FXSAVE64))]
"TARGET_64BIT && TARGET_FXSR"
"fxsave64\t%0"
[(set_attr "type" "other")
+ (set_attr "gpr32" "0")
(set_attr "memory" "store")
(set (attr "length")
(symbol_ref "ix86_attr_length_address_default (insn) + 4"))])
(symbol_ref "ix86_attr_length_address_default (insn) + 3"))])
(define_insn "fxrstor64"
- [(unspec_volatile [(match_operand:BLK 0 "memory_operand" "m")]
+ [(unspec_volatile [(match_operand:BLK 0 "memory_operand" "jm")]
UNSPECV_FXRSTOR64)]
"TARGET_64BIT && TARGET_FXSR"
"fxrstor64\t%0"
[(set_attr "type" "other")
+ (set_attr "gpr32" "0")
(set_attr "memory" "load")
(set (attr "length")
(symbol_ref "ix86_attr_length_address_default (insn) + 4"))])
(symbol_ref "ix86_attr_length_address_default (insn) + 3"))])
(define_insn "<xsave>_rex64"
- [(set (match_operand:BLK 0 "memory_operand" "=m")
+ [(set (match_operand:BLK 0 "memory_operand" "=jm")
(unspec_volatile:BLK
[(match_operand:SI 1 "register_operand" "a")
(match_operand:SI 2 "register_operand" "d")]
"<xsave>\t%0"
[(set_attr "type" "other")
(set_attr "memory" "store")
+ (set_attr "gpr32" "0")
(set (attr "length")
(symbol_ref "ix86_attr_length_address_default (insn) + 3"))])
(define_insn "<xsave>"
- [(set (match_operand:BLK 0 "memory_operand" "=m")
+ [(set (match_operand:BLK 0 "memory_operand" "=jm")
(unspec_volatile:BLK
[(match_operand:SI 1 "register_operand" "a")
(match_operand:SI 2 "register_operand" "d")]
"<xsave>\t%0"
[(set_attr "type" "other")
(set_attr "memory" "store")
+ (set_attr "gpr32" "0")
(set (attr "length")
(symbol_ref "ix86_attr_length_address_default (insn) + 4"))])
(define_insn "<xrstor>_rex64"
[(unspec_volatile:BLK
- [(match_operand:BLK 0 "memory_operand" "m")
+ [(match_operand:BLK 0 "memory_operand" "jm")
(match_operand:SI 1 "register_operand" "a")
(match_operand:SI 2 "register_operand" "d")]
ANY_XRSTOR)]
"<xrstor>\t%0"
[(set_attr "type" "other")
(set_attr "memory" "load")
+ (set_attr "gpr32" "0")
(set (attr "length")
(symbol_ref "ix86_attr_length_address_default (insn) + 3"))])
(define_insn "<xrstor>64"
[(unspec_volatile:BLK
- [(match_operand:BLK 0 "memory_operand" "m")
+ [(match_operand:BLK 0 "memory_operand" "jm")
(match_operand:SI 1 "register_operand" "a")
(match_operand:SI 2 "register_operand" "d")]
ANY_XRSTOR64)]
"<xrstor>64\t%0"
[(set_attr "type" "other")
(set_attr "memory" "load")
+ (set_attr "gpr32" "0")
(set (attr "length")
(symbol_ref "ix86_attr_length_address_default (insn) + 4"))])
"TARGET_64BIT && TARGET_FSGSBASE"
"rd<fsgs>base\t%0"
[(set_attr "type" "other")
- (set_attr "prefix_extra" "2")])
+ (set_attr "prefix_0f" "1")
+ (set_attr "prefix_rep" "1")])
(define_insn "wr<fsgs>base<mode>"
[(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r")]
"TARGET_64BIT && TARGET_FSGSBASE"
"wr<fsgs>base\t%0"
[(set_attr "type" "other")
- (set_attr "prefix_extra" "2")])
+ (set_attr "prefix_0f" "1")
+ (set_attr "prefix_rep" "1")])
(define_insn "ptwrite<mode>"
[(unspec_volatile [(match_operand:SWI48 0 "nonimmediate_operand" "rm")]
"TARGET_PTWRITE"
"ptwrite\t%0"
[(set_attr "type" "other")
- (set_attr "prefix_extra" "2")])
+ (set_attr "prefix_0f" "1")
+ (set_attr "prefix_rep" "1")])
(define_insn "@rdrand<mode>"
[(set (match_operand:SWI248 0 "register_operand" "=r")
"TARGET_RDRND"
"rdrand\t%0"
[(set_attr "type" "other")
- (set_attr "prefix_extra" "1")])
+ (set_attr "prefix_0f" "1")])
(define_insn "@rdseed<mode>"
[(set (match_operand:SWI248 0 "register_operand" "=r")
"TARGET_RDSEED"
"rdseed\t%0"
[(set_attr "type" "other")
- (set_attr "prefix_extra" "1")])
+ (set_attr "prefix_0f" "1")])
(define_expand "pause"
[(set (match_dup 0)
DONE;
})
+(define_insn "urdmsr"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (unspec_volatile:DI
+ [(match_operand:DI 1 "x86_64_szext_nonmemory_operand" "reZ")]
+ UNSPECV_URDMSR))]
+ "TARGET_USER_MSR && TARGET_64BIT"
+ "urdmsr\t{%1, %0|%0, %1}"
+ [(set_attr "prefix" "vex")
+ (set_attr "type" "other")])
+
+(define_insn "uwrmsr"
+ [(unspec_volatile
+ [(match_operand:DI 0 "x86_64_szext_nonmemory_operand" "reZ")
+ (match_operand:DI 1 "register_operand" "r")]
+ UNSPECV_UWRMSR)]
+ "TARGET_USER_MSR && TARGET_64BIT"
+ "uwrmsr\t{%1, %0|%0, %1}"
+ [(set_attr "prefix" "vex")
+ (set_attr "type" "other")])
+
(include "mmx.md")
(include "sse.md")
(include "sync.md")