]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/i386/i386.md
Fix ICE due to incorrect insn type.
[thirdparty/gcc.git] / gcc / config / i386 / i386.md
1 ;; GCC machine description for IA-32 and x86-64.
2 ;; Copyright (C) 1988-2022 Free Software Foundation, Inc.
3 ;; Mostly by William Schelter.
4 ;; x86_64 support added by Jan Hubicka
5 ;;
6 ;; This file is part of GCC.
7 ;;
8 ;; GCC is free software; you can redistribute it and/or modify
9 ;; it under the terms of the GNU General Public License as published by
10 ;; the Free Software Foundation; either version 3, or (at your option)
11 ;; any later version.
12 ;;
13 ;; GCC is distributed in the hope that it will be useful,
14 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
15 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 ;; GNU General Public License for more details.
17 ;;
18 ;; You should have received a copy of the GNU General Public License
19 ;; along with GCC; see the file COPYING3. If not see
20 ;; <http://www.gnu.org/licenses/>. */
21 ;;
22 ;; The original PO technology requires these to be ordered by speed,
23 ;; so that assigner will pick the fastest.
24 ;;
25 ;; See file "rtl.def" for documentation on define_insn, match_*, et. al.
26 ;;
27 ;; The special asm out single letter directives following a '%' are:
28 ;; L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
29 ;; C -- print opcode suffix for set/cmov insn.
30 ;; c -- like C, but print reversed condition
31 ;; F,f -- likewise, but for floating-point.
32 ;; O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
33 ;; otherwise nothing
34 ;; R -- print the prefix for register names.
35 ;; z -- print the opcode suffix for the size of the current operand.
36 ;; Z -- likewise, with special suffixes for x87 instructions.
37 ;; * -- print a star (in certain assembler syntax)
38 ;; A -- print an absolute memory reference.
39 ;; E -- print address with DImode register names if TARGET_64BIT.
40 ;; w -- print the operand as if it's a "word" (HImode) even if it isn't.
41 ;; s -- print a shift double count, followed by the assemblers argument
42 ;; delimiter.
43 ;; b -- print the QImode name of the register for the indicated operand.
44 ;; %b0 would print %al if operands[0] is reg 0.
45 ;; w -- likewise, print the HImode name of the register.
46 ;; k -- likewise, print the SImode name of the register.
47 ;; q -- likewise, print the DImode name of the register.
48 ;; x -- likewise, print the V4SFmode name of the register.
49 ;; t -- likewise, print the V8SFmode name of the register.
50 ;; h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
51 ;; y -- print "st(0)" instead of "st" as a register.
52 ;; d -- print duplicated register operand for AVX instruction.
53 ;; D -- print condition for SSE cmp instruction.
54 ;; P -- if PIC, print an @PLT suffix.
55 ;; p -- print raw symbol name.
56 ;; X -- don't print any sort of PIC '@' suffix for a symbol.
57 ;; & -- print some in-use local-dynamic symbol name.
58 ;; H -- print a memory address offset by 8; used for sse high-parts
59 ;; K -- print HLE lock prefix
60 ;; Y -- print condition for XOP pcom* instruction.
61 ;; + -- print a branch hint as 'cs' or 'ds' prefix
62 ;; ; -- print a semicolon (after prefixes due to bug in older gas).
63 ;; ~ -- print "i" if TARGET_AVX2, "f" otherwise.
64 ;; ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
65 ;; ! -- print NOTRACK prefix for jxx/call/ret instructions if required.
66
67 (define_c_enum "unspec" [
68 ;; Relocation specifiers
69 UNSPEC_GOT
70 UNSPEC_GOTOFF
71 UNSPEC_GOTPCREL
72 UNSPEC_GOTTPOFF
73 UNSPEC_TPOFF
74 UNSPEC_NTPOFF
75 UNSPEC_DTPOFF
76 UNSPEC_GOTNTPOFF
77 UNSPEC_INDNTPOFF
78 UNSPEC_PLTOFF
79 UNSPEC_MACHOPIC_OFFSET
80 UNSPEC_PCREL
81 UNSPEC_SIZEOF
82
83 ;; Prologue support
84 UNSPEC_STACK_ALLOC
85 UNSPEC_SET_GOT
86 UNSPEC_SET_RIP
87 UNSPEC_SET_GOT_OFFSET
88 UNSPEC_MEMORY_BLOCKAGE
89 UNSPEC_PROBE_STACK
90
91 ;; TLS support
92 UNSPEC_TP
93 UNSPEC_TLS_GD
94 UNSPEC_TLS_LD_BASE
95 UNSPEC_TLSDESC
96 UNSPEC_TLS_IE_SUN
97
98 ;; Other random patterns
99 UNSPEC_SCAS
100 UNSPEC_FNSTSW
101 UNSPEC_SAHF
102 UNSPEC_NOTRAP
103 UNSPEC_PARITY
104 UNSPEC_FSTCW
105 UNSPEC_REP
106 UNSPEC_LD_MPIC ; load_macho_picbase
107 UNSPEC_TRUNC_NOOP
108 UNSPEC_DIV_ALREADY_SPLIT
109 UNSPEC_PAUSE
110 UNSPEC_LEA_ADDR
111 UNSPEC_XBEGIN_ABORT
112 UNSPEC_STOS
113 UNSPEC_PEEPSIB
114 UNSPEC_INSN_FALSE_DEP
115 UNSPEC_SBB
116 UNSPEC_CC_NE
117
118 ;; For SSE/MMX support:
119 UNSPEC_FIX_NOTRUNC
120 UNSPEC_MASKMOV
121 UNSPEC_MOVCC_MASK
122 UNSPEC_MOVMSK
123 UNSPEC_BLENDV
124 UNSPEC_PSHUFB
125 UNSPEC_XOP_PERMUTE
126 UNSPEC_RCP
127 UNSPEC_RSQRT
128 UNSPEC_PSADBW
129
130 ;; For AVX/AVX512F support
131 UNSPEC_SCALEF
132 UNSPEC_PCMP
133 UNSPEC_CVTBFSF
134
135 ;; Generic math support
136 UNSPEC_IEEE_MIN ; not commutative
137 UNSPEC_IEEE_MAX ; not commutative
138
139 ;; x87 Floating point
140 UNSPEC_SIN
141 UNSPEC_COS
142 UNSPEC_FPATAN
143 UNSPEC_FYL2X
144 UNSPEC_FYL2XP1
145 UNSPEC_FRNDINT
146 UNSPEC_FIST
147 UNSPEC_F2XM1
148 UNSPEC_TAN
149 UNSPEC_FXAM
150
151 ;; x87 Rounding
152 UNSPEC_FRNDINT_ROUNDEVEN
153 UNSPEC_FRNDINT_FLOOR
154 UNSPEC_FRNDINT_CEIL
155 UNSPEC_FRNDINT_TRUNC
156 UNSPEC_FIST_FLOOR
157 UNSPEC_FIST_CEIL
158
159 ;; x87 Double output FP
160 UNSPEC_SINCOS_COS
161 UNSPEC_SINCOS_SIN
162 UNSPEC_XTRACT_FRACT
163 UNSPEC_XTRACT_EXP
164 UNSPEC_FSCALE_FRACT
165 UNSPEC_FSCALE_EXP
166 UNSPEC_FPREM_F
167 UNSPEC_FPREM_U
168 UNSPEC_FPREM1_F
169 UNSPEC_FPREM1_U
170
171 UNSPEC_C2_FLAG
172 UNSPEC_FXAM_MEM
173
174 ;; SSP patterns
175 UNSPEC_SP_SET
176 UNSPEC_SP_TEST
177
178 ;; For ROUND support
179 UNSPEC_ROUND
180
181 ;; For CRC32 support
182 UNSPEC_CRC32
183
184 ;; For LZCNT suppoprt
185 UNSPEC_LZCNT
186
187 ;; For BMI support
188 UNSPEC_TZCNT
189 UNSPEC_BEXTR
190
191 ;; For BMI2 support
192 UNSPEC_PDEP
193 UNSPEC_PEXT
194
195 ;; IRET support
196 UNSPEC_INTERRUPT_RETURN
197
198 ;; For MOVDIRI and MOVDIR64B support
199 UNSPEC_MOVDIRI
200 UNSPEC_MOVDIR64B
201
202 ;; For insn_callee_abi:
203 UNSPEC_CALLEE_ABI
204
205 ])
206
207 (define_c_enum "unspecv" [
208 UNSPECV_UD2
209 UNSPECV_BLOCKAGE
210 UNSPECV_STACK_PROBE
211 UNSPECV_PROBE_STACK_RANGE
212 UNSPECV_ALIGN
213 UNSPECV_PROLOGUE_USE
214 UNSPECV_SPLIT_STACK_RETURN
215 UNSPECV_CLD
216 UNSPECV_NOPS
217 UNSPECV_RDTSC
218 UNSPECV_RDTSCP
219 UNSPECV_RDPMC
220 UNSPECV_LLWP_INTRINSIC
221 UNSPECV_SLWP_INTRINSIC
222 UNSPECV_LWPVAL_INTRINSIC
223 UNSPECV_LWPINS_INTRINSIC
224 UNSPECV_RDFSBASE
225 UNSPECV_RDGSBASE
226 UNSPECV_WRFSBASE
227 UNSPECV_WRGSBASE
228 UNSPECV_FXSAVE
229 UNSPECV_FXRSTOR
230 UNSPECV_FXSAVE64
231 UNSPECV_FXRSTOR64
232 UNSPECV_XSAVE
233 UNSPECV_XRSTOR
234 UNSPECV_XSAVE64
235 UNSPECV_XRSTOR64
236 UNSPECV_XSAVEOPT
237 UNSPECV_XSAVEOPT64
238 UNSPECV_XSAVES
239 UNSPECV_XRSTORS
240 UNSPECV_XSAVES64
241 UNSPECV_XRSTORS64
242 UNSPECV_XSAVEC
243 UNSPECV_XSAVEC64
244 UNSPECV_XGETBV
245 UNSPECV_XSETBV
246 UNSPECV_WBINVD
247 UNSPECV_WBNOINVD
248
249 ;; For atomic compound assignments.
250 UNSPECV_FNSTENV
251 UNSPECV_FLDENV
252 UNSPECV_FNSTSW
253 UNSPECV_FNCLEX
254
255 ;; For RDRAND support
256 UNSPECV_RDRAND
257
258 ;; For RDSEED support
259 UNSPECV_RDSEED
260
261 ;; For RTM support
262 UNSPECV_XBEGIN
263 UNSPECV_XEND
264 UNSPECV_XABORT
265 UNSPECV_XTEST
266
267 UNSPECV_NLGR
268
269 ;; For CLWB support
270 UNSPECV_CLWB
271
272 ;; For CLFLUSHOPT support
273 UNSPECV_CLFLUSHOPT
274
275 ;; For MONITORX and MWAITX support
276 UNSPECV_MONITORX
277 UNSPECV_MWAITX
278
279 ;; For CLZERO support
280 UNSPECV_CLZERO
281
282 ;; For RDPKRU and WRPKRU support
283 UNSPECV_PKU
284
285 ;; For RDPID support
286 UNSPECV_RDPID
287
288 ;; For CET support
289 UNSPECV_NOP_ENDBR
290 UNSPECV_NOP_RDSSP
291 UNSPECV_INCSSP
292 UNSPECV_SAVEPREVSSP
293 UNSPECV_RSTORSSP
294 UNSPECV_WRSS
295 UNSPECV_WRUSS
296 UNSPECV_SETSSBSY
297 UNSPECV_CLRSSBSY
298
299 ;; For TSXLDTRK support
300 UNSPECV_XSUSLDTRK
301 UNSPECV_XRESLDTRK
302
303 ;; For WAITPKG support
304 UNSPECV_UMWAIT
305 UNSPECV_UMONITOR
306 UNSPECV_TPAUSE
307
308 ;; For UINTR support
309 UNSPECV_CLUI
310 UNSPECV_STUI
311 UNSPECV_TESTUI
312 UNSPECV_SENDUIPI
313
314 ;; For CLDEMOTE support
315 UNSPECV_CLDEMOTE
316
317 ;; For Speculation Barrier support
318 UNSPECV_SPECULATION_BARRIER
319
320 UNSPECV_PTWRITE
321
322 ;; For ENQCMD and ENQCMDS support
323 UNSPECV_ENQCMD
324 UNSPECV_ENQCMDS
325
326 ;; For SERIALIZE support
327 UNSPECV_SERIALIZE
328
329 ;; For patchable area support
330 UNSPECV_PATCHABLE_AREA
331
332 ;; For HRESET support
333 UNSPECV_HRESET
334
335 ;; For PREFETCHI support
336 UNSPECV_PREFETCHI
337 ])
338
339 ;; Constants to represent rounding modes in the ROUND instruction
340 (define_constants
341 [(ROUND_ROUNDEVEN 0x0)
342 (ROUND_FLOOR 0x1)
343 (ROUND_CEIL 0x2)
344 (ROUND_TRUNC 0x3)
345 (ROUND_MXCSR 0x4)
346 (ROUND_NO_EXC 0x8)
347 ])
348
349 ;; Constants to represent AVX512F embeded rounding
350 (define_constants
351 [(ROUND_NEAREST_INT 0)
352 (ROUND_NEG_INF 1)
353 (ROUND_POS_INF 2)
354 (ROUND_ZERO 3)
355 (NO_ROUND 4)
356 (ROUND_SAE 8)
357 ])
358
359 ;; Constants to represent pcomtrue/pcomfalse variants
360 (define_constants
361 [(PCOM_FALSE 0)
362 (PCOM_TRUE 1)
363 (COM_FALSE_S 2)
364 (COM_FALSE_P 3)
365 (COM_TRUE_S 4)
366 (COM_TRUE_P 5)
367 ])
368
369 ;; Constants used in the XOP pperm instruction
370 (define_constants
371 [(PPERM_SRC 0x00) /* copy source */
372 (PPERM_INVERT 0x20) /* invert source */
373 (PPERM_REVERSE 0x40) /* bit reverse source */
374 (PPERM_REV_INV 0x60) /* bit reverse & invert src */
375 (PPERM_ZERO 0x80) /* all 0's */
376 (PPERM_ONES 0xa0) /* all 1's */
377 (PPERM_SIGN 0xc0) /* propagate sign bit */
378 (PPERM_INV_SIGN 0xe0) /* invert & propagate sign */
379 (PPERM_SRC1 0x00) /* use first source byte */
380 (PPERM_SRC2 0x10) /* use second source byte */
381 ])
382
383 ;; Registers by name.
384 (define_constants
385 [(AX_REG 0)
386 (DX_REG 1)
387 (CX_REG 2)
388 (BX_REG 3)
389 (SI_REG 4)
390 (DI_REG 5)
391 (BP_REG 6)
392 (SP_REG 7)
393 (ST0_REG 8)
394 (ST1_REG 9)
395 (ST2_REG 10)
396 (ST3_REG 11)
397 (ST4_REG 12)
398 (ST5_REG 13)
399 (ST6_REG 14)
400 (ST7_REG 15)
401 (ARGP_REG 16)
402 (FLAGS_REG 17)
403 (FPSR_REG 18)
404 (FRAME_REG 19)
405 (XMM0_REG 20)
406 (XMM1_REG 21)
407 (XMM2_REG 22)
408 (XMM3_REG 23)
409 (XMM4_REG 24)
410 (XMM5_REG 25)
411 (XMM6_REG 26)
412 (XMM7_REG 27)
413 (MM0_REG 28)
414 (MM1_REG 29)
415 (MM2_REG 30)
416 (MM3_REG 31)
417 (MM4_REG 32)
418 (MM5_REG 33)
419 (MM6_REG 34)
420 (MM7_REG 35)
421 (R8_REG 36)
422 (R9_REG 37)
423 (R10_REG 38)
424 (R11_REG 39)
425 (R12_REG 40)
426 (R13_REG 41)
427 (R14_REG 42)
428 (R15_REG 43)
429 (XMM8_REG 44)
430 (XMM9_REG 45)
431 (XMM10_REG 46)
432 (XMM11_REG 47)
433 (XMM12_REG 48)
434 (XMM13_REG 49)
435 (XMM14_REG 50)
436 (XMM15_REG 51)
437 (XMM16_REG 52)
438 (XMM17_REG 53)
439 (XMM18_REG 54)
440 (XMM19_REG 55)
441 (XMM20_REG 56)
442 (XMM21_REG 57)
443 (XMM22_REG 58)
444 (XMM23_REG 59)
445 (XMM24_REG 60)
446 (XMM25_REG 61)
447 (XMM26_REG 62)
448 (XMM27_REG 63)
449 (XMM28_REG 64)
450 (XMM29_REG 65)
451 (XMM30_REG 66)
452 (XMM31_REG 67)
453 (MASK0_REG 68)
454 (MASK1_REG 69)
455 (MASK2_REG 70)
456 (MASK3_REG 71)
457 (MASK4_REG 72)
458 (MASK5_REG 73)
459 (MASK6_REG 74)
460 (MASK7_REG 75)
461 (FIRST_PSEUDO_REG 76)
462 ])
463
464 ;; Insn callee abi index.
465 (define_constants
466 [(ABI_DEFAULT 0)
467 (ABI_VZEROUPPER 1)
468 (ABI_UNKNOWN 2)])
469
470 ;; Insns whose names begin with "x86_" are emitted by gen_FOO calls
471 ;; from i386.cc.
472
473 ;; In C guard expressions, put expressions which may be compile-time
474 ;; constants first. This allows for better optimization. For
475 ;; example, write "TARGET_64BIT && reload_completed", not
476 ;; "reload_completed && TARGET_64BIT".
477
478 \f
479 ;; Processor type.
480 (define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,nehalem,
481 atom,slm,glm,haswell,generic,lujiazui,amdfam10,bdver1,
482 bdver2,bdver3,bdver4,btver2,znver1,znver2,znver3,znver4"
483 (const (symbol_ref "ix86_schedule")))
484
485 ;; A basic instruction type. Refinements due to arguments to be
486 ;; provided in other attributes.
487 (define_attr "type"
488 "other,multi,
489 alu,alu1,negnot,imov,imovx,lea,
490 incdec,ishift,ishiftx,ishift1,rotate,rotatex,rotate1,
491 imul,imulx,idiv,icmp,test,ibr,setcc,icmov,
492 push,pop,call,callv,leave,
493 str,bitmanip,
494 fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,
495 fxch,fistp,fisttp,frndint,
496 sse,ssemov,sseadd,sseadd1,sseiadd,sseiadd1,
497 ssemul,sseimul,ssediv,sselog,sselog1,
498 sseishft,sseishft1,ssecmp,ssecomi,
499 ssecvt,ssecvt1,sseicvt,sseins,
500 sseshuf,sseshuf1,ssemuladd,sse4arg,
501 lwp,mskmov,msklog,
502 mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft"
503 (const_string "other"))
504
505 ;; Main data type used by the insn
506 (define_attr "mode"
507 "unknown,none,QI,HI,SI,DI,TI,OI,XI,HF,BF,SF,DF,XF,TF,V32HF,V16HF,V8HF,
508 V16SF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,V8DF,V4HF,V4BF,V2HF,V2BF"
509 (const_string "unknown"))
510
511 ;; The CPU unit operations uses.
512 (define_attr "unit" "integer,i387,sse,mmx,unknown"
513 (cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,
514 fxch,fistp,fisttp,frndint")
515 (const_string "i387")
516 (eq_attr "type" "sse,ssemov,sseadd,sseadd1,sseiadd,sseiadd1,
517 ssemul,sseimul,ssediv,sselog,sselog1,
518 sseishft,sseishft1,ssecmp,ssecomi,
519 ssecvt,ssecvt1,sseicvt,sseins,
520 sseshuf,sseshuf1,ssemuladd,sse4arg,mskmov")
521 (const_string "sse")
522 (eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft")
523 (const_string "mmx")
524 (eq_attr "type" "other")
525 (const_string "unknown")]
526 (const_string "integer")))
527
528 ;; The (bounding maximum) length of an instruction immediate.
529 (define_attr "length_immediate" ""
530 (cond [(eq_attr "type" "incdec,setcc,icmov,str,lea,other,multi,idiv,leave,
531 bitmanip,imulx,msklog,mskmov")
532 (const_int 0)
533 (eq_attr "unit" "i387,sse,mmx")
534 (const_int 0)
535 (eq_attr "type" "alu,alu1,negnot,imovx,ishift,ishiftx,ishift1,
536 rotate,rotatex,rotate1,imul,icmp,push,pop")
537 (symbol_ref "ix86_attr_length_immediate_default (insn, true)")
538 (eq_attr "type" "imov,test")
539 (symbol_ref "ix86_attr_length_immediate_default (insn, false)")
540 (eq_attr "type" "call")
541 (if_then_else (match_operand 0 "constant_call_address_operand")
542 (const_int 4)
543 (const_int 0))
544 (eq_attr "type" "callv")
545 (if_then_else (match_operand 1 "constant_call_address_operand")
546 (const_int 4)
547 (const_int 0))
548 ;; We don't know the size before shorten_branches. Expect
549 ;; the instruction to fit for better scheduling.
550 (eq_attr "type" "ibr")
551 (const_int 1)
552 ]
553 (symbol_ref "/* Update immediate_length and other attributes! */
554 gcc_unreachable (),1")))
555
556 ;; The (bounding maximum) length of an instruction address.
557 (define_attr "length_address" ""
558 (cond [(eq_attr "type" "str,other,multi,fxch")
559 (const_int 0)
560 (and (eq_attr "type" "call")
561 (match_operand 0 "constant_call_address_operand"))
562 (const_int 0)
563 (and (eq_attr "type" "callv")
564 (match_operand 1 "constant_call_address_operand"))
565 (const_int 0)
566 ]
567 (symbol_ref "ix86_attr_length_address_default (insn)")))
568
569 ;; Set when length prefix is used.
570 (define_attr "prefix_data16" ""
571 (cond [(eq_attr "type" "ssemuladd,sse4arg,sseiadd1,ssecvt1")
572 (const_int 0)
573 (eq_attr "mode" "HI")
574 (const_int 1)
575 (and (eq_attr "unit" "sse") (eq_attr "mode" "V2DF,TI"))
576 (const_int 1)
577 ]
578 (const_int 0)))
579
580 ;; Set when string REP prefix is used.
581 (define_attr "prefix_rep" ""
582 (cond [(eq_attr "type" "ssemuladd,sse4arg,sseiadd1,ssecvt1")
583 (const_int 0)
584 (and (eq_attr "unit" "sse") (eq_attr "mode" "SF,DF"))
585 (const_int 1)
586 ]
587 (const_int 0)))
588
589 ;; Set when 0f opcode prefix is used.
590 (define_attr "prefix_0f" ""
591 (if_then_else
592 (ior (eq_attr "type" "imovx,setcc,icmov,bitmanip,msklog,mskmov")
593 (eq_attr "unit" "sse,mmx"))
594 (const_int 1)
595 (const_int 0)))
596
597 ;; Set when REX opcode prefix is used.
598 (define_attr "prefix_rex" ""
599 (cond [(not (match_test "TARGET_64BIT"))
600 (const_int 0)
601 (and (eq_attr "mode" "DI")
602 (and (eq_attr "type" "!push,pop,call,callv,leave,ibr")
603 (eq_attr "unit" "!mmx")))
604 (const_int 1)
605 (and (eq_attr "mode" "QI")
606 (match_test "x86_extended_QIreg_mentioned_p (insn)"))
607 (const_int 1)
608 (match_test "x86_extended_reg_mentioned_p (insn)")
609 (const_int 1)
610 (and (eq_attr "type" "imovx")
611 (match_operand:QI 1 "ext_QIreg_operand"))
612 (const_int 1)
613 ]
614 (const_int 0)))
615
616 ;; There are also additional prefixes in 3DNOW, SSSE3.
617 ;; ssemuladd,sse4arg default to 0f24/0f25 and DREX byte,
618 ;; sseiadd1,ssecvt1 to 0f7a with no DREX byte.
619 ;; 3DNOW has 0f0f prefix, SSSE3 and SSE4_{1,2} 0f38/0f3a.
620 (define_attr "prefix_extra" ""
621 (cond [(eq_attr "type" "ssemuladd,sse4arg")
622 (const_int 2)
623 (eq_attr "type" "sseiadd1,ssecvt1")
624 (const_int 1)
625 ]
626 (const_int 0)))
627
628 ;; Prefix used: original, VEX or maybe VEX.
629 (define_attr "prefix" "orig,vex,maybe_vex,evex,maybe_evex"
630 (cond [(eq_attr "mode" "OI,V8SF,V4DF")
631 (const_string "vex")
632 (eq_attr "mode" "XI,V16SF,V8DF")
633 (const_string "evex")
634 ]
635 (const_string "orig")))
636
637 ;; VEX W bit is used.
638 (define_attr "prefix_vex_w" "" (const_int 0))
639
640 ;; The length of VEX prefix
641 ;; Only instructions with 0f prefix can have 2 byte VEX prefix,
642 ;; 0f38/0f3a prefixes can't. In i386.md 0f3[8a] is
643 ;; still prefix_0f 1, with prefix_extra 1.
644 (define_attr "length_vex" ""
645 (if_then_else (and (eq_attr "prefix_0f" "1")
646 (eq_attr "prefix_extra" "0"))
647 (if_then_else (eq_attr "prefix_vex_w" "1")
648 (symbol_ref "ix86_attr_length_vex_default (insn, true, true)")
649 (symbol_ref "ix86_attr_length_vex_default (insn, true, false)"))
650 (if_then_else (eq_attr "prefix_vex_w" "1")
651 (symbol_ref "ix86_attr_length_vex_default (insn, false, true)")
652 (symbol_ref "ix86_attr_length_vex_default (insn, false, false)"))))
653
654 ;; 4-bytes evex prefix and 1 byte opcode.
655 (define_attr "length_evex" "" (const_int 5))
656
657 ;; Set when modrm byte is used.
658 (define_attr "modrm" ""
659 (cond [(eq_attr "type" "str,leave")
660 (const_int 0)
661 (eq_attr "unit" "i387")
662 (const_int 0)
663 (and (eq_attr "type" "incdec")
664 (and (not (match_test "TARGET_64BIT"))
665 (ior (match_operand:SI 1 "register_operand")
666 (match_operand:HI 1 "register_operand"))))
667 (const_int 0)
668 (and (eq_attr "type" "push")
669 (not (match_operand 1 "memory_operand")))
670 (const_int 0)
671 (and (eq_attr "type" "pop")
672 (not (match_operand 0 "memory_operand")))
673 (const_int 0)
674 (and (eq_attr "type" "imov")
675 (and (not (eq_attr "mode" "DI"))
676 (ior (and (match_operand 0 "register_operand")
677 (match_operand 1 "immediate_operand"))
678 (ior (and (match_operand 0 "ax_reg_operand")
679 (match_operand 1 "memory_displacement_only_operand"))
680 (and (match_operand 0 "memory_displacement_only_operand")
681 (match_operand 1 "ax_reg_operand"))))))
682 (const_int 0)
683 (and (eq_attr "type" "call")
684 (match_operand 0 "constant_call_address_operand"))
685 (const_int 0)
686 (and (eq_attr "type" "callv")
687 (match_operand 1 "constant_call_address_operand"))
688 (const_int 0)
689 (and (eq_attr "type" "alu,alu1,icmp,test")
690 (match_operand 0 "ax_reg_operand"))
691 (symbol_ref "(get_attr_length_immediate (insn) <= (get_attr_mode (insn) != MODE_QI))")
692 ]
693 (const_int 1)))
694
695 ;; The (bounding maximum) length of an instruction in bytes.
696 ;; ??? fistp and frndint are in fact fldcw/{fistp,frndint}/fldcw sequences.
697 ;; Later we may want to split them and compute proper length as for
698 ;; other insns.
699 (define_attr "length" ""
700 (cond [(eq_attr "type" "other,multi,fistp,frndint")
701 (const_int 16)
702 (eq_attr "type" "fcmp")
703 (const_int 4)
704 (eq_attr "unit" "i387")
705 (plus (const_int 2)
706 (plus (attr "prefix_data16")
707 (attr "length_address")))
708 (ior (eq_attr "prefix" "evex")
709 (and (ior (eq_attr "prefix" "maybe_evex")
710 (eq_attr "prefix" "maybe_vex"))
711 (match_test "TARGET_AVX512F")))
712 (plus (attr "length_evex")
713 (plus (attr "length_immediate")
714 (plus (attr "modrm")
715 (attr "length_address"))))
716 (ior (eq_attr "prefix" "vex")
717 (and (ior (eq_attr "prefix" "maybe_vex")
718 (eq_attr "prefix" "maybe_evex"))
719 (match_test "TARGET_AVX")))
720 (plus (attr "length_vex")
721 (plus (attr "length_immediate")
722 (plus (attr "modrm")
723 (attr "length_address"))))]
724 (plus (plus (attr "modrm")
725 (plus (attr "prefix_0f")
726 (plus (attr "prefix_rex")
727 (plus (attr "prefix_extra")
728 (const_int 1)))))
729 (plus (attr "prefix_rep")
730 (plus (attr "prefix_data16")
731 (plus (attr "length_immediate")
732 (attr "length_address")))))))
733
734 ;; The `memory' attribute is `none' if no memory is referenced, `load' or
735 ;; `store' if there is a simple memory reference therein, or `unknown'
736 ;; if the instruction is complex.
737
738 (define_attr "memory" "none,load,store,both,unknown"
739 (cond [(eq_attr "type" "other,multi,str,lwp")
740 (const_string "unknown")
741 (eq_attr "type" "lea,fcmov,fpspc")
742 (const_string "none")
743 (eq_attr "type" "fistp,leave")
744 (const_string "both")
745 (eq_attr "type" "frndint")
746 (const_string "load")
747 (eq_attr "type" "push")
748 (if_then_else (match_operand 1 "memory_operand")
749 (const_string "both")
750 (const_string "store"))
751 (eq_attr "type" "pop")
752 (if_then_else (match_operand 0 "memory_operand")
753 (const_string "both")
754 (const_string "load"))
755 (eq_attr "type" "setcc")
756 (if_then_else (match_operand 0 "memory_operand")
757 (const_string "store")
758 (const_string "none"))
759 (eq_attr "type" "icmp,test,ssecmp,ssecomi,mmxcmp,fcmp")
760 (if_then_else (ior (match_operand 0 "memory_operand")
761 (match_operand 1 "memory_operand"))
762 (const_string "load")
763 (const_string "none"))
764 (eq_attr "type" "ibr")
765 (if_then_else (match_operand 0 "memory_operand")
766 (const_string "load")
767 (const_string "none"))
768 (eq_attr "type" "call")
769 (if_then_else (match_operand 0 "constant_call_address_operand")
770 (const_string "none")
771 (const_string "load"))
772 (eq_attr "type" "callv")
773 (if_then_else (match_operand 1 "constant_call_address_operand")
774 (const_string "none")
775 (const_string "load"))
776 (and (eq_attr "type" "alu1,negnot,ishift1,rotate1,sselog1,sseshuf1")
777 (match_operand 1 "memory_operand"))
778 (const_string "both")
779 (and (match_operand 0 "memory_operand")
780 (match_operand 1 "memory_operand"))
781 (const_string "both")
782 (match_operand 0 "memory_operand")
783 (const_string "store")
784 (match_operand 1 "memory_operand")
785 (const_string "load")
786 (and (eq_attr "type"
787 "!alu1,negnot,ishift1,rotate1,
788 imov,imovx,icmp,test,bitmanip,
789 fmov,fcmp,fsgn,
790 sse,ssemov,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt,
791 sselog1,sseshuf1,sseadd1,sseiadd1,sseishft1,
792 mmx,mmxmov,mmxcmp,mmxcvt,mskmov,msklog")
793 (match_operand 2 "memory_operand"))
794 (const_string "load")
795 (and (eq_attr "type" "icmov,ssemuladd,sse4arg")
796 (match_operand 3 "memory_operand"))
797 (const_string "load")
798 ]
799 (const_string "none")))
800
801 ;; Indicates if an instruction has both an immediate and a displacement.
802
803 (define_attr "imm_disp" "false,true,unknown"
804 (cond [(eq_attr "type" "other,multi")
805 (const_string "unknown")
806 (and (eq_attr "type" "icmp,test,imov,alu1,ishift1,rotate1")
807 (and (match_operand 0 "memory_displacement_operand")
808 (match_operand 1 "immediate_operand")))
809 (const_string "true")
810 (and (eq_attr "type" "alu,ishift,ishiftx,rotate,rotatex,imul,idiv")
811 (and (match_operand 0 "memory_displacement_operand")
812 (match_operand 2 "immediate_operand")))
813 (const_string "true")
814 ]
815 (const_string "false")))
816
817 ;; Indicates if an FP operation has an integer source.
818
819 (define_attr "fp_int_src" "false,true"
820 (const_string "false"))
821
822 ;; Defines rounding mode of an FP operation.
823
824 (define_attr "i387_cw" "roundeven,floor,ceil,trunc,uninitialized,any"
825 (const_string "any"))
826
827 ;; Define attribute to indicate AVX insns with partial XMM register update.
828 (define_attr "avx_partial_xmm_update" "false,true"
829 (const_string "false"))
830
831 ;; Define attribute to classify add/sub insns that consumes carry flag (CF)
832 (define_attr "use_carry" "0,1" (const_string "0"))
833
834 ;; Define attribute to indicate unaligned ssemov insns
835 (define_attr "movu" "0,1" (const_string "0"))
836
837 ;; Used to control the "enabled" attribute on a per-instruction basis.
838 (define_attr "isa" "base,x64,nox64,x64_sse2,x64_sse4,x64_sse4_noavx,
839 x64_avx,x64_avx512bw,x64_avx512dq,
840 sse_noavx,sse2,sse2_noavx,sse3,sse3_noavx,sse4,sse4_noavx,
841 avx,noavx,avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f,
842 avx512bw,noavx512bw,avx512dq,noavx512dq,fma_or_avx512vl,
843 avx512vl,noavx512vl,avxvnni,avx512vnnivl,avx512fp16,avxifma,
844 avx512ifmavl,avxneconvert,avx512bf16vl"
845 (const_string "base"))
846
847 ;; Define instruction set of MMX instructions
848 (define_attr "mmx_isa" "base,native,sse,sse_noavx,avx"
849 (const_string "base"))
850
851 (define_attr "enabled" ""
852 (cond [(eq_attr "isa" "x64") (symbol_ref "TARGET_64BIT")
853 (eq_attr "isa" "nox64") (symbol_ref "!TARGET_64BIT")
854 (eq_attr "isa" "x64_sse2")
855 (symbol_ref "TARGET_64BIT && TARGET_SSE2")
856 (eq_attr "isa" "x64_sse4")
857 (symbol_ref "TARGET_64BIT && TARGET_SSE4_1")
858 (eq_attr "isa" "x64_sse4_noavx")
859 (symbol_ref "TARGET_64BIT && TARGET_SSE4_1 && !TARGET_AVX")
860 (eq_attr "isa" "x64_avx")
861 (symbol_ref "TARGET_64BIT && TARGET_AVX")
862 (eq_attr "isa" "x64_avx512bw")
863 (symbol_ref "TARGET_64BIT && TARGET_AVX512BW")
864 (eq_attr "isa" "x64_avx512dq")
865 (symbol_ref "TARGET_64BIT && TARGET_AVX512DQ")
866 (eq_attr "isa" "sse_noavx")
867 (symbol_ref "TARGET_SSE && !TARGET_AVX")
868 (eq_attr "isa" "sse2") (symbol_ref "TARGET_SSE2")
869 (eq_attr "isa" "sse2_noavx")
870 (symbol_ref "TARGET_SSE2 && !TARGET_AVX")
871 (eq_attr "isa" "sse3") (symbol_ref "TARGET_SSE3")
872 (eq_attr "isa" "sse3_noavx")
873 (symbol_ref "TARGET_SSE3 && !TARGET_AVX")
874 (eq_attr "isa" "sse4") (symbol_ref "TARGET_SSE4_1")
875 (eq_attr "isa" "sse4_noavx")
876 (symbol_ref "TARGET_SSE4_1 && !TARGET_AVX")
877 (eq_attr "isa" "avx") (symbol_ref "TARGET_AVX")
878 (eq_attr "isa" "noavx") (symbol_ref "!TARGET_AVX")
879 (eq_attr "isa" "avx2") (symbol_ref "TARGET_AVX2")
880 (eq_attr "isa" "noavx2") (symbol_ref "!TARGET_AVX2")
881 (eq_attr "isa" "bmi") (symbol_ref "TARGET_BMI")
882 (eq_attr "isa" "bmi2") (symbol_ref "TARGET_BMI2")
883 (eq_attr "isa" "fma4") (symbol_ref "TARGET_FMA4")
884 (eq_attr "isa" "fma") (symbol_ref "TARGET_FMA")
885 (eq_attr "isa" "fma_or_avx512vl")
886 (symbol_ref "TARGET_FMA || TARGET_AVX512VL")
887 (eq_attr "isa" "avx512f") (symbol_ref "TARGET_AVX512F")
888 (eq_attr "isa" "noavx512f") (symbol_ref "!TARGET_AVX512F")
889 (eq_attr "isa" "avx512bw") (symbol_ref "TARGET_AVX512BW")
890 (eq_attr "isa" "noavx512bw") (symbol_ref "!TARGET_AVX512BW")
891 (eq_attr "isa" "avx512dq") (symbol_ref "TARGET_AVX512DQ")
892 (eq_attr "isa" "noavx512dq") (symbol_ref "!TARGET_AVX512DQ")
893 (eq_attr "isa" "avx512vl") (symbol_ref "TARGET_AVX512VL")
894 (eq_attr "isa" "noavx512vl") (symbol_ref "!TARGET_AVX512VL")
895 (eq_attr "isa" "avxvnni") (symbol_ref "TARGET_AVXVNNI")
896 (eq_attr "isa" "avx512vnnivl")
897 (symbol_ref "TARGET_AVX512VNNI && TARGET_AVX512VL")
898 (eq_attr "isa" "avx512fp16")
899 (symbol_ref "TARGET_AVX512FP16")
900 (eq_attr "isa" "avxifma") (symbol_ref "TARGET_AVXIFMA")
901 (eq_attr "isa" "avx512ifmavl")
902 (symbol_ref "TARGET_AVX512IFMA && TARGET_AVX512VL")
903 (eq_attr "isa" "avxneconvert") (symbol_ref "TARGET_AVXNECONVERT")
904 (eq_attr "isa" "avx512bf16vl")
905 (symbol_ref "TARGET_AVX512BF16 && TARGET_AVX512VL")
906
907 (eq_attr "mmx_isa" "native")
908 (symbol_ref "!TARGET_MMX_WITH_SSE")
909 (eq_attr "mmx_isa" "sse")
910 (symbol_ref "TARGET_MMX_WITH_SSE")
911 (eq_attr "mmx_isa" "sse_noavx")
912 (symbol_ref "TARGET_MMX_WITH_SSE && !TARGET_AVX")
913 (eq_attr "mmx_isa" "avx")
914 (symbol_ref "TARGET_MMX_WITH_SSE && TARGET_AVX")
915 ]
916 (const_int 1)))
917
918 (define_attr "preferred_for_size" "" (const_int 1))
919 (define_attr "preferred_for_speed" "" (const_int 1))
920
921 ;; Describe a user's asm statement.
922 (define_asm_attributes
923 [(set_attr "length" "128")
924 (set_attr "type" "multi")])
925
926 (define_code_iterator plusminus [plus minus])
927 (define_code_iterator plusminusmultdiv [plus minus mult div])
928
929 (define_code_iterator sat_plusminus [ss_plus us_plus ss_minus us_minus])
930
931 ;; Base name for insn mnemonic.
932 (define_code_attr plusminus_mnemonic
933 [(plus "add") (ss_plus "adds") (us_plus "addus")
934 (minus "sub") (ss_minus "subs") (us_minus "subus")])
935
936 (define_code_iterator multdiv [mult div])
937
938 (define_code_attr multdiv_mnemonic
939 [(mult "mul") (div "div")])
940
941 ;; Mark commutative operators as such in constraints.
942 (define_code_attr comm [(plus "%") (ss_plus "%") (us_plus "%")
943 (minus "") (ss_minus "") (us_minus "")
944 (mult "%") (div "")])
945
946 ;; Mapping of max and min
947 (define_code_iterator maxmin [smax smin umax umin])
948
949 ;; Mapping of signed max and min
950 (define_code_iterator smaxmin [smax smin])
951
952 ;; Mapping of unsigned max and min
953 (define_code_iterator umaxmin [umax umin])
954
955 ;; Base name for integer and FP insn mnemonic
956 (define_code_attr maxmin_int [(smax "maxs") (smin "mins")
957 (umax "maxu") (umin "minu")])
958 (define_code_attr maxmin_float [(smax "max") (smin "min")])
959
960 (define_int_iterator IEEE_MAXMIN
961 [UNSPEC_IEEE_MAX
962 UNSPEC_IEEE_MIN])
963
964 (define_int_attr ieee_maxmin
965 [(UNSPEC_IEEE_MAX "max")
966 (UNSPEC_IEEE_MIN "min")])
967
968 ;; Mapping of logic operators
969 (define_code_iterator any_logic [and ior xor])
970 (define_code_iterator any_or [ior xor])
971 (define_code_iterator fpint_logic [and xor])
972
973 ;; Base name for insn mnemonic.
974 (define_code_attr logic [(and "and") (ior "or") (xor "xor")])
975
976 ;; Mapping of logic-shift operators
977 (define_code_iterator any_lshift [ashift lshiftrt])
978
979 ;; Mapping of shift-right operators
980 (define_code_iterator any_shiftrt [lshiftrt ashiftrt])
981
982 ;; Mapping of all shift operators
983 (define_code_iterator any_shift [ashift lshiftrt ashiftrt])
984
985 ;; Base name for insn mnemonic.
986 (define_code_attr shift [(ashift "sll") (lshiftrt "shr") (ashiftrt "sar")])
987 (define_code_attr vshift [(ashift "sll") (lshiftrt "srl") (ashiftrt "sra")])
988
989 ;; Mapping of rotate operators
990 (define_code_iterator any_rotate [rotate rotatert])
991
992 ;; Base name for insn mnemonic.
993 (define_code_attr rotate [(rotate "rol") (rotatert "ror")])
994
995 ;; Mapping of abs neg operators
996 (define_code_iterator absneg [abs neg])
997
998 ;; Mapping of abs neg operators to logic operation
999 (define_code_attr absneg_op [(abs "and") (neg "xor")])
1000
1001 ;; Base name for x87 insn mnemonic.
1002 (define_code_attr absneg_mnemonic [(abs "fabs") (neg "fchs")])
1003
1004 ;; Mapping of extend operators
1005 (define_code_iterator any_extend [sign_extend zero_extend])
1006
1007 ;; Mapping of highpart multiply operators
1008 (define_code_iterator any_mul_highpart [smul_highpart umul_highpart])
1009
1010 ;; Prefix for insn menmonic.
1011 (define_code_attr sgnprefix [(sign_extend "i") (zero_extend "")
1012 (smul_highpart "i") (umul_highpart "")
1013 (div "i") (udiv "")])
1014 ;; Prefix for define_insn
1015 (define_code_attr s [(sign_extend "s") (zero_extend "u")
1016 (smul_highpart "s") (umul_highpart "u")])
1017 (define_code_attr u [(sign_extend "") (zero_extend "u")
1018 (div "") (udiv "u")])
1019 (define_code_attr u_bool [(sign_extend "false") (zero_extend "true")
1020 (div "false") (udiv "true")])
1021
1022 ;; Used in signed and unsigned truncations.
1023 (define_code_iterator any_truncate [ss_truncate truncate us_truncate])
1024 ;; Instruction suffix for truncations.
1025 (define_code_attr trunsuffix
1026 [(ss_truncate "s") (truncate "") (us_truncate "us")])
1027
1028 ;; Instruction suffix for SSE sign and zero extensions.
1029 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
1030
1031 ;; Used in signed and unsigned fix.
1032 (define_code_iterator any_fix [fix unsigned_fix])
1033 (define_code_attr fixsuffix [(fix "") (unsigned_fix "u")])
1034 (define_code_attr fixunssuffix [(fix "") (unsigned_fix "uns")])
1035 (define_code_attr fixprefix [(fix "s") (unsigned_fix "u")])
1036
1037 ;; Used in signed and unsigned float.
1038 (define_code_iterator any_float [float unsigned_float])
1039 (define_code_attr floatsuffix [(float "") (unsigned_float "u")])
1040 (define_code_attr floatunssuffix [(float "") (unsigned_float "uns")])
1041 (define_code_attr floatprefix [(float "s") (unsigned_float "u")])
1042
1043 ;; Base name for expression
1044 (define_code_attr insn
1045 [(plus "add") (ss_plus "ssadd") (us_plus "usadd")
1046 (minus "sub") (ss_minus "sssub") (us_minus "ussub")
1047 (sign_extend "extend") (zero_extend "zero_extend")
1048 (ashift "ashl") (lshiftrt "lshr") (ashiftrt "ashr")
1049 (rotate "rotl") (rotatert "rotr")
1050 (mult "mul") (div "div")])
1051
1052 ;; All integer modes.
1053 (define_mode_iterator SWI1248x [QI HI SI DI])
1054
1055 ;; All integer modes without QImode.
1056 (define_mode_iterator SWI248x [HI SI DI])
1057
1058 ;; All integer modes without QImode and HImode.
1059 (define_mode_iterator SWI48x [SI DI])
1060
1061 ;; All integer modes without SImode and DImode.
1062 (define_mode_iterator SWI12 [QI HI])
1063
1064 ;; All integer modes without DImode.
1065 (define_mode_iterator SWI124 [QI HI SI])
1066
1067 ;; All integer modes without QImode and DImode.
1068 (define_mode_iterator SWI24 [HI SI])
1069
1070 ;; Single word integer modes.
1071 (define_mode_iterator SWI [QI HI SI (DI "TARGET_64BIT")])
1072
1073 ;; Single word integer modes without QImode.
1074 (define_mode_iterator SWI248 [HI SI (DI "TARGET_64BIT")])
1075
1076 ;; Single word integer modes without QImode and HImode.
1077 (define_mode_iterator SWI48 [SI (DI "TARGET_64BIT")])
1078
1079 ;; All math-dependant single and double word integer modes.
1080 (define_mode_iterator SDWIM [(QI "TARGET_QIMODE_MATH")
1081 (HI "TARGET_HIMODE_MATH")
1082 SI DI (TI "TARGET_64BIT")])
1083
1084 ;; Math-dependant single word integer modes.
1085 (define_mode_iterator SWIM [(QI "TARGET_QIMODE_MATH")
1086 (HI "TARGET_HIMODE_MATH")
1087 SI (DI "TARGET_64BIT")])
1088
1089 ;; Math-dependant integer modes without DImode.
1090 (define_mode_iterator SWIM124 [(QI "TARGET_QIMODE_MATH")
1091 (HI "TARGET_HIMODE_MATH")
1092 SI])
1093
1094 ;; Math-dependant integer modes with DImode.
1095 (define_mode_iterator SWIM1248x
1096 [(QI "TARGET_QIMODE_MATH")
1097 (HI "TARGET_HIMODE_MATH")
1098 SI DI])
1099
1100 ;; Math-dependant single word integer modes without QImode.
1101 (define_mode_iterator SWIM248 [(HI "TARGET_HIMODE_MATH")
1102 SI (DI "TARGET_64BIT")])
1103
1104 ;; Double word integer modes.
1105 (define_mode_iterator DWI [(DI "!TARGET_64BIT")
1106 (TI "TARGET_64BIT")])
1107
1108 ;; SWI and DWI together.
1109 (define_mode_iterator SWIDWI [QI HI SI DI (TI "TARGET_64BIT")])
1110
1111 ;; SWI48 and DWI together.
1112 (define_mode_iterator SWI48DWI [SI DI (TI "TARGET_64BIT")])
1113
1114 ;; GET_MODE_SIZE for selected modes. As GET_MODE_SIZE is not
1115 ;; compile time constant, it is faster to use <MODE_SIZE> than
1116 ;; GET_MODE_SIZE (<MODE>mode). For XFmode which depends on
1117 ;; command line options just use GET_MODE_SIZE macro.
1118 (define_mode_attr MODE_SIZE [(QI "1") (HI "2") (SI "4") (DI "8")
1119 (TI "16") (HF "2") (BF "2") (SF "4") (DF "8")
1120 (XF "GET_MODE_SIZE (XFmode)")
1121 (V16QI "16") (V32QI "32") (V64QI "64")
1122 (V8HI "16") (V16HI "32") (V32HI "64")
1123 (V4SI "16") (V8SI "32") (V16SI "64")
1124 (V2DI "16") (V4DI "32") (V8DI "64")
1125 (V1TI "16") (V2TI "32") (V4TI "64")
1126 (V2DF "16") (V4DF "32") (V8DF "64")
1127 (V4SF "16") (V8SF "32") (V16SF "64")
1128 (V8HF "16") (V16HF "32") (V32HF "64")
1129 (V4HF "8") (V2HF "4")
1130 (V8BF "16") (V16BF "32") (V32BF "64")
1131 (V4BF "8") (V2BF "4")])
1132
1133 ;; Double word integer modes as mode attribute.
1134 (define_mode_attr DWI [(QI "HI") (HI "SI") (SI "DI") (DI "TI") (TI "OI")])
1135 (define_mode_attr dwi [(QI "hi") (HI "si") (SI "di") (DI "ti") (TI "oi")])
1136
1137 ;; LEA mode corresponding to an integer mode
1138 (define_mode_attr LEAMODE [(QI "SI") (HI "SI") (SI "SI") (DI "DI")])
1139
1140 ;; Half mode for double word integer modes.
1141 (define_mode_iterator DWIH [(SI "!TARGET_64BIT")
1142 (DI "TARGET_64BIT")])
1143
1144 ;; Instruction suffix for integer modes.
1145 (define_mode_attr imodesuffix [(QI "b") (HI "w") (SI "l") (DI "q")])
1146
1147 ;; Instruction suffix for masks.
1148 (define_mode_attr mskmodesuffix [(QI "b") (HI "w") (SI "d") (DI "q")])
1149
1150 ;; Pointer size prefix for integer modes (Intel asm dialect)
1151 (define_mode_attr iptrsize [(QI "BYTE")
1152 (HI "WORD")
1153 (SI "DWORD")
1154 (DI "QWORD")])
1155
1156 ;; Register class for integer modes.
1157 (define_mode_attr r [(QI "q") (HI "r") (SI "r") (DI "r")])
1158
1159 ;; Immediate operand constraint for integer modes.
1160 (define_mode_attr i [(QI "n") (HI "n") (SI "e") (DI "e")])
1161
1162 ;; General operand constraint for word modes.
1163 (define_mode_attr g [(QI "qmn") (HI "rmn") (SI "rme") (DI "rme")])
1164
1165 ;; Memory operand constraint for word modes.
1166 (define_mode_attr m [(QI "m") (HI "m") (SI "BM") (DI "BM")])
1167
1168 ;; Immediate operand constraint for double integer modes.
1169 (define_mode_attr di [(SI "nF") (DI "Wd")])
1170
1171 ;; Immediate operand constraint for shifts.
1172 (define_mode_attr S [(QI "I") (HI "I") (SI "I") (DI "J") (TI "O")])
1173 (define_mode_attr KS [(QI "Wb") (HI "Ww") (SI "I") (DI "J")])
1174
1175 ;; Print register name in the specified mode.
1176 (define_mode_attr k [(QI "b") (HI "w") (SI "k") (DI "q")])
1177
1178 ;; General operand predicate for integer modes.
1179 (define_mode_attr general_operand
1180 [(QI "general_operand")
1181 (HI "general_operand")
1182 (SI "x86_64_general_operand")
1183 (DI "x86_64_general_operand")
1184 (TI "x86_64_general_operand")])
1185
1186 ;; General operand predicate for integer modes, where for TImode
1187 ;; we need both words of the operand to be general operands.
1188 (define_mode_attr general_hilo_operand
1189 [(QI "general_operand")
1190 (HI "general_operand")
1191 (SI "x86_64_general_operand")
1192 (DI "x86_64_general_operand")
1193 (TI "x86_64_hilo_general_operand")])
1194
1195 ;; General sign extend operand predicate for integer modes,
1196 ;; which disallows VOIDmode operands and thus it is suitable
1197 ;; for use inside sign_extend.
1198 (define_mode_attr general_sext_operand
1199 [(QI "sext_operand")
1200 (HI "sext_operand")
1201 (SI "x86_64_sext_operand")
1202 (DI "x86_64_sext_operand")])
1203
1204 ;; General sign/zero extend operand predicate for integer modes.
1205 (define_mode_attr general_szext_operand
1206 [(QI "general_operand")
1207 (HI "general_operand")
1208 (SI "x86_64_szext_general_operand")
1209 (DI "x86_64_szext_general_operand")
1210 (TI "x86_64_hilo_general_operand")])
1211
1212 (define_mode_attr nonmemory_szext_operand
1213 [(QI "nonmemory_operand")
1214 (HI "nonmemory_operand")
1215 (SI "x86_64_szext_nonmemory_operand")
1216 (DI "x86_64_szext_nonmemory_operand")])
1217
1218 ;; Immediate operand predicate for integer modes.
1219 (define_mode_attr immediate_operand
1220 [(QI "immediate_operand")
1221 (HI "immediate_operand")
1222 (SI "x86_64_immediate_operand")
1223 (DI "x86_64_immediate_operand")])
1224
1225 ;; Nonmemory operand predicate for integer modes.
1226 (define_mode_attr nonmemory_operand
1227 [(QI "nonmemory_operand")
1228 (HI "nonmemory_operand")
1229 (SI "x86_64_nonmemory_operand")
1230 (DI "x86_64_nonmemory_operand")])
1231
1232 ;; Operand predicate for shifts.
1233 (define_mode_attr shift_operand
1234 [(QI "nonimmediate_operand")
1235 (HI "nonimmediate_operand")
1236 (SI "nonimmediate_operand")
1237 (DI "shiftdi_operand")
1238 (TI "register_operand")])
1239
1240 ;; Operand predicate for shift argument.
1241 (define_mode_attr shift_immediate_operand
1242 [(QI "const_1_to_31_operand")
1243 (HI "const_1_to_31_operand")
1244 (SI "const_1_to_31_operand")
1245 (DI "const_1_to_63_operand")])
1246
1247 ;; Input operand predicate for arithmetic left shifts.
1248 (define_mode_attr ashl_input_operand
1249 [(QI "nonimmediate_operand")
1250 (HI "nonimmediate_operand")
1251 (SI "nonimmediate_operand")
1252 (DI "ashldi_input_operand")
1253 (TI "reg_or_pm1_operand")])
1254
1255 ;; SSE and x87 SFmode and DFmode floating point modes
1256 (define_mode_iterator MODEF [SF DF])
1257
1258 ;; SSE floating point modes
1259 (define_mode_iterator MODEFH [(HF "TARGET_AVX512FP16") SF DF])
1260
1261 ;; All x87 floating point modes
1262 (define_mode_iterator X87MODEF [SF DF XF])
1263
1264 ;; All x87 floating point modes plus HFmode
1265 (define_mode_iterator X87MODEFH [HF SF DF XF BF])
1266
1267 ;; All SSE floating point modes
1268 (define_mode_iterator SSEMODEF [HF SF DF TF])
1269 (define_mode_attr ssevecmodef [(HF "V8HF") (SF "V4SF") (DF "V2DF") (TF "TF")])
1270
1271 ;; SSE instruction suffix for various modes
1272 (define_mode_attr ssemodesuffix
1273 [(HF "sh") (SF "ss") (DF "sd")
1274 (V32HF "ph") (V16SF "ps") (V8DF "pd")
1275 (V16HF "ph") (V16BF "bf") (V8SF "ps") (V4DF "pd")
1276 (V8HF "ph") (V8BF "bf") (V4SF "ps") (V2DF "pd")
1277 (V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")
1278 (V32QI "b") (V16HI "w") (V8SI "d") (V4DI "q")
1279 (V64QI "b") (V32HI "w") (V16SI "d") (V8DI "q")])
1280
1281 ;; SSE vector suffix for floating point modes
1282 (define_mode_attr ssevecmodesuffix [(SF "ps") (DF "pd")])
1283
1284 ;; SSE vector mode corresponding to a scalar mode
1285 (define_mode_attr ssevecmode
1286 [(QI "V16QI") (HI "V8HI") (SI "V4SI") (DI "V2DI") (HF "V8HF") (BF "V8BF") (SF "V4SF") (DF "V2DF")])
1287 (define_mode_attr ssevecmodelower
1288 [(QI "v16qi") (HI "v8hi") (SI "v4si") (DI "v2di") (SF "v4sf") (DF "v2df")])
1289
1290 ;; AVX512F vector mode corresponding to a scalar mode
1291 (define_mode_attr avx512fvecmode
1292 [(QI "V64QI") (HI "V32HI") (SI "V16SI") (DI "V8DI") (SF "V16SF") (DF "V8DF")])
1293
1294 ;; Instruction suffix for REX 64bit operators.
1295 (define_mode_attr rex64suffix [(SI "{l}") (DI "{q}")])
1296 (define_mode_attr rex64namesuffix [(SI "") (DI "q")])
1297
1298 ;; This mode iterator allows :P to be used for patterns that operate on
1299 ;; pointer-sized quantities. Exactly one of the two alternatives will match.
1300 (define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")])
1301
1302 ;; This mode iterator allows :W to be used for patterns that operate on
1303 ;; word_mode sized quantities.
1304 (define_mode_iterator W
1305 [(SI "word_mode == SImode") (DI "word_mode == DImode")])
1306
1307 ;; This mode iterator allows :PTR to be used for patterns that operate on
1308 ;; ptr_mode sized quantities.
1309 (define_mode_iterator PTR
1310 [(SI "ptr_mode == SImode") (DI "ptr_mode == DImode")])
1311 \f
1312 ;; Scheduling descriptions
1313
1314 (include "pentium.md")
1315 (include "ppro.md")
1316 (include "k6.md")
1317 (include "athlon.md")
1318 (include "bdver1.md")
1319 (include "bdver3.md")
1320 (include "btver2.md")
1321 (include "znver.md")
1322 (include "geode.md")
1323 (include "atom.md")
1324 (include "slm.md")
1325 (include "glm.md")
1326 (include "core2.md")
1327 (include "haswell.md")
1328 (include "lujiazui.md")
1329
1330 \f
1331 ;; Operand and operator predicates and constraints
1332
1333 (include "predicates.md")
1334 (include "constraints.md")
1335
1336 \f
1337 ;; Compare and branch/compare and store instructions.
1338
1339 (define_expand "cbranch<mode>4"
1340 [(set (reg:CC FLAGS_REG)
1341 (compare:CC (match_operand:SDWIM 1 "nonimmediate_operand")
1342 (match_operand:SDWIM 2 "<general_operand>")))
1343 (set (pc) (if_then_else
1344 (match_operator 0 "ordered_comparison_operator"
1345 [(reg:CC FLAGS_REG) (const_int 0)])
1346 (label_ref (match_operand 3))
1347 (pc)))]
1348 ""
1349 {
1350 if (MEM_P (operands[1]) && MEM_P (operands[2]))
1351 operands[1] = force_reg (<MODE>mode, operands[1]);
1352 ix86_expand_branch (GET_CODE (operands[0]),
1353 operands[1], operands[2], operands[3]);
1354 DONE;
1355 })
1356
1357 (define_expand "cbranchoi4"
1358 [(set (reg:CC FLAGS_REG)
1359 (compare:CC (match_operand:OI 1 "nonimmediate_operand")
1360 (match_operand:OI 2 "nonimmediate_operand")))
1361 (set (pc) (if_then_else
1362 (match_operator 0 "bt_comparison_operator"
1363 [(reg:CC FLAGS_REG) (const_int 0)])
1364 (label_ref (match_operand 3))
1365 (pc)))]
1366 "TARGET_AVX"
1367 {
1368 ix86_expand_branch (GET_CODE (operands[0]),
1369 operands[1], operands[2], operands[3]);
1370 DONE;
1371 })
1372
1373 (define_expand "cstore<mode>4"
1374 [(set (reg:CC FLAGS_REG)
1375 (compare:CC (match_operand:SDWIM 2 "nonimmediate_operand")
1376 (match_operand:SDWIM 3 "<general_operand>")))
1377 (set (match_operand:QI 0 "register_operand")
1378 (match_operator 1 "ordered_comparison_operator"
1379 [(reg:CC FLAGS_REG) (const_int 0)]))]
1380 ""
1381 {
1382 if (<MODE>mode == (TARGET_64BIT ? TImode : DImode))
1383 {
1384 if (GET_CODE (operands[1]) != EQ
1385 && GET_CODE (operands[1]) != NE)
1386 FAIL;
1387 }
1388 else if (MEM_P (operands[2]) && MEM_P (operands[3]))
1389 operands[2] = force_reg (<MODE>mode, operands[2]);
1390 ix86_expand_setcc (operands[0], GET_CODE (operands[1]),
1391 operands[2], operands[3]);
1392 DONE;
1393 })
1394
1395 (define_expand "@cmp<mode>_1"
1396 [(set (reg:CC FLAGS_REG)
1397 (compare:CC (match_operand:SWI48 0 "nonimmediate_operand")
1398 (match_operand:SWI48 1 "<general_operand>")))])
1399
1400 (define_mode_iterator SWI1248_AVX512BWDQ_64
1401 [(QI "TARGET_AVX512DQ") HI
1402 (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW && TARGET_64BIT")])
1403
1404 (define_insn "*cmp<mode>_ccz_1"
1405 [(set (reg FLAGS_REG)
1406 (compare (match_operand:SWI1248_AVX512BWDQ_64 0
1407 "nonimmediate_operand" "<r>,?m<r>,$k")
1408 (match_operand:SWI1248_AVX512BWDQ_64 1 "const0_operand")))]
1409 "TARGET_AVX512F && ix86_match_ccmode (insn, CCZmode)"
1410 "@
1411 test{<imodesuffix>}\t%0, %0
1412 cmp{<imodesuffix>}\t{%1, %0|%0, %1}
1413 kortest<mskmodesuffix>\t%0, %0"
1414 [(set_attr "type" "test,icmp,msklog")
1415 (set_attr "length_immediate" "0,1,*")
1416 (set_attr "prefix" "*,*,vex")
1417 (set_attr "mode" "<MODE>")])
1418
1419 (define_insn "*cmp<mode>_ccno_1"
1420 [(set (reg FLAGS_REG)
1421 (compare (match_operand:SWI 0 "nonimmediate_operand" "<r>,?m<r>")
1422 (match_operand:SWI 1 "const0_operand")))]
1423 "ix86_match_ccmode (insn, CCNOmode)"
1424 "@
1425 test{<imodesuffix>}\t%0, %0
1426 cmp{<imodesuffix>}\t{%1, %0|%0, %1}"
1427 [(set_attr "type" "test,icmp")
1428 (set_attr "length_immediate" "0,1")
1429 (set_attr "mode" "<MODE>")])
1430
1431 (define_insn "*cmp<mode>_1"
1432 [(set (reg FLAGS_REG)
1433 (compare (match_operand:SWI 0 "nonimmediate_operand" "<r>m,<r>")
1434 (match_operand:SWI 1 "<general_operand>" "<r><i>,<r><m>")))]
1435 "ix86_match_ccmode (insn, CCmode)"
1436 "cmp{<imodesuffix>}\t{%1, %0|%0, %1}"
1437 [(set_attr "type" "icmp")
1438 (set_attr "mode" "<MODE>")])
1439
1440 (define_insn "*cmp<mode>_minus_1"
1441 [(set (reg FLAGS_REG)
1442 (compare
1443 (minus:SWI (match_operand:SWI 0 "nonimmediate_operand" "<r>m,<r>")
1444 (match_operand:SWI 1 "<general_operand>" "<r><i>,<r><m>"))
1445 (const_int 0)))]
1446 "ix86_match_ccmode (insn, CCGOCmode)"
1447 "cmp{<imodesuffix>}\t{%1, %0|%0, %1}"
1448 [(set_attr "type" "icmp")
1449 (set_attr "mode" "<MODE>")])
1450
1451 (define_insn "*cmpqi_ext<mode>_1"
1452 [(set (reg FLAGS_REG)
1453 (compare
1454 (match_operand:QI 0 "nonimmediate_operand" "QBc,m")
1455 (subreg:QI
1456 (zero_extract:SWI248
1457 (match_operand:SWI248 1 "register_operand" "Q,Q")
1458 (const_int 8)
1459 (const_int 8)) 0)))]
1460 "ix86_match_ccmode (insn, CCmode)"
1461 "cmp{b}\t{%h1, %0|%0, %h1}"
1462 [(set_attr "isa" "*,nox64")
1463 (set_attr "type" "icmp")
1464 (set_attr "mode" "QI")])
1465
1466 (define_insn "*cmpqi_ext<mode>_2"
1467 [(set (reg FLAGS_REG)
1468 (compare
1469 (subreg:QI
1470 (zero_extract:SWI248
1471 (match_operand:SWI248 0 "register_operand" "Q")
1472 (const_int 8)
1473 (const_int 8)) 0)
1474 (match_operand:QI 1 "const0_operand")))]
1475 "ix86_match_ccmode (insn, CCNOmode)"
1476 "test{b}\t%h0, %h0"
1477 [(set_attr "type" "test")
1478 (set_attr "length_immediate" "0")
1479 (set_attr "mode" "QI")])
1480
1481 (define_expand "cmpqi_ext_3"
1482 [(set (reg:CC FLAGS_REG)
1483 (compare:CC
1484 (subreg:QI
1485 (zero_extract:HI
1486 (match_operand:HI 0 "register_operand")
1487 (const_int 8)
1488 (const_int 8)) 0)
1489 (match_operand:QI 1 "const_int_operand")))])
1490
1491 (define_insn "*cmpqi_ext<mode>_3"
1492 [(set (reg FLAGS_REG)
1493 (compare
1494 (subreg:QI
1495 (zero_extract:SWI248
1496 (match_operand:SWI248 0 "register_operand" "Q,Q")
1497 (const_int 8)
1498 (const_int 8)) 0)
1499 (match_operand:QI 1 "general_operand" "QnBc,m")))]
1500 "ix86_match_ccmode (insn, CCmode)"
1501 "cmp{b}\t{%1, %h0|%h0, %1}"
1502 [(set_attr "isa" "*,nox64")
1503 (set_attr "type" "icmp")
1504 (set_attr "mode" "QI")])
1505
1506 (define_insn "*cmpqi_ext<mode>_4"
1507 [(set (reg FLAGS_REG)
1508 (compare
1509 (subreg:QI
1510 (zero_extract:SWI248
1511 (match_operand:SWI248 0 "register_operand" "Q")
1512 (const_int 8)
1513 (const_int 8)) 0)
1514 (subreg:QI
1515 (zero_extract:SWI248
1516 (match_operand:SWI248 1 "register_operand" "Q")
1517 (const_int 8)
1518 (const_int 8)) 0)))]
1519 "ix86_match_ccmode (insn, CCmode)"
1520 "cmp{b}\t{%h1, %h0|%h0, %h1}"
1521 [(set_attr "type" "icmp")
1522 (set_attr "mode" "QI")])
1523
1524 (define_insn_and_split "*cmp<dwi>_doubleword"
1525 [(set (reg:CCZ FLAGS_REG)
1526 (compare:CCZ (match_operand:<DWI> 0 "nonimmediate_operand")
1527 (match_operand:<DWI> 1 "general_operand")))]
1528 "ix86_pre_reload_split ()"
1529 "#"
1530 "&& 1"
1531 [(parallel [(set (reg:CCZ FLAGS_REG)
1532 (compare:CCZ (ior:DWIH (match_dup 4) (match_dup 5))
1533 (const_int 0)))
1534 (set (match_dup 4) (ior:DWIH (match_dup 4) (match_dup 5)))])]
1535 {
1536 split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[2]);
1537 /* Placing the SUBREG pieces in pseudos helps reload. */
1538 for (int i = 0; i < 4; i++)
1539 if (SUBREG_P (operands[i]))
1540 operands[i] = force_reg (<MODE>mode, operands[i]);
1541
1542 operands[4] = gen_reg_rtx (<MODE>mode);
1543
1544 /* Special case comparisons against -1. */
1545 if (operands[1] == constm1_rtx && operands[3] == constm1_rtx)
1546 {
1547 emit_insn (gen_and<mode>3 (operands[4], operands[0], operands[2]));
1548 emit_insn (gen_cmp_1 (<MODE>mode, operands[4], constm1_rtx));
1549 DONE;
1550 }
1551
1552 if (operands[1] == const0_rtx)
1553 emit_move_insn (operands[4], operands[0]);
1554 else if (operands[0] == const0_rtx)
1555 emit_move_insn (operands[4], operands[1]);
1556 else if (operands[1] == constm1_rtx)
1557 emit_insn (gen_one_cmpl<mode>2 (operands[4], operands[0]));
1558 else if (operands[0] == constm1_rtx)
1559 emit_insn (gen_one_cmpl<mode>2 (operands[4], operands[1]));
1560 else
1561 {
1562 if (CONST_SCALAR_INT_P (operands[1])
1563 && !x86_64_immediate_operand (operands[1], <MODE>mode))
1564 operands[1] = force_reg (<MODE>mode, operands[1]);
1565 emit_insn (gen_xor<mode>3 (operands[4], operands[0], operands[1]));
1566 }
1567
1568 if (operands[3] == const0_rtx)
1569 operands[5] = operands[2];
1570 else if (operands[2] == const0_rtx)
1571 operands[5] = operands[3];
1572 else
1573 {
1574 operands[5] = gen_reg_rtx (<MODE>mode);
1575 if (operands[3] == constm1_rtx)
1576 emit_insn (gen_one_cmpl<mode>2 (operands[5], operands[2]));
1577 else if (operands[2] == constm1_rtx)
1578 emit_insn (gen_one_cmpl<mode>2 (operands[5], operands[3]));
1579 else
1580 {
1581 if (CONST_SCALAR_INT_P (operands[3])
1582 && !x86_64_immediate_operand (operands[3], <MODE>mode))
1583 operands[3] = force_reg (<MODE>mode, operands[3]);
1584 emit_insn (gen_xor<mode>3 (operands[5], operands[2], operands[3]));
1585 }
1586 }
1587 })
1588
1589 ;; These implement float point compares.
1590 ;; %%% See if we can get away with VOIDmode operands on the actual insns,
1591 ;; which would allow mix and match FP modes on the compares. Which is what
1592 ;; the old patterns did, but with many more of them.
1593
1594 (define_expand "cbranchxf4"
1595 [(set (reg:CC FLAGS_REG)
1596 (compare:CC (match_operand:XF 1 "nonmemory_operand")
1597 (match_operand:XF 2 "nonmemory_operand")))
1598 (set (pc) (if_then_else
1599 (match_operator 0 "ix86_fp_comparison_operator"
1600 [(reg:CC FLAGS_REG)
1601 (const_int 0)])
1602 (label_ref (match_operand 3))
1603 (pc)))]
1604 "TARGET_80387"
1605 {
1606 ix86_expand_branch (GET_CODE (operands[0]),
1607 operands[1], operands[2], operands[3]);
1608 DONE;
1609 })
1610
1611 (define_expand "cstorexf4"
1612 [(set (reg:CC FLAGS_REG)
1613 (compare:CC (match_operand:XF 2 "nonmemory_operand")
1614 (match_operand:XF 3 "nonmemory_operand")))
1615 (set (match_operand:QI 0 "register_operand")
1616 (match_operator 1 "ix86_fp_comparison_operator"
1617 [(reg:CC FLAGS_REG)
1618 (const_int 0)]))]
1619 "TARGET_80387"
1620 {
1621 ix86_expand_setcc (operands[0], GET_CODE (operands[1]),
1622 operands[2], operands[3]);
1623 DONE;
1624 })
1625
1626 (define_expand "cbranchhf4"
1627 [(set (reg:CC FLAGS_REG)
1628 (compare:CC (match_operand:HF 1 "cmp_fp_expander_operand")
1629 (match_operand:HF 2 "cmp_fp_expander_operand")))
1630 (set (pc) (if_then_else
1631 (match_operator 0 "ix86_fp_comparison_operator"
1632 [(reg:CC FLAGS_REG)
1633 (const_int 0)])
1634 (label_ref (match_operand 3))
1635 (pc)))]
1636 "TARGET_AVX512FP16"
1637 {
1638 ix86_expand_branch (GET_CODE (operands[0]),
1639 operands[1], operands[2], operands[3]);
1640 DONE;
1641 })
1642
1643 (define_expand "cbranch<mode>4"
1644 [(set (reg:CC FLAGS_REG)
1645 (compare:CC (match_operand:MODEF 1 "cmp_fp_expander_operand")
1646 (match_operand:MODEF 2 "cmp_fp_expander_operand")))
1647 (set (pc) (if_then_else
1648 (match_operator 0 "ix86_fp_comparison_operator"
1649 [(reg:CC FLAGS_REG)
1650 (const_int 0)])
1651 (label_ref (match_operand 3))
1652 (pc)))]
1653 "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
1654 {
1655 ix86_expand_branch (GET_CODE (operands[0]),
1656 operands[1], operands[2], operands[3]);
1657 DONE;
1658 })
1659
1660 (define_expand "cbranchbf4"
1661 [(set (reg:CC FLAGS_REG)
1662 (compare:CC (match_operand:BF 1 "cmp_fp_expander_operand")
1663 (match_operand:BF 2 "cmp_fp_expander_operand")))
1664 (set (pc) (if_then_else
1665 (match_operator 0 "comparison_operator"
1666 [(reg:CC FLAGS_REG)
1667 (const_int 0)])
1668 (label_ref (match_operand 3))
1669 (pc)))]
1670 ""
1671 {
1672 rtx op1 = ix86_expand_fast_convert_bf_to_sf (operands[1]);
1673 rtx op2 = ix86_expand_fast_convert_bf_to_sf (operands[2]);
1674 do_compare_rtx_and_jump (op1, op2, GET_CODE (operands[0]), 0,
1675 SFmode, NULL_RTX, NULL,
1676 as_a <rtx_code_label *> (operands[3]),
1677 /* Unfortunately this isn't propagated. */
1678 profile_probability::even ());
1679 DONE;
1680 })
1681
1682 (define_expand "cstorehf4"
1683 [(set (reg:CC FLAGS_REG)
1684 (compare:CC (match_operand:HF 2 "cmp_fp_expander_operand")
1685 (match_operand:HF 3 "cmp_fp_expander_operand")))
1686 (set (match_operand:QI 0 "register_operand")
1687 (match_operator 1 "ix86_fp_comparison_operator"
1688 [(reg:CC FLAGS_REG)
1689 (const_int 0)]))]
1690 "TARGET_AVX512FP16"
1691 {
1692 ix86_expand_setcc (operands[0], GET_CODE (operands[1]),
1693 operands[2], operands[3]);
1694 DONE;
1695 })
1696
1697 (define_expand "cstorebf4"
1698 [(set (reg:CC FLAGS_REG)
1699 (compare:CC (match_operand:BF 2 "cmp_fp_expander_operand")
1700 (match_operand:BF 3 "cmp_fp_expander_operand")))
1701 (set (match_operand:QI 0 "register_operand")
1702 (match_operator 1 "comparison_operator"
1703 [(reg:CC FLAGS_REG)
1704 (const_int 0)]))]
1705 ""
1706 {
1707 rtx op1 = ix86_expand_fast_convert_bf_to_sf (operands[2]);
1708 rtx op2 = ix86_expand_fast_convert_bf_to_sf (operands[3]);
1709 rtx res = emit_store_flag_force (operands[0], GET_CODE (operands[1]),
1710 op1, op2, SFmode, 0, 1);
1711 if (!rtx_equal_p (res, operands[0]))
1712 emit_move_insn (operands[0], res);
1713 DONE;
1714 })
1715
1716 (define_expand "cstore<mode>4"
1717 [(set (reg:CC FLAGS_REG)
1718 (compare:CC (match_operand:MODEF 2 "cmp_fp_expander_operand")
1719 (match_operand:MODEF 3 "cmp_fp_expander_operand")))
1720 (set (match_operand:QI 0 "register_operand")
1721 (match_operator 1 "ix86_fp_comparison_operator"
1722 [(reg:CC FLAGS_REG)
1723 (const_int 0)]))]
1724 "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
1725 {
1726 ix86_expand_setcc (operands[0], GET_CODE (operands[1]),
1727 operands[2], operands[3]);
1728 DONE;
1729 })
1730
1731 (define_expand "cbranchcc4"
1732 [(set (pc) (if_then_else
1733 (match_operator 0 "comparison_operator"
1734 [(match_operand 1 "flags_reg_operand")
1735 (match_operand 2 "const0_operand")])
1736 (label_ref (match_operand 3))
1737 (pc)))]
1738 ""
1739 {
1740 ix86_expand_branch (GET_CODE (operands[0]),
1741 operands[1], operands[2], operands[3]);
1742 DONE;
1743 })
1744
1745 (define_expand "cstorecc4"
1746 [(set (match_operand:QI 0 "register_operand")
1747 (match_operator 1 "comparison_operator"
1748 [(match_operand 2 "flags_reg_operand")
1749 (match_operand 3 "const0_operand")]))]
1750 ""
1751 {
1752 ix86_expand_setcc (operands[0], GET_CODE (operands[1]),
1753 operands[2], operands[3]);
1754 DONE;
1755 })
1756
1757 ;; FP compares, step 1:
1758 ;; Set the FP condition codes and move fpsr to ax.
1759
1760 ;; We may not use "#" to split and emit these
1761 ;; due to reg-stack pops killing fpsr.
1762
1763 (define_insn "*cmpxf_i387"
1764 [(set (match_operand:HI 0 "register_operand" "=a")
1765 (unspec:HI
1766 [(compare:CCFP
1767 (match_operand:XF 1 "register_operand" "f")
1768 (match_operand:XF 2 "reg_or_0_operand" "fC"))]
1769 UNSPEC_FNSTSW))]
1770 "TARGET_80387"
1771 "* return output_fp_compare (insn, operands, false, false);"
1772 [(set_attr "type" "multi")
1773 (set_attr "unit" "i387")
1774 (set_attr "mode" "XF")])
1775
1776 (define_insn "*cmp<mode>_i387"
1777 [(set (match_operand:HI 0 "register_operand" "=a")
1778 (unspec:HI
1779 [(compare:CCFP
1780 (match_operand:MODEF 1 "register_operand" "f")
1781 (match_operand:MODEF 2 "nonimm_or_0_operand" "fmC"))]
1782 UNSPEC_FNSTSW))]
1783 "TARGET_80387"
1784 "* return output_fp_compare (insn, operands, false, false);"
1785 [(set_attr "type" "multi")
1786 (set_attr "unit" "i387")
1787 (set_attr "mode" "<MODE>")])
1788
1789 (define_insn "*cmp<X87MODEF:mode>_<SWI24:mode>_i387"
1790 [(set (match_operand:HI 0 "register_operand" "=a")
1791 (unspec:HI
1792 [(compare:CCFP
1793 (match_operand:X87MODEF 1 "register_operand" "f")
1794 (float:X87MODEF
1795 (match_operand:SWI24 2 "nonimmediate_operand" "m")))]
1796 UNSPEC_FNSTSW))]
1797 "TARGET_80387
1798 && (TARGET_USE_<SWI24:MODE>MODE_FIOP
1799 || optimize_function_for_size_p (cfun))"
1800 "* return output_fp_compare (insn, operands, false, false);"
1801 [(set_attr "type" "multi")
1802 (set_attr "unit" "i387")
1803 (set_attr "fp_int_src" "true")
1804 (set_attr "mode" "<SWI24:MODE>")])
1805
1806 (define_insn "*cmpu<mode>_i387"
1807 [(set (match_operand:HI 0 "register_operand" "=a")
1808 (unspec:HI
1809 [(unspec:CCFP
1810 [(compare:CCFP
1811 (match_operand:X87MODEF 1 "register_operand" "f")
1812 (match_operand:X87MODEF 2 "register_operand" "f"))]
1813 UNSPEC_NOTRAP)]
1814 UNSPEC_FNSTSW))]
1815 "TARGET_80387"
1816 "* return output_fp_compare (insn, operands, false, true);"
1817 [(set_attr "type" "multi")
1818 (set_attr "unit" "i387")
1819 (set_attr "mode" "<MODE>")])
1820
1821 ;; FP compares, step 2:
1822 ;; Get ax into flags, general case.
1823
1824 (define_insn "x86_sahf_1"
1825 [(set (reg:CC FLAGS_REG)
1826 (unspec:CC [(match_operand:HI 0 "register_operand" "a")]
1827 UNSPEC_SAHF))]
1828 "TARGET_SAHF"
1829 {
1830 #ifndef HAVE_AS_IX86_SAHF
1831 if (TARGET_64BIT)
1832 return ASM_BYTE "0x9e";
1833 else
1834 #endif
1835 return "sahf";
1836 }
1837 [(set_attr "length" "1")
1838 (set_attr "athlon_decode" "vector")
1839 (set_attr "amdfam10_decode" "direct")
1840 (set_attr "bdver1_decode" "direct")
1841 (set_attr "mode" "SI")])
1842
1843 ;; Pentium Pro can do both steps in one go.
1844 ;; (these instructions set flags directly)
1845
1846 (define_subst_attr "unord" "unord_subst" "" "u")
1847 (define_subst_attr "unordered" "unord_subst" "false" "true")
1848
1849 (define_subst "unord_subst"
1850 [(set (match_operand:CCFP 0)
1851 (match_operand:CCFP 1))]
1852 ""
1853 [(set (match_dup 0)
1854 (unspec:CCFP
1855 [(match_dup 1)]
1856 UNSPEC_NOTRAP))])
1857
1858 (define_insn "*cmpi<unord>xf_i387"
1859 [(set (reg:CCFP FLAGS_REG)
1860 (compare:CCFP
1861 (match_operand:XF 0 "register_operand" "f")
1862 (match_operand:XF 1 "register_operand" "f")))]
1863 "TARGET_80387 && TARGET_CMOVE"
1864 "* return output_fp_compare (insn, operands, true, <unordered>);"
1865 [(set_attr "type" "fcmp")
1866 (set_attr "mode" "XF")
1867 (set_attr "athlon_decode" "vector")
1868 (set_attr "amdfam10_decode" "direct")
1869 (set_attr "bdver1_decode" "double")
1870 (set_attr "znver1_decode" "double")])
1871
1872 (define_insn "*cmpi<unord><MODEF:mode>"
1873 [(set (reg:CCFP FLAGS_REG)
1874 (compare:CCFP
1875 (match_operand:MODEF 0 "register_operand" "f,v")
1876 (match_operand:MODEF 1 "register_ssemem_operand" "f,vm")))]
1877 "(SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH)
1878 || (TARGET_80387 && TARGET_CMOVE)"
1879 "@
1880 * return output_fp_compare (insn, operands, true, <unordered>);
1881 %v<unord>comi<MODEF:ssemodesuffix>\t{%1, %0|%0, %1}"
1882 [(set_attr "type" "fcmp,ssecomi")
1883 (set_attr "prefix" "orig,maybe_vex")
1884 (set_attr "mode" "<MODEF:MODE>")
1885 (set_attr "prefix_rep" "*,0")
1886 (set (attr "prefix_data16")
1887 (cond [(eq_attr "alternative" "0")
1888 (const_string "*")
1889 (eq_attr "mode" "DF")
1890 (const_string "1")
1891 ]
1892 (const_string "0")))
1893 (set_attr "athlon_decode" "vector")
1894 (set_attr "amdfam10_decode" "direct")
1895 (set_attr "bdver1_decode" "double")
1896 (set_attr "znver1_decode" "double")
1897 (set (attr "enabled")
1898 (if_then_else
1899 (match_test ("SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH"))
1900 (if_then_else
1901 (eq_attr "alternative" "0")
1902 (symbol_ref "TARGET_MIX_SSE_I387")
1903 (symbol_ref "true"))
1904 (if_then_else
1905 (eq_attr "alternative" "0")
1906 (symbol_ref "true")
1907 (symbol_ref "false"))))])
1908
1909 (define_insn "*cmpi<unord>hf"
1910 [(set (reg:CCFP FLAGS_REG)
1911 (compare:CCFP
1912 (match_operand:HF 0 "register_operand" "v")
1913 (match_operand:HF 1 "nonimmediate_operand" "vm")))]
1914 "TARGET_AVX512FP16"
1915 "v<unord>comish\t{%1, %0|%0, %1}"
1916 [(set_attr "type" "ssecomi")
1917 (set_attr "prefix" "evex")
1918 (set_attr "mode" "HF")])
1919 \f
1920 ;; Push/pop instructions.
1921
1922 (define_insn_and_split "*pushv1ti2"
1923 [(set (match_operand:V1TI 0 "push_operand" "=<")
1924 (match_operand:V1TI 1 "register_operand" "v"))]
1925 "TARGET_64BIT && TARGET_STV"
1926 "#"
1927 "&& reload_completed"
1928 [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
1929 (set (match_dup 0) (match_dup 1))]
1930 {
1931 operands[2] = GEN_INT (-PUSH_ROUNDING (GET_MODE_SIZE (V1TImode)));
1932 /* Preserve memory attributes. */
1933 operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
1934 }
1935 [(set_attr "type" "multi")
1936 (set_attr "mode" "TI")])
1937
1938 (define_insn "*push<mode>2"
1939 [(set (match_operand:DWI 0 "push_operand" "=<,<")
1940 (match_operand:DWI 1 "general_no_elim_operand" "riF*o,*v"))]
1941 ""
1942 "#"
1943 [(set_attr "type" "multi")
1944 (set_attr "mode" "<MODE>")])
1945
1946 (define_split
1947 [(set (match_operand:DWI 0 "push_operand")
1948 (match_operand:DWI 1 "general_gr_operand"))]
1949 "reload_completed"
1950 [(const_int 0)]
1951 "ix86_split_long_move (operands); DONE;")
1952
1953 (define_insn "*pushdi2_rex64"
1954 [(set (match_operand:DI 0 "push_operand" "=<,<,!<")
1955 (match_operand:DI 1 "general_no_elim_operand" "re*m,*v,n"))]
1956 "TARGET_64BIT"
1957 "@
1958 push{q}\t%1
1959 #
1960 #"
1961 [(set_attr "type" "push,multi,multi")
1962 (set_attr "mode" "DI")])
1963
1964 ;; Convert impossible pushes of immediate to existing instructions.
1965 ;; First try to get scratch register and go through it. In case this
1966 ;; fails, push sign extended lower part first and then overwrite
1967 ;; upper part by 32bit move.
1968
1969 (define_peephole2
1970 [(match_scratch:DI 2 "r")
1971 (set (match_operand:DI 0 "push_operand")
1972 (match_operand:DI 1 "immediate_operand"))]
1973 "TARGET_64BIT
1974 && !symbolic_operand (operands[1], DImode)
1975 && !x86_64_immediate_operand (operands[1], DImode)"
1976 [(set (match_dup 2) (match_dup 1))
1977 (set (match_dup 0) (match_dup 2))])
1978
1979 (define_split
1980 [(set (match_operand:DI 0 "push_operand")
1981 (match_operand:DI 1 "immediate_operand"))]
1982 "TARGET_64BIT && epilogue_completed
1983 && !symbolic_operand (operands[1], DImode)
1984 && !x86_64_immediate_operand (operands[1], DImode)"
1985 [(set (match_dup 0) (match_dup 1))
1986 (set (match_dup 2) (match_dup 3))]
1987 {
1988 split_double_mode (DImode, &operands[1], 1, &operands[2], &operands[3]);
1989
1990 operands[1] = gen_lowpart (DImode, operands[2]);
1991 operands[2] = gen_rtx_MEM (SImode,
1992 plus_constant (Pmode, stack_pointer_rtx, 4));
1993 })
1994
1995 ;; For TARGET_64BIT we always round up to 8 bytes.
1996 (define_insn "*pushsi2_rex64"
1997 [(set (match_operand:SI 0 "push_operand" "=X,X")
1998 (match_operand:SI 1 "nonmemory_no_elim_operand" "re,*v"))]
1999 "TARGET_64BIT"
2000 "@
2001 push{q}\t%q1
2002 #"
2003 [(set_attr "type" "push,multi")
2004 (set_attr "mode" "DI")])
2005
2006 (define_insn "*pushsi2"
2007 [(set (match_operand:SI 0 "push_operand" "=<,<")
2008 (match_operand:SI 1 "general_no_elim_operand" "ri*m,*v"))]
2009 "!TARGET_64BIT"
2010 "@
2011 push{l}\t%1
2012 #"
2013 [(set_attr "type" "push,multi")
2014 (set_attr "mode" "SI")])
2015
2016 (define_split
2017 [(set (match_operand:SWI48DWI 0 "push_operand")
2018 (match_operand:SWI48DWI 1 "sse_reg_operand"))]
2019 "TARGET_SSE && reload_completed"
2020 [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
2021 (set (match_dup 0) (match_dup 1))]
2022 {
2023 operands[2] = GEN_INT (-PUSH_ROUNDING (GET_MODE_SIZE (<SWI48DWI:MODE>mode)));
2024 /* Preserve memory attributes. */
2025 operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
2026 })
2027
2028 ;; emit_push_insn when it calls move_by_pieces requires an insn to
2029 ;; "push a byte/word". But actually we use push{l,q}, which has
2030 ;; the effect of rounding the amount pushed up to a word.
2031
2032 (define_insn "*push<mode>2"
2033 [(set (match_operand:SWI12 0 "push_operand" "=X")
2034 (match_operand:SWI12 1 "nonmemory_no_elim_operand" "rn"))]
2035 ""
2036 "* return TARGET_64BIT ? \"push{q}\t%q1\" : \"push{l}\t%k1\";"
2037 [(set_attr "type" "push")
2038 (set (attr "mode")
2039 (if_then_else (match_test "TARGET_64BIT")
2040 (const_string "DI")
2041 (const_string "SI")))])
2042
2043 (define_insn "*push<mode>2_prologue"
2044 [(set (match_operand:W 0 "push_operand" "=<")
2045 (match_operand:W 1 "general_no_elim_operand" "r<i>*m"))
2046 (clobber (mem:BLK (scratch)))]
2047 ""
2048 "push{<imodesuffix>}\t%1"
2049 [(set_attr "type" "push")
2050 (set_attr "mode" "<MODE>")])
2051
2052 (define_insn "*pop<mode>1"
2053 [(set (match_operand:W 0 "nonimmediate_operand" "=r*m")
2054 (match_operand:W 1 "pop_operand" ">"))]
2055 ""
2056 "pop{<imodesuffix>}\t%0"
2057 [(set_attr "type" "pop")
2058 (set_attr "mode" "<MODE>")])
2059
2060 (define_insn "*pop<mode>1_epilogue"
2061 [(set (match_operand:W 0 "nonimmediate_operand" "=r*m")
2062 (match_operand:W 1 "pop_operand" ">"))
2063 (clobber (mem:BLK (scratch)))]
2064 ""
2065 "pop{<imodesuffix>}\t%0"
2066 [(set_attr "type" "pop")
2067 (set_attr "mode" "<MODE>")])
2068
2069 (define_insn "*pushfl<mode>2"
2070 [(set (match_operand:W 0 "push_operand" "=<")
2071 (match_operand:W 1 "flags_reg_operand"))]
2072 ""
2073 "pushf{<imodesuffix>}"
2074 [(set_attr "type" "push")
2075 (set_attr "mode" "<MODE>")])
2076
2077 (define_insn "*popfl<mode>1"
2078 [(set (match_operand:W 0 "flags_reg_operand")
2079 (match_operand:W 1 "pop_operand" ">"))]
2080 ""
2081 "popf{<imodesuffix>}"
2082 [(set_attr "type" "pop")
2083 (set_attr "mode" "<MODE>")])
2084
2085 \f
2086 ;; Reload patterns to support multi-word load/store
2087 ;; with non-offsetable address.
2088 (define_expand "reload_noff_store"
2089 [(parallel [(match_operand 0 "memory_operand" "=m")
2090 (match_operand 1 "register_operand" "r")
2091 (match_operand:DI 2 "register_operand" "=&r")])]
2092 "TARGET_64BIT"
2093 {
2094 rtx mem = operands[0];
2095 rtx addr = XEXP (mem, 0);
2096
2097 emit_move_insn (operands[2], addr);
2098 mem = replace_equiv_address_nv (mem, operands[2]);
2099
2100 emit_insn (gen_rtx_SET (mem, operands[1]));
2101 DONE;
2102 })
2103
2104 (define_expand "reload_noff_load"
2105 [(parallel [(match_operand 0 "register_operand" "=r")
2106 (match_operand 1 "memory_operand" "m")
2107 (match_operand:DI 2 "register_operand" "=r")])]
2108 "TARGET_64BIT"
2109 {
2110 rtx mem = operands[1];
2111 rtx addr = XEXP (mem, 0);
2112
2113 emit_move_insn (operands[2], addr);
2114 mem = replace_equiv_address_nv (mem, operands[2]);
2115
2116 emit_insn (gen_rtx_SET (operands[0], mem));
2117 DONE;
2118 })
2119
2120 ;; Move instructions.
2121
2122 (define_expand "movxi"
2123 [(set (match_operand:XI 0 "nonimmediate_operand")
2124 (match_operand:XI 1 "general_operand"))]
2125 "TARGET_AVX512F"
2126 "ix86_expand_vector_move (XImode, operands); DONE;")
2127
2128 (define_expand "movoi"
2129 [(set (match_operand:OI 0 "nonimmediate_operand")
2130 (match_operand:OI 1 "general_operand"))]
2131 "TARGET_AVX"
2132 "ix86_expand_vector_move (OImode, operands); DONE;")
2133
2134 (define_expand "movti"
2135 [(set (match_operand:TI 0 "nonimmediate_operand")
2136 (match_operand:TI 1 "general_operand"))]
2137 "TARGET_64BIT || TARGET_SSE"
2138 {
2139 if (TARGET_64BIT)
2140 ix86_expand_move (TImode, operands);
2141 else
2142 ix86_expand_vector_move (TImode, operands);
2143 DONE;
2144 })
2145
2146 ;; This expands to what emit_move_complex would generate if we didn't
2147 ;; have a movti pattern. Having this avoids problems with reload on
2148 ;; 32-bit targets when SSE is present, but doesn't seem to be harmful
2149 ;; to have around all the time.
2150 (define_expand "movcdi"
2151 [(set (match_operand:CDI 0 "nonimmediate_operand")
2152 (match_operand:CDI 1 "general_operand"))]
2153 ""
2154 {
2155 if (push_operand (operands[0], CDImode))
2156 emit_move_complex_push (CDImode, operands[0], operands[1]);
2157 else
2158 emit_move_complex_parts (operands[0], operands[1]);
2159 DONE;
2160 })
2161
2162 (define_expand "mov<mode>"
2163 [(set (match_operand:SWI1248x 0 "nonimmediate_operand")
2164 (match_operand:SWI1248x 1 "general_operand"))]
2165 ""
2166 "ix86_expand_move (<MODE>mode, operands); DONE;")
2167
2168 (define_insn "*mov<mode>_xor"
2169 [(set (match_operand:SWI48 0 "register_operand" "=r")
2170 (match_operand:SWI48 1 "const0_operand"))
2171 (clobber (reg:CC FLAGS_REG))]
2172 "reload_completed"
2173 "xor{l}\t%k0, %k0"
2174 [(set_attr "type" "alu1")
2175 (set_attr "mode" "SI")
2176 (set_attr "length_immediate" "0")])
2177
2178 (define_insn "*mov<mode>_and"
2179 [(set (match_operand:SWI248 0 "memory_operand" "=m")
2180 (match_operand:SWI248 1 "const0_operand"))
2181 (clobber (reg:CC FLAGS_REG))]
2182 "reload_completed"
2183 "and{<imodesuffix>}\t{%1, %0|%0, %1}"
2184 [(set_attr "type" "alu1")
2185 (set_attr "mode" "<MODE>")
2186 (set_attr "length_immediate" "1")])
2187
2188 (define_insn "*mov<mode>_or"
2189 [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm")
2190 (match_operand:SWI248 1 "constm1_operand"))
2191 (clobber (reg:CC FLAGS_REG))]
2192 "reload_completed"
2193 "or{<imodesuffix>}\t{%1, %0|%0, %1}"
2194 [(set_attr "type" "alu1")
2195 (set_attr "mode" "<MODE>")
2196 (set_attr "length_immediate" "1")])
2197
2198 (define_insn "*movxi_internal_avx512f"
2199 [(set (match_operand:XI 0 "nonimmediate_operand" "=v,v ,v ,m")
2200 (match_operand:XI 1 "nonimmediate_or_sse_const_operand" " C,BC,vm,v"))]
2201 "TARGET_AVX512F
2202 && (register_operand (operands[0], XImode)
2203 || register_operand (operands[1], XImode))"
2204 {
2205 switch (get_attr_type (insn))
2206 {
2207 case TYPE_SSELOG1:
2208 return standard_sse_constant_opcode (insn, operands);
2209
2210 case TYPE_SSEMOV:
2211 return ix86_output_ssemov (insn, operands);
2212
2213 default:
2214 gcc_unreachable ();
2215 }
2216 }
2217 [(set_attr "type" "sselog1,sselog1,ssemov,ssemov")
2218 (set_attr "prefix" "evex")
2219 (set_attr "mode" "XI")])
2220
2221 (define_insn "*movoi_internal_avx"
2222 [(set (match_operand:OI 0 "nonimmediate_operand" "=v,v ,v ,m")
2223 (match_operand:OI 1 "nonimmediate_or_sse_const_operand" " C,BC,vm,v"))]
2224 "TARGET_AVX
2225 && (register_operand (operands[0], OImode)
2226 || register_operand (operands[1], OImode))"
2227 {
2228 switch (get_attr_type (insn))
2229 {
2230 case TYPE_SSELOG1:
2231 return standard_sse_constant_opcode (insn, operands);
2232
2233 case TYPE_SSEMOV:
2234 return ix86_output_ssemov (insn, operands);
2235
2236 default:
2237 gcc_unreachable ();
2238 }
2239 }
2240 [(set_attr "isa" "*,avx2,*,*")
2241 (set_attr "type" "sselog1,sselog1,ssemov,ssemov")
2242 (set_attr "prefix" "vex")
2243 (set_attr "mode" "OI")])
2244
2245 (define_insn "*movti_internal"
2246 [(set (match_operand:TI 0 "nonimmediate_operand" "=!r ,o ,v,v ,v ,m,?r,?Yd")
2247 (match_operand:TI 1 "general_operand" "riFo,re,C,BC,vm,v,Yd,r"))]
2248 "(TARGET_64BIT
2249 && !(MEM_P (operands[0]) && MEM_P (operands[1])))
2250 || (TARGET_SSE
2251 && nonimmediate_or_sse_const_operand (operands[1], TImode)
2252 && (register_operand (operands[0], TImode)
2253 || register_operand (operands[1], TImode)))"
2254 {
2255 switch (get_attr_type (insn))
2256 {
2257 case TYPE_MULTI:
2258 return "#";
2259
2260 case TYPE_SSELOG1:
2261 return standard_sse_constant_opcode (insn, operands);
2262
2263 case TYPE_SSEMOV:
2264 return ix86_output_ssemov (insn, operands);
2265
2266 default:
2267 gcc_unreachable ();
2268 }
2269 }
2270 [(set (attr "isa")
2271 (cond [(eq_attr "alternative" "0,1,6,7")
2272 (const_string "x64")
2273 (eq_attr "alternative" "3")
2274 (const_string "sse2")
2275 ]
2276 (const_string "*")))
2277 (set (attr "type")
2278 (cond [(eq_attr "alternative" "0,1,6,7")
2279 (const_string "multi")
2280 (eq_attr "alternative" "2,3")
2281 (const_string "sselog1")
2282 ]
2283 (const_string "ssemov")))
2284 (set (attr "prefix")
2285 (if_then_else (eq_attr "type" "sselog1,ssemov")
2286 (const_string "maybe_vex")
2287 (const_string "orig")))
2288 (set (attr "mode")
2289 (cond [(eq_attr "alternative" "0,1")
2290 (const_string "DI")
2291 (match_test "TARGET_AVX")
2292 (const_string "TI")
2293 (ior (not (match_test "TARGET_SSE2"))
2294 (match_test "optimize_function_for_size_p (cfun)"))
2295 (const_string "V4SF")
2296 (and (eq_attr "alternative" "5")
2297 (match_test "TARGET_SSE_TYPELESS_STORES"))
2298 (const_string "V4SF")
2299 ]
2300 (const_string "TI")))
2301 (set (attr "preferred_for_speed")
2302 (cond [(eq_attr "alternative" "6")
2303 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
2304 (eq_attr "alternative" "7")
2305 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
2306 ]
2307 (symbol_ref "true")))])
2308
2309 (define_split
2310 [(set (match_operand:TI 0 "sse_reg_operand")
2311 (match_operand:TI 1 "general_reg_operand"))]
2312 "TARGET_64BIT && TARGET_SSE4_1
2313 && reload_completed"
2314 [(set (match_dup 2)
2315 (vec_merge:V2DI
2316 (vec_duplicate:V2DI (match_dup 3))
2317 (match_dup 2)
2318 (const_int 2)))]
2319 {
2320 operands[2] = lowpart_subreg (V2DImode, operands[0], TImode);
2321 operands[3] = gen_highpart (DImode, operands[1]);
2322
2323 emit_move_insn (gen_lowpart (DImode, operands[0]),
2324 gen_lowpart (DImode, operands[1]));
2325 })
2326
2327 (define_insn "*movdi_internal"
2328 [(set (match_operand:DI 0 "nonimmediate_operand"
2329 "=r ,o ,r,r ,r,m ,*y,*y,?*y,?m,?r,?*y,?v,?v,?v,m ,m,?r ,?*Yd,?r,?v,?*y,?*x,*k,*k ,*r,*m,*k")
2330 (match_operand:DI 1 "general_operand"
2331 "riFo,riF,Z,rem,i,re,C ,*y,Bk ,*y,*y,r ,C ,?v,Bk,?v,v,*Yd,r ,?v,r ,*x ,*y ,*r,*kBk,*k,*k,CBC"))]
2332 "!(MEM_P (operands[0]) && MEM_P (operands[1]))
2333 && ix86_hardreg_mov_ok (operands[0], operands[1])"
2334 {
2335 switch (get_attr_type (insn))
2336 {
2337 case TYPE_MSKMOV:
2338 return "kmovq\t{%1, %0|%0, %1}";
2339
2340 case TYPE_MSKLOG:
2341 if (operands[1] == const0_rtx)
2342 return "kxorq\t%0, %0, %0";
2343 else if (operands[1] == constm1_rtx)
2344 return "kxnorq\t%0, %0, %0";
2345 gcc_unreachable ();
2346
2347 case TYPE_MULTI:
2348 return "#";
2349
2350 case TYPE_MMX:
2351 return "pxor\t%0, %0";
2352
2353 case TYPE_MMXMOV:
2354 /* Handle broken assemblers that require movd instead of movq. */
2355 if (!HAVE_AS_IX86_INTERUNIT_MOVQ
2356 && (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1])))
2357 return "movd\t{%1, %0|%0, %1}";
2358 return "movq\t{%1, %0|%0, %1}";
2359
2360 case TYPE_SSELOG1:
2361 return standard_sse_constant_opcode (insn, operands);
2362
2363 case TYPE_SSEMOV:
2364 return ix86_output_ssemov (insn, operands);
2365
2366 case TYPE_SSECVT:
2367 if (SSE_REG_P (operands[0]))
2368 return "movq2dq\t{%1, %0|%0, %1}";
2369 else
2370 return "movdq2q\t{%1, %0|%0, %1}";
2371
2372 case TYPE_LEA:
2373 return "lea{q}\t{%E1, %0|%0, %E1}";
2374
2375 case TYPE_IMOV:
2376 gcc_assert (!flag_pic || LEGITIMATE_PIC_OPERAND_P (operands[1]));
2377 if (get_attr_mode (insn) == MODE_SI)
2378 return "mov{l}\t{%k1, %k0|%k0, %k1}";
2379 else if (which_alternative == 4)
2380 return "movabs{q}\t{%1, %0|%0, %1}";
2381 else if (ix86_use_lea_for_mov (insn, operands))
2382 return "lea{q}\t{%E1, %0|%0, %E1}";
2383 else
2384 return "mov{q}\t{%1, %0|%0, %1}";
2385
2386 default:
2387 gcc_unreachable ();
2388 }
2389 }
2390 [(set (attr "isa")
2391 (cond [(eq_attr "alternative" "0,1,17,18")
2392 (const_string "nox64")
2393 (eq_attr "alternative" "2,3,4,5,10,11,23,25")
2394 (const_string "x64")
2395 (eq_attr "alternative" "19,20")
2396 (const_string "x64_sse2")
2397 (eq_attr "alternative" "21,22")
2398 (const_string "sse2")
2399 ]
2400 (const_string "*")))
2401 (set (attr "type")
2402 (cond [(eq_attr "alternative" "0,1,17,18")
2403 (const_string "multi")
2404 (eq_attr "alternative" "6")
2405 (const_string "mmx")
2406 (eq_attr "alternative" "7,8,9,10,11")
2407 (const_string "mmxmov")
2408 (eq_attr "alternative" "12")
2409 (const_string "sselog1")
2410 (eq_attr "alternative" "13,14,15,16,19,20")
2411 (const_string "ssemov")
2412 (eq_attr "alternative" "21,22")
2413 (const_string "ssecvt")
2414 (eq_attr "alternative" "23,24,25,26")
2415 (const_string "mskmov")
2416 (eq_attr "alternative" "27")
2417 (const_string "msklog")
2418 (and (match_operand 0 "register_operand")
2419 (match_operand 1 "pic_32bit_operand"))
2420 (const_string "lea")
2421 ]
2422 (const_string "imov")))
2423 (set (attr "modrm")
2424 (if_then_else
2425 (and (eq_attr "alternative" "4") (eq_attr "type" "imov"))
2426 (const_string "0")
2427 (const_string "*")))
2428 (set (attr "length_immediate")
2429 (if_then_else
2430 (and (eq_attr "alternative" "4") (eq_attr "type" "imov"))
2431 (const_string "8")
2432 (const_string "*")))
2433 (set (attr "prefix_rex")
2434 (if_then_else
2435 (eq_attr "alternative" "10,11,19,20")
2436 (const_string "1")
2437 (const_string "*")))
2438 (set (attr "prefix")
2439 (if_then_else (eq_attr "type" "sselog1,ssemov")
2440 (const_string "maybe_vex")
2441 (const_string "orig")))
2442 (set (attr "prefix_data16")
2443 (if_then_else (and (eq_attr "type" "ssemov") (eq_attr "mode" "DI"))
2444 (const_string "1")
2445 (const_string "*")))
2446 (set (attr "mode")
2447 (cond [(eq_attr "alternative" "2")
2448 (const_string "SI")
2449 (eq_attr "alternative" "12,13")
2450 (cond [(match_test "TARGET_AVX")
2451 (const_string "TI")
2452 (ior (not (match_test "TARGET_SSE2"))
2453 (match_test "optimize_function_for_size_p (cfun)"))
2454 (const_string "V4SF")
2455 ]
2456 (const_string "TI"))
2457
2458 (and (eq_attr "alternative" "14,15,16")
2459 (not (match_test "TARGET_SSE2")))
2460 (const_string "V2SF")
2461 ]
2462 (const_string "DI")))
2463 (set (attr "preferred_for_speed")
2464 (cond [(eq_attr "alternative" "10,17,19")
2465 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
2466 (eq_attr "alternative" "11,18,20")
2467 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
2468 ]
2469 (symbol_ref "true")))
2470 (set (attr "enabled")
2471 (cond [(eq_attr "alternative" "15")
2472 (if_then_else
2473 (match_test "TARGET_STV && TARGET_SSE2")
2474 (symbol_ref "false")
2475 (const_string "*"))
2476 (eq_attr "alternative" "16")
2477 (if_then_else
2478 (match_test "TARGET_STV && TARGET_SSE2")
2479 (symbol_ref "true")
2480 (symbol_ref "false"))
2481 ]
2482 (const_string "*")))])
2483
2484 (define_split
2485 [(set (match_operand:<DWI> 0 "general_reg_operand")
2486 (match_operand:<DWI> 1 "sse_reg_operand"))]
2487 "TARGET_SSE4_1
2488 && reload_completed"
2489 [(set (match_dup 2)
2490 (vec_select:DWIH
2491 (match_dup 3)
2492 (parallel [(const_int 1)])))]
2493 {
2494 operands[2] = gen_highpart (<MODE>mode, operands[0]);
2495 operands[3] = lowpart_subreg (<ssevecmode>mode, operands[1], <DWI>mode);
2496
2497 emit_move_insn (gen_lowpart (<MODE>mode, operands[0]),
2498 gen_lowpart (<MODE>mode, operands[1]));
2499 })
2500
2501 (define_split
2502 [(set (match_operand:DWI 0 "nonimmediate_gr_operand")
2503 (match_operand:DWI 1 "general_gr_operand"))]
2504 "reload_completed"
2505 [(const_int 0)]
2506 "ix86_split_long_move (operands); DONE;")
2507
2508 (define_split
2509 [(set (match_operand:DI 0 "sse_reg_operand")
2510 (match_operand:DI 1 "general_reg_operand"))]
2511 "!TARGET_64BIT && TARGET_SSE4_1
2512 && reload_completed"
2513 [(set (match_dup 2)
2514 (vec_merge:V4SI
2515 (vec_duplicate:V4SI (match_dup 3))
2516 (match_dup 2)
2517 (const_int 2)))]
2518 {
2519 operands[2] = lowpart_subreg (V4SImode, operands[0], DImode);
2520 operands[3] = gen_highpart (SImode, operands[1]);
2521
2522 emit_move_insn (gen_lowpart (SImode, operands[0]),
2523 gen_lowpart (SImode, operands[1]));
2524 })
2525
2526 ;; movabsq $0x0012345678000000, %rax is longer
2527 ;; than movl $0x12345678, %eax; shlq $24, %rax.
2528 (define_peephole2
2529 [(set (match_operand:DI 0 "register_operand")
2530 (match_operand:DI 1 "const_int_operand"))]
2531 "TARGET_64BIT
2532 && optimize_insn_for_size_p ()
2533 && LEGACY_INT_REG_P (operands[0])
2534 && !x86_64_immediate_operand (operands[1], DImode)
2535 && !x86_64_zext_immediate_operand (operands[1], DImode)
2536 && !((UINTVAL (operands[1]) >> ctz_hwi (UINTVAL (operands[1])))
2537 & ~(HOST_WIDE_INT) 0xffffffff)
2538 && peep2_regno_dead_p (0, FLAGS_REG)"
2539 [(set (match_dup 0) (match_dup 1))
2540 (parallel [(set (match_dup 0) (ashift:DI (match_dup 0) (match_dup 2)))
2541 (clobber (reg:CC FLAGS_REG))])]
2542 {
2543 int shift = ctz_hwi (UINTVAL (operands[1]));
2544 operands[1] = gen_int_mode (UINTVAL (operands[1]) >> shift, DImode);
2545 operands[2] = gen_int_mode (shift, QImode);
2546 })
2547
2548 (define_insn "*movsi_internal"
2549 [(set (match_operand:SI 0 "nonimmediate_operand"
2550 "=r,m ,*y,*y,?*y,?m,?r,?*y,?v,?v,?v,m ,?r,?v,*k,*k ,*rm,*k")
2551 (match_operand:SI 1 "general_operand"
2552 "g ,re,C ,*y,Bk ,*y,*y,r ,C ,?v,Bk,?v,?v,r ,*r,*kBk,*k ,CBC"))]
2553 "!(MEM_P (operands[0]) && MEM_P (operands[1]))
2554 && ix86_hardreg_mov_ok (operands[0], operands[1])"
2555 {
2556 switch (get_attr_type (insn))
2557 {
2558 case TYPE_SSELOG1:
2559 return standard_sse_constant_opcode (insn, operands);
2560
2561 case TYPE_MSKMOV:
2562 return "kmovd\t{%1, %0|%0, %1}";
2563
2564 case TYPE_MSKLOG:
2565 if (operands[1] == const0_rtx)
2566 return "kxord\t%0, %0, %0";
2567 else if (operands[1] == constm1_rtx)
2568 return "kxnord\t%0, %0, %0";
2569 gcc_unreachable ();
2570
2571 case TYPE_SSEMOV:
2572 return ix86_output_ssemov (insn, operands);
2573
2574 case TYPE_MMX:
2575 return "pxor\t%0, %0";
2576
2577 case TYPE_MMXMOV:
2578 switch (get_attr_mode (insn))
2579 {
2580 case MODE_DI:
2581 return "movq\t{%1, %0|%0, %1}";
2582 case MODE_SI:
2583 return "movd\t{%1, %0|%0, %1}";
2584
2585 default:
2586 gcc_unreachable ();
2587 }
2588
2589 case TYPE_LEA:
2590 return "lea{l}\t{%E1, %0|%0, %E1}";
2591
2592 case TYPE_IMOV:
2593 gcc_assert (!flag_pic || LEGITIMATE_PIC_OPERAND_P (operands[1]));
2594 if (ix86_use_lea_for_mov (insn, operands))
2595 return "lea{l}\t{%E1, %0|%0, %E1}";
2596 else
2597 return "mov{l}\t{%1, %0|%0, %1}";
2598
2599 default:
2600 gcc_unreachable ();
2601 }
2602 }
2603 [(set (attr "isa")
2604 (cond [(eq_attr "alternative" "12,13")
2605 (const_string "sse2")
2606 ]
2607 (const_string "*")))
2608 (set (attr "type")
2609 (cond [(eq_attr "alternative" "2")
2610 (const_string "mmx")
2611 (eq_attr "alternative" "3,4,5,6,7")
2612 (const_string "mmxmov")
2613 (eq_attr "alternative" "8")
2614 (const_string "sselog1")
2615 (eq_attr "alternative" "9,10,11,12,13")
2616 (const_string "ssemov")
2617 (eq_attr "alternative" "14,15,16")
2618 (const_string "mskmov")
2619 (eq_attr "alternative" "17")
2620 (const_string "msklog")
2621 (and (match_operand 0 "register_operand")
2622 (match_operand 1 "pic_32bit_operand"))
2623 (const_string "lea")
2624 ]
2625 (const_string "imov")))
2626 (set (attr "prefix")
2627 (if_then_else (eq_attr "type" "sselog1,ssemov")
2628 (const_string "maybe_vex")
2629 (const_string "orig")))
2630 (set (attr "prefix_data16")
2631 (if_then_else (and (eq_attr "type" "ssemov") (eq_attr "mode" "SI"))
2632 (const_string "1")
2633 (const_string "*")))
2634 (set (attr "mode")
2635 (cond [(eq_attr "alternative" "2,3")
2636 (const_string "DI")
2637 (eq_attr "alternative" "8,9")
2638 (cond [(match_test "TARGET_AVX")
2639 (const_string "TI")
2640 (ior (not (match_test "TARGET_SSE2"))
2641 (match_test "optimize_function_for_size_p (cfun)"))
2642 (const_string "V4SF")
2643 ]
2644 (const_string "TI"))
2645
2646 (and (eq_attr "alternative" "10,11")
2647 (not (match_test "TARGET_SSE2")))
2648 (const_string "SF")
2649 ]
2650 (const_string "SI")))
2651 (set (attr "preferred_for_speed")
2652 (cond [(eq_attr "alternative" "6,12")
2653 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
2654 (eq_attr "alternative" "7,13")
2655 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
2656 ]
2657 (symbol_ref "true")))])
2658
2659 ;; With -Oz, transform mov $imm,reg to the shorter push $imm; pop reg.
2660 (define_peephole2
2661 [(set (match_operand:SWI248 0 "general_reg_operand")
2662 (match_operand:SWI248 1 "const_int_operand"))]
2663 "optimize_insn_for_size_p () && optimize_size > 1
2664 && operands[1] != const0_rtx
2665 && IN_RANGE (INTVAL (operands[1]), -128, 127)
2666 && !ix86_red_zone_used
2667 && REGNO (operands[0]) != SP_REG"
2668 [(set (match_dup 2) (match_dup 1))
2669 (set (match_dup 0) (match_dup 3))]
2670 {
2671 if (GET_MODE (operands[0]) != word_mode)
2672 operands[0] = gen_rtx_REG (word_mode, REGNO (operands[0]));
2673
2674 operands[2] = gen_rtx_MEM (word_mode,
2675 gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx));
2676 operands[3] = gen_rtx_MEM (word_mode,
2677 gen_rtx_POST_INC (Pmode, stack_pointer_rtx));
2678 })
2679
2680 ;; With -Oz, transform mov $0,mem to the shorter and $0,mem.
2681 ;; Likewise, transform mov $-1,mem to the shorter or $-1,mem.
2682 (define_peephole2
2683 [(set (match_operand:SWI248 0 "memory_operand")
2684 (match_operand:SWI248 1 "const_int_operand"))]
2685 "(operands[1] == const0_rtx || operands[1] == constm1_rtx)
2686 && optimize_insn_for_size_p () && optimize_size > 1
2687 && peep2_regno_dead_p (0, FLAGS_REG)"
2688 [(parallel [(set (match_dup 0) (match_dup 1))
2689 (clobber (reg:CC FLAGS_REG))])])
2690
2691 (define_insn "*movhi_internal"
2692 [(set (match_operand:HI 0 "nonimmediate_operand"
2693 "=r,r,r,m ,*k,*k ,r ,m ,*k ,?r,?*v,*v,*v,*v,m")
2694 (match_operand:HI 1 "general_operand"
2695 "r ,n,m,rn,r ,*km,*k,*k,CBC,*v,r ,C ,*v,m ,*v"))]
2696 "!(MEM_P (operands[0]) && MEM_P (operands[1]))
2697 && ix86_hardreg_mov_ok (operands[0], operands[1])"
2698 {
2699 switch (get_attr_type (insn))
2700 {
2701 case TYPE_IMOVX:
2702 /* movzwl is faster than movw on p2 due to partial word stalls,
2703 though not as fast as an aligned movl. */
2704 return "movz{wl|x}\t{%1, %k0|%k0, %1}";
2705
2706 case TYPE_MSKMOV:
2707 switch (which_alternative)
2708 {
2709 case 4:
2710 return "kmovw\t{%k1, %0|%0, %k1}";
2711 case 6:
2712 return "kmovw\t{%1, %k0|%k0, %1}";
2713 case 5:
2714 case 7:
2715 return "kmovw\t{%1, %0|%0, %1}";
2716 default:
2717 gcc_unreachable ();
2718 }
2719
2720 case TYPE_SSEMOV:
2721 return ix86_output_ssemov (insn, operands);
2722
2723 case TYPE_SSELOG1:
2724 if (satisfies_constraint_C (operands[1]))
2725 return standard_sse_constant_opcode (insn, operands);
2726
2727 if (SSE_REG_P (operands[0]))
2728 return "%vpinsrw\t{$0, %1, %d0|%d0, %1, 0}";
2729 else
2730 return "%vpextrw\t{$0, %1, %0|%0, %1, 0}";
2731
2732 case TYPE_MSKLOG:
2733 if (operands[1] == const0_rtx)
2734 return "kxorw\t%0, %0, %0";
2735 else if (operands[1] == constm1_rtx)
2736 return "kxnorw\t%0, %0, %0";
2737 gcc_unreachable ();
2738
2739 default:
2740 if (get_attr_mode (insn) == MODE_SI)
2741 return "mov{l}\t{%k1, %k0|%k0, %k1}";
2742 else
2743 return "mov{w}\t{%1, %0|%0, %1}";
2744 }
2745 }
2746 [(set (attr "isa")
2747 (cond [(eq_attr "alternative" "9,10,11,12,13")
2748 (const_string "sse2")
2749 (eq_attr "alternative" "14")
2750 (const_string "sse4")
2751 ]
2752 (const_string "*")))
2753 (set (attr "type")
2754 (cond [(eq_attr "alternative" "4,5,6,7")
2755 (const_string "mskmov")
2756 (eq_attr "alternative" "8")
2757 (const_string "msklog")
2758 (eq_attr "alternative" "13,14")
2759 (if_then_else (match_test "TARGET_AVX512FP16")
2760 (const_string "ssemov")
2761 (const_string "sselog1"))
2762 (eq_attr "alternative" "11")
2763 (const_string "sselog1")
2764 (eq_attr "alternative" "9,10,12")
2765 (const_string "ssemov")
2766 (match_test "optimize_function_for_size_p (cfun)")
2767 (const_string "imov")
2768 (and (eq_attr "alternative" "0")
2769 (ior (not (match_test "TARGET_PARTIAL_REG_STALL"))
2770 (not (match_test "TARGET_HIMODE_MATH"))))
2771 (const_string "imov")
2772 (and (eq_attr "alternative" "1,2")
2773 (match_operand:HI 1 "aligned_operand"))
2774 (const_string "imov")
2775 (and (match_test "TARGET_MOVX")
2776 (eq_attr "alternative" "0,2"))
2777 (const_string "imovx")
2778 ]
2779 (const_string "imov")))
2780 (set (attr "prefix")
2781 (cond [(eq_attr "alternative" "4,5,6,7,8")
2782 (const_string "vex")
2783 (eq_attr "alternative" "9,10,11,12,13,14")
2784 (const_string "maybe_evex")
2785 ]
2786 (const_string "orig")))
2787 (set (attr "mode")
2788 (cond [(eq_attr "alternative" "9,10")
2789 (if_then_else (match_test "TARGET_AVX512FP16")
2790 (const_string "HI")
2791 (const_string "SI"))
2792 (eq_attr "alternative" "13,14")
2793 (if_then_else (match_test "TARGET_AVX512FP16")
2794 (const_string "HI")
2795 (const_string "TI"))
2796 (eq_attr "alternative" "11")
2797 (cond [(match_test "TARGET_AVX")
2798 (const_string "TI")
2799 (ior (not (match_test "TARGET_SSE2"))
2800 (match_test "optimize_function_for_size_p (cfun)"))
2801 (const_string "V4SF")
2802 ]
2803 (const_string "TI"))
2804 (eq_attr "alternative" "12")
2805 (cond [(match_test "TARGET_AVX512FP16")
2806 (const_string "HF")
2807 (match_test "TARGET_AVX")
2808 (const_string "TI")
2809 (ior (not (match_test "TARGET_SSE2"))
2810 (match_test "optimize_function_for_size_p (cfun)"))
2811 (const_string "V4SF")
2812 ]
2813 (const_string "TI"))
2814 (eq_attr "type" "imovx")
2815 (const_string "SI")
2816 (and (eq_attr "alternative" "1,2")
2817 (match_operand:HI 1 "aligned_operand"))
2818 (const_string "SI")
2819 (and (eq_attr "alternative" "0")
2820 (ior (not (match_test "TARGET_PARTIAL_REG_STALL"))
2821 (not (match_test "TARGET_HIMODE_MATH"))))
2822 (const_string "SI")
2823 ]
2824 (const_string "HI")))
2825 (set (attr "preferred_for_speed")
2826 (cond [(eq_attr "alternative" "9")
2827 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
2828 (eq_attr "alternative" "10")
2829 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
2830 ]
2831 (symbol_ref "true")))])
2832
2833 ;; Situation is quite tricky about when to choose full sized (SImode) move
2834 ;; over QImode moves. For Q_REG -> Q_REG move we use full size only for
2835 ;; partial register dependency machines (such as AMD Athlon), where QImode
2836 ;; moves issue extra dependency and for partial register stalls machines
2837 ;; that don't use QImode patterns (and QImode move cause stall on the next
2838 ;; instruction).
2839 ;;
2840 ;; For loads of Q_REG to NONQ_REG we use full sized moves except for partial
2841 ;; register stall machines with, where we use QImode instructions, since
2842 ;; partial register stall can be caused there. Then we use movzx.
2843
2844 (define_insn "*movqi_internal"
2845 [(set (match_operand:QI 0 "nonimmediate_operand"
2846 "=Q,R,r,q,q,r,r ,?r,m ,*k,*k,*r,*m,*k,*k,*k")
2847 (match_operand:QI 1 "general_operand"
2848 "Q ,R,r,n,m,q,rn, m,qn,*r,*k,*k,*k,*m,C,BC"))]
2849 "!(MEM_P (operands[0]) && MEM_P (operands[1]))
2850 && ix86_hardreg_mov_ok (operands[0], operands[1])"
2851
2852 {
2853 char buf[128];
2854 const char *ops;
2855 const char *suffix;
2856
2857 switch (get_attr_type (insn))
2858 {
2859 case TYPE_IMOVX:
2860 gcc_assert (ANY_QI_REG_P (operands[1]) || MEM_P (operands[1]));
2861 return "movz{bl|x}\t{%1, %k0|%k0, %1}";
2862
2863 case TYPE_MSKMOV:
2864 switch (which_alternative)
2865 {
2866 case 9:
2867 ops = "kmov%s\t{%%k1, %%0|%%0, %%k1}";
2868 break;
2869 case 11:
2870 ops = "kmov%s\t{%%1, %%k0|%%k0, %%1}";
2871 break;
2872 case 12:
2873 case 13:
2874 gcc_assert (TARGET_AVX512DQ);
2875 /* FALLTHRU */
2876 case 10:
2877 ops = "kmov%s\t{%%1, %%0|%%0, %%1}";
2878 break;
2879 default:
2880 gcc_unreachable ();
2881 }
2882
2883 suffix = (get_attr_mode (insn) == MODE_HI) ? "w" : "b";
2884
2885 snprintf (buf, sizeof (buf), ops, suffix);
2886 output_asm_insn (buf, operands);
2887 return "";
2888
2889 case TYPE_MSKLOG:
2890 if (operands[1] == const0_rtx)
2891 {
2892 if (get_attr_mode (insn) == MODE_HI)
2893 return "kxorw\t%0, %0, %0";
2894 else
2895 return "kxorb\t%0, %0, %0";
2896 }
2897 else if (operands[1] == constm1_rtx)
2898 {
2899 gcc_assert (TARGET_AVX512DQ);
2900 return "kxnorb\t%0, %0, %0";
2901 }
2902 gcc_unreachable ();
2903
2904 default:
2905 if (get_attr_mode (insn) == MODE_SI)
2906 return "mov{l}\t{%k1, %k0|%k0, %k1}";
2907 else
2908 return "mov{b}\t{%1, %0|%0, %1}";
2909 }
2910 }
2911 [(set (attr "isa")
2912 (cond [(eq_attr "alternative" "1,2")
2913 (const_string "x64")
2914 (eq_attr "alternative" "12,13,15")
2915 (const_string "avx512dq")
2916 ]
2917 (const_string "*")))
2918 (set (attr "type")
2919 (cond [(eq_attr "alternative" "9,10,11,12,13")
2920 (const_string "mskmov")
2921 (eq_attr "alternative" "14,15")
2922 (const_string "msklog")
2923 (and (eq_attr "alternative" "7")
2924 (not (match_operand:QI 1 "aligned_operand")))
2925 (const_string "imovx")
2926 (match_test "optimize_function_for_size_p (cfun)")
2927 (const_string "imov")
2928 (and (eq_attr "alternative" "5")
2929 (ior (not (match_test "TARGET_PARTIAL_REG_STALL"))
2930 (not (match_test "TARGET_QIMODE_MATH"))))
2931 (const_string "imov")
2932 (eq_attr "alternative" "5,7")
2933 (const_string "imovx")
2934 (and (match_test "TARGET_MOVX")
2935 (eq_attr "alternative" "4"))
2936 (const_string "imovx")
2937 ]
2938 (const_string "imov")))
2939 (set (attr "prefix")
2940 (if_then_else (eq_attr "alternative" "9,10,11,12,13,14,15")
2941 (const_string "vex")
2942 (const_string "orig")))
2943 (set (attr "mode")
2944 (cond [(eq_attr "alternative" "5,6,7")
2945 (const_string "SI")
2946 (eq_attr "alternative" "8")
2947 (const_string "QI")
2948 (and (eq_attr "alternative" "9,10,11,14")
2949 (not (match_test "TARGET_AVX512DQ")))
2950 (const_string "HI")
2951 (eq_attr "type" "imovx")
2952 (const_string "SI")
2953 ;; For -Os, 8-bit immediates are always shorter than 32-bit
2954 ;; ones.
2955 (and (eq_attr "type" "imov")
2956 (and (eq_attr "alternative" "3")
2957 (match_test "optimize_function_for_size_p (cfun)")))
2958 (const_string "QI")
2959 ;; For -Os, movl where one or both operands are NON_Q_REGS
2960 ;; and both are LEGACY_REGS is shorter than movb.
2961 ;; Otherwise movb and movl sizes are the same, so decide purely
2962 ;; based on speed factors.
2963 (and (eq_attr "type" "imov")
2964 (and (eq_attr "alternative" "1")
2965 (match_test "optimize_function_for_size_p (cfun)")))
2966 (const_string "SI")
2967 (and (eq_attr "type" "imov")
2968 (and (eq_attr "alternative" "0,1,2,3")
2969 (and (match_test "TARGET_PARTIAL_REG_DEPENDENCY")
2970 (not (match_test "TARGET_PARTIAL_REG_STALL")))))
2971 (const_string "SI")
2972 ;; Avoid partial register stalls when not using QImode arithmetic
2973 (and (eq_attr "type" "imov")
2974 (and (eq_attr "alternative" "0,1,2,3")
2975 (and (match_test "TARGET_PARTIAL_REG_STALL")
2976 (not (match_test "TARGET_QIMODE_MATH")))))
2977 (const_string "SI")
2978 ]
2979 (const_string "QI")))])
2980
2981 /* Reload dislikes loading 0/-1 directly into mask registers.
2982 Try to tidy things up here. */
2983 (define_peephole2
2984 [(set (match_operand:SWI 0 "general_reg_operand")
2985 (match_operand:SWI 1 "immediate_operand"))
2986 (set (match_operand:SWI 2 "mask_reg_operand")
2987 (match_dup 0))]
2988 "peep2_reg_dead_p (2, operands[0])
2989 && (const0_operand (operands[1], <MODE>mode)
2990 || (constm1_operand (operands[1], <MODE>mode)
2991 && (<MODE_SIZE> > 1 || TARGET_AVX512DQ)))"
2992 [(set (match_dup 2) (match_dup 1))])
2993
2994 ;; Stores and loads of ax to arbitrary constant address.
2995 ;; We fake an second form of instruction to force reload to load address
2996 ;; into register when rax is not available
2997 (define_insn "*movabs<mode>_1"
2998 [(set (mem:SWI1248x (match_operand:DI 0 "x86_64_movabs_operand" "i,r"))
2999 (match_operand:SWI1248x 1 "nonmemory_operand" "a,r<i>"))]
3000 "TARGET_LP64 && ix86_check_movabs (insn, 0)"
3001 {
3002 /* Recover the full memory rtx. */
3003 operands[0] = SET_DEST (PATTERN (insn));
3004 switch (which_alternative)
3005 {
3006 case 0:
3007 return "movabs{<imodesuffix>}\t{%1, %P0|<iptrsize> PTR [%P0], %1}";
3008 case 1:
3009 return "mov{<imodesuffix>}\t{%1, %0|%0, %1}";
3010 default:
3011 gcc_unreachable ();
3012 }
3013 }
3014 [(set_attr "type" "imov")
3015 (set_attr "modrm" "0,*")
3016 (set_attr "length_address" "8,0")
3017 (set_attr "length_immediate" "0,*")
3018 (set_attr "memory" "store")
3019 (set_attr "mode" "<MODE>")])
3020
3021 (define_insn "*movabs<mode>_2"
3022 [(set (match_operand:SWI1248x 0 "register_operand" "=a,r")
3023 (mem:SWI1248x (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))]
3024 "TARGET_LP64 && ix86_check_movabs (insn, 1)"
3025 {
3026 /* Recover the full memory rtx. */
3027 operands[1] = SET_SRC (PATTERN (insn));
3028 switch (which_alternative)
3029 {
3030 case 0:
3031 return "movabs{<imodesuffix>}\t{%P1, %0|%0, <iptrsize> PTR [%P1]}";
3032 case 1:
3033 return "mov{<imodesuffix>}\t{%1, %0|%0, %1}";
3034 default:
3035 gcc_unreachable ();
3036 }
3037 }
3038 [(set_attr "type" "imov")
3039 (set_attr "modrm" "0,*")
3040 (set_attr "length_address" "8,0")
3041 (set_attr "length_immediate" "0")
3042 (set_attr "memory" "load")
3043 (set_attr "mode" "<MODE>")])
3044
3045 (define_insn "swap<mode>"
3046 [(set (match_operand:SWI48 0 "register_operand" "+r")
3047 (match_operand:SWI48 1 "register_operand" "+r"))
3048 (set (match_dup 1)
3049 (match_dup 0))]
3050 ""
3051 "xchg{<imodesuffix>}\t%1, %0"
3052 [(set_attr "type" "imov")
3053 (set_attr "mode" "<MODE>")
3054 (set_attr "pent_pair" "np")
3055 (set_attr "athlon_decode" "vector")
3056 (set_attr "amdfam10_decode" "double")
3057 (set_attr "bdver1_decode" "double")])
3058
3059 (define_insn "*swap<mode>"
3060 [(set (match_operand:SWI12 0 "register_operand" "+<r>,r")
3061 (match_operand:SWI12 1 "register_operand" "+<r>,r"))
3062 (set (match_dup 1)
3063 (match_dup 0))]
3064 ""
3065 "@
3066 xchg{<imodesuffix>}\t%1, %0
3067 xchg{l}\t%k1, %k0"
3068 [(set_attr "type" "imov")
3069 (set_attr "mode" "<MODE>,SI")
3070 (set (attr "preferred_for_size")
3071 (cond [(eq_attr "alternative" "0")
3072 (symbol_ref "false")]
3073 (symbol_ref "true")))
3074 ;; Potential partial reg stall on alternative 1.
3075 (set (attr "preferred_for_speed")
3076 (cond [(eq_attr "alternative" "1")
3077 (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
3078 (symbol_ref "true")))
3079 (set_attr "pent_pair" "np")
3080 (set_attr "athlon_decode" "vector")
3081 (set_attr "amdfam10_decode" "double")
3082 (set_attr "bdver1_decode" "double")])
3083
3084 (define_peephole2
3085 [(set (match_operand:SWI 0 "general_reg_operand")
3086 (match_operand:SWI 1 "general_reg_operand"))
3087 (set (match_dup 1)
3088 (match_operand:SWI 2 "general_reg_operand"))
3089 (set (match_dup 2) (match_dup 0))]
3090 "peep2_reg_dead_p (3, operands[0])
3091 && optimize_insn_for_size_p ()"
3092 [(parallel [(set (match_dup 1) (match_dup 2))
3093 (set (match_dup 2) (match_dup 1))])])
3094
3095 ;; Convert moves to/from AX_REG into xchg with -Oz.
3096 (define_peephole2
3097 [(set (match_operand:SWI48 0 "general_reg_operand")
3098 (match_operand:SWI48 1 "general_reg_operand"))]
3099 "optimize_size > 1
3100 && ((REGNO (operands[0]) == AX_REG)
3101 != (REGNO (operands[1]) == AX_REG))
3102 && optimize_insn_for_size_p ()
3103 && peep2_reg_dead_p (1, operands[1])"
3104 [(parallel [(set (match_dup 0) (match_dup 1))
3105 (set (match_dup 1) (match_dup 0))])])
3106
3107 (define_expand "movstrict<mode>"
3108 [(set (strict_low_part (match_operand:SWI12 0 "register_operand"))
3109 (match_operand:SWI12 1 "general_operand"))]
3110 ""
3111 {
3112 gcc_assert (SUBREG_P (operands[0]));
3113 if ((TARGET_PARTIAL_REG_STALL && optimize_function_for_speed_p (cfun))
3114 || !VALID_INT_MODE_P (GET_MODE (SUBREG_REG (operands[0]))))
3115 FAIL;
3116 })
3117
3118 (define_insn "*movstrict<mode>_1"
3119 [(set (strict_low_part
3120 (match_operand:SWI12 0 "register_operand" "+<r>"))
3121 (match_operand:SWI12 1 "general_operand" "<r>mn"))]
3122 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
3123 "mov{<imodesuffix>}\t{%1, %0|%0, %1}"
3124 [(set_attr "type" "imov")
3125 (set_attr "mode" "<MODE>")])
3126
3127 (define_insn "*movstrict<mode>_xor"
3128 [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>"))
3129 (match_operand:SWI12 1 "const0_operand"))
3130 (clobber (reg:CC FLAGS_REG))]
3131 "reload_completed"
3132 "xor{<imodesuffix>}\t%0, %0"
3133 [(set_attr "type" "alu1")
3134 (set_attr "mode" "<MODE>")
3135 (set_attr "length_immediate" "0")])
3136
3137 (define_expand "extv<mode>"
3138 [(set (match_operand:SWI24 0 "register_operand")
3139 (sign_extract:SWI24 (match_operand:SWI24 1 "register_operand")
3140 (match_operand:SI 2 "const_int_operand")
3141 (match_operand:SI 3 "const_int_operand")))]
3142 ""
3143 {
3144 /* Handle extractions from %ah et al. */
3145 if (INTVAL (operands[2]) != 8 || INTVAL (operands[3]) != 8)
3146 FAIL;
3147
3148 unsigned int regno = reg_or_subregno (operands[1]);
3149
3150 /* Be careful to expand only with registers having upper parts. */
3151 if (regno <= LAST_VIRTUAL_REGISTER && !QI_REGNO_P (regno))
3152 operands[1] = copy_to_reg (operands[1]);
3153 })
3154
3155 (define_insn "*extv<mode>"
3156 [(set (match_operand:SWI24 0 "register_operand" "=R")
3157 (sign_extract:SWI24 (match_operand:SWI24 1 "register_operand" "Q")
3158 (const_int 8)
3159 (const_int 8)))]
3160 ""
3161 "movs{bl|x}\t{%h1, %k0|%k0, %h1}"
3162 [(set_attr "type" "imovx")
3163 (set_attr "mode" "SI")])
3164
3165 (define_expand "extzv<mode>"
3166 [(set (match_operand:SWI248 0 "register_operand")
3167 (zero_extract:SWI248 (match_operand:SWI248 1 "register_operand")
3168 (match_operand:SI 2 "const_int_operand")
3169 (match_operand:SI 3 "const_int_operand")))]
3170 ""
3171 {
3172 if (ix86_expand_pextr (operands))
3173 DONE;
3174
3175 /* Handle extractions from %ah et al. */
3176 if (INTVAL (operands[2]) != 8 || INTVAL (operands[3]) != 8)
3177 FAIL;
3178
3179 unsigned int regno = reg_or_subregno (operands[1]);
3180
3181 /* Be careful to expand only with registers having upper parts. */
3182 if (regno <= LAST_VIRTUAL_REGISTER && !QI_REGNO_P (regno))
3183 operands[1] = copy_to_reg (operands[1]);
3184 })
3185
3186 (define_insn "*extzvqi_mem_rex64"
3187 [(set (match_operand:QI 0 "norex_memory_operand" "=Bn")
3188 (subreg:QI
3189 (zero_extract:SWI248
3190 (match_operand:SWI248 1 "register_operand" "Q")
3191 (const_int 8)
3192 (const_int 8)) 0))]
3193 "TARGET_64BIT && reload_completed"
3194 "mov{b}\t{%h1, %0|%0, %h1}"
3195 [(set_attr "type" "imov")
3196 (set_attr "mode" "QI")])
3197
3198 (define_insn "*extzv<mode>"
3199 [(set (match_operand:SWI248 0 "register_operand" "=R")
3200 (zero_extract:SWI248 (match_operand:SWI248 1 "register_operand" "Q")
3201 (const_int 8)
3202 (const_int 8)))]
3203 ""
3204 "movz{bl|x}\t{%h1, %k0|%k0, %h1}"
3205 [(set_attr "type" "imovx")
3206 (set_attr "mode" "SI")])
3207
3208 (define_insn "*extzvqi"
3209 [(set (match_operand:QI 0 "nonimmediate_operand" "=QBc,?R,m")
3210 (subreg:QI
3211 (zero_extract:SWI248
3212 (match_operand:SWI248 1 "register_operand" "Q,Q,Q")
3213 (const_int 8)
3214 (const_int 8)) 0))]
3215 ""
3216 {
3217 switch (get_attr_type (insn))
3218 {
3219 case TYPE_IMOVX:
3220 return "movz{bl|x}\t{%h1, %k0|%k0, %h1}";
3221 default:
3222 return "mov{b}\t{%h1, %0|%0, %h1}";
3223 }
3224 }
3225 [(set_attr "isa" "*,*,nox64")
3226 (set (attr "type")
3227 (if_then_else (and (match_operand:QI 0 "register_operand")
3228 (ior (not (match_operand:QI 0 "QIreg_operand"))
3229 (match_test "TARGET_MOVX")))
3230 (const_string "imovx")
3231 (const_string "imov")))
3232 (set (attr "mode")
3233 (if_then_else (eq_attr "type" "imovx")
3234 (const_string "SI")
3235 (const_string "QI")))])
3236
3237 (define_peephole2
3238 [(set (match_operand:QI 0 "register_operand")
3239 (subreg:QI
3240 (zero_extract:SWI248 (match_operand:SWI248 1 "register_operand")
3241 (const_int 8)
3242 (const_int 8)) 0))
3243 (set (match_operand:QI 2 "norex_memory_operand") (match_dup 0))]
3244 "TARGET_64BIT
3245 && peep2_reg_dead_p (2, operands[0])"
3246 [(set (match_dup 2)
3247 (subreg:QI
3248 (zero_extract:SWI248 (match_dup 1)
3249 (const_int 8)
3250 (const_int 8)) 0))])
3251
3252 (define_expand "insv<mode>"
3253 [(set (zero_extract:SWI248 (match_operand:SWI248 0 "register_operand")
3254 (match_operand:SI 1 "const_int_operand")
3255 (match_operand:SI 2 "const_int_operand"))
3256 (match_operand:SWI248 3 "register_operand"))]
3257 ""
3258 {
3259 rtx dst;
3260
3261 if (ix86_expand_pinsr (operands))
3262 DONE;
3263
3264 /* Handle insertions to %ah et al. */
3265 if (INTVAL (operands[1]) != 8 || INTVAL (operands[2]) != 8)
3266 FAIL;
3267
3268 unsigned int regno = reg_or_subregno (operands[0]);
3269
3270 /* Be careful to expand only with registers having upper parts. */
3271 if (regno <= LAST_VIRTUAL_REGISTER && !QI_REGNO_P (regno))
3272 dst = copy_to_reg (operands[0]);
3273 else
3274 dst = operands[0];
3275
3276 emit_insn (gen_insv_1 (<MODE>mode, dst, operands[3]));
3277
3278 /* Fix up the destination if needed. */
3279 if (dst != operands[0])
3280 emit_move_insn (operands[0], dst);
3281
3282 DONE;
3283 })
3284
3285 (define_insn "*insvqi_1_mem_rex64"
3286 [(set (zero_extract:SWI248
3287 (match_operand:SWI248 0 "register_operand" "+Q")
3288 (const_int 8)
3289 (const_int 8))
3290 (subreg:SWI248
3291 (match_operand:QI 1 "norex_memory_operand" "Bn") 0))]
3292 "TARGET_64BIT && reload_completed"
3293 "mov{b}\t{%1, %h0|%h0, %1}"
3294 [(set_attr "type" "imov")
3295 (set_attr "mode" "QI")])
3296
3297 (define_insn "@insv<mode>_1"
3298 [(set (zero_extract:SWI248
3299 (match_operand:SWI248 0 "register_operand" "+Q,Q")
3300 (const_int 8)
3301 (const_int 8))
3302 (match_operand:SWI248 1 "general_operand" "QnBc,m"))]
3303 ""
3304 {
3305 if (CONST_INT_P (operands[1]))
3306 operands[1] = gen_int_mode (INTVAL (operands[1]), QImode);
3307 return "mov{b}\t{%b1, %h0|%h0, %b1}";
3308 }
3309 [(set_attr "isa" "*,nox64")
3310 (set_attr "type" "imov")
3311 (set_attr "mode" "QI")])
3312
3313 (define_insn "*insvqi_1"
3314 [(set (zero_extract:SWI248
3315 (match_operand:SWI248 0 "register_operand" "+Q,Q")
3316 (const_int 8)
3317 (const_int 8))
3318 (subreg:SWI248
3319 (match_operand:QI 1 "general_operand" "QnBc,m") 0))]
3320 ""
3321 "mov{b}\t{%1, %h0|%h0, %1}"
3322 [(set_attr "isa" "*,nox64")
3323 (set_attr "type" "imov")
3324 (set_attr "mode" "QI")])
3325
3326 (define_peephole2
3327 [(set (match_operand:QI 0 "register_operand")
3328 (match_operand:QI 1 "norex_memory_operand"))
3329 (set (zero_extract:SWI248 (match_operand:SWI248 2 "register_operand")
3330 (const_int 8)
3331 (const_int 8))
3332 (subreg:SWI248 (match_dup 0) 0))]
3333 "TARGET_64BIT
3334 && peep2_reg_dead_p (2, operands[0])"
3335 [(set (zero_extract:SWI248 (match_dup 2)
3336 (const_int 8)
3337 (const_int 8))
3338 (subreg:SWI248 (match_dup 1) 0))])
3339
3340 ;; Eliminate redundant insv, e.g. xorl %eax,%eax; movb $0, %ah
3341 (define_peephole2
3342 [(parallel [(set (match_operand:SWI48 0 "general_reg_operand")
3343 (const_int 0))
3344 (clobber (reg:CC FLAGS_REG))])
3345 (set (zero_extract:SWI248 (match_operand:SWI248 1 "general_reg_operand")
3346 (const_int 8)
3347 (const_int 8))
3348 (const_int 0))]
3349 "REGNO (operands[0]) == REGNO (operands[1])"
3350 [(parallel [(set (match_operand:SWI48 0 "general_reg_operand")
3351 (const_int 0))
3352 (clobber (reg:CC FLAGS_REG))])])
3353
3354 ;; Combine movl followed by movb.
3355 (define_peephole2
3356 [(set (match_operand:SWI48 0 "general_reg_operand")
3357 (match_operand:SWI48 1 "const_int_operand"))
3358 (set (zero_extract:SWI248 (match_operand:SWI248 2 "general_reg_operand")
3359 (const_int 8)
3360 (const_int 8))
3361 (match_operand:SWI248 3 "const_int_operand"))]
3362 "REGNO (operands[0]) == REGNO (operands[2])"
3363 [(set (match_operand:SWI48 0 "general_reg_operand")
3364 (match_dup 4))]
3365 {
3366 HOST_WIDE_INT tmp = INTVAL (operands[1]) & ~(HOST_WIDE_INT)0xff00;
3367 tmp |= (INTVAL (operands[3]) & 0xff) << 8;
3368 operands[4] = gen_int_mode (tmp, <SWI48:MODE>mode);
3369 })
3370
3371
3372 (define_code_iterator any_extract [sign_extract zero_extract])
3373
3374 (define_insn "*insvqi_2"
3375 [(set (zero_extract:SWI248
3376 (match_operand:SWI248 0 "register_operand" "+Q")
3377 (const_int 8)
3378 (const_int 8))
3379 (any_extract:SWI248
3380 (match_operand:SWI248 1 "register_operand" "Q")
3381 (const_int 8)
3382 (const_int 8)))]
3383 ""
3384 "mov{b}\t{%h1, %h0|%h0, %h1}"
3385 [(set_attr "type" "imov")
3386 (set_attr "mode" "QI")])
3387
3388 (define_insn "*insvqi_3"
3389 [(set (zero_extract:SWI248
3390 (match_operand:SWI248 0 "register_operand" "+Q")
3391 (const_int 8)
3392 (const_int 8))
3393 (any_shiftrt:SWI248
3394 (match_operand:SWI248 1 "register_operand" "Q")
3395 (const_int 8)))]
3396 ""
3397 "mov{b}\t{%h1, %h0|%h0, %h1}"
3398 [(set_attr "type" "imov")
3399 (set_attr "mode" "QI")])
3400 \f
3401 ;; Floating point push instructions.
3402
3403 (define_insn "*pushtf"
3404 [(set (match_operand:TF 0 "push_operand" "=<,<")
3405 (match_operand:TF 1 "general_no_elim_operand" "v,*roC"))]
3406 "TARGET_64BIT || TARGET_SSE"
3407 {
3408 /* This insn should be already split before reg-stack. */
3409 return "#";
3410 }
3411 [(set_attr "isa" "*,x64")
3412 (set_attr "type" "multi")
3413 (set_attr "unit" "sse,*")
3414 (set_attr "mode" "TF,DI")])
3415
3416 ;; %%% Kill this when call knows how to work this out.
3417 (define_split
3418 [(set (match_operand:TF 0 "push_operand")
3419 (match_operand:TF 1 "sse_reg_operand"))]
3420 "TARGET_SSE && reload_completed"
3421 [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -16)))
3422 (set (match_dup 0) (match_dup 1))]
3423 {
3424 /* Preserve memory attributes. */
3425 operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
3426 })
3427
3428 (define_insn "*pushxf"
3429 [(set (match_operand:XF 0 "push_operand" "=<,<,<,<,<")
3430 (match_operand:XF 1 "general_no_elim_operand" "f,r,*r,oF,oC"))]
3431 ""
3432 {
3433 /* This insn should be already split before reg-stack. */
3434 return "#";
3435 }
3436 [(set_attr "isa" "*,*,*,nox64,x64")
3437 (set_attr "type" "multi")
3438 (set_attr "unit" "i387,*,*,*,*")
3439 (set (attr "mode")
3440 (cond [(eq_attr "alternative" "1,2,3,4")
3441 (if_then_else (match_test "TARGET_64BIT")
3442 (const_string "DI")
3443 (const_string "SI"))
3444 ]
3445 (const_string "XF")))
3446 (set (attr "preferred_for_size")
3447 (cond [(eq_attr "alternative" "1")
3448 (symbol_ref "false")]
3449 (symbol_ref "true")))])
3450
3451 ;; %%% Kill this when call knows how to work this out.
3452 (define_split
3453 [(set (match_operand:XF 0 "push_operand")
3454 (match_operand:XF 1 "fp_register_operand"))]
3455 "reload_completed"
3456 [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
3457 (set (match_dup 0) (match_dup 1))]
3458 {
3459 operands[2] = GEN_INT (-PUSH_ROUNDING (GET_MODE_SIZE (XFmode)));
3460 /* Preserve memory attributes. */
3461 operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
3462 })
3463
3464 (define_insn "*pushdf"
3465 [(set (match_operand:DF 0 "push_operand" "=<,<,<,<,<,<")
3466 (match_operand:DF 1 "general_no_elim_operand" "f,r,*r,oF,rmC,v"))]
3467 ""
3468 {
3469 /* This insn should be already split before reg-stack. */
3470 return "#";
3471 }
3472 [(set_attr "isa" "*,nox64,nox64,nox64,x64,sse2")
3473 (set_attr "type" "multi")
3474 (set_attr "unit" "i387,*,*,*,*,sse")
3475 (set_attr "mode" "DF,SI,SI,SI,DI,DF")
3476 (set (attr "preferred_for_size")
3477 (cond [(eq_attr "alternative" "1")
3478 (symbol_ref "false")]
3479 (symbol_ref "true")))
3480 (set (attr "preferred_for_speed")
3481 (cond [(eq_attr "alternative" "1")
3482 (symbol_ref "TARGET_INTEGER_DFMODE_MOVES")]
3483 (symbol_ref "true")))])
3484
3485 ;; %%% Kill this when call knows how to work this out.
3486 (define_split
3487 [(set (match_operand:DF 0 "push_operand")
3488 (match_operand:DF 1 "any_fp_register_operand"))]
3489 "reload_completed"
3490 [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -8)))
3491 (set (match_dup 0) (match_dup 1))]
3492 {
3493 /* Preserve memory attributes. */
3494 operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
3495 })
3496
3497 (define_mode_iterator HFBF [HF BF])
3498
3499 (define_insn "*push<mode>_rex64"
3500 [(set (match_operand:HFBF 0 "push_operand" "=X,X")
3501 (match_operand:HFBF 1 "nonmemory_no_elim_operand" "r,x"))]
3502 "TARGET_64BIT"
3503 {
3504 /* Anything else should be already split before reg-stack. */
3505 gcc_assert (which_alternative == 0);
3506 return "push{q}\t%q1";
3507 }
3508 [(set_attr "isa" "*,sse4")
3509 (set_attr "type" "push,multi")
3510 (set_attr "mode" "DI,TI")])
3511
3512 (define_insn "*push<mode>"
3513 [(set (match_operand:HFBF 0 "push_operand" "=X,X")
3514 (match_operand:HFBF 1 "general_no_elim_operand" "rmF,x"))]
3515 "!TARGET_64BIT"
3516 {
3517 /* Anything else should be already split before reg-stack. */
3518 gcc_assert (which_alternative == 0);
3519 return "push{l}\t%k1";
3520 }
3521 [(set_attr "isa" "*,sse4")
3522 (set_attr "type" "push,multi")
3523 (set_attr "mode" "SI,TI")])
3524
3525 (define_insn "*pushsf_rex64"
3526 [(set (match_operand:SF 0 "push_operand" "=X,X,X")
3527 (match_operand:SF 1 "nonmemory_no_elim_operand" "f,rF,v"))]
3528 "TARGET_64BIT"
3529 {
3530 /* Anything else should be already split before reg-stack. */
3531 if (which_alternative != 1)
3532 return "#";
3533 return "push{q}\t%q1";
3534 }
3535 [(set_attr "type" "multi,push,multi")
3536 (set_attr "unit" "i387,*,*")
3537 (set_attr "mode" "SF,DI,SF")])
3538
3539 (define_insn "*pushsf"
3540 [(set (match_operand:SF 0 "push_operand" "=<,<,<")
3541 (match_operand:SF 1 "general_no_elim_operand" "f,rmF,v"))]
3542 "!TARGET_64BIT"
3543 {
3544 /* Anything else should be already split before reg-stack. */
3545 if (which_alternative != 1)
3546 return "#";
3547 return "push{l}\t%1";
3548 }
3549 [(set_attr "type" "multi,push,multi")
3550 (set_attr "unit" "i387,*,*")
3551 (set_attr "mode" "SF,SI,SF")])
3552
3553 (define_mode_iterator MODESH [SF HF BF])
3554 ;; %%% Kill this when call knows how to work this out.
3555 (define_split
3556 [(set (match_operand:MODESH 0 "push_operand")
3557 (match_operand:MODESH 1 "any_fp_register_operand"))]
3558 "reload_completed"
3559 [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
3560 (set (match_dup 0) (match_dup 1))]
3561 {
3562 rtx op = XEXP (operands[0], 0);
3563 if (GET_CODE (op) == PRE_DEC)
3564 {
3565 gcc_assert (!TARGET_64BIT);
3566 op = GEN_INT (-4);
3567 }
3568 else
3569 {
3570 op = XEXP (XEXP (op, 1), 1);
3571 gcc_assert (CONST_INT_P (op));
3572 }
3573 operands[2] = op;
3574 /* Preserve memory attributes. */
3575 operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
3576 })
3577
3578 (define_split
3579 [(set (match_operand:SF 0 "push_operand")
3580 (match_operand:SF 1 "memory_operand"))]
3581 "reload_completed
3582 && find_constant_src (insn)"
3583 [(set (match_dup 0) (match_dup 2))]
3584 "operands[2] = find_constant_src (curr_insn);")
3585
3586 (define_split
3587 [(set (match_operand 0 "push_operand")
3588 (match_operand 1 "general_gr_operand"))]
3589 "reload_completed
3590 && (GET_MODE (operands[0]) == TFmode
3591 || GET_MODE (operands[0]) == XFmode
3592 || GET_MODE (operands[0]) == DFmode)"
3593 [(const_int 0)]
3594 "ix86_split_long_move (operands); DONE;")
3595 \f
3596 ;; Floating point move instructions.
3597
3598 (define_expand "movtf"
3599 [(set (match_operand:TF 0 "nonimmediate_operand")
3600 (match_operand:TF 1 "nonimmediate_operand"))]
3601 "TARGET_64BIT || TARGET_SSE"
3602 "ix86_expand_move (TFmode, operands); DONE;")
3603
3604 (define_expand "mov<mode>"
3605 [(set (match_operand:X87MODEFH 0 "nonimmediate_operand")
3606 (match_operand:X87MODEFH 1 "general_operand"))]
3607 ""
3608 "ix86_expand_move (<MODE>mode, operands); DONE;")
3609
3610 (define_insn "*movtf_internal"
3611 [(set (match_operand:TF 0 "nonimmediate_operand" "=v,v ,m,?*r ,!o")
3612 (match_operand:TF 1 "general_operand" "C ,vm,v,*roF,*rC"))]
3613 "(TARGET_64BIT || TARGET_SSE)
3614 && !(MEM_P (operands[0]) && MEM_P (operands[1]))
3615 && (lra_in_progress || reload_completed
3616 || !CONST_DOUBLE_P (operands[1])
3617 || (standard_sse_constant_p (operands[1], TFmode) == 1
3618 && !memory_operand (operands[0], TFmode))
3619 || (!TARGET_MEMORY_MISMATCH_STALL
3620 && memory_operand (operands[0], TFmode)))"
3621 {
3622 switch (get_attr_type (insn))
3623 {
3624 case TYPE_SSELOG1:
3625 return standard_sse_constant_opcode (insn, operands);
3626
3627 case TYPE_SSEMOV:
3628 return ix86_output_ssemov (insn, operands);
3629
3630 case TYPE_MULTI:
3631 return "#";
3632
3633 default:
3634 gcc_unreachable ();
3635 }
3636 }
3637 [(set_attr "isa" "*,*,*,x64,x64")
3638 (set_attr "type" "sselog1,ssemov,ssemov,multi,multi")
3639 (set (attr "prefix")
3640 (if_then_else (eq_attr "type" "sselog1,ssemov")
3641 (const_string "maybe_vex")
3642 (const_string "orig")))
3643 (set (attr "mode")
3644 (cond [(eq_attr "alternative" "3,4")
3645 (const_string "DI")
3646 (match_test "TARGET_AVX")
3647 (const_string "TI")
3648 (ior (not (match_test "TARGET_SSE2"))
3649 (match_test "optimize_function_for_size_p (cfun)"))
3650 (const_string "V4SF")
3651 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3652 (const_string "V4SF")
3653 (and (eq_attr "alternative" "2")
3654 (match_test "TARGET_SSE_TYPELESS_STORES"))
3655 (const_string "V4SF")
3656 ]
3657 (const_string "TI")))])
3658
3659 (define_split
3660 [(set (match_operand:TF 0 "nonimmediate_gr_operand")
3661 (match_operand:TF 1 "general_gr_operand"))]
3662 "reload_completed"
3663 [(const_int 0)]
3664 "ix86_split_long_move (operands); DONE;")
3665
3666 ;; Possible store forwarding (partial memory) stall
3667 ;; in alternatives 4, 6, 7 and 8.
3668 (define_insn "*movxf_internal"
3669 [(set (match_operand:XF 0 "nonimmediate_operand"
3670 "=f,m,f,?r ,!o,?*r ,!o,!o,!o,r ,o ,o")
3671 (match_operand:XF 1 "general_operand"
3672 "fm,f,G,roF,r ,*roF,*r,F ,C ,roF,rF,rC"))]
3673 "!(MEM_P (operands[0]) && MEM_P (operands[1]))
3674 && (lra_in_progress || reload_completed
3675 || !CONST_DOUBLE_P (operands[1])
3676 || ((optimize_function_for_size_p (cfun)
3677 || (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC))
3678 && standard_80387_constant_p (operands[1]) > 0
3679 && !memory_operand (operands[0], XFmode))
3680 || (!TARGET_MEMORY_MISMATCH_STALL
3681 && memory_operand (operands[0], XFmode))
3682 || !TARGET_HARD_XF_REGS)"
3683 {
3684 switch (get_attr_type (insn))
3685 {
3686 case TYPE_FMOV:
3687 if (which_alternative == 2)
3688 return standard_80387_constant_opcode (operands[1]);
3689 return output_387_reg_move (insn, operands);
3690
3691 case TYPE_MULTI:
3692 return "#";
3693
3694 default:
3695 gcc_unreachable ();
3696 }
3697 }
3698 [(set (attr "isa")
3699 (cond [(eq_attr "alternative" "7,10")
3700 (const_string "nox64")
3701 (eq_attr "alternative" "8,11")
3702 (const_string "x64")
3703 ]
3704 (const_string "*")))
3705 (set (attr "type")
3706 (cond [(eq_attr "alternative" "3,4,5,6,7,8,9,10,11")
3707 (const_string "multi")
3708 ]
3709 (const_string "fmov")))
3710 (set (attr "mode")
3711 (cond [(eq_attr "alternative" "3,4,5,6,7,8,9,10,11")
3712 (if_then_else (match_test "TARGET_64BIT")
3713 (const_string "DI")
3714 (const_string "SI"))
3715 ]
3716 (const_string "XF")))
3717 (set (attr "preferred_for_size")
3718 (cond [(eq_attr "alternative" "3,4")
3719 (symbol_ref "false")]
3720 (symbol_ref "true")))
3721 (set (attr "enabled")
3722 (cond [(eq_attr "alternative" "9,10,11")
3723 (if_then_else
3724 (match_test "TARGET_HARD_XF_REGS")
3725 (symbol_ref "false")
3726 (const_string "*"))
3727 (not (match_test "TARGET_HARD_XF_REGS"))
3728 (symbol_ref "false")
3729 ]
3730 (const_string "*")))])
3731
3732 (define_split
3733 [(set (match_operand:XF 0 "nonimmediate_gr_operand")
3734 (match_operand:XF 1 "general_gr_operand"))]
3735 "reload_completed"
3736 [(const_int 0)]
3737 "ix86_split_long_move (operands); DONE;")
3738
3739 ;; Possible store forwarding (partial memory) stall in alternatives 4, 6 and 7.
3740 (define_insn "*movdf_internal"
3741 [(set (match_operand:DF 0 "nonimmediate_operand"
3742 "=Yf*f,m ,Yf*f,?r ,!o,?*r ,!o,!o,?r,?m,?r,?r,v,v,v,m,*x,*x,*x,m ,r ,v,r ,o ,r ,m")
3743 (match_operand:DF 1 "general_operand"
3744 "Yf*fm,Yf*f,G ,roF,r ,*roF,*r,F ,rm,rC,C ,F ,C,v,m,v,C ,*x,m ,*x,v,r ,roF,rF,rmF,rC"))]
3745 "!(MEM_P (operands[0]) && MEM_P (operands[1]))
3746 && (lra_in_progress || reload_completed
3747 || !CONST_DOUBLE_P (operands[1])
3748 || ((optimize_function_for_size_p (cfun)
3749 || (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC))
3750 && IS_STACK_MODE (DFmode)
3751 && standard_80387_constant_p (operands[1]) > 0
3752 && !memory_operand (operands[0], DFmode))
3753 || (TARGET_SSE2 && TARGET_SSE_MATH
3754 && standard_sse_constant_p (operands[1], DFmode) == 1
3755 && !memory_operand (operands[0], DFmode))
3756 || ((TARGET_64BIT || !TARGET_MEMORY_MISMATCH_STALL)
3757 && memory_operand (operands[0], DFmode))
3758 || !TARGET_HARD_DF_REGS)"
3759 {
3760 switch (get_attr_type (insn))
3761 {
3762 case TYPE_FMOV:
3763 if (which_alternative == 2)
3764 return standard_80387_constant_opcode (operands[1]);
3765 return output_387_reg_move (insn, operands);
3766
3767 case TYPE_MULTI:
3768 return "#";
3769
3770 case TYPE_IMOV:
3771 if (get_attr_mode (insn) == MODE_SI)
3772 return "mov{l}\t{%1, %k0|%k0, %1}";
3773 else if (which_alternative == 11)
3774 return "movabs{q}\t{%1, %0|%0, %1}";
3775 else
3776 return "mov{q}\t{%1, %0|%0, %1}";
3777
3778 case TYPE_SSELOG1:
3779 return standard_sse_constant_opcode (insn, operands);
3780
3781 case TYPE_SSEMOV:
3782 return ix86_output_ssemov (insn, operands);
3783
3784 default:
3785 gcc_unreachable ();
3786 }
3787 }
3788 [(set (attr "isa")
3789 (cond [(eq_attr "alternative" "3,4,5,6,7,22,23")
3790 (const_string "nox64")
3791 (eq_attr "alternative" "8,9,10,11,24,25")
3792 (const_string "x64")
3793 (eq_attr "alternative" "12,13,14,15")
3794 (const_string "sse2")
3795 (eq_attr "alternative" "20,21")
3796 (const_string "x64_sse2")
3797 ]
3798 (const_string "*")))
3799 (set (attr "type")
3800 (cond [(eq_attr "alternative" "0,1,2")
3801 (const_string "fmov")
3802 (eq_attr "alternative" "3,4,5,6,7,22,23")
3803 (const_string "multi")
3804 (eq_attr "alternative" "8,9,10,11,24,25")
3805 (const_string "imov")
3806 (eq_attr "alternative" "12,16")
3807 (const_string "sselog1")
3808 ]
3809 (const_string "ssemov")))
3810 (set (attr "modrm")
3811 (if_then_else (eq_attr "alternative" "11")
3812 (const_string "0")
3813 (const_string "*")))
3814 (set (attr "length_immediate")
3815 (if_then_else (eq_attr "alternative" "11")
3816 (const_string "8")
3817 (const_string "*")))
3818 (set (attr "prefix")
3819 (if_then_else (eq_attr "type" "sselog1,ssemov")
3820 (const_string "maybe_vex")
3821 (const_string "orig")))
3822 (set (attr "prefix_data16")
3823 (if_then_else
3824 (ior (and (eq_attr "type" "ssemov") (eq_attr "mode" "DI"))
3825 (eq_attr "mode" "V1DF"))
3826 (const_string "1")
3827 (const_string "*")))
3828 (set (attr "mode")
3829 (cond [(eq_attr "alternative" "3,4,5,6,7,10,22,23")
3830 (const_string "SI")
3831 (eq_attr "alternative" "8,9,11,20,21,24,25")
3832 (const_string "DI")
3833
3834 /* xorps is one byte shorter for non-AVX targets. */
3835 (eq_attr "alternative" "12,16")
3836 (cond [(match_test "TARGET_AVX")
3837 (const_string "V2DF")
3838 (ior (not (match_test "TARGET_SSE2"))
3839 (match_test "optimize_function_for_size_p (cfun)"))
3840 (const_string "V4SF")
3841 (match_test "TARGET_SSE_LOAD0_BY_PXOR")
3842 (const_string "TI")
3843 ]
3844 (const_string "V2DF"))
3845
3846 /* For architectures resolving dependencies on
3847 whole SSE registers use movapd to break dependency
3848 chains, otherwise use short move to avoid extra work. */
3849
3850 /* movaps is one byte shorter for non-AVX targets. */
3851 (eq_attr "alternative" "13,17")
3852 (cond [(match_test "TARGET_AVX")
3853 (const_string "DF")
3854 (ior (not (match_test "TARGET_SSE2"))
3855 (match_test "optimize_function_for_size_p (cfun)"))
3856 (const_string "V4SF")
3857 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3858 (const_string "V4SF")
3859 (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
3860 (const_string "V2DF")
3861 ]
3862 (const_string "DF"))
3863
3864 /* For architectures resolving dependencies on register
3865 parts we may avoid extra work to zero out upper part
3866 of register. */
3867 (eq_attr "alternative" "14,18")
3868 (cond [(not (match_test "TARGET_SSE2"))
3869 (const_string "V2SF")
3870 (match_test "TARGET_AVX")
3871 (const_string "DF")
3872 (match_test "TARGET_SSE_SPLIT_REGS")
3873 (const_string "V1DF")
3874 ]
3875 (const_string "DF"))
3876
3877 (and (eq_attr "alternative" "15,19")
3878 (not (match_test "TARGET_SSE2")))
3879 (const_string "V2SF")
3880 ]
3881 (const_string "DF")))
3882 (set (attr "preferred_for_size")
3883 (cond [(eq_attr "alternative" "3,4")
3884 (symbol_ref "false")]
3885 (symbol_ref "true")))
3886 (set (attr "preferred_for_speed")
3887 (cond [(eq_attr "alternative" "3,4")
3888 (symbol_ref "TARGET_INTEGER_DFMODE_MOVES")
3889 (eq_attr "alternative" "20")
3890 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
3891 (eq_attr "alternative" "21")
3892 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
3893 ]
3894 (symbol_ref "true")))
3895 (set (attr "enabled")
3896 (cond [(eq_attr "alternative" "22,23,24,25")
3897 (if_then_else
3898 (match_test "TARGET_HARD_DF_REGS")
3899 (symbol_ref "false")
3900 (const_string "*"))
3901 (not (match_test "TARGET_HARD_DF_REGS"))
3902 (symbol_ref "false")
3903 ]
3904 (const_string "*")))])
3905
3906 (define_split
3907 [(set (match_operand:DF 0 "nonimmediate_gr_operand")
3908 (match_operand:DF 1 "general_gr_operand"))]
3909 "!TARGET_64BIT && reload_completed"
3910 [(const_int 0)]
3911 "ix86_split_long_move (operands); DONE;")
3912
3913 (define_insn "*movsf_internal"
3914 [(set (match_operand:SF 0 "nonimmediate_operand"
3915 "=Yf*f,m ,Yf*f,?r ,?m,v,v,v,m,?r,?v,!*y,!*y,!m,!r,!*y,r ,m")
3916 (match_operand:SF 1 "general_operand"
3917 "Yf*fm,Yf*f,G ,rmF,rF,C,v,m,v,v ,r ,*y ,m ,*y,*y,r ,rmF,rF"))]
3918 "!(MEM_P (operands[0]) && MEM_P (operands[1]))
3919 && (lra_in_progress || reload_completed
3920 || !CONST_DOUBLE_P (operands[1])
3921 || ((optimize_function_for_size_p (cfun)
3922 || (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC))
3923 && IS_STACK_MODE (SFmode)
3924 && standard_80387_constant_p (operands[1]) > 0)
3925 || (TARGET_SSE && TARGET_SSE_MATH
3926 && standard_sse_constant_p (operands[1], SFmode) == 1)
3927 || memory_operand (operands[0], SFmode)
3928 || !TARGET_HARD_SF_REGS)"
3929 {
3930 switch (get_attr_type (insn))
3931 {
3932 case TYPE_FMOV:
3933 if (which_alternative == 2)
3934 return standard_80387_constant_opcode (operands[1]);
3935 return output_387_reg_move (insn, operands);
3936
3937 case TYPE_IMOV:
3938 return "mov{l}\t{%1, %0|%0, %1}";
3939
3940 case TYPE_SSELOG1:
3941 return standard_sse_constant_opcode (insn, operands);
3942
3943 case TYPE_SSEMOV:
3944 return ix86_output_ssemov (insn, operands);
3945
3946 case TYPE_MMXMOV:
3947 switch (get_attr_mode (insn))
3948 {
3949 case MODE_DI:
3950 return "movq\t{%1, %0|%0, %1}";
3951 case MODE_SI:
3952 return "movd\t{%1, %0|%0, %1}";
3953
3954 default:
3955 gcc_unreachable ();
3956 }
3957
3958 default:
3959 gcc_unreachable ();
3960 }
3961 }
3962 [(set (attr "isa")
3963 (cond [(eq_attr "alternative" "9,10")
3964 (const_string "sse2")
3965 ]
3966 (const_string "*")))
3967 (set (attr "type")
3968 (cond [(eq_attr "alternative" "0,1,2")
3969 (const_string "fmov")
3970 (eq_attr "alternative" "3,4,16,17")
3971 (const_string "imov")
3972 (eq_attr "alternative" "5")
3973 (const_string "sselog1")
3974 (eq_attr "alternative" "11,12,13,14,15")
3975 (const_string "mmxmov")
3976 ]
3977 (const_string "ssemov")))
3978 (set (attr "prefix")
3979 (if_then_else (eq_attr "type" "sselog1,ssemov")
3980 (const_string "maybe_vex")
3981 (const_string "orig")))
3982 (set (attr "prefix_data16")
3983 (if_then_else (and (eq_attr "type" "ssemov") (eq_attr "mode" "SI"))
3984 (const_string "1")
3985 (const_string "*")))
3986 (set (attr "mode")
3987 (cond [(eq_attr "alternative" "3,4,9,10,12,13,14,15,16,17")
3988 (const_string "SI")
3989 (eq_attr "alternative" "11")
3990 (const_string "DI")
3991 (eq_attr "alternative" "5")
3992 (cond [(and (match_test "TARGET_AVX512F")
3993 (not (match_test "TARGET_PREFER_AVX256")))
3994 (const_string "V16SF")
3995 (match_test "TARGET_AVX")
3996 (const_string "V4SF")
3997 (ior (not (match_test "TARGET_SSE2"))
3998 (match_test "optimize_function_for_size_p (cfun)"))
3999 (const_string "V4SF")
4000 (match_test "TARGET_SSE_LOAD0_BY_PXOR")
4001 (const_string "TI")
4002 ]
4003 (const_string "V4SF"))
4004
4005 /* For architectures resolving dependencies on
4006 whole SSE registers use APS move to break dependency
4007 chains, otherwise use short move to avoid extra work.
4008
4009 Do the same for architectures resolving dependencies on
4010 the parts. While in DF mode it is better to always handle
4011 just register parts, the SF mode is different due to lack
4012 of instructions to load just part of the register. It is
4013 better to maintain the whole registers in single format
4014 to avoid problems on using packed logical operations. */
4015 (eq_attr "alternative" "6")
4016 (cond [(ior (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
4017 (match_test "TARGET_SSE_SPLIT_REGS"))
4018 (const_string "V4SF")
4019 ]
4020 (const_string "SF"))
4021 ]
4022 (const_string "SF")))
4023 (set (attr "preferred_for_speed")
4024 (cond [(eq_attr "alternative" "9,14")
4025 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
4026 (eq_attr "alternative" "10,15")
4027 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
4028 ]
4029 (symbol_ref "true")))
4030 (set (attr "enabled")
4031 (cond [(eq_attr "alternative" "16,17")
4032 (if_then_else
4033 (match_test "TARGET_HARD_SF_REGS")
4034 (symbol_ref "false")
4035 (const_string "*"))
4036 (not (match_test "TARGET_HARD_SF_REGS"))
4037 (symbol_ref "false")
4038 ]
4039 (const_string "*")))])
4040
4041 (define_mode_attr hfbfconstf
4042 [(HF "F") (BF "")])
4043
4044 (define_insn "*mov<mode>_internal"
4045 [(set (match_operand:HFBF 0 "nonimmediate_operand"
4046 "=?r,?r,?r,?m,v,v,?r,m,?v,v")
4047 (match_operand:HFBF 1 "general_operand"
4048 "r ,F ,m ,r<hfbfconstf>,C,v, v,v,r ,m"))]
4049 "!(MEM_P (operands[0]) && MEM_P (operands[1]))
4050 && (lra_in_progress
4051 || reload_completed
4052 || !CONST_DOUBLE_P (operands[1])
4053 || (TARGET_SSE2
4054 && standard_sse_constant_p (operands[1], <MODE>mode) == 1)
4055 || memory_operand (operands[0], <MODE>mode))"
4056 {
4057 switch (get_attr_type (insn))
4058 {
4059 case TYPE_IMOVX:
4060 /* movzwl is faster than movw on p2 due to partial word stalls,
4061 though not as fast as an aligned movl. */
4062 return "movz{wl|x}\t{%1, %k0|%k0, %1}";
4063
4064 case TYPE_SSEMOV:
4065 return ix86_output_ssemov (insn, operands);
4066
4067 case TYPE_SSELOG1:
4068 if (satisfies_constraint_C (operands[1]))
4069 return standard_sse_constant_opcode (insn, operands);
4070
4071 if (SSE_REG_P (operands[0]))
4072 return "%vpinsrw\t{$0, %1, %d0|%d0, %1, 0}";
4073 else
4074 return "%vpextrw\t{$0, %1, %0|%0, %1, 0}";
4075
4076 default:
4077 if (get_attr_mode (insn) == MODE_SI)
4078 return "mov{l}\t{%k1, %k0|%k0, %k1}";
4079 else
4080 return "mov{w}\t{%1, %0|%0, %1}";
4081 }
4082 }
4083 [(set (attr "isa")
4084 (cond [(eq_attr "alternative" "4,5,6,8,9")
4085 (const_string "sse2")
4086 (eq_attr "alternative" "7")
4087 (const_string "sse4")
4088 ]
4089 (const_string "*")))
4090 (set (attr "type")
4091 (cond [(eq_attr "alternative" "4")
4092 (const_string "sselog1")
4093 (eq_attr "alternative" "5,6,8")
4094 (const_string "ssemov")
4095 (eq_attr "alternative" "7,9")
4096 (if_then_else
4097 (match_test ("TARGET_AVX512FP16"))
4098 (const_string "ssemov")
4099 (const_string "sselog1"))
4100 (match_test "optimize_function_for_size_p (cfun)")
4101 (const_string "imov")
4102 (and (eq_attr "alternative" "0")
4103 (ior (not (match_test "TARGET_PARTIAL_REG_STALL"))
4104 (not (match_test "TARGET_HIMODE_MATH"))))
4105 (const_string "imov")
4106 (and (eq_attr "alternative" "1,2")
4107 (match_operand:HI 1 "aligned_operand"))
4108 (const_string "imov")
4109 (and (match_test "TARGET_MOVX")
4110 (eq_attr "alternative" "0,2"))
4111 (const_string "imovx")
4112 ]
4113 (const_string "imov")))
4114 (set (attr "prefix")
4115 (cond [(eq_attr "alternative" "4,5,6,7,8,9")
4116 (const_string "maybe_vex")
4117 ]
4118 (const_string "orig")))
4119 (set (attr "mode")
4120 (cond [(eq_attr "alternative" "4")
4121 (const_string "V4SF")
4122 (eq_attr "alternative" "6,8")
4123 (if_then_else
4124 (match_test "TARGET_AVX512FP16")
4125 (const_string "HI")
4126 (const_string "SI"))
4127 (eq_attr "alternative" "7,9")
4128 (if_then_else
4129 (match_test "TARGET_AVX512FP16")
4130 (const_string "HI")
4131 (const_string "TI"))
4132 (eq_attr "alternative" "5")
4133 (cond [(match_test "TARGET_AVX512FP16")
4134 (const_string "HF")
4135 (ior (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
4136 (match_test "TARGET_SSE_SPLIT_REGS"))
4137 (const_string "V4SF")
4138 ]
4139 (const_string "SF"))
4140 (eq_attr "type" "imovx")
4141 (const_string "SI")
4142 (and (eq_attr "alternative" "1,2")
4143 (match_operand:HI 1 "aligned_operand"))
4144 (const_string "SI")
4145 (and (eq_attr "alternative" "0")
4146 (ior (not (match_test "TARGET_PARTIAL_REG_STALL"))
4147 (not (match_test "TARGET_HIMODE_MATH"))))
4148 (const_string "SI")
4149 ]
4150 (const_string "HI")))
4151 (set (attr "enabled")
4152 (cond [(and (match_test "<MODE>mode == BFmode")
4153 (eq_attr "alternative" "1"))
4154 (symbol_ref "false")
4155 ]
4156 (const_string "*")))])
4157
4158 (define_split
4159 [(set (match_operand 0 "any_fp_register_operand")
4160 (match_operand 1 "memory_operand"))]
4161 "reload_completed
4162 && (GET_MODE (operands[0]) == TFmode
4163 || GET_MODE (operands[0]) == XFmode
4164 || GET_MODE (operands[0]) == DFmode
4165 || GET_MODE (operands[0]) == SFmode)
4166 && ix86_standard_x87sse_constant_load_p (insn, operands[0])"
4167 [(set (match_dup 0) (match_dup 2))]
4168 "operands[2] = find_constant_src (curr_insn);")
4169
4170 (define_split
4171 [(set (match_operand 0 "any_fp_register_operand")
4172 (float_extend (match_operand 1 "memory_operand")))]
4173 "reload_completed
4174 && (GET_MODE (operands[0]) == TFmode
4175 || GET_MODE (operands[0]) == XFmode
4176 || GET_MODE (operands[0]) == DFmode)
4177 && ix86_standard_x87sse_constant_load_p (insn, operands[0])"
4178 [(set (match_dup 0) (match_dup 2))]
4179 "operands[2] = find_constant_src (curr_insn);")
4180
4181 ;; Split the load of -0.0 or -1.0 into fldz;fchs or fld1;fchs sequence
4182 (define_split
4183 [(set (match_operand:X87MODEF 0 "fp_register_operand")
4184 (match_operand:X87MODEF 1 "immediate_operand"))]
4185 "reload_completed
4186 && (standard_80387_constant_p (operands[1]) == 8
4187 || standard_80387_constant_p (operands[1]) == 9)"
4188 [(set (match_dup 0)(match_dup 1))
4189 (set (match_dup 0)
4190 (neg:X87MODEF (match_dup 0)))]
4191 {
4192 if (real_isnegzero (CONST_DOUBLE_REAL_VALUE (operands[1])))
4193 operands[1] = CONST0_RTX (<MODE>mode);
4194 else
4195 operands[1] = CONST1_RTX (<MODE>mode);
4196 })
4197
4198 (define_insn "*swapxf"
4199 [(set (match_operand:XF 0 "register_operand" "+f")
4200 (match_operand:XF 1 "register_operand" "+f"))
4201 (set (match_dup 1)
4202 (match_dup 0))]
4203 "TARGET_80387"
4204 {
4205 if (STACK_TOP_P (operands[0]))
4206 return "fxch\t%1";
4207 else
4208 return "fxch\t%0";
4209 }
4210 [(set_attr "type" "fxch")
4211 (set_attr "mode" "XF")])
4212 \f
4213
4214 ;; Zero extension instructions
4215
4216 (define_insn_and_split "zero_extendditi2"
4217 [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o")
4218 (zero_extend:TI (match_operand:DI 1 "nonimmediate_operand" "rm,r")))]
4219 "TARGET_64BIT"
4220 "#"
4221 "&& reload_completed"
4222 [(set (match_dup 3) (match_dup 1))
4223 (set (match_dup 4) (const_int 0))]
4224 "split_double_mode (TImode, &operands[0], 1, &operands[3], &operands[4]);")
4225
4226 (define_expand "zero_extendsidi2"
4227 [(set (match_operand:DI 0 "nonimmediate_operand")
4228 (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand")))])
4229
4230 (define_insn "*zero_extendsidi2"
4231 [(set (match_operand:DI 0 "nonimmediate_operand"
4232 "=r,?r,?o,r ,o,?*y,?!*y,$r,$v,$x,*x,*v,*r,*k")
4233 (zero_extend:DI
4234 (match_operand:SI 1 "x86_64_zext_operand"
4235 "0 ,rm,r ,rmWz,0,r ,m ,v ,r ,m ,*x,*v,*k,*km")))]
4236 ""
4237 {
4238 switch (get_attr_type (insn))
4239 {
4240 case TYPE_IMOVX:
4241 if (ix86_use_lea_for_mov (insn, operands))
4242 return "lea{l}\t{%E1, %k0|%k0, %E1}";
4243 else
4244 return "mov{l}\t{%1, %k0|%k0, %1}";
4245
4246 case TYPE_MULTI:
4247 return "#";
4248
4249 case TYPE_MMXMOV:
4250 return "movd\t{%1, %0|%0, %1}";
4251
4252 case TYPE_SSEMOV:
4253 if (SSE_REG_P (operands[0]) && SSE_REG_P (operands[1]))
4254 {
4255 if (EXT_REX_SSE_REG_P (operands[0])
4256 || EXT_REX_SSE_REG_P (operands[1]))
4257 return "vpmovzxdq\t{%t1, %g0|%g0, %t1}";
4258 else
4259 return "%vpmovzxdq\t{%1, %0|%0, %1}";
4260 }
4261
4262 if (GENERAL_REG_P (operands[0]))
4263 return "%vmovd\t{%1, %k0|%k0, %1}";
4264
4265 return "%vmovd\t{%1, %0|%0, %1}";
4266
4267 case TYPE_MSKMOV:
4268 return "kmovd\t{%1, %k0|%k0, %1}";
4269
4270 default:
4271 gcc_unreachable ();
4272 }
4273 }
4274 [(set (attr "isa")
4275 (cond [(eq_attr "alternative" "0,1,2")
4276 (const_string "nox64")
4277 (eq_attr "alternative" "3")
4278 (const_string "x64")
4279 (eq_attr "alternative" "7,8,9")
4280 (const_string "sse2")
4281 (eq_attr "alternative" "10")
4282 (const_string "sse4")
4283 (eq_attr "alternative" "11")
4284 (const_string "avx512f")
4285 (eq_attr "alternative" "12")
4286 (const_string "x64_avx512bw")
4287 (eq_attr "alternative" "13")
4288 (const_string "avx512bw")
4289 ]
4290 (const_string "*")))
4291 (set (attr "mmx_isa")
4292 (if_then_else (eq_attr "alternative" "5,6")
4293 (const_string "native")
4294 (const_string "*")))
4295 (set (attr "type")
4296 (cond [(eq_attr "alternative" "0,1,2,4")
4297 (const_string "multi")
4298 (eq_attr "alternative" "5,6")
4299 (const_string "mmxmov")
4300 (eq_attr "alternative" "7")
4301 (if_then_else (match_test "TARGET_64BIT")
4302 (const_string "ssemov")
4303 (const_string "multi"))
4304 (eq_attr "alternative" "8,9,10,11")
4305 (const_string "ssemov")
4306 (eq_attr "alternative" "12,13")
4307 (const_string "mskmov")
4308 ]
4309 (const_string "imovx")))
4310 (set (attr "prefix_extra")
4311 (if_then_else (eq_attr "alternative" "10,11")
4312 (const_string "1")
4313 (const_string "*")))
4314 (set (attr "prefix")
4315 (if_then_else (eq_attr "type" "ssemov")
4316 (const_string "maybe_vex")
4317 (const_string "orig")))
4318 (set (attr "prefix_0f")
4319 (if_then_else (eq_attr "type" "imovx")
4320 (const_string "0")
4321 (const_string "*")))
4322 (set (attr "mode")
4323 (cond [(eq_attr "alternative" "5,6")
4324 (const_string "DI")
4325 (and (eq_attr "alternative" "7")
4326 (match_test "TARGET_64BIT"))
4327 (const_string "TI")
4328 (eq_attr "alternative" "8,10,11")
4329 (const_string "TI")
4330 ]
4331 (const_string "SI")))
4332 (set (attr "preferred_for_speed")
4333 (cond [(eq_attr "alternative" "7")
4334 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
4335 (eq_attr "alternative" "5,8")
4336 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
4337 ]
4338 (symbol_ref "true")))])
4339
4340 (define_split
4341 [(set (match_operand:DI 0 "memory_operand")
4342 (zero_extend:DI (match_operand:SI 1 "memory_operand")))]
4343 "reload_completed"
4344 [(set (match_dup 4) (const_int 0))]
4345 "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);")
4346
4347 (define_split
4348 [(set (match_operand:DI 0 "general_reg_operand")
4349 (zero_extend:DI (match_operand:SI 1 "general_reg_operand")))]
4350 "!TARGET_64BIT && reload_completed
4351 && REGNO (operands[0]) == REGNO (operands[1])"
4352 [(set (match_dup 4) (const_int 0))]
4353 "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);")
4354
4355 (define_split
4356 [(set (match_operand:DI 0 "nonimmediate_gr_operand")
4357 (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand")))]
4358 "!TARGET_64BIT && reload_completed
4359 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4360 [(set (match_dup 3) (match_dup 1))
4361 (set (match_dup 4) (const_int 0))]
4362 "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);")
4363
4364 (define_mode_attr kmov_isa
4365 [(QI "avx512dq") (HI "avx512f") (SI "avx512bw") (DI "avx512bw")])
4366
4367 (define_insn "zero_extend<mode>di2"
4368 [(set (match_operand:DI 0 "register_operand" "=r,*r,*k")
4369 (zero_extend:DI
4370 (match_operand:SWI12 1 "nonimmediate_operand" "<r>m,*k,*km")))]
4371 "TARGET_64BIT"
4372 "@
4373 movz{<imodesuffix>l|x}\t{%1, %k0|%k0, %1}
4374 kmov<mskmodesuffix>\t{%1, %k0|%k0, %1}
4375 kmov<mskmodesuffix>\t{%1, %k0|%k0, %1}"
4376 [(set_attr "isa" "*,<kmov_isa>,<kmov_isa>")
4377 (set_attr "type" "imovx,mskmov,mskmov")
4378 (set_attr "mode" "SI,<MODE>,<MODE>")])
4379
4380 (define_expand "zero_extend<mode>si2"
4381 [(set (match_operand:SI 0 "register_operand")
4382 (zero_extend:SI (match_operand:SWI12 1 "nonimmediate_operand")))]
4383 ""
4384 {
4385 if (TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))
4386 {
4387 operands[1] = force_reg (<MODE>mode, operands[1]);
4388 emit_insn (gen_zero_extend<mode>si2_and (operands[0], operands[1]));
4389 DONE;
4390 }
4391 })
4392
4393 (define_insn_and_split "zero_extend<mode>si2_and"
4394 [(set (match_operand:SI 0 "register_operand" "=r,?&<r>")
4395 (zero_extend:SI
4396 (match_operand:SWI12 1 "nonimmediate_operand" "0,<r>m")))
4397 (clobber (reg:CC FLAGS_REG))]
4398 "TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)"
4399 "#"
4400 "&& reload_completed"
4401 [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (match_dup 2)))
4402 (clobber (reg:CC FLAGS_REG))])]
4403 {
4404 if (!REG_P (operands[1])
4405 || REGNO (operands[0]) != REGNO (operands[1]))
4406 {
4407 ix86_expand_clear (operands[0]);
4408
4409 gcc_assert (!TARGET_PARTIAL_REG_STALL);
4410 emit_insn (gen_rtx_SET
4411 (gen_rtx_STRICT_LOW_PART
4412 (VOIDmode, gen_lowpart (<MODE>mode, operands[0])),
4413 operands[1]));
4414 DONE;
4415 }
4416
4417 operands[2] = GEN_INT (GET_MODE_MASK (<MODE>mode));
4418 }
4419 [(set_attr "type" "alu1")
4420 (set_attr "mode" "SI")])
4421
4422 (define_insn "*zero_extend<mode>si2"
4423 [(set (match_operand:SI 0 "register_operand" "=r,*r,*k")
4424 (zero_extend:SI
4425 (match_operand:SWI12 1 "nonimmediate_operand" "<r>m,*k,*km")))]
4426 "!(TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))"
4427 "@
4428 movz{<imodesuffix>l|x}\t{%1, %0|%0, %1}
4429 kmov<mskmodesuffix>\t{%1, %0|%0, %1}
4430 kmov<mskmodesuffix>\t{%1, %0|%0, %1}"
4431 [(set_attr "isa" "*,<kmov_isa>,<kmov_isa>")
4432 (set_attr "type" "imovx,mskmov,mskmov")
4433 (set_attr "mode" "SI,<MODE>,<MODE>")])
4434
4435 (define_expand "zero_extendqihi2"
4436 [(set (match_operand:HI 0 "register_operand")
4437 (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand")))]
4438 ""
4439 {
4440 if (TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))
4441 {
4442 operands[1] = force_reg (QImode, operands[1]);
4443 emit_insn (gen_zero_extendqihi2_and (operands[0], operands[1]));
4444 DONE;
4445 }
4446 })
4447
4448 (define_insn_and_split "zero_extendqihi2_and"
4449 [(set (match_operand:HI 0 "register_operand" "=r,?&q")
4450 (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "0,qm")))
4451 (clobber (reg:CC FLAGS_REG))]
4452 "TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)"
4453 "#"
4454 "&& reload_completed"
4455 [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (const_int 255)))
4456 (clobber (reg:CC FLAGS_REG))])]
4457 {
4458 if (!REG_P (operands[1])
4459 || REGNO (operands[0]) != REGNO (operands[1]))
4460 {
4461 ix86_expand_clear (operands[0]);
4462
4463 gcc_assert (!TARGET_PARTIAL_REG_STALL);
4464 emit_insn (gen_rtx_SET
4465 (gen_rtx_STRICT_LOW_PART
4466 (VOIDmode, gen_lowpart (QImode, operands[0])),
4467 operands[1]));
4468 DONE;
4469 }
4470
4471 operands[0] = gen_lowpart (SImode, operands[0]);
4472 }
4473 [(set_attr "type" "alu1")
4474 (set_attr "mode" "SI")])
4475
4476 ; zero extend to SImode to avoid partial register stalls
4477 (define_insn "*zero_extendqihi2"
4478 [(set (match_operand:HI 0 "register_operand" "=r,*r,*k")
4479 (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "qm,*k,*km")))]
4480 "!(TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))"
4481 "@
4482 movz{bl|x}\t{%1, %k0|%k0, %1}
4483 kmovb\t{%1, %k0|%k0, %1}
4484 kmovb\t{%1, %0|%0, %1}"
4485 [(set_attr "isa" "*,avx512dq,avx512dq")
4486 (set_attr "type" "imovx,mskmov,mskmov")
4487 (set_attr "mode" "SI,QI,QI")])
4488
4489 ;; Transform xorl; mov[bw] (set strict_low_part) into movz[bw]l.
4490 (define_peephole2
4491 [(parallel [(set (match_operand:SWI48 0 "general_reg_operand")
4492 (const_int 0))
4493 (clobber (reg:CC FLAGS_REG))])
4494 (set (strict_low_part (match_operand:SWI12 1 "general_reg_operand"))
4495 (match_operand:SWI12 2 "nonimmediate_operand"))]
4496 "REGNO (operands[0]) == REGNO (operands[1])
4497 && (<SWI48:MODE>mode != SImode
4498 || !TARGET_ZERO_EXTEND_WITH_AND
4499 || !optimize_function_for_speed_p (cfun))"
4500 [(set (match_dup 0) (zero_extend:SWI48 (match_dup 2)))])
4501
4502 ;; Likewise, but preserving FLAGS_REG.
4503 (define_peephole2
4504 [(set (match_operand:SWI48 0 "general_reg_operand") (const_int 0))
4505 (set (strict_low_part (match_operand:SWI12 1 "general_reg_operand"))
4506 (match_operand:SWI12 2 "nonimmediate_operand"))]
4507 "REGNO (operands[0]) == REGNO (operands[1])
4508 && (<SWI48:MODE>mode != SImode
4509 || !TARGET_ZERO_EXTEND_WITH_AND
4510 || !optimize_function_for_speed_p (cfun))"
4511 [(set (match_dup 0) (zero_extend:SWI48 (match_dup 2)))])
4512 \f
4513 ;; Sign extension instructions
4514
4515 (define_expand "extendsidi2"
4516 [(set (match_operand:DI 0 "register_operand")
4517 (sign_extend:DI (match_operand:SI 1 "register_operand")))]
4518 ""
4519 {
4520 if (!TARGET_64BIT)
4521 {
4522 emit_insn (gen_extendsidi2_1 (operands[0], operands[1]));
4523 DONE;
4524 }
4525 })
4526
4527 (define_insn "*extendsidi2_rex64"
4528 [(set (match_operand:DI 0 "register_operand" "=*a,r")
4529 (sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "*0,rm")))]
4530 "TARGET_64BIT"
4531 "@
4532 {cltq|cdqe}
4533 movs{lq|x}\t{%1, %0|%0, %1}"
4534 [(set_attr "type" "imovx")
4535 (set_attr "mode" "DI")
4536 (set_attr "prefix_0f" "0")
4537 (set_attr "modrm" "0,1")])
4538
4539 (define_insn "extendsidi2_1"
4540 [(set (match_operand:DI 0 "nonimmediate_operand" "=*A,r,?r,?*o")
4541 (sign_extend:DI (match_operand:SI 1 "register_operand" "0,0,r,r")))
4542 (clobber (reg:CC FLAGS_REG))
4543 (clobber (match_scratch:SI 2 "=X,X,X,&r"))]
4544 "!TARGET_64BIT"
4545 "#")
4546
4547 ;; Split the memory case. If the source register doesn't die, it will stay
4548 ;; this way, if it does die, following peephole2s take care of it.
4549 (define_split
4550 [(set (match_operand:DI 0 "memory_operand")
4551 (sign_extend:DI (match_operand:SI 1 "register_operand")))
4552 (clobber (reg:CC FLAGS_REG))
4553 (clobber (match_operand:SI 2 "register_operand"))]
4554 "reload_completed"
4555 [(const_int 0)]
4556 {
4557 split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);
4558
4559 emit_move_insn (operands[3], operands[1]);
4560
4561 /* Generate a cltd if possible and doing so it profitable. */
4562 if ((optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
4563 && REGNO (operands[1]) == AX_REG
4564 && REGNO (operands[2]) == DX_REG)
4565 {
4566 emit_insn (gen_ashrsi3_cvt (operands[2], operands[1], GEN_INT (31)));
4567 }
4568 else
4569 {
4570 emit_move_insn (operands[2], operands[1]);
4571 emit_insn (gen_ashrsi3_cvt (operands[2], operands[2], GEN_INT (31)));
4572 }
4573 emit_move_insn (operands[4], operands[2]);
4574 DONE;
4575 })
4576
4577 ;; Peepholes for the case where the source register does die, after
4578 ;; being split with the above splitter.
4579 (define_peephole2
4580 [(set (match_operand:SI 0 "memory_operand")
4581 (match_operand:SI 1 "general_reg_operand"))
4582 (set (match_operand:SI 2 "general_reg_operand") (match_dup 1))
4583 (parallel [(set (match_dup 2)
4584 (ashiftrt:SI (match_dup 2) (const_int 31)))
4585 (clobber (reg:CC FLAGS_REG))])
4586 (set (match_operand:SI 3 "memory_operand") (match_dup 2))]
4587 "REGNO (operands[1]) != REGNO (operands[2])
4588 && peep2_reg_dead_p (2, operands[1])
4589 && peep2_reg_dead_p (4, operands[2])
4590 && !reg_mentioned_p (operands[2], operands[3])"
4591 [(set (match_dup 0) (match_dup 1))
4592 (parallel [(set (match_dup 1) (ashiftrt:SI (match_dup 1) (const_int 31)))
4593 (clobber (reg:CC FLAGS_REG))])
4594 (set (match_dup 3) (match_dup 1))])
4595
4596 (define_peephole2
4597 [(set (match_operand:SI 0 "memory_operand")
4598 (match_operand:SI 1 "general_reg_operand"))
4599 (parallel [(set (match_operand:SI 2 "general_reg_operand")
4600 (ashiftrt:SI (match_dup 1) (const_int 31)))
4601 (clobber (reg:CC FLAGS_REG))])
4602 (set (match_operand:SI 3 "memory_operand") (match_dup 2))]
4603 "/* cltd is shorter than sarl $31, %eax */
4604 !optimize_function_for_size_p (cfun)
4605 && REGNO (operands[1]) == AX_REG
4606 && REGNO (operands[2]) == DX_REG
4607 && peep2_reg_dead_p (2, operands[1])
4608 && peep2_reg_dead_p (3, operands[2])
4609 && !reg_mentioned_p (operands[2], operands[3])"
4610 [(set (match_dup 0) (match_dup 1))
4611 (parallel [(set (match_dup 1) (ashiftrt:SI (match_dup 1) (const_int 31)))
4612 (clobber (reg:CC FLAGS_REG))])
4613 (set (match_dup 3) (match_dup 1))])
4614
4615 ;; Extend to register case. Optimize case where source and destination
4616 ;; registers match and cases where we can use cltd.
4617 (define_split
4618 [(set (match_operand:DI 0 "register_operand")
4619 (sign_extend:DI (match_operand:SI 1 "register_operand")))
4620 (clobber (reg:CC FLAGS_REG))
4621 (clobber (match_scratch:SI 2))]
4622 "reload_completed"
4623 [(const_int 0)]
4624 {
4625 split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);
4626
4627 if (REGNO (operands[3]) != REGNO (operands[1]))
4628 emit_move_insn (operands[3], operands[1]);
4629
4630 /* Generate a cltd if possible and doing so it profitable. */
4631 if ((optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
4632 && REGNO (operands[3]) == AX_REG
4633 && REGNO (operands[4]) == DX_REG)
4634 {
4635 emit_insn (gen_ashrsi3_cvt (operands[4], operands[3], GEN_INT (31)));
4636 DONE;
4637 }
4638
4639 if (REGNO (operands[4]) != REGNO (operands[1]))
4640 emit_move_insn (operands[4], operands[1]);
4641
4642 emit_insn (gen_ashrsi3_cvt (operands[4], operands[4], GEN_INT (31)));
4643 DONE;
4644 })
4645
4646 (define_insn "extend<mode>di2"
4647 [(set (match_operand:DI 0 "register_operand" "=r")
4648 (sign_extend:DI
4649 (match_operand:SWI12 1 "nonimmediate_operand" "<r>m")))]
4650 "TARGET_64BIT"
4651 "movs{<imodesuffix>q|x}\t{%1, %0|%0, %1}"
4652 [(set_attr "type" "imovx")
4653 (set_attr "mode" "DI")])
4654
4655 (define_insn "extendhisi2"
4656 [(set (match_operand:SI 0 "register_operand" "=*a,r")
4657 (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "*0,rm")))]
4658 ""
4659 {
4660 switch (get_attr_prefix_0f (insn))
4661 {
4662 case 0:
4663 return "{cwtl|cwde}";
4664 default:
4665 return "movs{wl|x}\t{%1, %0|%0, %1}";
4666 }
4667 }
4668 [(set_attr "type" "imovx")
4669 (set_attr "mode" "SI")
4670 (set (attr "prefix_0f")
4671 ;; movsx is short decodable while cwtl is vector decoded.
4672 (if_then_else (and (eq_attr "cpu" "!k6")
4673 (eq_attr "alternative" "0"))
4674 (const_string "0")
4675 (const_string "1")))
4676 (set (attr "znver1_decode")
4677 (if_then_else (eq_attr "prefix_0f" "0")
4678 (const_string "double")
4679 (const_string "direct")))
4680 (set (attr "modrm")
4681 (if_then_else (eq_attr "prefix_0f" "0")
4682 (const_string "0")
4683 (const_string "1")))])
4684
4685 (define_insn "*extendhisi2_zext"
4686 [(set (match_operand:DI 0 "register_operand" "=*a,r")
4687 (zero_extend:DI
4688 (sign_extend:SI
4689 (match_operand:HI 1 "nonimmediate_operand" "*0,rm"))))]
4690 "TARGET_64BIT"
4691 {
4692 switch (get_attr_prefix_0f (insn))
4693 {
4694 case 0:
4695 return "{cwtl|cwde}";
4696 default:
4697 return "movs{wl|x}\t{%1, %k0|%k0, %1}";
4698 }
4699 }
4700 [(set_attr "type" "imovx")
4701 (set_attr "mode" "SI")
4702 (set (attr "prefix_0f")
4703 ;; movsx is short decodable while cwtl is vector decoded.
4704 (if_then_else (and (eq_attr "cpu" "!k6")
4705 (eq_attr "alternative" "0"))
4706 (const_string "0")
4707 (const_string "1")))
4708 (set (attr "modrm")
4709 (if_then_else (eq_attr "prefix_0f" "0")
4710 (const_string "0")
4711 (const_string "1")))])
4712
4713 (define_insn "extendqisi2"
4714 [(set (match_operand:SI 0 "register_operand" "=r")
4715 (sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm")))]
4716 ""
4717 "movs{bl|x}\t{%1, %0|%0, %1}"
4718 [(set_attr "type" "imovx")
4719 (set_attr "mode" "SI")])
4720
4721 (define_insn "*extendqisi2_zext"
4722 [(set (match_operand:DI 0 "register_operand" "=r")
4723 (zero_extend:DI
4724 (sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm"))))]
4725 "TARGET_64BIT"
4726 "movs{bl|x}\t{%1, %k0|%k0, %1}"
4727 [(set_attr "type" "imovx")
4728 (set_attr "mode" "SI")])
4729
4730 (define_insn "extendqihi2"
4731 [(set (match_operand:HI 0 "register_operand" "=*a,r")
4732 (sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" "*0,qm")))]
4733 ""
4734 {
4735 switch (get_attr_prefix_0f (insn))
4736 {
4737 case 0:
4738 return "{cbtw|cbw}";
4739 default:
4740 return "movs{bw|x}\t{%1, %0|%0, %1}";
4741 }
4742 }
4743 [(set_attr "type" "imovx")
4744 (set_attr "mode" "HI")
4745 (set (attr "prefix_0f")
4746 ;; movsx is short decodable while cwtl is vector decoded.
4747 (if_then_else (and (eq_attr "cpu" "!k6")
4748 (eq_attr "alternative" "0"))
4749 (const_string "0")
4750 (const_string "1")))
4751 (set (attr "modrm")
4752 (if_then_else (eq_attr "prefix_0f" "0")
4753 (const_string "0")
4754 (const_string "1")))])
4755 \f
4756 ;; Conversions between float and double.
4757
4758 ;; These are all no-ops in the model used for the 80387.
4759 ;; So just emit moves.
4760
4761 ;; %%% Kill these when call knows how to work out a DFmode push earlier.
4762 (define_split
4763 [(set (match_operand:DF 0 "push_operand")
4764 (float_extend:DF (match_operand:SF 1 "fp_register_operand")))]
4765 "reload_completed"
4766 [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -8)))
4767 (set (mem:DF (reg:P SP_REG)) (float_extend:DF (match_dup 1)))])
4768
4769 (define_split
4770 [(set (match_operand:XF 0 "push_operand")
4771 (float_extend:XF (match_operand:MODEF 1 "fp_register_operand")))]
4772 "reload_completed"
4773 [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
4774 (set (mem:XF (reg:P SP_REG)) (float_extend:XF (match_dup 1)))]
4775 "operands[2] = GEN_INT (-GET_MODE_SIZE (XFmode));")
4776
4777 (define_expand "extendsfdf2"
4778 [(set (match_operand:DF 0 "nonimm_ssenomem_operand")
4779 (float_extend:DF (match_operand:SF 1 "general_operand")))]
4780 "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
4781 {
4782 /* ??? Needed for compress_float_constant since all fp constants
4783 are TARGET_LEGITIMATE_CONSTANT_P. */
4784 if (CONST_DOUBLE_P (operands[1]))
4785 {
4786 if ((!TARGET_SSE2 || TARGET_MIX_SSE_I387)
4787 && standard_80387_constant_p (operands[1]) > 0)
4788 {
4789 operands[1] = simplify_const_unary_operation
4790 (FLOAT_EXTEND, DFmode, operands[1], SFmode);
4791 emit_move_insn_1 (operands[0], operands[1]);
4792 DONE;
4793 }
4794 operands[1] = validize_mem (force_const_mem (SFmode, operands[1]));
4795 }
4796 })
4797
4798 (define_insn "*extendsfdf2"
4799 [(set (match_operand:DF 0 "nonimm_ssenomem_operand" "=f,m,v,v")
4800 (float_extend:DF
4801 (match_operand:SF 1 "nonimmediate_operand" "fm,f,v,m")))]
4802 "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
4803 {
4804 switch (which_alternative)
4805 {
4806 case 0:
4807 case 1:
4808 return output_387_reg_move (insn, operands);
4809
4810 case 2:
4811 return "%vcvtss2sd\t{%d1, %0|%0, %d1}";
4812 case 3:
4813 return "%vcvtss2sd\t{%1, %d0|%d0, %1}";
4814
4815 default:
4816 gcc_unreachable ();
4817 }
4818 }
4819 [(set_attr "type" "fmov,fmov,ssecvt,ssecvt")
4820 (set_attr "avx_partial_xmm_update" "false,false,false,true")
4821 (set_attr "prefix" "orig,orig,maybe_vex,maybe_vex")
4822 (set_attr "mode" "SF,XF,DF,DF")
4823 (set (attr "enabled")
4824 (if_then_else
4825 (match_test ("TARGET_SSE2 && TARGET_SSE_MATH"))
4826 (if_then_else
4827 (eq_attr "alternative" "0,1")
4828 (symbol_ref "TARGET_MIX_SSE_I387")
4829 (symbol_ref "true"))
4830 (if_then_else
4831 (eq_attr "alternative" "0,1")
4832 (symbol_ref "true")
4833 (symbol_ref "false"))))])
4834
4835 /* For converting SF(xmm2) to DF(xmm1), use the following code instead of
4836 cvtss2sd:
4837 unpcklps xmm2,xmm2 ; packed conversion might crash on signaling NaNs
4838 cvtps2pd xmm2,xmm1
4839 We do the conversion post reload to avoid producing of 128bit spills
4840 that might lead to ICE on 32bit target. The sequence unlikely combine
4841 anyway. */
4842 (define_split
4843 [(set (match_operand:DF 0 "sse_reg_operand")
4844 (float_extend:DF
4845 (match_operand:SF 1 "nonimmediate_operand")))]
4846 "TARGET_USE_VECTOR_FP_CONVERTS
4847 && optimize_insn_for_speed_p ()
4848 && reload_completed
4849 && (!EXT_REX_SSE_REG_P (operands[0])
4850 || TARGET_AVX512VL)"
4851 [(set (match_dup 2)
4852 (float_extend:V2DF
4853 (vec_select:V2SF
4854 (match_dup 3)
4855 (parallel [(const_int 0) (const_int 1)]))))]
4856 {
4857 operands[2] = lowpart_subreg (V2DFmode, operands[0], DFmode);
4858 operands[3] = lowpart_subreg (V4SFmode, operands[0], DFmode);
4859 /* Use movss for loading from memory, unpcklps reg, reg for registers.
4860 Try to avoid move when unpacking can be done in source. */
4861 if (REG_P (operands[1]))
4862 {
4863 /* If it is unsafe to overwrite upper half of source, we need
4864 to move to destination and unpack there. */
4865 if (REGNO (operands[0]) != REGNO (operands[1])
4866 || (EXT_REX_SSE_REG_P (operands[1])
4867 && !TARGET_AVX512VL))
4868 {
4869 rtx tmp = lowpart_subreg (SFmode, operands[0], DFmode);
4870 emit_move_insn (tmp, operands[1]);
4871 }
4872 else
4873 operands[3] = lowpart_subreg (V4SFmode, operands[1], SFmode);
4874 /* FIXME: vec_interleave_lowv4sf for AVX512VL should allow
4875 =v, v, then vbroadcastss will be only needed for AVX512F without
4876 AVX512VL. */
4877 if (!EXT_REX_SSE_REGNO_P (REGNO (operands[3])))
4878 emit_insn (gen_vec_interleave_lowv4sf (operands[3], operands[3],
4879 operands[3]));
4880 else
4881 {
4882 rtx tmp = lowpart_subreg (V16SFmode, operands[3], V4SFmode);
4883 emit_insn (gen_avx512f_vec_dupv16sf_1 (tmp, tmp));
4884 }
4885 }
4886 else
4887 emit_insn (gen_vec_setv4sf_0 (operands[3],
4888 CONST0_RTX (V4SFmode), operands[1]));
4889 })
4890
4891 ;; It's more profitable to split and then extend in the same register.
4892 (define_peephole2
4893 [(set (match_operand:DF 0 "sse_reg_operand")
4894 (float_extend:DF
4895 (match_operand:SF 1 "memory_operand")))]
4896 "TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS
4897 && optimize_insn_for_speed_p ()"
4898 [(set (match_dup 2) (match_dup 1))
4899 (set (match_dup 0) (float_extend:DF (match_dup 2)))]
4900 "operands[2] = lowpart_subreg (SFmode, operands[0], DFmode);")
4901
4902 ;; Break partial SSE register dependency stall. This splitter should split
4903 ;; late in the pass sequence (after register rename pass), so allocated
4904 ;; registers won't change anymore
4905
4906 (define_split
4907 [(set (match_operand:DF 0 "sse_reg_operand")
4908 (float_extend:DF
4909 (match_operand:SF 1 "nonimmediate_operand")))]
4910 "!TARGET_AVX
4911 && TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY
4912 && epilogue_completed
4913 && optimize_function_for_speed_p (cfun)
4914 && (!REG_P (operands[1])
4915 || (!TARGET_AVX && REGNO (operands[0]) != REGNO (operands[1])))
4916 && (!EXT_REX_SSE_REG_P (operands[0])
4917 || TARGET_AVX512VL)"
4918 [(set (match_dup 0)
4919 (vec_merge:V2DF
4920 (vec_duplicate:V2DF
4921 (float_extend:DF
4922 (match_dup 1)))
4923 (match_dup 0)
4924 (const_int 1)))]
4925 {
4926 operands[0] = lowpart_subreg (V2DFmode, operands[0], DFmode);
4927 emit_move_insn (operands[0], CONST0_RTX (V2DFmode));
4928 })
4929
4930 (define_expand "extendhfsf2"
4931 [(set (match_operand:SF 0 "register_operand")
4932 (float_extend:SF
4933 (match_operand:HF 1 "nonimmediate_operand")))]
4934 "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL"
4935 {
4936 if (!TARGET_AVX512FP16)
4937 {
4938 rtx res = gen_reg_rtx (V4SFmode);
4939 rtx tmp = gen_reg_rtx (V8HFmode);
4940 rtx zero = force_reg (V8HFmode, CONST0_RTX (V8HFmode));
4941
4942 emit_insn (gen_vec_setv8hf_0 (tmp, zero, operands[1]));
4943 emit_insn (gen_vcvtph2ps (res, gen_lowpart (V8HImode, tmp)));
4944 emit_move_insn (operands[0], gen_lowpart (SFmode, res));
4945 DONE;
4946 }
4947 })
4948
4949 (define_expand "extendhfdf2"
4950 [(set (match_operand:DF 0 "register_operand")
4951 (float_extend:DF
4952 (match_operand:HF 1 "nonimmediate_operand")))]
4953 "TARGET_AVX512FP16")
4954
4955 (define_insn "*extendhf<mode>2"
4956 [(set (match_operand:MODEF 0 "register_operand" "=v")
4957 (float_extend:MODEF
4958 (match_operand:HF 1 "nonimmediate_operand" "vm")))]
4959 "TARGET_AVX512FP16"
4960 "vcvtsh2<ssemodesuffix>\t{%1, %0, %0|%0, %0, %1}"
4961 [(set_attr "type" "ssecvt")
4962 (set_attr "prefix" "evex")
4963 (set_attr "mode" "<MODE>")])
4964
4965 (define_expand "extendbfsf2"
4966 [(set (match_operand:SF 0 "register_operand")
4967 (unspec:SF
4968 [(match_operand:BF 1 "register_operand")]
4969 UNSPEC_CVTBFSF))]
4970 "TARGET_SSE2 && !HONOR_NANS (BFmode)")
4971
4972 ;; Don't use float_extend since psrlld doesn't raise
4973 ;; exceptions and turn a sNaN into a qNaN.
4974 (define_insn "extendbfsf2_1"
4975 [(set (match_operand:SF 0 "register_operand" "=x,Yw")
4976 (unspec:SF
4977 [(match_operand:BF 1 "register_operand" " 0,Yw")]
4978 UNSPEC_CVTBFSF))]
4979 "TARGET_SSE2"
4980 "@
4981 pslld\t{$16, %0|%0, 16}
4982 vpslld\t{$16, %1, %0|%0, %1, 16}"
4983 [(set_attr "isa" "noavx,avx")
4984 (set_attr "type" "sseishft1")
4985 (set_attr "length_immediate" "1")
4986 (set_attr "prefix_data16" "1,*")
4987 (set_attr "prefix" "orig,vex")
4988 (set_attr "mode" "TI")
4989 (set_attr "memory" "none")])
4990
4991 (define_expand "extend<mode>xf2"
4992 [(set (match_operand:XF 0 "nonimmediate_operand")
4993 (float_extend:XF (match_operand:MODEF 1 "general_operand")))]
4994 "TARGET_80387"
4995 {
4996 /* ??? Needed for compress_float_constant since all fp constants
4997 are TARGET_LEGITIMATE_CONSTANT_P. */
4998 if (CONST_DOUBLE_P (operands[1]))
4999 {
5000 if (standard_80387_constant_p (operands[1]) > 0)
5001 {
5002 operands[1] = simplify_const_unary_operation
5003 (FLOAT_EXTEND, XFmode, operands[1], <MODE>mode);
5004 emit_move_insn_1 (operands[0], operands[1]);
5005 DONE;
5006 }
5007 operands[1] = validize_mem (force_const_mem (<MODE>mode, operands[1]));
5008 }
5009 })
5010
5011 (define_insn "*extend<mode>xf2_i387"
5012 [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m")
5013 (float_extend:XF
5014 (match_operand:MODEF 1 "nonimmediate_operand" "fm,f")))]
5015 "TARGET_80387"
5016 "* return output_387_reg_move (insn, operands);"
5017 [(set_attr "type" "fmov")
5018 (set_attr "mode" "<MODE>,XF")])
5019
5020 ;; %%% This seems like bad news.
5021 ;; This cannot output into an f-reg because there is no way to be sure
5022 ;; of truncating in that case. Otherwise this is just like a simple move
5023 ;; insn. So we pretend we can output to a reg in order to get better
5024 ;; register preferencing, but we really use a stack slot.
5025
5026 ;; Conversion from DFmode to SFmode.
5027
5028 (define_insn "truncdfsf2"
5029 [(set (match_operand:SF 0 "nonimm_ssenomem_operand" "=m,f,v,v")
5030 (float_truncate:SF
5031 (match_operand:DF 1 "register_ssemem_operand" "f,f,v,m")))]
5032 "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
5033 {
5034 switch (which_alternative)
5035 {
5036 case 0:
5037 case 1:
5038 return output_387_reg_move (insn, operands);
5039
5040 case 2:
5041 return "%vcvtsd2ss\t{%d1, %0|%0, %d1}";
5042 case 3:
5043 return "%vcvtsd2ss\t{%1, %d0|%d0, %1}";
5044
5045 default:
5046 gcc_unreachable ();
5047 }
5048 }
5049 [(set_attr "type" "fmov,fmov,ssecvt,ssecvt")
5050 (set_attr "avx_partial_xmm_update" "false,false,false,true")
5051 (set_attr "mode" "SF")
5052 (set (attr "enabled")
5053 (if_then_else
5054 (match_test ("TARGET_SSE2 && TARGET_SSE_MATH"))
5055 (cond [(eq_attr "alternative" "0")
5056 (symbol_ref "TARGET_MIX_SSE_I387")
5057 (eq_attr "alternative" "1")
5058 (symbol_ref "TARGET_MIX_SSE_I387
5059 && flag_unsafe_math_optimizations")
5060 ]
5061 (symbol_ref "true"))
5062 (cond [(eq_attr "alternative" "0")
5063 (symbol_ref "true")
5064 (eq_attr "alternative" "1")
5065 (symbol_ref "flag_unsafe_math_optimizations")
5066 ]
5067 (symbol_ref "false"))))])
5068
5069 /* For converting DF(xmm2) to SF(xmm1), use the following code instead of
5070 cvtsd2ss:
5071 unpcklpd xmm2,xmm2 ; packed conversion might crash on signaling NaNs
5072 cvtpd2ps xmm2,xmm1
5073 We do the conversion post reload to avoid producing of 128bit spills
5074 that might lead to ICE on 32bit target. The sequence unlikely combine
5075 anyway. */
5076 (define_split
5077 [(set (match_operand:SF 0 "sse_reg_operand")
5078 (float_truncate:SF
5079 (match_operand:DF 1 "nonimmediate_operand")))]
5080 "TARGET_USE_VECTOR_FP_CONVERTS
5081 && optimize_insn_for_speed_p ()
5082 && reload_completed
5083 && (!EXT_REX_SSE_REG_P (operands[0])
5084 || TARGET_AVX512VL)"
5085 [(set (match_dup 2)
5086 (vec_concat:V4SF
5087 (float_truncate:V2SF
5088 (match_dup 4))
5089 (match_dup 3)))]
5090 {
5091 operands[2] = lowpart_subreg (V4SFmode, operands[0], SFmode);
5092 operands[3] = CONST0_RTX (V2SFmode);
5093 operands[4] = lowpart_subreg (V2DFmode, operands[0], SFmode);
5094 /* Use movsd for loading from memory, unpcklpd for registers.
5095 Try to avoid move when unpacking can be done in source, or SSE3
5096 movddup is available. */
5097 if (REG_P (operands[1]))
5098 {
5099 if ((!TARGET_SSE3 && REGNO (operands[0]) != REGNO (operands[1]))
5100 || (EXT_REX_SSE_REG_P (operands[1]) && !TARGET_AVX512VL))
5101 {
5102 rtx tmp = lowpart_subreg (DFmode, operands[0], SFmode);
5103 emit_move_insn (tmp, operands[1]);
5104 operands[1] = tmp;
5105 }
5106 else if (!TARGET_SSE3)
5107 operands[4] = lowpart_subreg (V2DFmode, operands[1], DFmode);
5108 emit_insn (gen_vec_dupv2df (operands[4], operands[1]));
5109 }
5110 else
5111 emit_insn (gen_vec_concatv2df (operands[4], operands[1],
5112 CONST0_RTX (DFmode)));
5113 })
5114
5115 ;; It's more profitable to split and then truncate in the same register.
5116 (define_peephole2
5117 [(set (match_operand:SF 0 "sse_reg_operand")
5118 (float_truncate:SF
5119 (match_operand:DF 1 "memory_operand")))]
5120 "TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS
5121 && optimize_insn_for_speed_p ()"
5122 [(set (match_dup 2) (match_dup 1))
5123 (set (match_dup 0) (float_truncate:SF (match_dup 2)))]
5124 "operands[2] = lowpart_subreg (DFmode, operands[0], SFmode);")
5125
5126 ;; Break partial SSE register dependency stall. This splitter should split
5127 ;; late in the pass sequence (after register rename pass), so allocated
5128 ;; registers won't change anymore
5129
5130 (define_split
5131 [(set (match_operand:SF 0 "sse_reg_operand")
5132 (float_truncate:SF
5133 (match_operand:DF 1 "nonimmediate_operand")))]
5134 "!TARGET_AVX
5135 && TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY
5136 && epilogue_completed
5137 && optimize_function_for_speed_p (cfun)
5138 && (!REG_P (operands[1])
5139 || (!TARGET_AVX && REGNO (operands[0]) != REGNO (operands[1])))
5140 && (!EXT_REX_SSE_REG_P (operands[0])
5141 || TARGET_AVX512VL)"
5142 [(set (match_dup 0)
5143 (vec_merge:V4SF
5144 (vec_duplicate:V4SF
5145 (float_truncate:SF
5146 (match_dup 1)))
5147 (match_dup 0)
5148 (const_int 1)))]
5149 {
5150 operands[0] = lowpart_subreg (V4SFmode, operands[0], SFmode);
5151 emit_move_insn (operands[0], CONST0_RTX (V4SFmode));
5152 })
5153
5154 ;; Conversion from XFmode to {SF,DF}mode
5155
5156 (define_insn "truncxf<mode>2"
5157 [(set (match_operand:MODEF 0 "nonimmediate_operand" "=m,f")
5158 (float_truncate:MODEF
5159 (match_operand:XF 1 "register_operand" "f,f")))]
5160 "TARGET_80387"
5161 "* return output_387_reg_move (insn, operands);"
5162 [(set_attr "type" "fmov")
5163 (set_attr "mode" "<MODE>")
5164 (set (attr "enabled")
5165 (cond [(eq_attr "alternative" "1")
5166 (symbol_ref "flag_unsafe_math_optimizations")
5167 ]
5168 (symbol_ref "true")))])
5169
5170 ;; Conversion from {SF,DF}mode to HFmode.
5171
5172 (define_expand "truncsfhf2"
5173 [(set (match_operand:HF 0 "register_operand")
5174 (float_truncate:HF
5175 (match_operand:SF 1 "nonimmediate_operand")))]
5176 "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL"
5177 {
5178 if (!TARGET_AVX512FP16)
5179 {
5180 rtx res = gen_reg_rtx (V8HFmode);
5181 rtx tmp = gen_reg_rtx (V4SFmode);
5182 rtx zero = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
5183
5184 emit_insn (gen_vec_setv4sf_0 (tmp, zero, operands[1]));
5185 emit_insn (gen_vcvtps2ph (gen_lowpart (V8HImode, res), tmp, GEN_INT (4)));
5186 emit_move_insn (operands[0], gen_lowpart (HFmode, res));
5187 DONE;
5188 }
5189 })
5190
5191 (define_expand "truncdfhf2"
5192 [(set (match_operand:HF 0 "register_operand")
5193 (float_truncate:HF
5194 (match_operand:DF 1 "nonimmediate_operand")))]
5195 "TARGET_AVX512FP16")
5196
5197 (define_insn "*trunc<mode>hf2"
5198 [(set (match_operand:HF 0 "register_operand" "=v")
5199 (float_truncate:HF
5200 (match_operand:MODEF 1 "nonimmediate_operand" "vm")))]
5201 "TARGET_AVX512FP16"
5202 "vcvt<ssemodesuffix>2sh\t{%1, %d0|%d0, %1}"
5203 [(set_attr "type" "ssecvt")
5204 (set_attr "prefix" "evex")
5205 (set_attr "mode" "HF")])
5206
5207 (define_insn "truncsfbf2"
5208 [(set (match_operand:BF 0 "register_operand" "=x, v")
5209 (float_truncate:BF
5210 (match_operand:SF 1 "register_operand" "x,v")))]
5211 "((TARGET_AVX512BF16 && TARGET_AVX512VL) || TARGET_AVXNECONVERT)
5212 && !HONOR_NANS (BFmode) && flag_unsafe_math_optimizations"
5213 "@
5214 %{vex%} vcvtneps2bf16\t{%1, %0|%0, %1}
5215 vcvtneps2bf16\t{%1, %0|%0, %1}"
5216 [(set_attr "isa" "avxneconvert,avx512bf16vl")
5217 (set_attr "prefix" "vex,evex")])
5218
5219 ;; Signed conversion to DImode.
5220
5221 (define_expand "fix_truncxfdi2"
5222 [(parallel [(set (match_operand:DI 0 "nonimmediate_operand")
5223 (fix:DI (match_operand:XF 1 "register_operand")))
5224 (clobber (reg:CC FLAGS_REG))])]
5225 "TARGET_80387"
5226 {
5227 if (TARGET_FISTTP)
5228 {
5229 emit_insn (gen_fix_truncdi_i387_fisttp (operands[0], operands[1]));
5230 DONE;
5231 }
5232 })
5233
5234 (define_expand "fix_trunc<mode>di2"
5235 [(parallel [(set (match_operand:DI 0 "nonimmediate_operand")
5236 (fix:DI (match_operand:MODEF 1 "register_operand")))
5237 (clobber (reg:CC FLAGS_REG))])]
5238 "TARGET_80387 || (TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode))"
5239 {
5240 if (TARGET_FISTTP
5241 && !(TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
5242 {
5243 emit_insn (gen_fix_truncdi_i387_fisttp (operands[0], operands[1]));
5244 DONE;
5245 }
5246 if (TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode))
5247 {
5248 rtx out = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (DImode);
5249 emit_insn (gen_fix_trunc<mode>di_sse (out, operands[1]));
5250 if (out != operands[0])
5251 emit_move_insn (operands[0], out);
5252 DONE;
5253 }
5254 })
5255
5256 (define_insn "fix<fixunssuffix>_trunchf<mode>2"
5257 [(set (match_operand:SWI48 0 "register_operand" "=r")
5258 (any_fix:SWI48
5259 (match_operand:HF 1 "nonimmediate_operand" "vm")))]
5260 "TARGET_AVX512FP16"
5261 "vcvttsh2<fixsuffix>si\t{%1, %0|%0, %1}"
5262 [(set_attr "type" "sseicvt")
5263 (set_attr "prefix" "evex")
5264 (set_attr "mode" "<MODE>")])
5265
5266 ;; Signed conversion to SImode.
5267
5268 (define_expand "fix_truncxfsi2"
5269 [(parallel [(set (match_operand:SI 0 "nonimmediate_operand")
5270 (fix:SI (match_operand:XF 1 "register_operand")))
5271 (clobber (reg:CC FLAGS_REG))])]
5272 "TARGET_80387"
5273 {
5274 if (TARGET_FISTTP)
5275 {
5276 emit_insn (gen_fix_truncsi_i387_fisttp (operands[0], operands[1]));
5277 DONE;
5278 }
5279 })
5280
5281 (define_expand "fix_trunc<mode>si2"
5282 [(parallel [(set (match_operand:SI 0 "nonimmediate_operand")
5283 (fix:SI (match_operand:MODEF 1 "register_operand")))
5284 (clobber (reg:CC FLAGS_REG))])]
5285 "TARGET_80387 || SSE_FLOAT_MODE_P (<MODE>mode)"
5286 {
5287 if (TARGET_FISTTP
5288 && !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
5289 {
5290 emit_insn (gen_fix_truncsi_i387_fisttp (operands[0], operands[1]));
5291 DONE;
5292 }
5293 if (SSE_FLOAT_MODE_P (<MODE>mode))
5294 {
5295 rtx out = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (SImode);
5296 emit_insn (gen_fix_trunc<mode>si_sse (out, operands[1]));
5297 if (out != operands[0])
5298 emit_move_insn (operands[0], out);
5299 DONE;
5300 }
5301 })
5302
5303 ;; Signed conversion to HImode.
5304
5305 (define_expand "fix_trunc<mode>hi2"
5306 [(parallel [(set (match_operand:HI 0 "nonimmediate_operand")
5307 (fix:HI (match_operand:X87MODEF 1 "register_operand")))
5308 (clobber (reg:CC FLAGS_REG))])]
5309 "TARGET_80387
5310 && !(SSE_FLOAT_MODE_P (<MODE>mode) && (!TARGET_FISTTP || TARGET_SSE_MATH))"
5311 {
5312 if (TARGET_FISTTP)
5313 {
5314 emit_insn (gen_fix_trunchi_i387_fisttp (operands[0], operands[1]));
5315 DONE;
5316 }
5317 })
5318
5319 ;; Unsigned conversion to DImode
5320
5321 (define_insn "fixuns_trunc<mode>di2"
5322 [(set (match_operand:DI 0 "register_operand" "=r")
5323 (unsigned_fix:DI
5324 (match_operand:MODEF 1 "nonimmediate_operand" "vm")))]
5325 "TARGET_64BIT && TARGET_AVX512F && TARGET_SSE_MATH"
5326 "vcvtt<ssemodesuffix>2usi\t{%1, %0|%0, %1}"
5327 [(set_attr "type" "sseicvt")
5328 (set_attr "prefix" "evex")
5329 (set_attr "mode" "DI")])
5330
5331 ;; Unsigned conversion to SImode.
5332
5333 (define_expand "fixuns_trunc<mode>si2"
5334 [(parallel
5335 [(set (match_operand:SI 0 "register_operand")
5336 (unsigned_fix:SI
5337 (match_operand:MODEF 1 "nonimmediate_operand")))
5338 (use (match_dup 2))
5339 (clobber (scratch:<ssevecmode>))
5340 (clobber (scratch:<ssevecmode>))])]
5341 "(!TARGET_64BIT || TARGET_AVX512F) && TARGET_SSE2 && TARGET_SSE_MATH"
5342 {
5343 machine_mode mode = <MODE>mode;
5344 machine_mode vecmode = <ssevecmode>mode;
5345 REAL_VALUE_TYPE TWO31r;
5346 rtx two31;
5347
5348 if (TARGET_AVX512F)
5349 {
5350 emit_insn (gen_fixuns_trunc<mode>si2_avx512f (operands[0], operands[1]));
5351 DONE;
5352 }
5353
5354 if (optimize_insn_for_size_p ())
5355 FAIL;
5356
5357 real_ldexp (&TWO31r, &dconst1, 31);
5358 two31 = const_double_from_real_value (TWO31r, mode);
5359 two31 = ix86_build_const_vector (vecmode, true, two31);
5360 operands[2] = force_reg (vecmode, two31);
5361 })
5362
5363 (define_insn "fixuns_trunc<mode>si2_avx512f"
5364 [(set (match_operand:SI 0 "register_operand" "=r")
5365 (unsigned_fix:SI
5366 (match_operand:MODEF 1 "nonimmediate_operand" "vm")))]
5367 "TARGET_AVX512F && TARGET_SSE_MATH"
5368 "vcvtt<ssemodesuffix>2usi\t{%1, %0|%0, %1}"
5369 [(set_attr "type" "sseicvt")
5370 (set_attr "prefix" "evex")
5371 (set_attr "mode" "SI")])
5372
5373 (define_insn "*fixuns_trunchfsi2zext"
5374 [(set (match_operand:DI 0 "register_operand" "=r")
5375 (zero_extend:DI
5376 (unsigned_fix:SI
5377 (match_operand:HF 1 "nonimmediate_operand" "vm"))))]
5378 "TARGET_64BIT && TARGET_AVX512FP16"
5379 "vcvttsh2usi\t{%1, %k0|%k0, %1}"
5380 [(set_attr "type" "sseicvt")
5381 (set_attr "prefix" "evex")
5382 (set_attr "mode" "SI")])
5383
5384 (define_insn "*fixuns_trunc<mode>si2_avx512f_zext"
5385 [(set (match_operand:DI 0 "register_operand" "=r")
5386 (zero_extend:DI
5387 (unsigned_fix:SI
5388 (match_operand:MODEF 1 "nonimmediate_operand" "vm"))))]
5389 "TARGET_64BIT && TARGET_AVX512F && TARGET_SSE_MATH"
5390 "vcvtt<ssemodesuffix>2usi\t{%1, %k0|%k0, %1}"
5391 [(set_attr "type" "sseicvt")
5392 (set_attr "prefix" "evex")
5393 (set_attr "mode" "SI")])
5394
5395 (define_insn_and_split "*fixuns_trunc<mode>_1"
5396 [(set (match_operand:SI 0 "register_operand" "=&x,&x")
5397 (unsigned_fix:SI
5398 (match_operand:MODEF 3 "nonimmediate_operand" "xm,xm")))
5399 (use (match_operand:<ssevecmode> 4 "nonimmediate_operand" "m,x"))
5400 (clobber (match_scratch:<ssevecmode> 1 "=x,&x"))
5401 (clobber (match_scratch:<ssevecmode> 2 "=x,x"))]
5402 "!TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH
5403 && optimize_function_for_speed_p (cfun)"
5404 "#"
5405 "&& reload_completed"
5406 [(const_int 0)]
5407 {
5408 ix86_split_convert_uns_si_sse (operands);
5409 DONE;
5410 })
5411
5412 ;; Unsigned conversion to HImode.
5413 ;; Without these patterns, we'll try the unsigned SI conversion which
5414 ;; is complex for SSE, rather than the signed SI conversion, which isn't.
5415
5416 (define_expand "fixuns_trunchfhi2"
5417 [(set (match_dup 2)
5418 (fix:SI (match_operand:HF 1 "nonimmediate_operand")))
5419 (set (match_operand:HI 0 "nonimmediate_operand")
5420 (subreg:HI (match_dup 2) 0))]
5421 "TARGET_AVX512FP16"
5422 "operands[2] = gen_reg_rtx (SImode);")
5423
5424 (define_expand "fixuns_trunc<mode>hi2"
5425 [(set (match_dup 2)
5426 (fix:SI (match_operand:MODEF 1 "nonimmediate_operand")))
5427 (set (match_operand:HI 0 "nonimmediate_operand")
5428 (subreg:HI (match_dup 2) 0))]
5429 "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
5430 "operands[2] = gen_reg_rtx (SImode);")
5431
5432 ;; When SSE is available, it is always faster to use it!
5433 (define_insn "fix_trunc<MODEF:mode><SWI48:mode>_sse"
5434 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5435 (fix:SWI48 (match_operand:MODEF 1 "nonimmediate_operand" "v,m")))]
5436 "SSE_FLOAT_MODE_P (<MODEF:MODE>mode)
5437 && (!TARGET_FISTTP || TARGET_SSE_MATH)"
5438 "%vcvtt<MODEF:ssemodesuffix>2si<SWI48:rex64suffix>\t{%1, %0|%0, %1}"
5439 [(set_attr "type" "sseicvt")
5440 (set_attr "prefix" "maybe_vex")
5441 (set (attr "prefix_rex")
5442 (if_then_else
5443 (match_test "<SWI48:MODE>mode == DImode")
5444 (const_string "1")
5445 (const_string "*")))
5446 (set_attr "mode" "<MODEF:MODE>")
5447 (set_attr "athlon_decode" "double,vector")
5448 (set_attr "amdfam10_decode" "double,double")
5449 (set_attr "bdver1_decode" "double,double")])
5450
5451 ;; Avoid vector decoded forms of the instruction.
5452 (define_peephole2
5453 [(match_scratch:MODEF 2 "x")
5454 (set (match_operand:SWI48 0 "register_operand")
5455 (fix:SWI48 (match_operand:MODEF 1 "memory_operand")))]
5456 "TARGET_AVOID_VECTOR_DECODE
5457 && SSE_FLOAT_MODE_P (<MODEF:MODE>mode)
5458 && optimize_insn_for_speed_p ()"
5459 [(set (match_dup 2) (match_dup 1))
5460 (set (match_dup 0) (fix:SWI48 (match_dup 2)))])
5461
5462 (define_insn "fix_trunc<mode>_i387_fisttp"
5463 [(set (match_operand:SWI248x 0 "nonimmediate_operand" "=m")
5464 (fix:SWI248x (match_operand 1 "register_operand" "f")))
5465 (clobber (match_scratch:XF 2 "=&f"))]
5466 "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
5467 && TARGET_FISTTP
5468 && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
5469 && (TARGET_64BIT || <MODE>mode != DImode))
5470 && TARGET_SSE_MATH)"
5471 "* return output_fix_trunc (insn, operands, true);"
5472 [(set_attr "type" "fisttp")
5473 (set_attr "mode" "<MODE>")])
5474
5475 ;; See the comments in i386.h near OPTIMIZE_MODE_SWITCHING for the description
5476 ;; of the machinery. Please note the clobber of FLAGS_REG. In i387 control
5477 ;; word calculation (inserted by LCM in mode switching pass) a FLAGS_REG
5478 ;; clobbering insns can be used. Look at emit_i387_cw_initialization ()
5479 ;; function in i386.cc.
5480 (define_insn_and_split "*fix_trunc<mode>_i387_1"
5481 [(set (match_operand:SWI248x 0 "nonimmediate_operand")
5482 (fix:SWI248x (match_operand 1 "register_operand")))
5483 (clobber (reg:CC FLAGS_REG))]
5484 "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
5485 && !TARGET_FISTTP
5486 && !(SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
5487 && (TARGET_64BIT || <MODE>mode != DImode))
5488 && ix86_pre_reload_split ()"
5489 "#"
5490 "&& 1"
5491 [(const_int 0)]
5492 {
5493 ix86_optimize_mode_switching[I387_TRUNC] = 1;
5494
5495 operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
5496 operands[3] = assign_386_stack_local (HImode, SLOT_CW_TRUNC);
5497
5498 emit_insn (gen_fix_trunc<mode>_i387 (operands[0], operands[1],
5499 operands[2], operands[3]));
5500 DONE;
5501 }
5502 [(set_attr "type" "fistp")
5503 (set_attr "i387_cw" "trunc")
5504 (set_attr "mode" "<MODE>")])
5505
5506 (define_insn "fix_truncdi_i387"
5507 [(set (match_operand:DI 0 "nonimmediate_operand" "=m")
5508 (fix:DI (match_operand 1 "register_operand" "f")))
5509 (use (match_operand:HI 2 "memory_operand" "m"))
5510 (use (match_operand:HI 3 "memory_operand" "m"))
5511 (clobber (match_scratch:XF 4 "=&f"))]
5512 "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
5513 && !TARGET_FISTTP
5514 && !(TARGET_64BIT && SSE_FLOAT_MODE_P (GET_MODE (operands[1])))"
5515 "* return output_fix_trunc (insn, operands, false);"
5516 [(set_attr "type" "fistp")
5517 (set_attr "i387_cw" "trunc")
5518 (set_attr "mode" "DI")])
5519
5520 (define_insn "fix_trunc<mode>_i387"
5521 [(set (match_operand:SWI24 0 "nonimmediate_operand" "=m")
5522 (fix:SWI24 (match_operand 1 "register_operand" "f")))
5523 (use (match_operand:HI 2 "memory_operand" "m"))
5524 (use (match_operand:HI 3 "memory_operand" "m"))]
5525 "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
5526 && !TARGET_FISTTP
5527 && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))"
5528 "* return output_fix_trunc (insn, operands, false);"
5529 [(set_attr "type" "fistp")
5530 (set_attr "i387_cw" "trunc")
5531 (set_attr "mode" "<MODE>")])
5532
5533 (define_insn "x86_fnstcw_1"
5534 [(set (match_operand:HI 0 "memory_operand" "=m")
5535 (unspec:HI [(const_int 0)] UNSPEC_FSTCW))]
5536 "TARGET_80387"
5537 "fnstcw\t%0"
5538 [(set (attr "length")
5539 (symbol_ref "ix86_attr_length_address_default (insn) + 2"))
5540 (set_attr "mode" "HI")
5541 (set_attr "unit" "i387")
5542 (set_attr "bdver1_decode" "vector")])
5543 \f
5544 ;; Conversion between fixed point and floating point.
5545
5546 ;; Even though we only accept memory inputs, the backend _really_
5547 ;; wants to be able to do this between registers. Thankfully, LRA
5548 ;; will fix this up for us during register allocation.
5549
5550 (define_insn "floathi<mode>2"
5551 [(set (match_operand:X87MODEF 0 "register_operand" "=f")
5552 (float:X87MODEF (match_operand:HI 1 "nonimmediate_operand" "m")))]
5553 "TARGET_80387
5554 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
5555 || TARGET_MIX_SSE_I387)"
5556 "fild%Z1\t%1"
5557 [(set_attr "type" "fmov")
5558 (set_attr "mode" "<MODE>")
5559 (set_attr "znver1_decode" "double")
5560 (set_attr "fp_int_src" "true")])
5561
5562 (define_insn "float<SWI48x:mode>xf2"
5563 [(set (match_operand:XF 0 "register_operand" "=f")
5564 (float:XF (match_operand:SWI48x 1 "nonimmediate_operand" "m")))]
5565 "TARGET_80387"
5566 "fild%Z1\t%1"
5567 [(set_attr "type" "fmov")
5568 (set_attr "mode" "XF")
5569 (set_attr "znver1_decode" "double")
5570 (set_attr "fp_int_src" "true")])
5571
5572 (define_expand "float<SWI48x:mode><MODEF:mode>2"
5573 [(set (match_operand:MODEF 0 "register_operand")
5574 (float:MODEF (match_operand:SWI48x 1 "nonimmediate_operand")))]
5575 "(TARGET_80387 && X87_ENABLE_FLOAT (<MODEF:MODE>mode, <SWI48x:MODE>mode))
5576 || (SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
5577 && ((<SWI48x:MODE>mode != DImode) || TARGET_64BIT))")
5578
5579 (define_insn "*float<SWI48:mode><MODEF:mode>2"
5580 [(set (match_operand:MODEF 0 "register_operand" "=f,v,v")
5581 (float:MODEF
5582 (match_operand:SWI48 1 "nonimmediate_operand" "m,r,m")))]
5583 "(TARGET_80387 && X87_ENABLE_FLOAT (<MODEF:MODE>mode, <SWI48:MODE>mode))
5584 || (SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH)"
5585 "@
5586 fild%Z1\t%1
5587 %vcvtsi2<MODEF:ssemodesuffix><SWI48:rex64suffix>\t{%1, %d0|%d0, %1}
5588 %vcvtsi2<MODEF:ssemodesuffix><SWI48:rex64suffix>\t{%1, %d0|%d0, %1}"
5589 [(set_attr "type" "fmov,sseicvt,sseicvt")
5590 (set_attr "avx_partial_xmm_update" "false,true,true")
5591 (set_attr "prefix" "orig,maybe_vex,maybe_vex")
5592 (set_attr "mode" "<MODEF:MODE>")
5593 (set (attr "prefix_rex")
5594 (if_then_else
5595 (and (eq_attr "prefix" "maybe_vex")
5596 (match_test "<SWI48:MODE>mode == DImode"))
5597 (const_string "1")
5598 (const_string "*")))
5599 (set_attr "unit" "i387,*,*")
5600 (set_attr "athlon_decode" "*,double,direct")
5601 (set_attr "amdfam10_decode" "*,vector,double")
5602 (set_attr "bdver1_decode" "*,double,direct")
5603 (set_attr "znver1_decode" "double,*,*")
5604 (set_attr "fp_int_src" "true")
5605 (set (attr "enabled")
5606 (if_then_else
5607 (match_test ("SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH"))
5608 (if_then_else
5609 (eq_attr "alternative" "0")
5610 (symbol_ref "TARGET_MIX_SSE_I387
5611 && X87_ENABLE_FLOAT (<MODEF:MODE>mode,
5612 <SWI48:MODE>mode)")
5613 (symbol_ref "true"))
5614 (if_then_else
5615 (eq_attr "alternative" "0")
5616 (symbol_ref "true")
5617 (symbol_ref "false"))))
5618 (set (attr "preferred_for_speed")
5619 (cond [(eq_attr "alternative" "1")
5620 (symbol_ref "TARGET_INTER_UNIT_CONVERSIONS")]
5621 (symbol_ref "true")))])
5622
5623 (define_insn "float<floatunssuffix><mode>hf2"
5624 [(set (match_operand:HF 0 "register_operand" "=v")
5625 (any_float:HF
5626 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))]
5627 "TARGET_AVX512FP16"
5628 "vcvt<floatsuffix>si2sh<rex64suffix>\t{%1, %d0|%d0, %1}"
5629 [(set_attr "type" "sseicvt")
5630 (set_attr "prefix" "evex")
5631 (set_attr "mode" "HF")])
5632
5633 (define_insn "*floatdi<MODEF:mode>2_i387"
5634 [(set (match_operand:MODEF 0 "register_operand" "=f")
5635 (float:MODEF (match_operand:DI 1 "nonimmediate_operand" "m")))]
5636 "!TARGET_64BIT
5637 && TARGET_80387 && X87_ENABLE_FLOAT (<MODEF:MODE>mode, DImode)"
5638 "fild%Z1\t%1"
5639 [(set_attr "type" "fmov")
5640 (set_attr "mode" "<MODEF:MODE>")
5641 (set_attr "znver1_decode" "double")
5642 (set_attr "fp_int_src" "true")])
5643
5644 ;; Try TARGET_USE_VECTOR_CONVERTS, but not so hard as to require extra memory
5645 ;; slots when !TARGET_INTER_UNIT_MOVES_TO_VEC disables the general_regs
5646 ;; alternative in sse2_loadld.
5647 (define_split
5648 [(set (match_operand:MODEF 0 "sse_reg_operand")
5649 (float:MODEF (match_operand:SI 1 "nonimmediate_operand")))]
5650 "TARGET_SSE2
5651 && TARGET_USE_VECTOR_CONVERTS
5652 && optimize_function_for_speed_p (cfun)
5653 && reload_completed
5654 && (MEM_P (operands[1]) || TARGET_INTER_UNIT_MOVES_TO_VEC)
5655 && (!EXT_REX_SSE_REG_P (operands[0])
5656 || TARGET_AVX512VL)"
5657 [(const_int 0)]
5658 {
5659 operands[3] = lowpart_subreg (<ssevecmode>mode, operands[0], <MODE>mode);
5660 operands[4] = lowpart_subreg (V4SImode, operands[0], <MODE>mode);
5661
5662 emit_insn (gen_sse2_loadld (operands[4],
5663 CONST0_RTX (V4SImode), operands[1]));
5664
5665 if (<ssevecmode>mode == V4SFmode)
5666 emit_insn (gen_floatv4siv4sf2 (operands[3], operands[4]));
5667 else
5668 emit_insn (gen_sse2_cvtdq2pd (operands[3], operands[4]));
5669 DONE;
5670 })
5671
5672 ;; Avoid store forwarding (partial memory) stall penalty
5673 ;; by passing DImode value through XMM registers. */
5674
5675 (define_split
5676 [(set (match_operand:X87MODEF 0 "register_operand")
5677 (float:X87MODEF
5678 (match_operand:DI 1 "register_operand")))]
5679 "!TARGET_64BIT && TARGET_INTER_UNIT_MOVES_TO_VEC
5680 && TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
5681 && TARGET_SSE2 && optimize_function_for_speed_p (cfun)
5682 && can_create_pseudo_p ()"
5683 [(const_int 0)]
5684 {
5685 rtx s = assign_386_stack_local (DImode, SLOT_FLOATxFDI_387);
5686 emit_insn (gen_floatdi<mode>2_i387_with_xmm (operands[0], operands[1], s));
5687 DONE;
5688 })
5689
5690 (define_insn_and_split "floatdi<X87MODEF:mode>2_i387_with_xmm"
5691 [(set (match_operand:X87MODEF 0 "register_operand" "=f,f")
5692 (float:X87MODEF
5693 (match_operand:DI 1 "register_operand" "r,r")))
5694 (clobber (match_operand:DI 2 "memory_operand" "=m,m"))
5695 (clobber (match_scratch:V4SI 3 "=x,x"))
5696 (clobber (match_scratch:V4SI 4 "=X,x"))]
5697 "!TARGET_64BIT && TARGET_INTER_UNIT_MOVES_TO_VEC
5698 && TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
5699 && TARGET_SSE2 && optimize_function_for_speed_p (cfun)"
5700 "#"
5701 "&& reload_completed"
5702 [(set (match_dup 2) (match_dup 3))
5703 (set (match_dup 0) (float:X87MODEF (match_dup 2)))]
5704 {
5705 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
5706 Assemble the 64-bit DImode value in an xmm register. */
5707 emit_insn (gen_sse2_loadld (operands[3], CONST0_RTX (V4SImode),
5708 gen_lowpart (SImode, operands[1])));
5709 if (TARGET_SSE4_1)
5710 emit_insn (gen_sse4_1_pinsrd (operands[3], operands[3],
5711 gen_highpart (SImode, operands[1]),
5712 GEN_INT (2)));
5713 else
5714 {
5715 emit_insn (gen_sse2_loadld (operands[4], CONST0_RTX (V4SImode),
5716 gen_highpart (SImode, operands[1])));
5717 emit_insn (gen_vec_interleave_lowv4si (operands[3], operands[3],
5718 operands[4]));
5719 }
5720 operands[3] = gen_lowpart (DImode, operands[3]);
5721 }
5722 [(set_attr "isa" "sse4,*")
5723 (set_attr "type" "multi")
5724 (set_attr "mode" "<X87MODEF:MODE>")
5725 (set_attr "unit" "i387")
5726 (set_attr "fp_int_src" "true")])
5727
5728 ;; Break partial SSE register dependency stall. This splitter should split
5729 ;; late in the pass sequence (after register rename pass), so allocated
5730 ;; registers won't change anymore
5731
5732 (define_split
5733 [(set (match_operand:MODEF 0 "sse_reg_operand")
5734 (float:MODEF (match_operand:SWI48 1 "nonimmediate_operand")))]
5735 "!TARGET_AVX
5736 && TARGET_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY
5737 && epilogue_completed
5738 && optimize_function_for_speed_p (cfun)
5739 && (!EXT_REX_SSE_REG_P (operands[0])
5740 || TARGET_AVX512VL)"
5741 [(set (match_dup 0)
5742 (vec_merge:<MODEF:ssevecmode>
5743 (vec_duplicate:<MODEF:ssevecmode>
5744 (float:MODEF
5745 (match_dup 1)))
5746 (match_dup 0)
5747 (const_int 1)))]
5748 {
5749 const machine_mode vmode = <MODEF:ssevecmode>mode;
5750
5751 operands[0] = lowpart_subreg (vmode, operands[0], <MODEF:MODE>mode);
5752 emit_move_insn (operands[0], CONST0_RTX (vmode));
5753 })
5754
5755 (define_expand "floatuns<SWI12:mode><MODEF:mode>2"
5756 [(set (match_operand:MODEF 0 "register_operand")
5757 (unsigned_float:MODEF
5758 (match_operand:SWI12 1 "nonimmediate_operand")))]
5759 "!TARGET_64BIT
5760 && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH"
5761 {
5762 operands[1] = convert_to_mode (SImode, operands[1], 1);
5763 emit_insn (gen_floatsi<MODEF:mode>2 (operands[0], operands[1]));
5764 DONE;
5765 })
5766
5767 (define_insn "*floatuns<SWI48:mode><MODEF:mode>2_avx512"
5768 [(set (match_operand:MODEF 0 "register_operand" "=v")
5769 (unsigned_float:MODEF
5770 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))]
5771 "TARGET_AVX512F && TARGET_SSE_MATH"
5772 "vcvtusi2<MODEF:ssemodesuffix><SWI48:rex64suffix>\t{%1, %0, %0|%0, %0, %1}"
5773 [(set_attr "type" "sseicvt")
5774 (set_attr "avx_partial_xmm_update" "true")
5775 (set_attr "prefix" "evex")
5776 (set_attr "mode" "<MODEF:MODE>")])
5777
5778 ;; Avoid store forwarding (partial memory) stall penalty by extending
5779 ;; SImode value to DImode through XMM register instead of pushing two
5780 ;; SImode values to stack. Also note that fild loads from memory only.
5781
5782 (define_insn_and_split "floatunssi<mode>2_i387_with_xmm"
5783 [(set (match_operand:X87MODEF 0 "register_operand" "=f")
5784 (unsigned_float:X87MODEF
5785 (match_operand:SI 1 "nonimmediate_operand" "rm")))
5786 (clobber (match_operand:DI 2 "memory_operand" "=m"))
5787 (clobber (match_scratch:DI 3 "=x"))]
5788 "!TARGET_64BIT
5789 && TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
5790 && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
5791 "#"
5792 "&& reload_completed"
5793 [(set (match_dup 3) (zero_extend:DI (match_dup 1)))
5794 (set (match_dup 2) (match_dup 3))
5795 (set (match_dup 0)
5796 (float:X87MODEF (match_dup 2)))]
5797 ""
5798 [(set_attr "type" "multi")
5799 (set_attr "mode" "<MODE>")])
5800
5801 (define_expand "floatunssi<mode>2"
5802 [(set (match_operand:X87MODEF 0 "register_operand")
5803 (unsigned_float:X87MODEF
5804 (match_operand:SI 1 "nonimmediate_operand")))]
5805 "(!TARGET_64BIT
5806 && TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
5807 && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC)
5808 || ((!TARGET_64BIT || TARGET_AVX512F)
5809 && SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
5810 {
5811 if (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
5812 {
5813 emit_insn (gen_floatunssi<mode>2_i387_with_xmm
5814 (operands[0], operands[1],
5815 assign_386_stack_local (DImode, SLOT_TEMP)));
5816 DONE;
5817 }
5818 if (!TARGET_AVX512F)
5819 {
5820 ix86_expand_convert_uns_si<mode>_sse (operands[0], operands[1]);
5821 DONE;
5822 }
5823 })
5824
5825 (define_expand "floatunsdisf2"
5826 [(set (match_operand:SF 0 "register_operand")
5827 (unsigned_float:SF
5828 (match_operand:DI 1 "nonimmediate_operand")))]
5829 "TARGET_64BIT && TARGET_SSE && TARGET_SSE_MATH"
5830 {
5831 if (!TARGET_AVX512F)
5832 {
5833 x86_emit_floatuns (operands);
5834 DONE;
5835 }
5836 })
5837
5838 (define_expand "floatunsdidf2"
5839 [(set (match_operand:DF 0 "register_operand")
5840 (unsigned_float:DF
5841 (match_operand:DI 1 "nonimmediate_operand")))]
5842 "((TARGET_64BIT && TARGET_AVX512F)
5843 || TARGET_KEEPS_VECTOR_ALIGNED_STACK)
5844 && TARGET_SSE2 && TARGET_SSE_MATH"
5845 {
5846 if (!TARGET_64BIT)
5847 {
5848 ix86_expand_convert_uns_didf_sse (operands[0], operands[1]);
5849 DONE;
5850 }
5851 if (!TARGET_AVX512F)
5852 {
5853 x86_emit_floatuns (operands);
5854 DONE;
5855 }
5856 })
5857 \f
5858 ;; Load effective address instructions
5859
5860 (define_insn "*lea<mode>"
5861 [(set (match_operand:SWI48 0 "register_operand" "=r")
5862 (match_operand:SWI48 1 "address_no_seg_operand" "Ts"))]
5863 "ix86_hardreg_mov_ok (operands[0], operands[1])"
5864 {
5865 if (SImode_address_operand (operands[1], VOIDmode))
5866 {
5867 gcc_assert (TARGET_64BIT);
5868 return "lea{l}\t{%E1, %k0|%k0, %E1}";
5869 }
5870 else
5871 return "lea{<imodesuffix>}\t{%E1, %0|%0, %E1}";
5872 }
5873 [(set_attr "type" "lea")
5874 (set (attr "mode")
5875 (if_then_else
5876 (match_operand 1 "SImode_address_operand")
5877 (const_string "SI")
5878 (const_string "<MODE>")))])
5879
5880 (define_peephole2
5881 [(set (match_operand:SWI48 0 "register_operand")
5882 (match_operand:SWI48 1 "address_no_seg_operand"))]
5883 "ix86_hardreg_mov_ok (operands[0], operands[1])
5884 && peep2_regno_dead_p (0, FLAGS_REG)
5885 && ix86_avoid_lea_for_addr (peep2_next_insn (0), operands)"
5886 [(const_int 0)]
5887 {
5888 machine_mode mode = <MODE>mode;
5889
5890 /* Emit all operations in SImode for zero-extended addresses. */
5891 if (SImode_address_operand (operands[1], VOIDmode))
5892 mode = SImode;
5893
5894 ix86_split_lea_for_addr (peep2_next_insn (0), operands, mode);
5895
5896 /* Zero-extend return register to DImode for zero-extended addresses. */
5897 if (mode != <MODE>mode)
5898 emit_insn (gen_zero_extendsidi2 (operands[0],
5899 gen_lowpart (mode, operands[0])));
5900
5901 DONE;
5902 })
5903
5904 ;; ix86_split_lea_for_addr emits the shifts as MULT to avoid it from being
5905 ;; peephole2 optimized back into a lea. Split that into the shift during
5906 ;; the following split pass.
5907 (define_split
5908 [(set (match_operand:SWI48 0 "general_reg_operand")
5909 (mult:SWI48 (match_dup 0) (match_operand:SWI48 1 "const1248_operand")))
5910 (clobber (reg:CC FLAGS_REG))]
5911 "reload_completed"
5912 [(parallel [(set (match_dup 0) (ashift:SWI48 (match_dup 0) (match_dup 1)))
5913 (clobber (reg:CC FLAGS_REG))])]
5914 "operands[1] = GEN_INT (exact_log2 (INTVAL (operands[1])));")
5915 \f
5916 ;; Add instructions
5917
5918 (define_expand "add<mode>3"
5919 [(set (match_operand:SDWIM 0 "nonimmediate_operand")
5920 (plus:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand")
5921 (match_operand:SDWIM 2 "<general_hilo_operand>")))]
5922 ""
5923 "ix86_expand_binary_operator (PLUS, <MODE>mode, operands); DONE;")
5924
5925 (define_insn_and_split "*add<dwi>3_doubleword"
5926 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
5927 (plus:<DWI>
5928 (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0")
5929 (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o")))
5930 (clobber (reg:CC FLAGS_REG))]
5931 "ix86_binary_operator_ok (PLUS, <DWI>mode, operands)"
5932 "#"
5933 "&& reload_completed"
5934 [(parallel [(set (reg:CCC FLAGS_REG)
5935 (compare:CCC
5936 (plus:DWIH (match_dup 1) (match_dup 2))
5937 (match_dup 1)))
5938 (set (match_dup 0)
5939 (plus:DWIH (match_dup 1) (match_dup 2)))])
5940 (parallel [(set (match_dup 3)
5941 (plus:DWIH
5942 (plus:DWIH
5943 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
5944 (match_dup 4))
5945 (match_dup 5)))
5946 (clobber (reg:CC FLAGS_REG))])]
5947 {
5948 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
5949 if (operands[2] == const0_rtx)
5950 {
5951 if (operands[5] != const0_rtx)
5952 ix86_expand_binary_operator (PLUS, <MODE>mode, &operands[3]);
5953 else if (!rtx_equal_p (operands[3], operands[4]))
5954 emit_move_insn (operands[3], operands[4]);
5955 else
5956 emit_note (NOTE_INSN_DELETED);
5957 DONE;
5958 }
5959 })
5960
5961 (define_insn_and_split "*add<dwi>3_doubleword_zext"
5962 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=r,o")
5963 (plus:<DWI>
5964 (zero_extend:<DWI>
5965 (match_operand:DWIH 2 "nonimmediate_operand" "rm,r"))
5966 (match_operand:<DWI> 1 "nonimmediate_operand" "0,0")))
5967 (clobber (reg:CC FLAGS_REG))]
5968 "ix86_binary_operator_ok (UNKNOWN, <DWI>mode, operands)"
5969 "#"
5970 "&& reload_completed"
5971 [(parallel [(set (reg:CCC FLAGS_REG)
5972 (compare:CCC
5973 (plus:DWIH (match_dup 1) (match_dup 2))
5974 (match_dup 1)))
5975 (set (match_dup 0)
5976 (plus:DWIH (match_dup 1) (match_dup 2)))])
5977 (parallel [(set (match_dup 3)
5978 (plus:DWIH
5979 (plus:DWIH
5980 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
5981 (match_dup 4))
5982 (const_int 0)))
5983 (clobber (reg:CC FLAGS_REG))])]
5984 "split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[3]);")
5985
5986 (define_insn "*add<mode>_1"
5987 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,r")
5988 (plus:SWI48
5989 (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,r,r")
5990 (match_operand:SWI48 2 "x86_64_general_operand" "re,BM,0,le")))
5991 (clobber (reg:CC FLAGS_REG))]
5992 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
5993 {
5994 switch (get_attr_type (insn))
5995 {
5996 case TYPE_LEA:
5997 return "#";
5998
5999 case TYPE_INCDEC:
6000 gcc_assert (rtx_equal_p (operands[0], operands[1]));
6001 if (operands[2] == const1_rtx)
6002 return "inc{<imodesuffix>}\t%0";
6003 else
6004 {
6005 gcc_assert (operands[2] == constm1_rtx);
6006 return "dec{<imodesuffix>}\t%0";
6007 }
6008
6009 default:
6010 /* For most processors, ADD is faster than LEA. This alternative
6011 was added to use ADD as much as possible. */
6012 if (which_alternative == 2)
6013 std::swap (operands[1], operands[2]);
6014
6015 gcc_assert (rtx_equal_p (operands[0], operands[1]));
6016 if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
6017 return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
6018
6019 return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
6020 }
6021 }
6022 [(set (attr "type")
6023 (cond [(eq_attr "alternative" "3")
6024 (const_string "lea")
6025 (match_operand:SWI48 2 "incdec_operand")
6026 (const_string "incdec")
6027 ]
6028 (const_string "alu")))
6029 (set (attr "length_immediate")
6030 (if_then_else
6031 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
6032 (const_string "1")
6033 (const_string "*")))
6034 (set_attr "mode" "<MODE>")])
6035
6036 ;; It may seem that nonimmediate operand is proper one for operand 1.
6037 ;; The addsi_1 pattern allows nonimmediate operand at that place and
6038 ;; we take care in ix86_binary_operator_ok to not allow two memory
6039 ;; operands so proper swapping will be done in reload. This allow
6040 ;; patterns constructed from addsi_1 to match.
6041
6042 (define_insn "addsi_1_zext"
6043 [(set (match_operand:DI 0 "register_operand" "=r,r,r")
6044 (zero_extend:DI
6045 (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r,r")
6046 (match_operand:SI 2 "x86_64_general_operand" "rBMe,0,le"))))
6047 (clobber (reg:CC FLAGS_REG))]
6048 "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)"
6049 {
6050 switch (get_attr_type (insn))
6051 {
6052 case TYPE_LEA:
6053 return "#";
6054
6055 case TYPE_INCDEC:
6056 if (operands[2] == const1_rtx)
6057 return "inc{l}\t%k0";
6058 else
6059 {
6060 gcc_assert (operands[2] == constm1_rtx);
6061 return "dec{l}\t%k0";
6062 }
6063
6064 default:
6065 /* For most processors, ADD is faster than LEA. This alternative
6066 was added to use ADD as much as possible. */
6067 if (which_alternative == 1)
6068 std::swap (operands[1], operands[2]);
6069
6070 if (x86_maybe_negate_const_int (&operands[2], SImode))
6071 return "sub{l}\t{%2, %k0|%k0, %2}";
6072
6073 return "add{l}\t{%2, %k0|%k0, %2}";
6074 }
6075 }
6076 [(set (attr "type")
6077 (cond [(eq_attr "alternative" "2")
6078 (const_string "lea")
6079 (match_operand:SI 2 "incdec_operand")
6080 (const_string "incdec")
6081 ]
6082 (const_string "alu")))
6083 (set (attr "length_immediate")
6084 (if_then_else
6085 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
6086 (const_string "1")
6087 (const_string "*")))
6088 (set_attr "mode" "SI")])
6089
6090 (define_insn "*addhi_1"
6091 [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r,Yp")
6092 (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,r,Yp")
6093 (match_operand:HI 2 "general_operand" "rn,m,0,ln")))
6094 (clobber (reg:CC FLAGS_REG))]
6095 "ix86_binary_operator_ok (PLUS, HImode, operands)"
6096 {
6097 switch (get_attr_type (insn))
6098 {
6099 case TYPE_LEA:
6100 return "#";
6101
6102 case TYPE_INCDEC:
6103 gcc_assert (rtx_equal_p (operands[0], operands[1]));
6104 if (operands[2] == const1_rtx)
6105 return "inc{w}\t%0";
6106 else
6107 {
6108 gcc_assert (operands[2] == constm1_rtx);
6109 return "dec{w}\t%0";
6110 }
6111
6112 default:
6113 /* For most processors, ADD is faster than LEA. This alternative
6114 was added to use ADD as much as possible. */
6115 if (which_alternative == 2)
6116 std::swap (operands[1], operands[2]);
6117
6118 gcc_assert (rtx_equal_p (operands[0], operands[1]));
6119 if (x86_maybe_negate_const_int (&operands[2], HImode))
6120 return "sub{w}\t{%2, %0|%0, %2}";
6121
6122 return "add{w}\t{%2, %0|%0, %2}";
6123 }
6124 }
6125 [(set (attr "type")
6126 (cond [(eq_attr "alternative" "3")
6127 (const_string "lea")
6128 (match_operand:HI 2 "incdec_operand")
6129 (const_string "incdec")
6130 ]
6131 (const_string "alu")))
6132 (set (attr "length_immediate")
6133 (if_then_else
6134 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
6135 (const_string "1")
6136 (const_string "*")))
6137 (set_attr "mode" "HI,HI,HI,SI")])
6138
6139 (define_insn "*addqi_1"
6140 [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,q,r,r,Yp")
6141 (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,q,0,r,Yp")
6142 (match_operand:QI 2 "general_operand" "qn,m,0,rn,0,ln")))
6143 (clobber (reg:CC FLAGS_REG))]
6144 "ix86_binary_operator_ok (PLUS, QImode, operands)"
6145 {
6146 bool widen = (get_attr_mode (insn) != MODE_QI);
6147
6148 switch (get_attr_type (insn))
6149 {
6150 case TYPE_LEA:
6151 return "#";
6152
6153 case TYPE_INCDEC:
6154 gcc_assert (rtx_equal_p (operands[0], operands[1]));
6155 if (operands[2] == const1_rtx)
6156 return widen ? "inc{l}\t%k0" : "inc{b}\t%0";
6157 else
6158 {
6159 gcc_assert (operands[2] == constm1_rtx);
6160 return widen ? "dec{l}\t%k0" : "dec{b}\t%0";
6161 }
6162
6163 default:
6164 /* For most processors, ADD is faster than LEA. These alternatives
6165 were added to use ADD as much as possible. */
6166 if (which_alternative == 2 || which_alternative == 4)
6167 std::swap (operands[1], operands[2]);
6168
6169 gcc_assert (rtx_equal_p (operands[0], operands[1]));
6170 if (x86_maybe_negate_const_int (&operands[2], QImode))
6171 {
6172 if (widen)
6173 return "sub{l}\t{%2, %k0|%k0, %2}";
6174 else
6175 return "sub{b}\t{%2, %0|%0, %2}";
6176 }
6177 if (widen)
6178 return "add{l}\t{%k2, %k0|%k0, %k2}";
6179 else
6180 return "add{b}\t{%2, %0|%0, %2}";
6181 }
6182 }
6183 [(set (attr "type")
6184 (cond [(eq_attr "alternative" "5")
6185 (const_string "lea")
6186 (match_operand:QI 2 "incdec_operand")
6187 (const_string "incdec")
6188 ]
6189 (const_string "alu")))
6190 (set (attr "length_immediate")
6191 (if_then_else
6192 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
6193 (const_string "1")
6194 (const_string "*")))
6195 (set_attr "mode" "QI,QI,QI,SI,SI,SI")
6196 ;; Potential partial reg stall on alternatives 3 and 4.
6197 (set (attr "preferred_for_speed")
6198 (cond [(eq_attr "alternative" "3,4")
6199 (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
6200 (symbol_ref "true")))])
6201
6202 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
6203 (define_insn_and_split "*add<mode>_1_slp"
6204 [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>,&<r>"))
6205 (plus:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "%0,!<r>")
6206 (match_operand:SWI12 2 "general_operand" "<r>mn,<r>mn")))
6207 (clobber (reg:CC FLAGS_REG))]
6208 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
6209 {
6210 if (which_alternative)
6211 return "#";
6212
6213 switch (get_attr_type (insn))
6214 {
6215 case TYPE_INCDEC:
6216 if (operands[2] == const1_rtx)
6217 return "inc{<imodesuffix>}\t%0";
6218 else
6219 {
6220 gcc_assert (operands[2] == constm1_rtx);
6221 return "dec{<imodesuffix>}\t%0";
6222 }
6223
6224 default:
6225 if (x86_maybe_negate_const_int (&operands[2], QImode))
6226 return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
6227
6228 return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
6229 }
6230 }
6231 "&& reload_completed"
6232 [(set (strict_low_part (match_dup 0)) (match_dup 1))
6233 (parallel
6234 [(set (strict_low_part (match_dup 0))
6235 (plus:SWI12 (match_dup 0) (match_dup 2)))
6236 (clobber (reg:CC FLAGS_REG))])]
6237 ""
6238 [(set (attr "type")
6239 (if_then_else (match_operand:QI 2 "incdec_operand")
6240 (const_string "incdec")
6241 (const_string "alu")))
6242 (set_attr "mode" "<MODE>")])
6243
6244 ;; Split non destructive adds if we cannot use lea.
6245 (define_split
6246 [(set (match_operand:SWI48 0 "register_operand")
6247 (plus:SWI48 (match_operand:SWI48 1 "register_operand")
6248 (match_operand:SWI48 2 "x86_64_nonmemory_operand")))
6249 (clobber (reg:CC FLAGS_REG))]
6250 "reload_completed && ix86_avoid_lea_for_add (insn, operands)"
6251 [(set (match_dup 0) (match_dup 1))
6252 (parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (match_dup 2)))
6253 (clobber (reg:CC FLAGS_REG))])])
6254
6255 ;; Split non destructive adds if we cannot use lea.
6256 (define_split
6257 [(set (match_operand:DI 0 "register_operand")
6258 (zero_extend:DI
6259 (plus:SI (match_operand:SI 1 "register_operand")
6260 (match_operand:SI 2 "x86_64_nonmemory_operand"))))
6261 (clobber (reg:CC FLAGS_REG))]
6262 "TARGET_64BIT
6263 && reload_completed && ix86_avoid_lea_for_add (insn, operands)"
6264 [(set (match_dup 3) (match_dup 1))
6265 (parallel [(set (match_dup 0)
6266 (zero_extend:DI (plus:SI (match_dup 3) (match_dup 2))))
6267 (clobber (reg:CC FLAGS_REG))])]
6268 "operands[3] = gen_lowpart (SImode, operands[0]);")
6269
6270 ;; Convert add to the lea pattern to avoid flags dependency.
6271 (define_split
6272 [(set (match_operand:SWI 0 "register_operand")
6273 (plus:SWI (match_operand:SWI 1 "register_operand")
6274 (match_operand:SWI 2 "<nonmemory_operand>")))
6275 (clobber (reg:CC FLAGS_REG))]
6276 "reload_completed && ix86_lea_for_add_ok (insn, operands)"
6277 [(set (match_dup 0)
6278 (plus:<LEAMODE> (match_dup 1) (match_dup 2)))]
6279 {
6280 if (<MODE>mode != <LEAMODE>mode)
6281 {
6282 operands[0] = gen_lowpart (<LEAMODE>mode, operands[0]);
6283 operands[1] = gen_lowpart (<LEAMODE>mode, operands[1]);
6284 operands[2] = gen_lowpart (<LEAMODE>mode, operands[2]);
6285 }
6286 })
6287
6288 ;; Convert add to the lea pattern to avoid flags dependency.
6289 (define_split
6290 [(set (match_operand:DI 0 "register_operand")
6291 (zero_extend:DI
6292 (plus:SI (match_operand:SI 1 "register_operand")
6293 (match_operand:SI 2 "x86_64_nonmemory_operand"))))
6294 (clobber (reg:CC FLAGS_REG))]
6295 "TARGET_64BIT && reload_completed && ix86_lea_for_add_ok (insn, operands)"
6296 [(set (match_dup 0)
6297 (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))])
6298
6299 (define_insn "*add<mode>_2"
6300 [(set (reg FLAGS_REG)
6301 (compare
6302 (plus:SWI
6303 (match_operand:SWI 1 "nonimmediate_operand" "%0,0,<r>")
6304 (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,0"))
6305 (const_int 0)))
6306 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,<r>")
6307 (plus:SWI (match_dup 1) (match_dup 2)))]
6308 "ix86_match_ccmode (insn, CCGOCmode)
6309 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
6310 {
6311 switch (get_attr_type (insn))
6312 {
6313 case TYPE_INCDEC:
6314 if (operands[2] == const1_rtx)
6315 return "inc{<imodesuffix>}\t%0";
6316 else
6317 {
6318 gcc_assert (operands[2] == constm1_rtx);
6319 return "dec{<imodesuffix>}\t%0";
6320 }
6321
6322 default:
6323 if (which_alternative == 2)
6324 std::swap (operands[1], operands[2]);
6325
6326 gcc_assert (rtx_equal_p (operands[0], operands[1]));
6327 if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
6328 return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
6329
6330 return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
6331 }
6332 }
6333 [(set (attr "type")
6334 (if_then_else (match_operand:SWI 2 "incdec_operand")
6335 (const_string "incdec")
6336 (const_string "alu")))
6337 (set (attr "length_immediate")
6338 (if_then_else
6339 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
6340 (const_string "1")
6341 (const_string "*")))
6342 (set_attr "mode" "<MODE>")])
6343
6344 ;; See comment for addsi_1_zext why we do use nonimmediate_operand
6345 (define_insn "*addsi_2_zext"
6346 [(set (reg FLAGS_REG)
6347 (compare
6348 (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r")
6349 (match_operand:SI 2 "x86_64_general_operand" "rBMe,0"))
6350 (const_int 0)))
6351 (set (match_operand:DI 0 "register_operand" "=r,r")
6352 (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
6353 "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
6354 && ix86_binary_operator_ok (PLUS, SImode, operands)"
6355 {
6356 switch (get_attr_type (insn))
6357 {
6358 case TYPE_INCDEC:
6359 if (operands[2] == const1_rtx)
6360 return "inc{l}\t%k0";
6361 else
6362 {
6363 gcc_assert (operands[2] == constm1_rtx);
6364 return "dec{l}\t%k0";
6365 }
6366
6367 default:
6368 if (which_alternative == 1)
6369 std::swap (operands[1], operands[2]);
6370
6371 if (x86_maybe_negate_const_int (&operands[2], SImode))
6372 return "sub{l}\t{%2, %k0|%k0, %2}";
6373
6374 return "add{l}\t{%2, %k0|%k0, %2}";
6375 }
6376 }
6377 [(set (attr "type")
6378 (if_then_else (match_operand:SI 2 "incdec_operand")
6379 (const_string "incdec")
6380 (const_string "alu")))
6381 (set (attr "length_immediate")
6382 (if_then_else
6383 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
6384 (const_string "1")
6385 (const_string "*")))
6386 (set_attr "mode" "SI")])
6387
6388 (define_insn "*add<mode>_3"
6389 [(set (reg FLAGS_REG)
6390 (compare
6391 (neg:SWI (match_operand:SWI 2 "<general_operand>" "<g>,0"))
6392 (match_operand:SWI 1 "nonimmediate_operand" "%0,<r>")))
6393 (clobber (match_scratch:SWI 0 "=<r>,<r>"))]
6394 "ix86_match_ccmode (insn, CCZmode)
6395 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
6396 {
6397 switch (get_attr_type (insn))
6398 {
6399 case TYPE_INCDEC:
6400 if (operands[2] == const1_rtx)
6401 return "inc{<imodesuffix>}\t%0";
6402 else
6403 {
6404 gcc_assert (operands[2] == constm1_rtx);
6405 return "dec{<imodesuffix>}\t%0";
6406 }
6407
6408 default:
6409 if (which_alternative == 1)
6410 std::swap (operands[1], operands[2]);
6411
6412 gcc_assert (rtx_equal_p (operands[0], operands[1]));
6413 if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
6414 return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
6415
6416 return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
6417 }
6418 }
6419 [(set (attr "type")
6420 (if_then_else (match_operand:SWI 2 "incdec_operand")
6421 (const_string "incdec")
6422 (const_string "alu")))
6423 (set (attr "length_immediate")
6424 (if_then_else
6425 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
6426 (const_string "1")
6427 (const_string "*")))
6428 (set_attr "mode" "<MODE>")])
6429
6430 ;; See comment for addsi_1_zext why we do use nonimmediate_operand
6431 (define_insn "*addsi_3_zext"
6432 [(set (reg FLAGS_REG)
6433 (compare
6434 (neg:SI (match_operand:SI 2 "x86_64_general_operand" "rBMe,0"))
6435 (match_operand:SI 1 "nonimmediate_operand" "%0,r")))
6436 (set (match_operand:DI 0 "register_operand" "=r,r")
6437 (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
6438 "TARGET_64BIT && ix86_match_ccmode (insn, CCZmode)
6439 && ix86_binary_operator_ok (PLUS, SImode, operands)"
6440 {
6441 switch (get_attr_type (insn))
6442 {
6443 case TYPE_INCDEC:
6444 if (operands[2] == const1_rtx)
6445 return "inc{l}\t%k0";
6446 else
6447 {
6448 gcc_assert (operands[2] == constm1_rtx);
6449 return "dec{l}\t%k0";
6450 }
6451
6452 default:
6453 if (which_alternative == 1)
6454 std::swap (operands[1], operands[2]);
6455
6456 if (x86_maybe_negate_const_int (&operands[2], SImode))
6457 return "sub{l}\t{%2, %k0|%k0, %2}";
6458
6459 return "add{l}\t{%2, %k0|%k0, %2}";
6460 }
6461 }
6462 [(set (attr "type")
6463 (if_then_else (match_operand:SI 2 "incdec_operand")
6464 (const_string "incdec")
6465 (const_string "alu")))
6466 (set (attr "length_immediate")
6467 (if_then_else
6468 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
6469 (const_string "1")
6470 (const_string "*")))
6471 (set_attr "mode" "SI")])
6472
6473 ; For comparisons against 1, -1 and 128, we may generate better code
6474 ; by converting cmp to add, inc or dec as done by peephole2. This pattern
6475 ; is matched then. We can't accept general immediate, because for
6476 ; case of overflows, the result is messed up.
6477 ; Also carry flag is reversed compared to cmp, so this conversion is valid
6478 ; only for comparisons not depending on it.
6479
6480 (define_insn "*adddi_4"
6481 [(set (reg FLAGS_REG)
6482 (compare
6483 (match_operand:DI 1 "nonimmediate_operand" "0")
6484 (match_operand:DI 2 "x86_64_immediate_operand" "e")))
6485 (clobber (match_scratch:DI 0 "=r"))]
6486 "TARGET_64BIT
6487 && ix86_match_ccmode (insn, CCGCmode)"
6488 {
6489 switch (get_attr_type (insn))
6490 {
6491 case TYPE_INCDEC:
6492 if (operands[2] == constm1_rtx)
6493 return "inc{q}\t%0";
6494 else
6495 {
6496 gcc_assert (operands[2] == const1_rtx);
6497 return "dec{q}\t%0";
6498 }
6499
6500 default:
6501 if (x86_maybe_negate_const_int (&operands[2], DImode))
6502 return "add{q}\t{%2, %0|%0, %2}";
6503
6504 return "sub{q}\t{%2, %0|%0, %2}";
6505 }
6506 }
6507 [(set (attr "type")
6508 (if_then_else (match_operand:DI 2 "incdec_operand")
6509 (const_string "incdec")
6510 (const_string "alu")))
6511 (set (attr "length_immediate")
6512 (if_then_else
6513 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
6514 (const_string "1")
6515 (const_string "*")))
6516 (set_attr "mode" "DI")])
6517
6518 ; For comparisons against 1, -1 and 128, we may generate better code
6519 ; by converting cmp to add, inc or dec as done by peephole2. This pattern
6520 ; is matched then. We can't accept general immediate, because for
6521 ; case of overflows, the result is messed up.
6522 ; Also carry flag is reversed compared to cmp, so this conversion is valid
6523 ; only for comparisons not depending on it.
6524
6525 (define_insn "*add<mode>_4"
6526 [(set (reg FLAGS_REG)
6527 (compare
6528 (match_operand:SWI124 1 "nonimmediate_operand" "0")
6529 (match_operand:SWI124 2 "const_int_operand")))
6530 (clobber (match_scratch:SWI124 0 "=<r>"))]
6531 "ix86_match_ccmode (insn, CCGCmode)"
6532 {
6533 switch (get_attr_type (insn))
6534 {
6535 case TYPE_INCDEC:
6536 if (operands[2] == constm1_rtx)
6537 return "inc{<imodesuffix>}\t%0";
6538 else
6539 {
6540 gcc_assert (operands[2] == const1_rtx);
6541 return "dec{<imodesuffix>}\t%0";
6542 }
6543
6544 default:
6545 if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
6546 return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
6547
6548 return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
6549 }
6550 }
6551 [(set (attr "type")
6552 (if_then_else (match_operand:<MODE> 2 "incdec_operand")
6553 (const_string "incdec")
6554 (const_string "alu")))
6555 (set (attr "length_immediate")
6556 (if_then_else
6557 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
6558 (const_string "1")
6559 (const_string "*")))
6560 (set_attr "mode" "<MODE>")])
6561
6562 (define_insn "*add<mode>_5"
6563 [(set (reg FLAGS_REG)
6564 (compare
6565 (plus:SWI
6566 (match_operand:SWI 1 "nonimmediate_operand" "%0,<r>")
6567 (match_operand:SWI 2 "<general_operand>" "<g>,0"))
6568 (const_int 0)))
6569 (clobber (match_scratch:SWI 0 "=<r>,<r>"))]
6570 "ix86_match_ccmode (insn, CCGOCmode)
6571 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
6572 {
6573 switch (get_attr_type (insn))
6574 {
6575 case TYPE_INCDEC:
6576 if (operands[2] == const1_rtx)
6577 return "inc{<imodesuffix>}\t%0";
6578 else
6579 {
6580 gcc_assert (operands[2] == constm1_rtx);
6581 return "dec{<imodesuffix>}\t%0";
6582 }
6583
6584 default:
6585 if (which_alternative == 1)
6586 std::swap (operands[1], operands[2]);
6587
6588 gcc_assert (rtx_equal_p (operands[0], operands[1]));
6589 if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
6590 return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
6591
6592 return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
6593 }
6594 }
6595 [(set (attr "type")
6596 (if_then_else (match_operand:SWI 2 "incdec_operand")
6597 (const_string "incdec")
6598 (const_string "alu")))
6599 (set (attr "length_immediate")
6600 (if_then_else
6601 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
6602 (const_string "1")
6603 (const_string "*")))
6604 (set_attr "mode" "<MODE>")])
6605
6606 (define_expand "addqi_ext_1"
6607 [(parallel
6608 [(set (zero_extract:HI (match_operand:HI 0 "register_operand")
6609 (const_int 8)
6610 (const_int 8))
6611 (subreg:HI
6612 (plus:QI
6613 (subreg:QI
6614 (zero_extract:HI (match_operand:HI 1 "register_operand")
6615 (const_int 8)
6616 (const_int 8)) 0)
6617 (match_operand:QI 2 "const_int_operand")) 0))
6618 (clobber (reg:CC FLAGS_REG))])])
6619
6620 (define_insn "*addqi_ext<mode>_1"
6621 [(set (zero_extract:SWI248
6622 (match_operand:SWI248 0 "register_operand" "+Q,Q")
6623 (const_int 8)
6624 (const_int 8))
6625 (subreg:SWI248
6626 (plus:QI
6627 (subreg:QI
6628 (zero_extract:SWI248
6629 (match_operand:SWI248 1 "register_operand" "0,0")
6630 (const_int 8)
6631 (const_int 8)) 0)
6632 (match_operand:QI 2 "general_operand" "QnBc,m")) 0))
6633 (clobber (reg:CC FLAGS_REG))]
6634 "/* FIXME: without this LRA can't reload this pattern, see PR82524. */
6635 rtx_equal_p (operands[0], operands[1])"
6636 {
6637 switch (get_attr_type (insn))
6638 {
6639 case TYPE_INCDEC:
6640 if (operands[2] == const1_rtx)
6641 return "inc{b}\t%h0";
6642 else
6643 {
6644 gcc_assert (operands[2] == constm1_rtx);
6645 return "dec{b}\t%h0";
6646 }
6647
6648 default:
6649 return "add{b}\t{%2, %h0|%h0, %2}";
6650 }
6651 }
6652 [(set_attr "isa" "*,nox64")
6653 (set (attr "type")
6654 (if_then_else (match_operand:QI 2 "incdec_operand")
6655 (const_string "incdec")
6656 (const_string "alu")))
6657 (set_attr "mode" "QI")])
6658
6659 (define_insn "*addqi_ext<mode>_2"
6660 [(set (zero_extract:SWI248
6661 (match_operand:SWI248 0 "register_operand" "+Q")
6662 (const_int 8)
6663 (const_int 8))
6664 (subreg:SWI248
6665 (plus:QI
6666 (subreg:QI
6667 (zero_extract:SWI248
6668 (match_operand:SWI248 1 "register_operand" "%0")
6669 (const_int 8)
6670 (const_int 8)) 0)
6671 (subreg:QI
6672 (zero_extract:SWI248
6673 (match_operand:SWI248 2 "register_operand" "Q")
6674 (const_int 8)
6675 (const_int 8)) 0)) 0))
6676 (clobber (reg:CC FLAGS_REG))]
6677 "/* FIXME: without this LRA can't reload this pattern, see PR82524. */
6678 rtx_equal_p (operands[0], operands[1])
6679 || rtx_equal_p (operands[0], operands[2])"
6680 "add{b}\t{%h2, %h0|%h0, %h2}"
6681 [(set_attr "type" "alu")
6682 (set_attr "mode" "QI")])
6683
6684 ;; Like DWI, but use POImode instead of OImode.
6685 (define_mode_attr DPWI [(QI "HI") (HI "SI") (SI "DI") (DI "TI") (TI "POI")])
6686
6687 ;; Add with jump on overflow.
6688 (define_expand "addv<mode>4"
6689 [(parallel [(set (reg:CCO FLAGS_REG)
6690 (eq:CCO
6691 (plus:<DPWI>
6692 (sign_extend:<DPWI>
6693 (match_operand:SWIDWI 1 "nonimmediate_operand"))
6694 (match_dup 4))
6695 (sign_extend:<DPWI>
6696 (plus:SWIDWI (match_dup 1)
6697 (match_operand:SWIDWI 2
6698 "<general_hilo_operand>")))))
6699 (set (match_operand:SWIDWI 0 "register_operand")
6700 (plus:SWIDWI (match_dup 1) (match_dup 2)))])
6701 (set (pc) (if_then_else
6702 (eq (reg:CCO FLAGS_REG) (const_int 0))
6703 (label_ref (match_operand 3))
6704 (pc)))]
6705 ""
6706 {
6707 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
6708 if (CONST_SCALAR_INT_P (operands[2]))
6709 operands[4] = operands[2];
6710 else
6711 operands[4] = gen_rtx_SIGN_EXTEND (<DPWI>mode, operands[2]);
6712 })
6713
6714 (define_insn "*addv<mode>4"
6715 [(set (reg:CCO FLAGS_REG)
6716 (eq:CCO (plus:<DWI>
6717 (sign_extend:<DWI>
6718 (match_operand:SWI 1 "nonimmediate_operand" "%0,0"))
6719 (sign_extend:<DWI>
6720 (match_operand:SWI 2 "<general_sext_operand>" "<r>We,m")))
6721 (sign_extend:<DWI>
6722 (plus:SWI (match_dup 1) (match_dup 2)))))
6723 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
6724 (plus:SWI (match_dup 1) (match_dup 2)))]
6725 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
6726 "add{<imodesuffix>}\t{%2, %0|%0, %2}"
6727 [(set_attr "type" "alu")
6728 (set_attr "mode" "<MODE>")])
6729
6730 (define_insn "addv<mode>4_1"
6731 [(set (reg:CCO FLAGS_REG)
6732 (eq:CCO (plus:<DWI>
6733 (sign_extend:<DWI>
6734 (match_operand:SWI 1 "nonimmediate_operand" "0"))
6735 (match_operand:<DWI> 3 "const_int_operand"))
6736 (sign_extend:<DWI>
6737 (plus:SWI
6738 (match_dup 1)
6739 (match_operand:SWI 2 "x86_64_immediate_operand" "<i>")))))
6740 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
6741 (plus:SWI (match_dup 1) (match_dup 2)))]
6742 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)
6743 && CONST_INT_P (operands[2])
6744 && INTVAL (operands[2]) == INTVAL (operands[3])"
6745 "add{<imodesuffix>}\t{%2, %0|%0, %2}"
6746 [(set_attr "type" "alu")
6747 (set_attr "mode" "<MODE>")
6748 (set (attr "length_immediate")
6749 (cond [(match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)")
6750 (const_string "1")
6751 (match_test "<MODE_SIZE> == 8")
6752 (const_string "4")]
6753 (const_string "<MODE_SIZE>")))])
6754
6755 ;; Quad word integer modes as mode attribute.
6756 (define_mode_attr QPWI [(SI "TI") (DI "POI")])
6757
6758 (define_insn_and_split "*addv<dwi>4_doubleword"
6759 [(set (reg:CCO FLAGS_REG)
6760 (eq:CCO
6761 (plus:<QPWI>
6762 (sign_extend:<QPWI>
6763 (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0"))
6764 (sign_extend:<QPWI>
6765 (match_operand:<DWI> 2 "nonimmediate_operand" "r,o")))
6766 (sign_extend:<QPWI>
6767 (plus:<DWI> (match_dup 1) (match_dup 2)))))
6768 (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
6769 (plus:<DWI> (match_dup 1) (match_dup 2)))]
6770 "ix86_binary_operator_ok (PLUS, <DWI>mode, operands)"
6771 "#"
6772 "&& reload_completed"
6773 [(parallel [(set (reg:CCC FLAGS_REG)
6774 (compare:CCC
6775 (plus:DWIH (match_dup 1) (match_dup 2))
6776 (match_dup 1)))
6777 (set (match_dup 0)
6778 (plus:DWIH (match_dup 1) (match_dup 2)))])
6779 (parallel [(set (reg:CCO FLAGS_REG)
6780 (eq:CCO
6781 (plus:<DWI>
6782 (plus:<DWI>
6783 (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0))
6784 (sign_extend:<DWI> (match_dup 4)))
6785 (sign_extend:<DWI> (match_dup 5)))
6786 (sign_extend:<DWI>
6787 (plus:DWIH
6788 (plus:DWIH
6789 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
6790 (match_dup 4))
6791 (match_dup 5)))))
6792 (set (match_dup 3)
6793 (plus:DWIH
6794 (plus:DWIH
6795 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
6796 (match_dup 4))
6797 (match_dup 5)))])]
6798 {
6799 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
6800 })
6801
6802 (define_insn_and_split "*addv<dwi>4_doubleword_1"
6803 [(set (reg:CCO FLAGS_REG)
6804 (eq:CCO
6805 (plus:<QPWI>
6806 (sign_extend:<QPWI>
6807 (match_operand:<DWI> 1 "nonimmediate_operand" "%0"))
6808 (match_operand:<QPWI> 3 "const_scalar_int_operand" "n"))
6809 (sign_extend:<QPWI>
6810 (plus:<DWI>
6811 (match_dup 1)
6812 (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "<di>")))))
6813 (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro")
6814 (plus:<DWI> (match_dup 1) (match_dup 2)))]
6815 "ix86_binary_operator_ok (PLUS, <DWI>mode, operands)
6816 && CONST_SCALAR_INT_P (operands[2])
6817 && rtx_equal_p (operands[2], operands[3])"
6818 "#"
6819 "&& reload_completed"
6820 [(parallel [(set (reg:CCC FLAGS_REG)
6821 (compare:CCC
6822 (plus:DWIH (match_dup 1) (match_dup 2))
6823 (match_dup 1)))
6824 (set (match_dup 0)
6825 (plus:DWIH (match_dup 1) (match_dup 2)))])
6826 (parallel [(set (reg:CCO FLAGS_REG)
6827 (eq:CCO
6828 (plus:<DWI>
6829 (plus:<DWI>
6830 (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0))
6831 (sign_extend:<DWI> (match_dup 4)))
6832 (match_dup 5))
6833 (sign_extend:<DWI>
6834 (plus:DWIH
6835 (plus:DWIH
6836 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
6837 (match_dup 4))
6838 (match_dup 5)))))
6839 (set (match_dup 3)
6840 (plus:DWIH
6841 (plus:DWIH
6842 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
6843 (match_dup 4))
6844 (match_dup 5)))])]
6845 {
6846 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
6847 if (operands[2] == const0_rtx)
6848 {
6849 emit_insn (gen_addv<mode>4_1 (operands[3], operands[4], operands[5],
6850 operands[5]));
6851 DONE;
6852 }
6853 })
6854
6855 (define_insn "*addv<mode>4_overflow_1"
6856 [(set (reg:CCO FLAGS_REG)
6857 (eq:CCO
6858 (plus:<DWI>
6859 (plus:<DWI>
6860 (match_operator:<DWI> 4 "ix86_carry_flag_operator"
6861 [(match_operand 3 "flags_reg_operand") (const_int 0)])
6862 (sign_extend:<DWI>
6863 (match_operand:SWI 1 "nonimmediate_operand" "%0,0")))
6864 (sign_extend:<DWI>
6865 (match_operand:SWI 2 "<general_sext_operand>" "rWe,m")))
6866 (sign_extend:<DWI>
6867 (plus:SWI
6868 (plus:SWI
6869 (match_operator:SWI 5 "ix86_carry_flag_operator"
6870 [(match_dup 3) (const_int 0)])
6871 (match_dup 1))
6872 (match_dup 2)))))
6873 (set (match_operand:SWI 0 "nonimmediate_operand" "=rm,r")
6874 (plus:SWI
6875 (plus:SWI
6876 (match_op_dup 5 [(match_dup 3) (const_int 0)])
6877 (match_dup 1))
6878 (match_dup 2)))]
6879 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
6880 "adc{<imodesuffix>}\t{%2, %0|%0, %2}"
6881 [(set_attr "type" "alu")
6882 (set_attr "mode" "<MODE>")])
6883
6884 (define_insn "*addv<mode>4_overflow_2"
6885 [(set (reg:CCO FLAGS_REG)
6886 (eq:CCO
6887 (plus:<DWI>
6888 (plus:<DWI>
6889 (match_operator:<DWI> 4 "ix86_carry_flag_operator"
6890 [(match_operand 3 "flags_reg_operand") (const_int 0)])
6891 (sign_extend:<DWI>
6892 (match_operand:SWI 1 "nonimmediate_operand" "%0")))
6893 (match_operand:<DWI> 6 "const_int_operand" "n"))
6894 (sign_extend:<DWI>
6895 (plus:SWI
6896 (plus:SWI
6897 (match_operator:SWI 5 "ix86_carry_flag_operator"
6898 [(match_dup 3) (const_int 0)])
6899 (match_dup 1))
6900 (match_operand:SWI 2 "x86_64_immediate_operand" "e")))))
6901 (set (match_operand:SWI 0 "nonimmediate_operand" "=rm")
6902 (plus:SWI
6903 (plus:SWI
6904 (match_op_dup 5 [(match_dup 3) (const_int 0)])
6905 (match_dup 1))
6906 (match_dup 2)))]
6907 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)
6908 && CONST_INT_P (operands[2])
6909 && INTVAL (operands[2]) == INTVAL (operands[6])"
6910 "adc{<imodesuffix>}\t{%2, %0|%0, %2}"
6911 [(set_attr "type" "alu")
6912 (set_attr "mode" "<MODE>")
6913 (set (attr "length_immediate")
6914 (if_then_else (match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)")
6915 (const_string "1")
6916 (const_string "4")))])
6917
6918 (define_expand "uaddv<mode>4"
6919 [(parallel [(set (reg:CCC FLAGS_REG)
6920 (compare:CCC
6921 (plus:SWIDWI
6922 (match_operand:SWIDWI 1 "nonimmediate_operand")
6923 (match_operand:SWIDWI 2 "<general_hilo_operand>"))
6924 (match_dup 1)))
6925 (set (match_operand:SWIDWI 0 "register_operand")
6926 (plus:SWIDWI (match_dup 1) (match_dup 2)))])
6927 (set (pc) (if_then_else
6928 (ltu (reg:CCC FLAGS_REG) (const_int 0))
6929 (label_ref (match_operand 3))
6930 (pc)))]
6931 ""
6932 "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
6933
6934 ;; The lea patterns for modes less than 32 bits need to be matched by
6935 ;; several insns converted to real lea by splitters.
6936
6937 (define_insn_and_split "*lea<mode>_general_1"
6938 [(set (match_operand:SWI12 0 "register_operand" "=r")
6939 (plus:SWI12
6940 (plus:SWI12 (match_operand:SWI12 1 "index_register_operand" "l")
6941 (match_operand:SWI12 2 "register_operand" "r"))
6942 (match_operand:SWI12 3 "immediate_operand" "i")))]
6943 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
6944 "#"
6945 "&& reload_completed"
6946 [(set (match_dup 0)
6947 (plus:SI
6948 (plus:SI (match_dup 1) (match_dup 2))
6949 (match_dup 3)))]
6950 {
6951 operands[0] = gen_lowpart (SImode, operands[0]);
6952 operands[1] = gen_lowpart (SImode, operands[1]);
6953 operands[2] = gen_lowpart (SImode, operands[2]);
6954 operands[3] = gen_lowpart (SImode, operands[3]);
6955 }
6956 [(set_attr "type" "lea")
6957 (set_attr "mode" "SI")])
6958
6959 (define_insn_and_split "*lea<mode>_general_2"
6960 [(set (match_operand:SWI12 0 "register_operand" "=r")
6961 (plus:SWI12
6962 (mult:SWI12 (match_operand:SWI12 1 "index_register_operand" "l")
6963 (match_operand 2 "const248_operand" "n"))
6964 (match_operand:SWI12 3 "nonmemory_operand" "ri")))]
6965 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
6966 "#"
6967 "&& reload_completed"
6968 [(set (match_dup 0)
6969 (plus:SI
6970 (mult:SI (match_dup 1) (match_dup 2))
6971 (match_dup 3)))]
6972 {
6973 operands[0] = gen_lowpart (SImode, operands[0]);
6974 operands[1] = gen_lowpart (SImode, operands[1]);
6975 operands[3] = gen_lowpart (SImode, operands[3]);
6976 }
6977 [(set_attr "type" "lea")
6978 (set_attr "mode" "SI")])
6979
6980 (define_insn_and_split "*lea<mode>_general_2b"
6981 [(set (match_operand:SWI12 0 "register_operand" "=r")
6982 (plus:SWI12
6983 (ashift:SWI12 (match_operand:SWI12 1 "index_register_operand" "l")
6984 (match_operand 2 "const123_operand" "n"))
6985 (match_operand:SWI12 3 "nonmemory_operand" "ri")))]
6986 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
6987 "#"
6988 "&& reload_completed"
6989 [(set (match_dup 0)
6990 (plus:SI
6991 (ashift:SI (match_dup 1) (match_dup 2))
6992 (match_dup 3)))]
6993 {
6994 operands[0] = gen_lowpart (SImode, operands[0]);
6995 operands[1] = gen_lowpart (SImode, operands[1]);
6996 operands[3] = gen_lowpart (SImode, operands[3]);
6997 }
6998 [(set_attr "type" "lea")
6999 (set_attr "mode" "SI")])
7000
7001 (define_insn_and_split "*lea<mode>_general_3"
7002 [(set (match_operand:SWI12 0 "register_operand" "=r")
7003 (plus:SWI12
7004 (plus:SWI12
7005 (mult:SWI12 (match_operand:SWI12 1 "index_register_operand" "l")
7006 (match_operand 2 "const248_operand" "n"))
7007 (match_operand:SWI12 3 "register_operand" "r"))
7008 (match_operand:SWI12 4 "immediate_operand" "i")))]
7009 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
7010 "#"
7011 "&& reload_completed"
7012 [(set (match_dup 0)
7013 (plus:SI
7014 (plus:SI
7015 (mult:SI (match_dup 1) (match_dup 2))
7016 (match_dup 3))
7017 (match_dup 4)))]
7018 {
7019 operands[0] = gen_lowpart (SImode, operands[0]);
7020 operands[1] = gen_lowpart (SImode, operands[1]);
7021 operands[3] = gen_lowpart (SImode, operands[3]);
7022 operands[4] = gen_lowpart (SImode, operands[4]);
7023 }
7024 [(set_attr "type" "lea")
7025 (set_attr "mode" "SI")])
7026
7027 (define_insn_and_split "*lea<mode>_general_3b"
7028 [(set (match_operand:SWI12 0 "register_operand" "=r")
7029 (plus:SWI12
7030 (plus:SWI12
7031 (ashift:SWI12 (match_operand:SWI12 1 "index_register_operand" "l")
7032 (match_operand 2 "const123_operand" "n"))
7033 (match_operand:SWI12 3 "register_operand" "r"))
7034 (match_operand:SWI12 4 "immediate_operand" "i")))]
7035 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
7036 "#"
7037 "&& reload_completed"
7038 [(set (match_dup 0)
7039 (plus:SI
7040 (plus:SI
7041 (ashift:SI (match_dup 1) (match_dup 2))
7042 (match_dup 3))
7043 (match_dup 4)))]
7044 {
7045 operands[0] = gen_lowpart (SImode, operands[0]);
7046 operands[1] = gen_lowpart (SImode, operands[1]);
7047 operands[3] = gen_lowpart (SImode, operands[3]);
7048 operands[4] = gen_lowpart (SImode, operands[4]);
7049 }
7050 [(set_attr "type" "lea")
7051 (set_attr "mode" "SI")])
7052
7053 (define_insn_and_split "*lea<mode>_general_4"
7054 [(set (match_operand:SWI12 0 "register_operand" "=r")
7055 (any_or:SWI12
7056 (ashift:SWI12
7057 (match_operand:SWI12 1 "index_register_operand" "l")
7058 (match_operand 2 "const_0_to_3_operand"))
7059 (match_operand 3 "const_int_operand")))]
7060 "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
7061 && ((unsigned HOST_WIDE_INT) INTVAL (operands[3])
7062 < (HOST_WIDE_INT_1U << INTVAL (operands[2])))"
7063 "#"
7064 "&& reload_completed"
7065 [(set (match_dup 0)
7066 (plus:SI
7067 (mult:SI (match_dup 1) (match_dup 2))
7068 (match_dup 3)))]
7069 {
7070 operands[0] = gen_lowpart (SImode, operands[0]);
7071 operands[1] = gen_lowpart (SImode, operands[1]);
7072 operands[2] = GEN_INT (1 << INTVAL (operands[2]));
7073 }
7074 [(set_attr "type" "lea")
7075 (set_attr "mode" "SI")])
7076
7077 (define_insn_and_split "*lea<mode>_general_4"
7078 [(set (match_operand:SWI48 0 "register_operand" "=r")
7079 (any_or:SWI48
7080 (ashift:SWI48
7081 (match_operand:SWI48 1 "index_register_operand" "l")
7082 (match_operand 2 "const_0_to_3_operand"))
7083 (match_operand 3 "const_int_operand")))]
7084 "(unsigned HOST_WIDE_INT) INTVAL (operands[3])
7085 < (HOST_WIDE_INT_1U << INTVAL (operands[2]))"
7086 "#"
7087 "&& reload_completed"
7088 [(set (match_dup 0)
7089 (plus:SWI48
7090 (mult:SWI48 (match_dup 1) (match_dup 2))
7091 (match_dup 3)))]
7092 "operands[2] = GEN_INT (1 << INTVAL (operands[2]));"
7093 [(set_attr "type" "lea")
7094 (set_attr "mode" "<MODE>")])
7095 \f
7096 ;; Subtract instructions
7097
7098 (define_expand "sub<mode>3"
7099 [(set (match_operand:SDWIM 0 "nonimmediate_operand")
7100 (minus:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand")
7101 (match_operand:SDWIM 2 "<general_hilo_operand>")))]
7102 ""
7103 "ix86_expand_binary_operator (MINUS, <MODE>mode, operands); DONE;")
7104
7105 (define_insn_and_split "*sub<dwi>3_doubleword"
7106 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
7107 (minus:<DWI>
7108 (match_operand:<DWI> 1 "nonimmediate_operand" "0,0")
7109 (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o")))
7110 (clobber (reg:CC FLAGS_REG))]
7111 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
7112 "#"
7113 "&& reload_completed"
7114 [(parallel [(set (reg:CC FLAGS_REG)
7115 (compare:CC (match_dup 1) (match_dup 2)))
7116 (set (match_dup 0)
7117 (minus:DWIH (match_dup 1) (match_dup 2)))])
7118 (parallel [(set (match_dup 3)
7119 (minus:DWIH
7120 (minus:DWIH
7121 (match_dup 4)
7122 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)))
7123 (match_dup 5)))
7124 (clobber (reg:CC FLAGS_REG))])]
7125 {
7126 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
7127 if (operands[2] == const0_rtx)
7128 {
7129 ix86_expand_binary_operator (MINUS, <MODE>mode, &operands[3]);
7130 DONE;
7131 }
7132 })
7133
7134 (define_insn_and_split "*sub<dwi>3_doubleword_zext"
7135 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=r,o")
7136 (minus:<DWI>
7137 (match_operand:<DWI> 1 "nonimmediate_operand" "0,0")
7138 (zero_extend:<DWI>
7139 (match_operand:DWIH 2 "nonimmediate_operand" "rm,r"))))
7140 (clobber (reg:CC FLAGS_REG))]
7141 "ix86_binary_operator_ok (UNKNOWN, <DWI>mode, operands)"
7142 "#"
7143 "&& reload_completed"
7144 [(parallel [(set (reg:CC FLAGS_REG)
7145 (compare:CC (match_dup 1) (match_dup 2)))
7146 (set (match_dup 0)
7147 (minus:DWIH (match_dup 1) (match_dup 2)))])
7148 (parallel [(set (match_dup 3)
7149 (minus:DWIH
7150 (minus:DWIH
7151 (match_dup 4)
7152 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)))
7153 (const_int 0)))
7154 (clobber (reg:CC FLAGS_REG))])]
7155 "split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[3]);")
7156
7157 (define_insn "*sub<mode>_1"
7158 [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
7159 (minus:SWI
7160 (match_operand:SWI 1 "nonimmediate_operand" "0,0")
7161 (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>")))
7162 (clobber (reg:CC FLAGS_REG))]
7163 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
7164 "sub{<imodesuffix>}\t{%2, %0|%0, %2}"
7165 [(set_attr "type" "alu")
7166 (set_attr "mode" "<MODE>")])
7167
7168 (define_insn "*subsi_1_zext"
7169 [(set (match_operand:DI 0 "register_operand" "=r")
7170 (zero_extend:DI
7171 (minus:SI (match_operand:SI 1 "register_operand" "0")
7172 (match_operand:SI 2 "x86_64_general_operand" "rBMe"))))
7173 (clobber (reg:CC FLAGS_REG))]
7174 "TARGET_64BIT && ix86_binary_operator_ok (MINUS, SImode, operands)"
7175 "sub{l}\t{%2, %k0|%k0, %2}"
7176 [(set_attr "type" "alu")
7177 (set_attr "mode" "SI")])
7178
7179 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
7180 (define_insn_and_split "*sub<mode>_1_slp"
7181 [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>,&<r>"))
7182 (minus:SWI12 (match_operand:SWI12 1 "register_operand" "0,!<r>")
7183 (match_operand:SWI12 2 "general_operand" "<r>mn,<r>mn")))
7184 (clobber (reg:CC FLAGS_REG))]
7185 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
7186 "@
7187 sub{<imodesuffix>}\t{%2, %0|%0, %2}
7188 #"
7189 "&& reload_completed"
7190 [(set (strict_low_part (match_dup 0)) (match_dup 1))
7191 (parallel
7192 [(set (strict_low_part (match_dup 0))
7193 (minus:SWI12 (match_dup 0) (match_dup 2)))
7194 (clobber (reg:CC FLAGS_REG))])]
7195 ""
7196 [(set_attr "type" "alu")
7197 (set_attr "mode" "<MODE>")])
7198
7199 (define_insn "*sub<mode>_2"
7200 [(set (reg FLAGS_REG)
7201 (compare
7202 (minus:SWI
7203 (match_operand:SWI 1 "nonimmediate_operand" "0,0")
7204 (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>"))
7205 (const_int 0)))
7206 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
7207 (minus:SWI (match_dup 1) (match_dup 2)))]
7208 "ix86_match_ccmode (insn, CCGOCmode)
7209 && ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
7210 "sub{<imodesuffix>}\t{%2, %0|%0, %2}"
7211 [(set_attr "type" "alu")
7212 (set_attr "mode" "<MODE>")])
7213
7214 (define_insn "*subsi_2_zext"
7215 [(set (reg FLAGS_REG)
7216 (compare
7217 (minus:SI (match_operand:SI 1 "register_operand" "0")
7218 (match_operand:SI 2 "x86_64_general_operand" "rBMe"))
7219 (const_int 0)))
7220 (set (match_operand:DI 0 "register_operand" "=r")
7221 (zero_extend:DI
7222 (minus:SI (match_dup 1)
7223 (match_dup 2))))]
7224 "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
7225 && ix86_binary_operator_ok (MINUS, SImode, operands)"
7226 "sub{l}\t{%2, %k0|%k0, %2}"
7227 [(set_attr "type" "alu")
7228 (set_attr "mode" "SI")])
7229
7230 (define_insn "*subqi_ext<mode>_2"
7231 [(set (zero_extract:SWI248
7232 (match_operand:SWI248 0 "register_operand" "+Q")
7233 (const_int 8)
7234 (const_int 8))
7235 (subreg:SWI248
7236 (minus:QI
7237 (subreg:QI
7238 (zero_extract:SWI248
7239 (match_operand:SWI248 1 "register_operand" "0")
7240 (const_int 8)
7241 (const_int 8)) 0)
7242 (subreg:QI
7243 (zero_extract:SWI248
7244 (match_operand:SWI248 2 "register_operand" "Q")
7245 (const_int 8)
7246 (const_int 8)) 0)) 0))
7247 (clobber (reg:CC FLAGS_REG))]
7248 "/* FIXME: without this LRA can't reload this pattern, see PR82524. */
7249 rtx_equal_p (operands[0], operands[1])"
7250 "sub{b}\t{%h2, %h0|%h0, %h2}"
7251 [(set_attr "type" "alu")
7252 (set_attr "mode" "QI")])
7253
7254 ;; Subtract with jump on overflow.
7255 (define_expand "subv<mode>4"
7256 [(parallel [(set (reg:CCO FLAGS_REG)
7257 (eq:CCO
7258 (minus:<DPWI>
7259 (sign_extend:<DPWI>
7260 (match_operand:SWIDWI 1 "nonimmediate_operand"))
7261 (match_dup 4))
7262 (sign_extend:<DPWI>
7263 (minus:SWIDWI (match_dup 1)
7264 (match_operand:SWIDWI 2
7265 "<general_hilo_operand>")))))
7266 (set (match_operand:SWIDWI 0 "register_operand")
7267 (minus:SWIDWI (match_dup 1) (match_dup 2)))])
7268 (set (pc) (if_then_else
7269 (eq (reg:CCO FLAGS_REG) (const_int 0))
7270 (label_ref (match_operand 3))
7271 (pc)))]
7272 ""
7273 {
7274 ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);
7275 if (CONST_SCALAR_INT_P (operands[2]))
7276 operands[4] = operands[2];
7277 else
7278 operands[4] = gen_rtx_SIGN_EXTEND (<DPWI>mode, operands[2]);
7279 })
7280
7281 (define_insn "*subv<mode>4"
7282 [(set (reg:CCO FLAGS_REG)
7283 (eq:CCO (minus:<DWI>
7284 (sign_extend:<DWI>
7285 (match_operand:SWI 1 "nonimmediate_operand" "0,0"))
7286 (sign_extend:<DWI>
7287 (match_operand:SWI 2 "<general_sext_operand>" "<r>We,m")))
7288 (sign_extend:<DWI>
7289 (minus:SWI (match_dup 1) (match_dup 2)))))
7290 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
7291 (minus:SWI (match_dup 1) (match_dup 2)))]
7292 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
7293 "sub{<imodesuffix>}\t{%2, %0|%0, %2}"
7294 [(set_attr "type" "alu")
7295 (set_attr "mode" "<MODE>")])
7296
7297 (define_insn "subv<mode>4_1"
7298 [(set (reg:CCO FLAGS_REG)
7299 (eq:CCO (minus:<DWI>
7300 (sign_extend:<DWI>
7301 (match_operand:SWI 1 "nonimmediate_operand" "0"))
7302 (match_operand:<DWI> 3 "const_int_operand"))
7303 (sign_extend:<DWI>
7304 (minus:SWI
7305 (match_dup 1)
7306 (match_operand:SWI 2 "x86_64_immediate_operand" "<i>")))))
7307 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
7308 (minus:SWI (match_dup 1) (match_dup 2)))]
7309 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)
7310 && CONST_INT_P (operands[2])
7311 && INTVAL (operands[2]) == INTVAL (operands[3])"
7312 "sub{<imodesuffix>}\t{%2, %0|%0, %2}"
7313 [(set_attr "type" "alu")
7314 (set_attr "mode" "<MODE>")
7315 (set (attr "length_immediate")
7316 (cond [(match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)")
7317 (const_string "1")
7318 (match_test "<MODE_SIZE> == 8")
7319 (const_string "4")]
7320 (const_string "<MODE_SIZE>")))])
7321
7322 (define_insn_and_split "*subv<dwi>4_doubleword"
7323 [(set (reg:CCO FLAGS_REG)
7324 (eq:CCO
7325 (minus:<QPWI>
7326 (sign_extend:<QPWI>
7327 (match_operand:<DWI> 1 "nonimmediate_operand" "0,0"))
7328 (sign_extend:<QPWI>
7329 (match_operand:<DWI> 2 "nonimmediate_operand" "r,o")))
7330 (sign_extend:<QPWI>
7331 (minus:<DWI> (match_dup 1) (match_dup 2)))))
7332 (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
7333 (minus:<DWI> (match_dup 1) (match_dup 2)))]
7334 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
7335 "#"
7336 "&& reload_completed"
7337 [(parallel [(set (reg:CC FLAGS_REG)
7338 (compare:CC (match_dup 1) (match_dup 2)))
7339 (set (match_dup 0)
7340 (minus:DWIH (match_dup 1) (match_dup 2)))])
7341 (parallel [(set (reg:CCO FLAGS_REG)
7342 (eq:CCO
7343 (minus:<DWI>
7344 (minus:<DWI>
7345 (sign_extend:<DWI> (match_dup 4))
7346 (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0)))
7347 (sign_extend:<DWI> (match_dup 5)))
7348 (sign_extend:<DWI>
7349 (minus:DWIH
7350 (minus:DWIH
7351 (match_dup 4)
7352 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)))
7353 (match_dup 5)))))
7354 (set (match_dup 3)
7355 (minus:DWIH
7356 (minus:DWIH
7357 (match_dup 4)
7358 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)))
7359 (match_dup 5)))])]
7360 {
7361 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
7362 })
7363
7364 (define_insn_and_split "*subv<dwi>4_doubleword_1"
7365 [(set (reg:CCO FLAGS_REG)
7366 (eq:CCO
7367 (minus:<QPWI>
7368 (sign_extend:<QPWI>
7369 (match_operand:<DWI> 1 "nonimmediate_operand" "0"))
7370 (match_operand:<QPWI> 3 "const_scalar_int_operand"))
7371 (sign_extend:<QPWI>
7372 (minus:<DWI>
7373 (match_dup 1)
7374 (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "<di>")))))
7375 (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro")
7376 (minus:<DWI> (match_dup 1) (match_dup 2)))]
7377 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)
7378 && CONST_SCALAR_INT_P (operands[2])
7379 && rtx_equal_p (operands[2], operands[3])"
7380 "#"
7381 "&& reload_completed"
7382 [(parallel [(set (reg:CC FLAGS_REG)
7383 (compare:CC (match_dup 1) (match_dup 2)))
7384 (set (match_dup 0)
7385 (minus:DWIH (match_dup 1) (match_dup 2)))])
7386 (parallel [(set (reg:CCO FLAGS_REG)
7387 (eq:CCO
7388 (minus:<DWI>
7389 (minus:<DWI>
7390 (sign_extend:<DWI> (match_dup 4))
7391 (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0)))
7392 (match_dup 5))
7393 (sign_extend:<DWI>
7394 (minus:DWIH
7395 (minus:DWIH
7396 (match_dup 4)
7397 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)))
7398 (match_dup 5)))))
7399 (set (match_dup 3)
7400 (minus:DWIH
7401 (minus:DWIH
7402 (match_dup 4)
7403 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)))
7404 (match_dup 5)))])]
7405 {
7406 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
7407 if (operands[2] == const0_rtx)
7408 {
7409 emit_insn (gen_subv<mode>4_1 (operands[3], operands[4], operands[5],
7410 operands[5]));
7411 DONE;
7412 }
7413 })
7414
7415 (define_insn "*subv<mode>4_overflow_1"
7416 [(set (reg:CCO FLAGS_REG)
7417 (eq:CCO
7418 (minus:<DWI>
7419 (minus:<DWI>
7420 (sign_extend:<DWI>
7421 (match_operand:SWI 1 "nonimmediate_operand" "%0,0"))
7422 (match_operator:<DWI> 4 "ix86_carry_flag_operator"
7423 [(match_operand 3 "flags_reg_operand") (const_int 0)]))
7424 (sign_extend:<DWI>
7425 (match_operand:SWI 2 "<general_sext_operand>" "rWe,m")))
7426 (sign_extend:<DWI>
7427 (minus:SWI
7428 (minus:SWI
7429 (match_dup 1)
7430 (match_operator:SWI 5 "ix86_carry_flag_operator"
7431 [(match_dup 3) (const_int 0)]))
7432 (match_dup 2)))))
7433 (set (match_operand:SWI 0 "nonimmediate_operand" "=rm,r")
7434 (minus:SWI
7435 (minus:SWI
7436 (match_dup 1)
7437 (match_op_dup 5 [(match_dup 3) (const_int 0)]))
7438 (match_dup 2)))]
7439 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
7440 "sbb{<imodesuffix>}\t{%2, %0|%0, %2}"
7441 [(set_attr "type" "alu")
7442 (set_attr "mode" "<MODE>")])
7443
7444 (define_insn "*subv<mode>4_overflow_2"
7445 [(set (reg:CCO FLAGS_REG)
7446 (eq:CCO
7447 (minus:<DWI>
7448 (minus:<DWI>
7449 (sign_extend:<DWI>
7450 (match_operand:SWI 1 "nonimmediate_operand" "%0"))
7451 (match_operator:<DWI> 4 "ix86_carry_flag_operator"
7452 [(match_operand 3 "flags_reg_operand") (const_int 0)]))
7453 (match_operand:<DWI> 6 "const_int_operand" "n"))
7454 (sign_extend:<DWI>
7455 (minus:SWI
7456 (minus:SWI
7457 (match_dup 1)
7458 (match_operator:SWI 5 "ix86_carry_flag_operator"
7459 [(match_dup 3) (const_int 0)]))
7460 (match_operand:SWI 2 "x86_64_immediate_operand" "e")))))
7461 (set (match_operand:SWI 0 "nonimmediate_operand" "=rm")
7462 (minus:SWI
7463 (minus:SWI
7464 (match_dup 1)
7465 (match_op_dup 5 [(match_dup 3) (const_int 0)]))
7466 (match_dup 2)))]
7467 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)
7468 && CONST_INT_P (operands[2])
7469 && INTVAL (operands[2]) == INTVAL (operands[6])"
7470 "sbb{<imodesuffix>}\t{%2, %0|%0, %2}"
7471 [(set_attr "type" "alu")
7472 (set_attr "mode" "<MODE>")
7473 (set (attr "length_immediate")
7474 (if_then_else (match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)")
7475 (const_string "1")
7476 (const_string "4")))])
7477
7478 (define_expand "usubv<mode>4"
7479 [(parallel [(set (reg:CC FLAGS_REG)
7480 (compare:CC
7481 (match_operand:SWI 1 "nonimmediate_operand")
7482 (match_operand:SWI 2 "<general_operand>")))
7483 (set (match_operand:SWI 0 "register_operand")
7484 (minus:SWI (match_dup 1) (match_dup 2)))])
7485 (set (pc) (if_then_else
7486 (ltu (reg:CC FLAGS_REG) (const_int 0))
7487 (label_ref (match_operand 3))
7488 (pc)))]
7489 ""
7490 "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
7491
7492 (define_insn "*sub<mode>_3"
7493 [(set (reg FLAGS_REG)
7494 (compare (match_operand:SWI 1 "nonimmediate_operand" "0,0")
7495 (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>")))
7496 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
7497 (minus:SWI (match_dup 1) (match_dup 2)))]
7498 "ix86_match_ccmode (insn, CCmode)
7499 && ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
7500 "sub{<imodesuffix>}\t{%2, %0|%0, %2}"
7501 [(set_attr "type" "alu")
7502 (set_attr "mode" "<MODE>")])
7503
7504 (define_peephole2
7505 [(parallel
7506 [(set (reg:CC FLAGS_REG)
7507 (compare:CC (match_operand:SWI 0 "general_reg_operand")
7508 (match_operand:SWI 1 "general_gr_operand")))
7509 (set (match_dup 0)
7510 (minus:SWI (match_dup 0) (match_dup 1)))])]
7511 "find_regno_note (peep2_next_insn (0), REG_UNUSED, REGNO (operands[0])) != 0"
7512 [(set (reg:CC FLAGS_REG)
7513 (compare:CC (match_dup 0) (match_dup 1)))])
7514
7515 ;; decl %eax; cmpl $-1, %eax; jne .Lxx; can be optimized into
7516 ;; subl $1, %eax; jnc .Lxx;
7517 (define_peephole2
7518 [(parallel
7519 [(set (match_operand:SWI 0 "general_reg_operand")
7520 (plus:SWI (match_dup 0) (const_int -1)))
7521 (clobber (reg FLAGS_REG))])
7522 (set (reg:CCZ FLAGS_REG)
7523 (compare:CCZ (match_dup 0) (const_int -1)))
7524 (set (pc)
7525 (if_then_else (match_operator 1 "bt_comparison_operator"
7526 [(reg:CCZ FLAGS_REG) (const_int 0)])
7527 (match_operand 2)
7528 (pc)))]
7529 "peep2_regno_dead_p (3, FLAGS_REG)"
7530 [(parallel
7531 [(set (reg:CC FLAGS_REG)
7532 (compare:CC (match_dup 0) (const_int 1)))
7533 (set (match_dup 0)
7534 (minus:SWI (match_dup 0) (const_int 1)))])
7535 (set (pc)
7536 (if_then_else (match_dup 3)
7537 (match_dup 2)
7538 (pc)))]
7539 {
7540 rtx cc = gen_rtx_REG (CCmode, FLAGS_REG);
7541 operands[3] = gen_rtx_fmt_ee (GET_CODE (operands[1]) == NE
7542 ? GEU : LTU, VOIDmode, cc, const0_rtx);
7543 })
7544
7545 ;; Help combine use borrow flag to test for -1 after dec (add $-1).
7546 (define_insn_and_split "*dec_cmov<mode>"
7547 [(set (match_operand:SWI248 0 "register_operand" "=r")
7548 (if_then_else:SWI248
7549 (match_operator 1 "bt_comparison_operator"
7550 [(match_operand:SWI248 2 "register_operand" "0") (const_int 0)])
7551 (plus:SWI248 (match_dup 2) (const_int -1))
7552 (match_operand:SWI248 3 "nonimmediate_operand" "rm")))
7553 (clobber (reg:CC FLAGS_REG))]
7554 "TARGET_CMOVE"
7555 "#"
7556 "&& reload_completed"
7557 [(parallel [(set (reg:CC FLAGS_REG)
7558 (compare:CC (match_dup 2) (const_int 1)))
7559 (set (match_dup 0) (minus:SWI248 (match_dup 2) (const_int 1)))])
7560 (set (match_dup 0)
7561 (if_then_else:SWI248 (match_dup 4) (match_dup 0) (match_dup 3)))]
7562 {
7563 rtx cc = gen_rtx_REG (CCCmode, FLAGS_REG);
7564 operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[1]) == NE
7565 ? GEU : LTU, VOIDmode, cc, const0_rtx);
7566 })
7567
7568 (define_insn "*subsi_3_zext"
7569 [(set (reg FLAGS_REG)
7570 (compare (match_operand:SI 1 "register_operand" "0")
7571 (match_operand:SI 2 "x86_64_general_operand" "rBMe")))
7572 (set (match_operand:DI 0 "register_operand" "=r")
7573 (zero_extend:DI
7574 (minus:SI (match_dup 1)
7575 (match_dup 2))))]
7576 "TARGET_64BIT && ix86_match_ccmode (insn, CCmode)
7577 && ix86_binary_operator_ok (MINUS, SImode, operands)"
7578 "sub{l}\t{%2, %1|%1, %2}"
7579 [(set_attr "type" "alu")
7580 (set_attr "mode" "SI")])
7581 \f
7582 ;; Add with carry and subtract with borrow
7583
7584 (define_insn "@add<mode>3_carry"
7585 [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
7586 (plus:SWI
7587 (plus:SWI
7588 (match_operator:SWI 4 "ix86_carry_flag_operator"
7589 [(match_operand 3 "flags_reg_operand") (const_int 0)])
7590 (match_operand:SWI 1 "nonimmediate_operand" "%0,0"))
7591 (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>")))
7592 (clobber (reg:CC FLAGS_REG))]
7593 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
7594 "adc{<imodesuffix>}\t{%2, %0|%0, %2}"
7595 [(set_attr "type" "alu")
7596 (set_attr "use_carry" "1")
7597 (set_attr "pent_pair" "pu")
7598 (set_attr "mode" "<MODE>")])
7599
7600 (define_insn "*add<mode>3_carry_0"
7601 [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
7602 (plus:SWI
7603 (match_operator:SWI 2 "ix86_carry_flag_operator"
7604 [(reg FLAGS_REG) (const_int 0)])
7605 (match_operand:SWI 1 "nonimmediate_operand" "0")))
7606 (clobber (reg:CC FLAGS_REG))]
7607 "!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[1])"
7608 "adc{<imodesuffix>}\t{$0, %0|%0, 0}"
7609 [(set_attr "type" "alu")
7610 (set_attr "use_carry" "1")
7611 (set_attr "pent_pair" "pu")
7612 (set_attr "mode" "<MODE>")])
7613
7614 (define_insn "*add<mode>3_carry_0r"
7615 [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
7616 (plus:SWI
7617 (match_operator:SWI 2 "ix86_carry_flag_unset_operator"
7618 [(reg FLAGS_REG) (const_int 0)])
7619 (match_operand:SWI 1 "nonimmediate_operand" "0")))
7620 (clobber (reg:CC FLAGS_REG))]
7621 "!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[1])"
7622 "sbb{<imodesuffix>}\t{$-1, %0|%0, -1}"
7623 [(set_attr "type" "alu")
7624 (set_attr "use_carry" "1")
7625 (set_attr "pent_pair" "pu")
7626 (set_attr "mode" "<MODE>")])
7627
7628 (define_insn "*addsi3_carry_zext"
7629 [(set (match_operand:DI 0 "register_operand" "=r")
7630 (zero_extend:DI
7631 (plus:SI
7632 (plus:SI (match_operator:SI 3 "ix86_carry_flag_operator"
7633 [(reg FLAGS_REG) (const_int 0)])
7634 (match_operand:SI 1 "register_operand" "%0"))
7635 (match_operand:SI 2 "x86_64_general_operand" "rBMe"))))
7636 (clobber (reg:CC FLAGS_REG))]
7637 "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)"
7638 "adc{l}\t{%2, %k0|%k0, %2}"
7639 [(set_attr "type" "alu")
7640 (set_attr "use_carry" "1")
7641 (set_attr "pent_pair" "pu")
7642 (set_attr "mode" "SI")])
7643
7644 (define_insn "*addsi3_carry_zext_0"
7645 [(set (match_operand:DI 0 "register_operand" "=r")
7646 (zero_extend:DI
7647 (plus:SI (match_operator:SI 2 "ix86_carry_flag_operator"
7648 [(reg FLAGS_REG) (const_int 0)])
7649 (match_operand:SI 1 "register_operand" "0"))))
7650 (clobber (reg:CC FLAGS_REG))]
7651 "TARGET_64BIT"
7652 "adc{l}\t{$0, %k0|%k0, 0}"
7653 [(set_attr "type" "alu")
7654 (set_attr "use_carry" "1")
7655 (set_attr "pent_pair" "pu")
7656 (set_attr "mode" "SI")])
7657
7658 (define_insn "*addsi3_carry_zext_0r"
7659 [(set (match_operand:DI 0 "register_operand" "=r")
7660 (zero_extend:DI
7661 (plus:SI (match_operator:SI 2 "ix86_carry_flag_unset_operator"
7662 [(reg FLAGS_REG) (const_int 0)])
7663 (match_operand:SI 1 "register_operand" "0"))))
7664 (clobber (reg:CC FLAGS_REG))]
7665 "TARGET_64BIT"
7666 "sbb{l}\t{$-1, %k0|%k0, -1}"
7667 [(set_attr "type" "alu")
7668 (set_attr "use_carry" "1")
7669 (set_attr "pent_pair" "pu")
7670 (set_attr "mode" "SI")])
7671
7672 ;; There is no point to generate ADCX instruction. ADC is shorter and faster.
7673
7674 (define_insn "addcarry<mode>"
7675 [(set (reg:CCC FLAGS_REG)
7676 (compare:CCC
7677 (zero_extend:<DWI>
7678 (plus:SWI48
7679 (plus:SWI48
7680 (match_operator:SWI48 5 "ix86_carry_flag_operator"
7681 [(match_operand 3 "flags_reg_operand") (const_int 0)])
7682 (match_operand:SWI48 1 "nonimmediate_operand" "%0,0"))
7683 (match_operand:SWI48 2 "nonimmediate_operand" "r,rm")))
7684 (plus:<DWI>
7685 (zero_extend:<DWI> (match_dup 2))
7686 (match_operator:<DWI> 4 "ix86_carry_flag_operator"
7687 [(match_dup 3) (const_int 0)]))))
7688 (set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
7689 (plus:SWI48 (plus:SWI48 (match_op_dup 5
7690 [(match_dup 3) (const_int 0)])
7691 (match_dup 1))
7692 (match_dup 2)))]
7693 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
7694 "adc{<imodesuffix>}\t{%2, %0|%0, %2}"
7695 [(set_attr "type" "alu")
7696 (set_attr "use_carry" "1")
7697 (set_attr "pent_pair" "pu")
7698 (set_attr "mode" "<MODE>")])
7699
7700 (define_expand "addcarry<mode>_0"
7701 [(parallel
7702 [(set (reg:CCC FLAGS_REG)
7703 (compare:CCC
7704 (plus:SWI48
7705 (match_operand:SWI48 1 "nonimmediate_operand")
7706 (match_operand:SWI48 2 "x86_64_general_operand"))
7707 (match_dup 1)))
7708 (set (match_operand:SWI48 0 "nonimmediate_operand")
7709 (plus:SWI48 (match_dup 1) (match_dup 2)))])]
7710 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)")
7711
7712 (define_insn "*addcarry<mode>_1"
7713 [(set (reg:CCC FLAGS_REG)
7714 (compare:CCC
7715 (zero_extend:<DWI>
7716 (plus:SWI48
7717 (plus:SWI48
7718 (match_operator:SWI48 5 "ix86_carry_flag_operator"
7719 [(match_operand 3 "flags_reg_operand") (const_int 0)])
7720 (match_operand:SWI48 1 "nonimmediate_operand" "%0"))
7721 (match_operand:SWI48 2 "x86_64_immediate_operand" "e")))
7722 (plus:<DWI>
7723 (match_operand:<DWI> 6 "const_scalar_int_operand")
7724 (match_operator:<DWI> 4 "ix86_carry_flag_operator"
7725 [(match_dup 3) (const_int 0)]))))
7726 (set (match_operand:SWI48 0 "nonimmediate_operand" "=rm")
7727 (plus:SWI48 (plus:SWI48 (match_op_dup 5
7728 [(match_dup 3) (const_int 0)])
7729 (match_dup 1))
7730 (match_dup 2)))]
7731 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)
7732 && CONST_INT_P (operands[2])
7733 /* Check that operands[6] is operands[2] zero extended from
7734 <MODE>mode to <DWI>mode. */
7735 && ((<MODE>mode == SImode || INTVAL (operands[2]) >= 0)
7736 ? (CONST_INT_P (operands[6])
7737 && UINTVAL (operands[6]) == (UINTVAL (operands[2])
7738 & GET_MODE_MASK (<MODE>mode)))
7739 : (CONST_WIDE_INT_P (operands[6])
7740 && CONST_WIDE_INT_NUNITS (operands[6]) == 2
7741 && ((unsigned HOST_WIDE_INT) CONST_WIDE_INT_ELT (operands[6], 0)
7742 == UINTVAL (operands[2]))
7743 && CONST_WIDE_INT_ELT (operands[6], 1) == 0))"
7744 "adc{<imodesuffix>}\t{%2, %0|%0, %2}"
7745 [(set_attr "type" "alu")
7746 (set_attr "use_carry" "1")
7747 (set_attr "pent_pair" "pu")
7748 (set_attr "mode" "<MODE>")
7749 (set (attr "length_immediate")
7750 (if_then_else (match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)")
7751 (const_string "1")
7752 (const_string "4")))])
7753
7754 (define_insn "@sub<mode>3_carry"
7755 [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
7756 (minus:SWI
7757 (minus:SWI
7758 (match_operand:SWI 1 "nonimmediate_operand" "0,0")
7759 (match_operator:SWI 4 "ix86_carry_flag_operator"
7760 [(match_operand 3 "flags_reg_operand") (const_int 0)]))
7761 (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>")))
7762 (clobber (reg:CC FLAGS_REG))]
7763 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
7764 "sbb{<imodesuffix>}\t{%2, %0|%0, %2}"
7765 [(set_attr "type" "alu")
7766 (set_attr "use_carry" "1")
7767 (set_attr "pent_pair" "pu")
7768 (set_attr "mode" "<MODE>")])
7769
7770 (define_insn "*sub<mode>3_carry_0"
7771 [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
7772 (minus:SWI
7773 (match_operand:SWI 1 "nonimmediate_operand" "0")
7774 (match_operator:SWI 2 "ix86_carry_flag_operator"
7775 [(reg FLAGS_REG) (const_int 0)])))
7776 (clobber (reg:CC FLAGS_REG))]
7777 "!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[1])"
7778 "sbb{<imodesuffix>}\t{$0, %0|%0, 0}"
7779 [(set_attr "type" "alu")
7780 (set_attr "use_carry" "1")
7781 (set_attr "pent_pair" "pu")
7782 (set_attr "mode" "<MODE>")])
7783
7784 (define_insn "*sub<mode>3_carry_0r"
7785 [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
7786 (minus:SWI
7787 (match_operand:SWI 1 "nonimmediate_operand" "0")
7788 (match_operator:SWI 2 "ix86_carry_flag_unset_operator"
7789 [(reg FLAGS_REG) (const_int 0)])))
7790 (clobber (reg:CC FLAGS_REG))]
7791 "!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[1])"
7792 "adc{<imodesuffix>}\t{$-1, %0|%0, -1}"
7793 [(set_attr "type" "alu")
7794 (set_attr "use_carry" "1")
7795 (set_attr "pent_pair" "pu")
7796 (set_attr "mode" "<MODE>")])
7797
7798 (define_insn "*subsi3_carry_zext"
7799 [(set (match_operand:DI 0 "register_operand" "=r")
7800 (zero_extend:DI
7801 (minus:SI
7802 (minus:SI
7803 (match_operand:SI 1 "register_operand" "0")
7804 (match_operator:SI 3 "ix86_carry_flag_operator"
7805 [(reg FLAGS_REG) (const_int 0)]))
7806 (match_operand:SI 2 "x86_64_general_operand" "rBMe"))))
7807 (clobber (reg:CC FLAGS_REG))]
7808 "TARGET_64BIT && ix86_binary_operator_ok (MINUS, SImode, operands)"
7809 "sbb{l}\t{%2, %k0|%k0, %2}"
7810 [(set_attr "type" "alu")
7811 (set_attr "use_carry" "1")
7812 (set_attr "pent_pair" "pu")
7813 (set_attr "mode" "SI")])
7814
7815 (define_insn "*subsi3_carry_zext_0"
7816 [(set (match_operand:DI 0 "register_operand" "=r")
7817 (zero_extend:DI
7818 (minus:SI
7819 (match_operand:SI 1 "register_operand" "0")
7820 (match_operator:SI 2 "ix86_carry_flag_operator"
7821 [(reg FLAGS_REG) (const_int 0)]))))
7822 (clobber (reg:CC FLAGS_REG))]
7823 "TARGET_64BIT"
7824 "sbb{l}\t{$0, %k0|%k0, 0}"
7825 [(set_attr "type" "alu")
7826 (set_attr "use_carry" "1")
7827 (set_attr "pent_pair" "pu")
7828 (set_attr "mode" "SI")])
7829
7830 (define_insn "*subsi3_carry_zext_0r"
7831 [(set (match_operand:DI 0 "register_operand" "=r")
7832 (zero_extend:DI
7833 (minus:SI
7834 (match_operand:SI 1 "register_operand" "0")
7835 (match_operator:SI 2 "ix86_carry_flag_unset_operator"
7836 [(reg FLAGS_REG) (const_int 0)]))))
7837 (clobber (reg:CC FLAGS_REG))]
7838 "TARGET_64BIT"
7839 "adc{l}\t{$-1, %k0|%k0, -1}"
7840 [(set_attr "type" "alu")
7841 (set_attr "use_carry" "1")
7842 (set_attr "pent_pair" "pu")
7843 (set_attr "mode" "SI")])
7844
7845 (define_insn "@sub<mode>3_carry_ccc"
7846 [(set (reg:CCC FLAGS_REG)
7847 (compare:CCC
7848 (zero_extend:<DWI> (match_operand:DWIH 1 "register_operand" "0"))
7849 (plus:<DWI>
7850 (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0))
7851 (zero_extend:<DWI>
7852 (match_operand:DWIH 2 "x86_64_sext_operand" "rmWe")))))
7853 (clobber (match_scratch:DWIH 0 "=r"))]
7854 ""
7855 "sbb{<imodesuffix>}\t{%2, %0|%0, %2}"
7856 [(set_attr "type" "alu")
7857 (set_attr "mode" "<MODE>")])
7858
7859 (define_insn "*sub<mode>3_carry_ccc_1"
7860 [(set (reg:CCC FLAGS_REG)
7861 (compare:CCC
7862 (zero_extend:<DWI> (match_operand:DWIH 1 "register_operand" "0"))
7863 (plus:<DWI>
7864 (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0))
7865 (match_operand:<DWI> 2 "x86_64_dwzext_immediate_operand" "Wf"))))
7866 (clobber (match_scratch:DWIH 0 "=r"))]
7867 ""
7868 {
7869 operands[3] = simplify_subreg (<MODE>mode, operands[2], <DWI>mode, 0);
7870 return "sbb{<imodesuffix>}\t{%3, %0|%0, %3}";
7871 }
7872 [(set_attr "type" "alu")
7873 (set_attr "mode" "<MODE>")])
7874
7875 ;; The sign flag is set from the
7876 ;; (compare (match_dup 1) (plus:DWIH (ltu:DWIH ...) (match_dup 2)))
7877 ;; result, the overflow flag likewise, but the overflow flag is also
7878 ;; set if the (plus:DWIH (ltu:DWIH ...) (match_dup 2)) overflows.
7879 (define_insn "@sub<mode>3_carry_ccgz"
7880 [(set (reg:CCGZ FLAGS_REG)
7881 (unspec:CCGZ [(match_operand:DWIH 1 "register_operand" "0")
7882 (match_operand:DWIH 2 "x86_64_general_operand" "rBMe")
7883 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))]
7884 UNSPEC_SBB))
7885 (clobber (match_scratch:DWIH 0 "=r"))]
7886 ""
7887 "sbb{<imodesuffix>}\t{%2, %0|%0, %2}"
7888 [(set_attr "type" "alu")
7889 (set_attr "mode" "<MODE>")])
7890
7891 (define_insn "subborrow<mode>"
7892 [(set (reg:CCC FLAGS_REG)
7893 (compare:CCC
7894 (zero_extend:<DWI>
7895 (match_operand:SWI48 1 "nonimmediate_operand" "0"))
7896 (plus:<DWI>
7897 (match_operator:<DWI> 4 "ix86_carry_flag_operator"
7898 [(match_operand 3 "flags_reg_operand") (const_int 0)])
7899 (zero_extend:<DWI>
7900 (match_operand:SWI48 2 "nonimmediate_operand" "rm")))))
7901 (set (match_operand:SWI48 0 "register_operand" "=r")
7902 (minus:SWI48 (minus:SWI48
7903 (match_dup 1)
7904 (match_operator:SWI48 5 "ix86_carry_flag_operator"
7905 [(match_dup 3) (const_int 0)]))
7906 (match_dup 2)))]
7907 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
7908 "sbb{<imodesuffix>}\t{%2, %0|%0, %2}"
7909 [(set_attr "type" "alu")
7910 (set_attr "use_carry" "1")
7911 (set_attr "pent_pair" "pu")
7912 (set_attr "mode" "<MODE>")])
7913
7914 (define_expand "subborrow<mode>_0"
7915 [(parallel
7916 [(set (reg:CC FLAGS_REG)
7917 (compare:CC
7918 (match_operand:SWI48 1 "nonimmediate_operand")
7919 (match_operand:SWI48 2 "<general_operand>")))
7920 (set (match_operand:SWI48 0 "register_operand")
7921 (minus:SWI48 (match_dup 1) (match_dup 2)))])]
7922 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)")
7923
7924 (define_mode_iterator CC_CCC [CC CCC])
7925
7926 ;; Pre-reload splitter to optimize
7927 ;; *setcc_qi followed by *addqi3_cconly_overflow_1 with the same QI
7928 ;; operand and no intervening flags modifications into nothing.
7929 (define_insn_and_split "*setcc_qi_addqi3_cconly_overflow_1_<mode>"
7930 [(set (reg:CCC FLAGS_REG)
7931 (compare:CCC (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
7932 (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))))]
7933 "ix86_pre_reload_split ()"
7934 "#"
7935 "&& 1"
7936 [(const_int 0)])
7937 \f
7938 ;; Overflow setting add instructions
7939
7940 (define_expand "addqi3_cconly_overflow"
7941 [(parallel
7942 [(set (reg:CCC FLAGS_REG)
7943 (compare:CCC
7944 (plus:QI
7945 (match_operand:QI 0 "nonimmediate_operand")
7946 (match_operand:QI 1 "general_operand"))
7947 (match_dup 0)))
7948 (clobber (scratch:QI))])]
7949 "!(MEM_P (operands[0]) && MEM_P (operands[1]))")
7950
7951 (define_insn "*add<mode>3_cconly_overflow_1"
7952 [(set (reg:CCC FLAGS_REG)
7953 (compare:CCC
7954 (plus:SWI
7955 (match_operand:SWI 1 "nonimmediate_operand" "%0")
7956 (match_operand:SWI 2 "<general_operand>" "<g>"))
7957 (match_dup 1)))
7958 (clobber (match_scratch:SWI 0 "=<r>"))]
7959 "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
7960 "add{<imodesuffix>}\t{%2, %0|%0, %2}"
7961 [(set_attr "type" "alu")
7962 (set_attr "mode" "<MODE>")])
7963
7964 (define_insn "*add<mode>3_cc_overflow_1"
7965 [(set (reg:CCC FLAGS_REG)
7966 (compare:CCC
7967 (plus:SWI
7968 (match_operand:SWI 1 "nonimmediate_operand" "%0,0")
7969 (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>"))
7970 (match_dup 1)))
7971 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
7972 (plus:SWI (match_dup 1) (match_dup 2)))]
7973 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
7974 "add{<imodesuffix>}\t{%2, %0|%0, %2}"
7975 [(set_attr "type" "alu")
7976 (set_attr "mode" "<MODE>")])
7977
7978 (define_peephole2
7979 [(parallel [(set (reg:CCC FLAGS_REG)
7980 (compare:CCC
7981 (plus:SWI (match_operand:SWI 0 "general_reg_operand")
7982 (match_operand:SWI 1 "memory_operand"))
7983 (match_dup 0)))
7984 (set (match_dup 0) (plus:SWI (match_dup 0) (match_dup 1)))])
7985 (set (match_dup 1) (match_dup 0))]
7986 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
7987 && peep2_reg_dead_p (2, operands[0])
7988 && !reg_overlap_mentioned_p (operands[0], operands[1])"
7989 [(parallel [(set (reg:CCC FLAGS_REG)
7990 (compare:CCC
7991 (plus:SWI (match_dup 1) (match_dup 0))
7992 (match_dup 1)))
7993 (set (match_dup 1) (plus:SWI (match_dup 1) (match_dup 0)))])])
7994
7995 (define_insn "*addsi3_zext_cc_overflow_1"
7996 [(set (reg:CCC FLAGS_REG)
7997 (compare:CCC
7998 (plus:SI
7999 (match_operand:SI 1 "nonimmediate_operand" "%0")
8000 (match_operand:SI 2 "x86_64_general_operand" "rBMe"))
8001 (match_dup 1)))
8002 (set (match_operand:DI 0 "register_operand" "=r")
8003 (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
8004 "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)"
8005 "add{l}\t{%2, %k0|%k0, %2}"
8006 [(set_attr "type" "alu")
8007 (set_attr "mode" "SI")])
8008
8009 (define_insn "*add<mode>3_cconly_overflow_2"
8010 [(set (reg:CCC FLAGS_REG)
8011 (compare:CCC
8012 (plus:SWI
8013 (match_operand:SWI 1 "nonimmediate_operand" "%0")
8014 (match_operand:SWI 2 "<general_operand>" "<g>"))
8015 (match_dup 2)))
8016 (clobber (match_scratch:SWI 0 "=<r>"))]
8017 "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
8018 "add{<imodesuffix>}\t{%2, %0|%0, %2}"
8019 [(set_attr "type" "alu")
8020 (set_attr "mode" "<MODE>")])
8021
8022 (define_insn "*add<mode>3_cc_overflow_2"
8023 [(set (reg:CCC FLAGS_REG)
8024 (compare:CCC
8025 (plus:SWI
8026 (match_operand:SWI 1 "nonimmediate_operand" "%0,0")
8027 (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>"))
8028 (match_dup 2)))
8029 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
8030 (plus:SWI (match_dup 1) (match_dup 2)))]
8031 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
8032 "add{<imodesuffix>}\t{%2, %0|%0, %2}"
8033 [(set_attr "type" "alu")
8034 (set_attr "mode" "<MODE>")])
8035
8036 (define_insn "*addsi3_zext_cc_overflow_2"
8037 [(set (reg:CCC FLAGS_REG)
8038 (compare:CCC
8039 (plus:SI
8040 (match_operand:SI 1 "nonimmediate_operand" "%0")
8041 (match_operand:SI 2 "x86_64_general_operand" "rBMe"))
8042 (match_dup 2)))
8043 (set (match_operand:DI 0 "register_operand" "=r")
8044 (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
8045 "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)"
8046 "add{l}\t{%2, %k0|%k0, %2}"
8047 [(set_attr "type" "alu")
8048 (set_attr "mode" "SI")])
8049
8050 (define_insn_and_split "*add<dwi>3_doubleword_cc_overflow_1"
8051 [(set (reg:CCC FLAGS_REG)
8052 (compare:CCC
8053 (plus:<DWI>
8054 (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0")
8055 (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o"))
8056 (match_dup 1)))
8057 (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
8058 (plus:<DWI> (match_dup 1) (match_dup 2)))]
8059 "ix86_binary_operator_ok (PLUS, <DWI>mode, operands)"
8060 "#"
8061 "&& reload_completed"
8062 [(parallel [(set (reg:CCC FLAGS_REG)
8063 (compare:CCC
8064 (plus:DWIH (match_dup 1) (match_dup 2))
8065 (match_dup 1)))
8066 (set (match_dup 0)
8067 (plus:DWIH (match_dup 1) (match_dup 2)))])
8068 (parallel [(set (reg:CCC FLAGS_REG)
8069 (compare:CCC
8070 (zero_extend:<DWI>
8071 (plus:DWIH
8072 (plus:DWIH
8073 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
8074 (match_dup 4))
8075 (match_dup 5)))
8076 (plus:<DWI>
8077 (match_dup 6)
8078 (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0)))))
8079 (set (match_dup 3)
8080 (plus:DWIH
8081 (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
8082 (match_dup 4))
8083 (match_dup 5)))])]
8084 {
8085 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
8086 if (operands[2] == const0_rtx)
8087 {
8088 emit_insn (gen_addcarry<mode>_0 (operands[3], operands[4], operands[5]));
8089 DONE;
8090 }
8091 if (CONST_INT_P (operands[5]))
8092 operands[6] = simplify_unary_operation (ZERO_EXTEND, <DWI>mode,
8093 operands[5], <MODE>mode);
8094 else
8095 operands[6] = gen_rtx_ZERO_EXTEND (<DWI>mode, operands[5]);
8096 })
8097
8098 ;; x == 0 with zero flag test can be done also as x < 1U with carry flag
8099 ;; test, where the latter is preferrable if we have some carry consuming
8100 ;; instruction.
8101 ;; For x != 0, we need to use x < 1U with negation of carry, i.e.
8102 ;; + (1 - CF).
8103 (define_insn_and_split "*add<mode>3_eq"
8104 [(set (match_operand:SWI 0 "nonimmediate_operand")
8105 (plus:SWI
8106 (plus:SWI
8107 (eq:SWI (match_operand 3 "int_nonimmediate_operand") (const_int 0))
8108 (match_operand:SWI 1 "nonimmediate_operand"))
8109 (match_operand:SWI 2 "<general_operand>")))
8110 (clobber (reg:CC FLAGS_REG))]
8111 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)
8112 && ix86_pre_reload_split ()"
8113 "#"
8114 "&& 1"
8115 [(set (reg:CC FLAGS_REG)
8116 (compare:CC (match_dup 3) (const_int 1)))
8117 (parallel [(set (match_dup 0)
8118 (plus:SWI
8119 (plus:SWI (ltu:SWI (reg:CC FLAGS_REG) (const_int 0))
8120 (match_dup 1))
8121 (match_dup 2)))
8122 (clobber (reg:CC FLAGS_REG))])])
8123
8124 (define_insn_and_split "*add<mode>3_ne"
8125 [(set (match_operand:SWI 0 "nonimmediate_operand")
8126 (plus:SWI
8127 (plus:SWI
8128 (ne:SWI (match_operand 3 "int_nonimmediate_operand") (const_int 0))
8129 (match_operand:SWI 1 "nonimmediate_operand"))
8130 (match_operand:SWI 2 "<immediate_operand>")))
8131 (clobber (reg:CC FLAGS_REG))]
8132 "CONST_INT_P (operands[2])
8133 && (<MODE>mode != DImode
8134 || INTVAL (operands[2]) != HOST_WIDE_INT_C (-0x80000000))
8135 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)
8136 && ix86_pre_reload_split ()"
8137 "#"
8138 "&& 1"
8139 [(set (reg:CC FLAGS_REG)
8140 (compare:CC (match_dup 3) (const_int 1)))
8141 (parallel [(set (match_dup 0)
8142 (minus:SWI
8143 (minus:SWI (match_dup 1)
8144 (ltu:SWI (reg:CC FLAGS_REG) (const_int 0)))
8145 (match_dup 2)))
8146 (clobber (reg:CC FLAGS_REG))])]
8147 {
8148 operands[2] = gen_int_mode (~INTVAL (operands[2]),
8149 <MODE>mode == DImode ? SImode : <MODE>mode);
8150 })
8151
8152 (define_insn_and_split "*add<mode>3_eq_0"
8153 [(set (match_operand:SWI 0 "nonimmediate_operand")
8154 (plus:SWI
8155 (eq:SWI (match_operand 2 "int_nonimmediate_operand") (const_int 0))
8156 (match_operand:SWI 1 "<general_operand>")))
8157 (clobber (reg:CC FLAGS_REG))]
8158 "ix86_unary_operator_ok (PLUS, <MODE>mode, operands)
8159 && ix86_pre_reload_split ()"
8160 "#"
8161 "&& 1"
8162 [(set (reg:CC FLAGS_REG)
8163 (compare:CC (match_dup 2) (const_int 1)))
8164 (parallel [(set (match_dup 0)
8165 (plus:SWI (ltu:SWI (reg:CC FLAGS_REG) (const_int 0))
8166 (match_dup 1)))
8167 (clobber (reg:CC FLAGS_REG))])]
8168 {
8169 if (!nonimmediate_operand (operands[1], <MODE>mode))
8170 operands[1] = force_reg (<MODE>mode, operands[1]);
8171 })
8172
8173 (define_insn_and_split "*add<mode>3_ne_0"
8174 [(set (match_operand:SWI 0 "nonimmediate_operand")
8175 (plus:SWI
8176 (ne:SWI (match_operand 2 "int_nonimmediate_operand") (const_int 0))
8177 (match_operand:SWI 1 "<general_operand>")))
8178 (clobber (reg:CC FLAGS_REG))]
8179 "ix86_unary_operator_ok (PLUS, <MODE>mode, operands)
8180 && ix86_pre_reload_split ()"
8181 "#"
8182 "&& 1"
8183 [(set (reg:CC FLAGS_REG)
8184 (compare:CC (match_dup 2) (const_int 1)))
8185 (parallel [(set (match_dup 0)
8186 (minus:SWI (minus:SWI
8187 (match_dup 1)
8188 (ltu:SWI (reg:CC FLAGS_REG) (const_int 0)))
8189 (const_int -1)))
8190 (clobber (reg:CC FLAGS_REG))])]
8191 {
8192 if (!nonimmediate_operand (operands[1], <MODE>mode))
8193 operands[1] = force_reg (<MODE>mode, operands[1]);
8194 })
8195
8196 (define_insn_and_split "*sub<mode>3_eq"
8197 [(set (match_operand:SWI 0 "nonimmediate_operand")
8198 (minus:SWI
8199 (minus:SWI
8200 (match_operand:SWI 1 "nonimmediate_operand")
8201 (eq:SWI (match_operand 3 "int_nonimmediate_operand")
8202 (const_int 0)))
8203 (match_operand:SWI 2 "<general_operand>")))
8204 (clobber (reg:CC FLAGS_REG))]
8205 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)
8206 && ix86_pre_reload_split ()"
8207 "#"
8208 "&& 1"
8209 [(set (reg:CC FLAGS_REG)
8210 (compare:CC (match_dup 3) (const_int 1)))
8211 (parallel [(set (match_dup 0)
8212 (minus:SWI
8213 (minus:SWI (match_dup 1)
8214 (ltu:SWI (reg:CC FLAGS_REG) (const_int 0)))
8215 (match_dup 2)))
8216 (clobber (reg:CC FLAGS_REG))])])
8217
8218 (define_insn_and_split "*sub<mode>3_ne"
8219 [(set (match_operand:SWI 0 "nonimmediate_operand")
8220 (plus:SWI
8221 (minus:SWI
8222 (match_operand:SWI 1 "nonimmediate_operand")
8223 (ne:SWI (match_operand 3 "int_nonimmediate_operand")
8224 (const_int 0)))
8225 (match_operand:SWI 2 "<immediate_operand>")))
8226 (clobber (reg:CC FLAGS_REG))]
8227 "CONST_INT_P (operands[2])
8228 && (<MODE>mode != DImode
8229 || INTVAL (operands[2]) != HOST_WIDE_INT_C (-0x80000000))
8230 && ix86_binary_operator_ok (MINUS, <MODE>mode, operands)
8231 && ix86_pre_reload_split ()"
8232 "#"
8233 "&& 1"
8234 [(set (reg:CC FLAGS_REG)
8235 (compare:CC (match_dup 3) (const_int 1)))
8236 (parallel [(set (match_dup 0)
8237 (plus:SWI
8238 (plus:SWI (ltu:SWI (reg:CC FLAGS_REG) (const_int 0))
8239 (match_dup 1))
8240 (match_dup 2)))
8241 (clobber (reg:CC FLAGS_REG))])]
8242 {
8243 operands[2] = gen_int_mode (INTVAL (operands[2]) - 1,
8244 <MODE>mode == DImode ? SImode : <MODE>mode);
8245 })
8246
8247 (define_insn_and_split "*sub<mode>3_eq_1"
8248 [(set (match_operand:SWI 0 "nonimmediate_operand")
8249 (plus:SWI
8250 (minus:SWI
8251 (match_operand:SWI 1 "nonimmediate_operand")
8252 (eq:SWI (match_operand 3 "int_nonimmediate_operand")
8253 (const_int 0)))
8254 (match_operand:SWI 2 "<immediate_operand>")))
8255 (clobber (reg:CC FLAGS_REG))]
8256 "CONST_INT_P (operands[2])
8257 && (<MODE>mode != DImode
8258 || INTVAL (operands[2]) != HOST_WIDE_INT_C (-0x80000000))
8259 && ix86_binary_operator_ok (MINUS, <MODE>mode, operands)
8260 && ix86_pre_reload_split ()"
8261 "#"
8262 "&& 1"
8263 [(set (reg:CC FLAGS_REG)
8264 (compare:CC (match_dup 3) (const_int 1)))
8265 (parallel [(set (match_dup 0)
8266 (minus:SWI
8267 (minus:SWI (match_dup 1)
8268 (ltu:SWI (reg:CC FLAGS_REG) (const_int 0)))
8269 (match_dup 2)))
8270 (clobber (reg:CC FLAGS_REG))])]
8271 {
8272 operands[2] = gen_int_mode (-INTVAL (operands[2]),
8273 <MODE>mode == DImode ? SImode : <MODE>mode);
8274 })
8275
8276 (define_insn_and_split "*sub<mode>3_eq_0"
8277 [(set (match_operand:SWI 0 "nonimmediate_operand")
8278 (minus:SWI
8279 (match_operand:SWI 1 "<general_operand>")
8280 (eq:SWI (match_operand 2 "int_nonimmediate_operand") (const_int 0))))
8281 (clobber (reg:CC FLAGS_REG))]
8282 "ix86_unary_operator_ok (MINUS, <MODE>mode, operands)
8283 && ix86_pre_reload_split ()"
8284 "#"
8285 "&& 1"
8286 [(set (reg:CC FLAGS_REG)
8287 (compare:CC (match_dup 2) (const_int 1)))
8288 (parallel [(set (match_dup 0)
8289 (minus:SWI (match_dup 1)
8290 (ltu:SWI (reg:CC FLAGS_REG) (const_int 0))))
8291 (clobber (reg:CC FLAGS_REG))])]
8292 {
8293 if (!nonimmediate_operand (operands[1], <MODE>mode))
8294 operands[1] = force_reg (<MODE>mode, operands[1]);
8295 })
8296
8297 (define_insn_and_split "*sub<mode>3_ne_0"
8298 [(set (match_operand:SWI 0 "nonimmediate_operand")
8299 (minus:SWI
8300 (match_operand:SWI 1 "<general_operand>")
8301 (ne:SWI (match_operand 2 "int_nonimmediate_operand") (const_int 0))))
8302 (clobber (reg:CC FLAGS_REG))]
8303 "ix86_unary_operator_ok (MINUS, <MODE>mode, operands)
8304 && ix86_pre_reload_split ()"
8305 "#"
8306 "&& 1"
8307 [(set (reg:CC FLAGS_REG)
8308 (compare:CC (match_dup 2) (const_int 1)))
8309 (parallel [(set (match_dup 0)
8310 (plus:SWI (plus:SWI
8311 (ltu:SWI (reg:CC FLAGS_REG) (const_int 0))
8312 (match_dup 1))
8313 (const_int -1)))
8314 (clobber (reg:CC FLAGS_REG))])]
8315 {
8316 if (!nonimmediate_operand (operands[1], <MODE>mode))
8317 operands[1] = force_reg (<MODE>mode, operands[1]);
8318 })
8319
8320 ;; The patterns that match these are at the end of this file.
8321
8322 (define_expand "<insn>xf3"
8323 [(set (match_operand:XF 0 "register_operand")
8324 (plusminus:XF
8325 (match_operand:XF 1 "register_operand")
8326 (match_operand:XF 2 "register_operand")))]
8327 "TARGET_80387")
8328
8329 (define_expand "<insn>hf3"
8330 [(set (match_operand:HF 0 "register_operand")
8331 (plusminus:HF
8332 (match_operand:HF 1 "register_operand")
8333 (match_operand:HF 2 "nonimmediate_operand")))]
8334 "TARGET_AVX512FP16")
8335
8336 (define_expand "<insn><mode>3"
8337 [(set (match_operand:MODEF 0 "register_operand")
8338 (plusminus:MODEF
8339 (match_operand:MODEF 1 "register_operand")
8340 (match_operand:MODEF 2 "nonimmediate_operand")))]
8341 "(TARGET_80387 && X87_ENABLE_ARITH (<MODE>mode))
8342 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)")
8343 \f
8344 ;; Multiply instructions
8345
8346 (define_expand "mul<mode>3"
8347 [(parallel [(set (match_operand:SWIM248 0 "register_operand")
8348 (mult:SWIM248
8349 (match_operand:SWIM248 1 "register_operand")
8350 (match_operand:SWIM248 2 "<general_operand>")))
8351 (clobber (reg:CC FLAGS_REG))])])
8352
8353 (define_expand "mulqi3"
8354 [(parallel [(set (match_operand:QI 0 "register_operand")
8355 (mult:QI
8356 (match_operand:QI 1 "register_operand")
8357 (match_operand:QI 2 "nonimmediate_operand")))
8358 (clobber (reg:CC FLAGS_REG))])]
8359 "TARGET_QIMODE_MATH")
8360
8361 ;; On AMDFAM10
8362 ;; IMUL reg32/64, reg32/64, imm8 Direct
8363 ;; IMUL reg32/64, mem32/64, imm8 VectorPath
8364 ;; IMUL reg32/64, reg32/64, imm32 Direct
8365 ;; IMUL reg32/64, mem32/64, imm32 VectorPath
8366 ;; IMUL reg32/64, reg32/64 Direct
8367 ;; IMUL reg32/64, mem32/64 Direct
8368 ;;
8369 ;; On BDVER1, all above IMULs use DirectPath
8370 ;;
8371 ;; On AMDFAM10
8372 ;; IMUL reg16, reg16, imm8 VectorPath
8373 ;; IMUL reg16, mem16, imm8 VectorPath
8374 ;; IMUL reg16, reg16, imm16 VectorPath
8375 ;; IMUL reg16, mem16, imm16 VectorPath
8376 ;; IMUL reg16, reg16 Direct
8377 ;; IMUL reg16, mem16 Direct
8378 ;;
8379 ;; On BDVER1, all HI MULs use DoublePath
8380
8381 (define_insn "*mul<mode>3_1"
8382 [(set (match_operand:SWIM248 0 "register_operand" "=r,r,r")
8383 (mult:SWIM248
8384 (match_operand:SWIM248 1 "nonimmediate_operand" "%rm,rm,0")
8385 (match_operand:SWIM248 2 "<general_operand>" "K,<i>,<m>r")))
8386 (clobber (reg:CC FLAGS_REG))]
8387 "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
8388 "@
8389 imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
8390 imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
8391 imul{<imodesuffix>}\t{%2, %0|%0, %2}"
8392 [(set_attr "type" "imul")
8393 (set_attr "prefix_0f" "0,0,1")
8394 (set (attr "athlon_decode")
8395 (cond [(eq_attr "cpu" "athlon")
8396 (const_string "vector")
8397 (eq_attr "alternative" "1")
8398 (const_string "vector")
8399 (and (eq_attr "alternative" "2")
8400 (ior (match_test "<MODE>mode == HImode")
8401 (match_operand 1 "memory_operand")))
8402 (const_string "vector")]
8403 (const_string "direct")))
8404 (set (attr "amdfam10_decode")
8405 (cond [(and (eq_attr "alternative" "0,1")
8406 (ior (match_test "<MODE>mode == HImode")
8407 (match_operand 1 "memory_operand")))
8408 (const_string "vector")]
8409 (const_string "direct")))
8410 (set (attr "bdver1_decode")
8411 (if_then_else
8412 (match_test "<MODE>mode == HImode")
8413 (const_string "double")
8414 (const_string "direct")))
8415 (set_attr "mode" "<MODE>")])
8416
8417 (define_insn "*mulsi3_1_zext"
8418 [(set (match_operand:DI 0 "register_operand" "=r,r,r")
8419 (zero_extend:DI
8420 (mult:SI (match_operand:SI 1 "nonimmediate_operand" "%rm,rm,0")
8421 (match_operand:SI 2 "x86_64_general_operand" "K,e,BMr"))))
8422 (clobber (reg:CC FLAGS_REG))]
8423 "TARGET_64BIT
8424 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
8425 "@
8426 imul{l}\t{%2, %1, %k0|%k0, %1, %2}
8427 imul{l}\t{%2, %1, %k0|%k0, %1, %2}
8428 imul{l}\t{%2, %k0|%k0, %2}"
8429 [(set_attr "type" "imul")
8430 (set_attr "prefix_0f" "0,0,1")
8431 (set (attr "athlon_decode")
8432 (cond [(eq_attr "cpu" "athlon")
8433 (const_string "vector")
8434 (eq_attr "alternative" "1")
8435 (const_string "vector")
8436 (and (eq_attr "alternative" "2")
8437 (match_operand 1 "memory_operand"))
8438 (const_string "vector")]
8439 (const_string "direct")))
8440 (set (attr "amdfam10_decode")
8441 (cond [(and (eq_attr "alternative" "0,1")
8442 (match_operand 1 "memory_operand"))
8443 (const_string "vector")]
8444 (const_string "direct")))
8445 (set_attr "bdver1_decode" "direct")
8446 (set_attr "mode" "SI")])
8447
8448 ;;On AMDFAM10 and BDVER1
8449 ;; MUL reg8 Direct
8450 ;; MUL mem8 Direct
8451
8452 (define_insn "*mulqi3_1"
8453 [(set (match_operand:QI 0 "register_operand" "=a")
8454 (mult:QI (match_operand:QI 1 "nonimmediate_operand" "%0")
8455 (match_operand:QI 2 "nonimmediate_operand" "qm")))
8456 (clobber (reg:CC FLAGS_REG))]
8457 "TARGET_QIMODE_MATH
8458 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
8459 "mul{b}\t%2"
8460 [(set_attr "type" "imul")
8461 (set_attr "length_immediate" "0")
8462 (set (attr "athlon_decode")
8463 (if_then_else (eq_attr "cpu" "athlon")
8464 (const_string "vector")
8465 (const_string "direct")))
8466 (set_attr "amdfam10_decode" "direct")
8467 (set_attr "bdver1_decode" "direct")
8468 (set_attr "mode" "QI")])
8469
8470 ;; Multiply with jump on overflow.
8471 (define_expand "mulv<mode>4"
8472 [(parallel [(set (reg:CCO FLAGS_REG)
8473 (eq:CCO (mult:<DWI>
8474 (sign_extend:<DWI>
8475 (match_operand:SWI248 1 "register_operand"))
8476 (match_dup 4))
8477 (sign_extend:<DWI>
8478 (mult:SWI248 (match_dup 1)
8479 (match_operand:SWI248 2
8480 "<general_operand>")))))
8481 (set (match_operand:SWI248 0 "register_operand")
8482 (mult:SWI248 (match_dup 1) (match_dup 2)))])
8483 (set (pc) (if_then_else
8484 (eq (reg:CCO FLAGS_REG) (const_int 0))
8485 (label_ref (match_operand 3))
8486 (pc)))]
8487 ""
8488 {
8489 if (CONST_INT_P (operands[2]))
8490 operands[4] = operands[2];
8491 else
8492 operands[4] = gen_rtx_SIGN_EXTEND (<DWI>mode, operands[2]);
8493 })
8494
8495 (define_insn "*mulv<mode>4"
8496 [(set (reg:CCO FLAGS_REG)
8497 (eq:CCO (mult:<DWI>
8498 (sign_extend:<DWI>
8499 (match_operand:SWI48 1 "nonimmediate_operand" "%rm,0"))
8500 (sign_extend:<DWI>
8501 (match_operand:SWI48 2 "x86_64_sext_operand" "We,mr")))
8502 (sign_extend:<DWI>
8503 (mult:SWI48 (match_dup 1) (match_dup 2)))))
8504 (set (match_operand:SWI48 0 "register_operand" "=r,r")
8505 (mult:SWI48 (match_dup 1) (match_dup 2)))]
8506 "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
8507 "@
8508 imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
8509 imul{<imodesuffix>}\t{%2, %0|%0, %2}"
8510 [(set_attr "type" "imul")
8511 (set_attr "prefix_0f" "0,1")
8512 (set (attr "athlon_decode")
8513 (cond [(eq_attr "cpu" "athlon")
8514 (const_string "vector")
8515 (eq_attr "alternative" "0")
8516 (const_string "vector")
8517 (and (eq_attr "alternative" "1")
8518 (match_operand 1 "memory_operand"))
8519 (const_string "vector")]
8520 (const_string "direct")))
8521 (set (attr "amdfam10_decode")
8522 (cond [(and (eq_attr "alternative" "1")
8523 (match_operand 1 "memory_operand"))
8524 (const_string "vector")]
8525 (const_string "direct")))
8526 (set_attr "bdver1_decode" "direct")
8527 (set_attr "mode" "<MODE>")])
8528
8529 (define_insn "*mulvhi4"
8530 [(set (reg:CCO FLAGS_REG)
8531 (eq:CCO (mult:SI
8532 (sign_extend:SI
8533 (match_operand:HI 1 "nonimmediate_operand" "%0"))
8534 (sign_extend:SI
8535 (match_operand:HI 2 "nonimmediate_operand" "mr")))
8536 (sign_extend:SI
8537 (mult:HI (match_dup 1) (match_dup 2)))))
8538 (set (match_operand:HI 0 "register_operand" "=r")
8539 (mult:HI (match_dup 1) (match_dup 2)))]
8540 "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
8541 "imul{w}\t{%2, %0|%0, %2}"
8542 [(set_attr "type" "imul")
8543 (set_attr "prefix_0f" "1")
8544 (set_attr "athlon_decode" "vector")
8545 (set_attr "amdfam10_decode" "direct")
8546 (set_attr "bdver1_decode" "double")
8547 (set_attr "mode" "HI")])
8548
8549 (define_insn "*mulv<mode>4_1"
8550 [(set (reg:CCO FLAGS_REG)
8551 (eq:CCO (mult:<DWI>
8552 (sign_extend:<DWI>
8553 (match_operand:SWI248 1 "nonimmediate_operand" "rm,rm"))
8554 (match_operand:<DWI> 3 "const_int_operand" "K,i"))
8555 (sign_extend:<DWI>
8556 (mult:SWI248 (match_dup 1)
8557 (match_operand:SWI248 2
8558 "<immediate_operand>" "K,<i>")))))
8559 (set (match_operand:SWI248 0 "register_operand" "=r,r")
8560 (mult:SWI248 (match_dup 1) (match_dup 2)))]
8561 "!(MEM_P (operands[1]) && MEM_P (operands[2]))
8562 && CONST_INT_P (operands[2])
8563 && INTVAL (operands[2]) == INTVAL (operands[3])"
8564 "imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
8565 [(set_attr "type" "imul")
8566 (set (attr "prefix_0f")
8567 (if_then_else
8568 (match_test "<MODE>mode == HImode")
8569 (const_string "0")
8570 (const_string "*")))
8571 (set (attr "athlon_decode")
8572 (cond [(eq_attr "cpu" "athlon")
8573 (const_string "vector")
8574 (eq_attr "alternative" "1")
8575 (const_string "vector")]
8576 (const_string "direct")))
8577 (set (attr "amdfam10_decode")
8578 (cond [(ior (match_test "<MODE>mode == HImode")
8579 (match_operand 1 "memory_operand"))
8580 (const_string "vector")]
8581 (const_string "direct")))
8582 (set (attr "bdver1_decode")
8583 (if_then_else
8584 (match_test "<MODE>mode == HImode")
8585 (const_string "double")
8586 (const_string "direct")))
8587 (set_attr "mode" "<MODE>")
8588 (set (attr "length_immediate")
8589 (cond [(eq_attr "alternative" "0")
8590 (const_string "1")
8591 (match_test "<MODE_SIZE> == 8")
8592 (const_string "4")]
8593 (const_string "<MODE_SIZE>")))])
8594
8595 (define_expand "umulv<mode>4"
8596 [(parallel [(set (reg:CCO FLAGS_REG)
8597 (eq:CCO (mult:<DWI>
8598 (zero_extend:<DWI>
8599 (match_operand:SWI248 1
8600 "nonimmediate_operand"))
8601 (zero_extend:<DWI>
8602 (match_operand:SWI248 2
8603 "nonimmediate_operand")))
8604 (zero_extend:<DWI>
8605 (mult:SWI248 (match_dup 1) (match_dup 2)))))
8606 (set (match_operand:SWI248 0 "register_operand")
8607 (mult:SWI248 (match_dup 1) (match_dup 2)))
8608 (clobber (scratch:SWI248))])
8609 (set (pc) (if_then_else
8610 (eq (reg:CCO FLAGS_REG) (const_int 0))
8611 (label_ref (match_operand 3))
8612 (pc)))]
8613 ""
8614 {
8615 if (MEM_P (operands[1]) && MEM_P (operands[2]))
8616 operands[1] = force_reg (<MODE>mode, operands[1]);
8617 })
8618
8619 (define_insn "*umulv<mode>4"
8620 [(set (reg:CCO FLAGS_REG)
8621 (eq:CCO (mult:<DWI>
8622 (zero_extend:<DWI>
8623 (match_operand:SWI248 1 "nonimmediate_operand" "%0"))
8624 (zero_extend:<DWI>
8625 (match_operand:SWI248 2 "nonimmediate_operand" "rm")))
8626 (zero_extend:<DWI>
8627 (mult:SWI248 (match_dup 1) (match_dup 2)))))
8628 (set (match_operand:SWI248 0 "register_operand" "=a")
8629 (mult:SWI248 (match_dup 1) (match_dup 2)))
8630 (clobber (match_scratch:SWI248 3 "=d"))]
8631 "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
8632 "mul{<imodesuffix>}\t%2"
8633 [(set_attr "type" "imul")
8634 (set_attr "length_immediate" "0")
8635 (set (attr "athlon_decode")
8636 (if_then_else (eq_attr "cpu" "athlon")
8637 (const_string "vector")
8638 (const_string "double")))
8639 (set_attr "amdfam10_decode" "double")
8640 (set_attr "bdver1_decode" "direct")
8641 (set_attr "mode" "<MODE>")])
8642
8643 (define_expand "<u>mulvqi4"
8644 [(parallel [(set (reg:CCO FLAGS_REG)
8645 (eq:CCO (mult:HI
8646 (any_extend:HI
8647 (match_operand:QI 1 "nonimmediate_operand"))
8648 (any_extend:HI
8649 (match_operand:QI 2 "nonimmediate_operand")))
8650 (any_extend:HI
8651 (mult:QI (match_dup 1) (match_dup 2)))))
8652 (set (match_operand:QI 0 "register_operand")
8653 (mult:QI (match_dup 1) (match_dup 2)))])
8654 (set (pc) (if_then_else
8655 (eq (reg:CCO FLAGS_REG) (const_int 0))
8656 (label_ref (match_operand 3))
8657 (pc)))]
8658 "TARGET_QIMODE_MATH"
8659 {
8660 if (MEM_P (operands[1]) && MEM_P (operands[2]))
8661 operands[1] = force_reg (QImode, operands[1]);
8662 })
8663
8664 (define_insn "*<u>mulvqi4"
8665 [(set (reg:CCO FLAGS_REG)
8666 (eq:CCO (mult:HI
8667 (any_extend:HI
8668 (match_operand:QI 1 "nonimmediate_operand" "%0"))
8669 (any_extend:HI
8670 (match_operand:QI 2 "nonimmediate_operand" "qm")))
8671 (any_extend:HI
8672 (mult:QI (match_dup 1) (match_dup 2)))))
8673 (set (match_operand:QI 0 "register_operand" "=a")
8674 (mult:QI (match_dup 1) (match_dup 2)))]
8675 "TARGET_QIMODE_MATH
8676 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
8677 "<sgnprefix>mul{b}\t%2"
8678 [(set_attr "type" "imul")
8679 (set_attr "length_immediate" "0")
8680 (set (attr "athlon_decode")
8681 (if_then_else (eq_attr "cpu" "athlon")
8682 (const_string "vector")
8683 (const_string "direct")))
8684 (set_attr "amdfam10_decode" "direct")
8685 (set_attr "bdver1_decode" "direct")
8686 (set_attr "mode" "QI")])
8687
8688 (define_expand "<u>mul<mode><dwi>3"
8689 [(parallel [(set (match_operand:<DWI> 0 "register_operand")
8690 (mult:<DWI>
8691 (any_extend:<DWI>
8692 (match_operand:DWIH 1 "nonimmediate_operand"))
8693 (any_extend:<DWI>
8694 (match_operand:DWIH 2 "register_operand"))))
8695 (clobber (reg:CC FLAGS_REG))])])
8696
8697 (define_expand "<u>mulqihi3"
8698 [(parallel [(set (match_operand:HI 0 "register_operand")
8699 (mult:HI
8700 (any_extend:HI
8701 (match_operand:QI 1 "nonimmediate_operand"))
8702 (any_extend:HI
8703 (match_operand:QI 2 "register_operand"))))
8704 (clobber (reg:CC FLAGS_REG))])]
8705 "TARGET_QIMODE_MATH")
8706
8707 (define_insn "*bmi2_umul<mode><dwi>3_1"
8708 [(set (match_operand:DWIH 0 "register_operand" "=r")
8709 (mult:DWIH
8710 (match_operand:DWIH 2 "nonimmediate_operand" "%d")
8711 (match_operand:DWIH 3 "nonimmediate_operand" "rm")))
8712 (set (match_operand:DWIH 1 "register_operand" "=r")
8713 (truncate:DWIH
8714 (lshiftrt:<DWI>
8715 (mult:<DWI> (zero_extend:<DWI> (match_dup 2))
8716 (zero_extend:<DWI> (match_dup 3)))
8717 (match_operand:QI 4 "const_int_operand"))))]
8718 "TARGET_BMI2 && INTVAL (operands[4]) == <MODE_SIZE> * BITS_PER_UNIT
8719 && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
8720 "mulx\t{%3, %0, %1|%1, %0, %3}"
8721 [(set_attr "type" "imulx")
8722 (set_attr "prefix" "vex")
8723 (set_attr "mode" "<MODE>")])
8724
8725 (define_insn "*umul<mode><dwi>3_1"
8726 [(set (match_operand:<DWI> 0 "register_operand" "=r,A")
8727 (mult:<DWI>
8728 (zero_extend:<DWI>
8729 (match_operand:DWIH 1 "nonimmediate_operand" "%d,0"))
8730 (zero_extend:<DWI>
8731 (match_operand:DWIH 2 "nonimmediate_operand" "rm,rm"))))
8732 (clobber (reg:CC FLAGS_REG))]
8733 "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
8734 "@
8735 #
8736 mul{<imodesuffix>}\t%2"
8737 [(set_attr "isa" "bmi2,*")
8738 (set_attr "type" "imulx,imul")
8739 (set_attr "length_immediate" "*,0")
8740 (set (attr "athlon_decode")
8741 (cond [(eq_attr "alternative" "1")
8742 (if_then_else (eq_attr "cpu" "athlon")
8743 (const_string "vector")
8744 (const_string "double"))]
8745 (const_string "*")))
8746 (set_attr "amdfam10_decode" "*,double")
8747 (set_attr "bdver1_decode" "*,direct")
8748 (set_attr "prefix" "vex,orig")
8749 (set_attr "mode" "<MODE>")])
8750
8751 ;; Convert mul to the mulx pattern to avoid flags dependency.
8752 (define_split
8753 [(set (match_operand:<DWI> 0 "register_operand")
8754 (mult:<DWI>
8755 (zero_extend:<DWI>
8756 (match_operand:DWIH 1 "register_operand"))
8757 (zero_extend:<DWI>
8758 (match_operand:DWIH 2 "nonimmediate_operand"))))
8759 (clobber (reg:CC FLAGS_REG))]
8760 "TARGET_BMI2 && reload_completed
8761 && REGNO (operands[1]) == DX_REG"
8762 [(parallel [(set (match_dup 3)
8763 (mult:DWIH (match_dup 1) (match_dup 2)))
8764 (set (match_dup 4)
8765 (truncate:DWIH
8766 (lshiftrt:<DWI>
8767 (mult:<DWI> (zero_extend:<DWI> (match_dup 1))
8768 (zero_extend:<DWI> (match_dup 2)))
8769 (match_dup 5))))])]
8770 {
8771 split_double_mode (<DWI>mode, &operands[0], 1, &operands[3], &operands[4]);
8772
8773 operands[5] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);
8774 })
8775
8776 (define_insn "*mul<mode><dwi>3_1"
8777 [(set (match_operand:<DWI> 0 "register_operand" "=A")
8778 (mult:<DWI>
8779 (sign_extend:<DWI>
8780 (match_operand:DWIH 1 "nonimmediate_operand" "%0"))
8781 (sign_extend:<DWI>
8782 (match_operand:DWIH 2 "nonimmediate_operand" "rm"))))
8783 (clobber (reg:CC FLAGS_REG))]
8784 "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
8785 "imul{<imodesuffix>}\t%2"
8786 [(set_attr "type" "imul")
8787 (set_attr "length_immediate" "0")
8788 (set (attr "athlon_decode")
8789 (if_then_else (eq_attr "cpu" "athlon")
8790 (const_string "vector")
8791 (const_string "double")))
8792 (set_attr "amdfam10_decode" "double")
8793 (set_attr "bdver1_decode" "direct")
8794 (set_attr "mode" "<MODE>")])
8795
8796 (define_insn "*<u>mulqihi3_1"
8797 [(set (match_operand:HI 0 "register_operand" "=a")
8798 (mult:HI
8799 (any_extend:HI
8800 (match_operand:QI 1 "nonimmediate_operand" "%0"))
8801 (any_extend:HI
8802 (match_operand:QI 2 "nonimmediate_operand" "qm"))))
8803 (clobber (reg:CC FLAGS_REG))]
8804 "TARGET_QIMODE_MATH
8805 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
8806 "<sgnprefix>mul{b}\t%2"
8807 [(set_attr "type" "imul")
8808 (set_attr "length_immediate" "0")
8809 (set (attr "athlon_decode")
8810 (if_then_else (eq_attr "cpu" "athlon")
8811 (const_string "vector")
8812 (const_string "direct")))
8813 (set_attr "amdfam10_decode" "direct")
8814 (set_attr "bdver1_decode" "direct")
8815 (set_attr "mode" "QI")])
8816
8817 ;; Highpart multiplication patterns
8818 (define_insn "<s>mul<mode>3_highpart"
8819 [(set (match_operand:DWIH 0 "register_operand" "=d")
8820 (any_mul_highpart:DWIH
8821 (match_operand:DWIH 1 "register_operand" "%a")
8822 (match_operand:DWIH 2 "nonimmediate_operand" "rm")))
8823 (clobber (match_scratch:DWIH 3 "=1"))
8824 (clobber (reg:CC FLAGS_REG))]
8825 ""
8826 "<sgnprefix>mul{<imodesuffix>}\t%2"
8827 [(set_attr "type" "imul")
8828 (set_attr "length_immediate" "0")
8829 (set (attr "athlon_decode")
8830 (if_then_else (eq_attr "cpu" "athlon")
8831 (const_string "vector")
8832 (const_string "double")))
8833 (set_attr "amdfam10_decode" "double")
8834 (set_attr "bdver1_decode" "direct")
8835 (set_attr "mode" "<MODE>")])
8836
8837 (define_insn "*<s>mulsi3_highpart_zext"
8838 [(set (match_operand:DI 0 "register_operand" "=d")
8839 (zero_extend:DI
8840 (any_mul_highpart:SI
8841 (match_operand:SI 1 "register_operand" "%a")
8842 (match_operand:SI 2 "nonimmediate_operand" "rm"))))
8843 (clobber (match_scratch:SI 3 "=1"))
8844 (clobber (reg:CC FLAGS_REG))]
8845 "TARGET_64BIT"
8846 "<sgnprefix>mul{l}\t%2"
8847 [(set_attr "type" "imul")
8848 (set_attr "length_immediate" "0")
8849 (set (attr "athlon_decode")
8850 (if_then_else (eq_attr "cpu" "athlon")
8851 (const_string "vector")
8852 (const_string "double")))
8853 (set_attr "amdfam10_decode" "double")
8854 (set_attr "bdver1_decode" "direct")
8855 (set_attr "mode" "SI")])
8856
8857 (define_insn "*<s>muldi3_highpart_1"
8858 [(set (match_operand:DI 0 "register_operand" "=d")
8859 (truncate:DI
8860 (lshiftrt:TI
8861 (mult:TI
8862 (any_extend:TI
8863 (match_operand:DI 1 "nonimmediate_operand" "%a"))
8864 (any_extend:TI
8865 (match_operand:DI 2 "nonimmediate_operand" "rm")))
8866 (const_int 64))))
8867 (clobber (match_scratch:DI 3 "=1"))
8868 (clobber (reg:CC FLAGS_REG))]
8869 "TARGET_64BIT
8870 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
8871 "<sgnprefix>mul{q}\t%2"
8872 [(set_attr "type" "imul")
8873 (set_attr "length_immediate" "0")
8874 (set (attr "athlon_decode")
8875 (if_then_else (eq_attr "cpu" "athlon")
8876 (const_string "vector")
8877 (const_string "double")))
8878 (set_attr "amdfam10_decode" "double")
8879 (set_attr "bdver1_decode" "direct")
8880 (set_attr "mode" "DI")])
8881
8882 (define_insn "*<s>mulsi3_highpart_zext"
8883 [(set (match_operand:DI 0 "register_operand" "=d")
8884 (zero_extend:DI (truncate:SI
8885 (lshiftrt:DI
8886 (mult:DI (any_extend:DI
8887 (match_operand:SI 1 "nonimmediate_operand" "%a"))
8888 (any_extend:DI
8889 (match_operand:SI 2 "nonimmediate_operand" "rm")))
8890 (const_int 32)))))
8891 (clobber (match_scratch:SI 3 "=1"))
8892 (clobber (reg:CC FLAGS_REG))]
8893 "TARGET_64BIT
8894 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
8895 "<sgnprefix>mul{l}\t%2"
8896 [(set_attr "type" "imul")
8897 (set_attr "length_immediate" "0")
8898 (set (attr "athlon_decode")
8899 (if_then_else (eq_attr "cpu" "athlon")
8900 (const_string "vector")
8901 (const_string "double")))
8902 (set_attr "amdfam10_decode" "double")
8903 (set_attr "bdver1_decode" "direct")
8904 (set_attr "mode" "SI")])
8905
8906 (define_insn "*<s>mulsi3_highpart_1"
8907 [(set (match_operand:SI 0 "register_operand" "=d")
8908 (truncate:SI
8909 (lshiftrt:DI
8910 (mult:DI
8911 (any_extend:DI
8912 (match_operand:SI 1 "nonimmediate_operand" "%a"))
8913 (any_extend:DI
8914 (match_operand:SI 2 "nonimmediate_operand" "rm")))
8915 (const_int 32))))
8916 (clobber (match_scratch:SI 3 "=1"))
8917 (clobber (reg:CC FLAGS_REG))]
8918 "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
8919 "<sgnprefix>mul{l}\t%2"
8920 [(set_attr "type" "imul")
8921 (set_attr "length_immediate" "0")
8922 (set (attr "athlon_decode")
8923 (if_then_else (eq_attr "cpu" "athlon")
8924 (const_string "vector")
8925 (const_string "double")))
8926 (set_attr "amdfam10_decode" "double")
8927 (set_attr "bdver1_decode" "direct")
8928 (set_attr "mode" "SI")])
8929
8930 ;; Highpart multiplication peephole2s to tweak register allocation.
8931 ;; mov imm,%rdx; mov %rdi,%rax; imulq %rdx -> mov imm,%rax; imulq %rdi
8932 (define_peephole2
8933 [(set (match_operand:SWI48 0 "general_reg_operand")
8934 (match_operand:SWI48 1 "immediate_operand"))
8935 (set (match_operand:SWI48 2 "general_reg_operand")
8936 (match_operand:SWI48 3 "general_reg_operand"))
8937 (parallel [(set (match_operand:SWI48 4 "general_reg_operand")
8938 (any_mul_highpart:SWI48 (match_dup 2) (match_dup 0)))
8939 (clobber (match_dup 2))
8940 (clobber (reg:CC FLAGS_REG))])]
8941 "REGNO (operands[3]) != AX_REG
8942 && REGNO (operands[0]) != REGNO (operands[2])
8943 && REGNO (operands[0]) != REGNO (operands[3])
8944 && (REGNO (operands[0]) == REGNO (operands[4])
8945 || peep2_reg_dead_p (3, operands[0]))"
8946 [(set (match_dup 2) (match_dup 1))
8947 (parallel [(set (match_dup 4)
8948 (any_mul_highpart:SWI48 (match_dup 2) (match_dup 3)))
8949 (clobber (match_dup 2))
8950 (clobber (reg:CC FLAGS_REG))])])
8951
8952 (define_peephole2
8953 [(set (match_operand:SI 0 "general_reg_operand")
8954 (match_operand:SI 1 "immediate_operand"))
8955 (set (match_operand:SI 2 "general_reg_operand")
8956 (match_operand:SI 3 "general_reg_operand"))
8957 (parallel [(set (match_operand:DI 4 "general_reg_operand")
8958 (zero_extend:DI
8959 (any_mul_highpart:SI (match_dup 2) (match_dup 0))))
8960 (clobber (match_dup 2))
8961 (clobber (reg:CC FLAGS_REG))])]
8962 "TARGET_64BIT
8963 && REGNO (operands[3]) != AX_REG
8964 && REGNO (operands[0]) != REGNO (operands[2])
8965 && REGNO (operands[2]) != REGNO (operands[3])
8966 && REGNO (operands[0]) != REGNO (operands[3])
8967 && (REGNO (operands[0]) == REGNO (operands[4])
8968 || peep2_reg_dead_p (3, operands[0]))"
8969 [(set (match_dup 2) (match_dup 1))
8970 (parallel [(set (match_dup 4)
8971 (zero_extend:DI
8972 (any_mul_highpart:SI (match_dup 2) (match_dup 3))))
8973 (clobber (match_dup 2))
8974 (clobber (reg:CC FLAGS_REG))])])
8975
8976 ;; The patterns that match these are at the end of this file.
8977
8978 (define_expand "mulxf3"
8979 [(set (match_operand:XF 0 "register_operand")
8980 (mult:XF (match_operand:XF 1 "register_operand")
8981 (match_operand:XF 2 "register_operand")))]
8982 "TARGET_80387")
8983
8984 (define_expand "mulhf3"
8985 [(set (match_operand:HF 0 "register_operand")
8986 (mult:HF (match_operand:HF 1 "register_operand")
8987 (match_operand:HF 2 "nonimmediate_operand")))]
8988 "TARGET_AVX512FP16")
8989
8990 (define_expand "mul<mode>3"
8991 [(set (match_operand:MODEF 0 "register_operand")
8992 (mult:MODEF (match_operand:MODEF 1 "register_operand")
8993 (match_operand:MODEF 2 "nonimmediate_operand")))]
8994 "(TARGET_80387 && X87_ENABLE_ARITH (<MODE>mode))
8995 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)")
8996 \f
8997 ;; Divide instructions
8998
8999 ;; The patterns that match these are at the end of this file.
9000
9001 (define_expand "divxf3"
9002 [(set (match_operand:XF 0 "register_operand")
9003 (div:XF (match_operand:XF 1 "register_operand")
9004 (match_operand:XF 2 "register_operand")))]
9005 "TARGET_80387")
9006
9007 /* There is no more precision loss than Newton-Rhapson approximation
9008 when using HFmode rcp/rsqrt, so do the transformation directly under
9009 TARGET_RECIP_DIV and fast-math. */
9010 (define_expand "divhf3"
9011 [(set (match_operand:HF 0 "register_operand")
9012 (div:HF (match_operand:HF 1 "register_operand")
9013 (match_operand:HF 2 "nonimmediate_operand")))]
9014 "TARGET_AVX512FP16"
9015 {
9016 if (TARGET_RECIP_DIV
9017 && optimize_insn_for_speed_p ()
9018 && flag_finite_math_only && !flag_trapping_math
9019 && flag_unsafe_math_optimizations)
9020 {
9021 rtx op = gen_reg_rtx (HFmode);
9022 operands[2] = force_reg (HFmode, operands[2]);
9023 emit_insn (gen_rcphf2 (op, operands[2]));
9024 emit_insn (gen_mulhf3 (operands[0], operands[1], op));
9025 DONE;
9026 }
9027 })
9028
9029 (define_expand "div<mode>3"
9030 [(set (match_operand:MODEF 0 "register_operand")
9031 (div:MODEF (match_operand:MODEF 1 "register_operand")
9032 (match_operand:MODEF 2 "nonimmediate_operand")))]
9033 "(TARGET_80387 && X87_ENABLE_ARITH (<MODE>mode))
9034 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
9035 {
9036 if (<MODE>mode == SFmode
9037 && TARGET_SSE && TARGET_SSE_MATH
9038 && TARGET_RECIP_DIV
9039 && optimize_insn_for_speed_p ()
9040 && flag_finite_math_only && !flag_trapping_math
9041 && flag_unsafe_math_optimizations)
9042 {
9043 ix86_emit_swdivsf (operands[0], operands[1],
9044 operands[2], SFmode);
9045 DONE;
9046 }
9047 })
9048 \f
9049 ;; Divmod instructions.
9050
9051 (define_code_iterator any_div [div udiv])
9052 (define_code_attr paired_mod [(div "mod") (udiv "umod")])
9053
9054 (define_expand "<u>divmod<mode>4"
9055 [(parallel [(set (match_operand:SWIM248 0 "register_operand")
9056 (any_div:SWIM248
9057 (match_operand:SWIM248 1 "register_operand")
9058 (match_operand:SWIM248 2 "nonimmediate_operand")))
9059 (set (match_operand:SWIM248 3 "register_operand")
9060 (<paired_mod>:SWIM248 (match_dup 1) (match_dup 2)))
9061 (clobber (reg:CC FLAGS_REG))])])
9062
9063 ;; Split with 8bit unsigned divide:
9064 ;; if (dividend an divisor are in [0-255])
9065 ;; use 8bit unsigned integer divide
9066 ;; else
9067 ;; use original integer divide
9068 (define_split
9069 [(set (match_operand:SWI48 0 "register_operand")
9070 (any_div:SWI48 (match_operand:SWI48 2 "register_operand")
9071 (match_operand:SWI48 3 "nonimmediate_operand")))
9072 (set (match_operand:SWI48 1 "register_operand")
9073 (<paired_mod>:SWI48 (match_dup 2) (match_dup 3)))
9074 (clobber (reg:CC FLAGS_REG))]
9075 "TARGET_USE_8BIT_IDIV
9076 && TARGET_QIMODE_MATH
9077 && can_create_pseudo_p ()
9078 && !optimize_insn_for_size_p ()"
9079 [(const_int 0)]
9080 "ix86_split_idivmod (<MODE>mode, operands, <u_bool>); DONE;")
9081
9082 (define_split
9083 [(set (match_operand:DI 0 "register_operand")
9084 (zero_extend:DI
9085 (any_div:SI (match_operand:SI 2 "register_operand")
9086 (match_operand:SI 3 "nonimmediate_operand"))))
9087 (set (match_operand:SI 1 "register_operand")
9088 (<paired_mod>:SI (match_dup 2) (match_dup 3)))
9089 (clobber (reg:CC FLAGS_REG))]
9090 "TARGET_64BIT
9091 && TARGET_USE_8BIT_IDIV
9092 && TARGET_QIMODE_MATH
9093 && can_create_pseudo_p ()
9094 && !optimize_insn_for_size_p ()"
9095 [(const_int 0)]
9096 "ix86_split_idivmod (SImode, operands, <u_bool>); DONE;")
9097
9098 (define_split
9099 [(set (match_operand:DI 1 "register_operand")
9100 (zero_extend:DI
9101 (<paired_mod>:SI (match_operand:SI 2 "register_operand")
9102 (match_operand:SI 3 "nonimmediate_operand"))))
9103 (set (match_operand:SI 0 "register_operand")
9104 (any_div:SI (match_dup 2) (match_dup 3)))
9105 (clobber (reg:CC FLAGS_REG))]
9106 "TARGET_64BIT
9107 && TARGET_USE_8BIT_IDIV
9108 && TARGET_QIMODE_MATH
9109 && can_create_pseudo_p ()
9110 && !optimize_insn_for_size_p ()"
9111 [(const_int 0)]
9112 "ix86_split_idivmod (SImode, operands, <u_bool>); DONE;")
9113
9114 (define_insn_and_split "divmod<mode>4_1"
9115 [(set (match_operand:SWI48 0 "register_operand" "=a")
9116 (div:SWI48 (match_operand:SWI48 2 "register_operand" "0")
9117 (match_operand:SWI48 3 "nonimmediate_operand" "rm")))
9118 (set (match_operand:SWI48 1 "register_operand" "=&d")
9119 (mod:SWI48 (match_dup 2) (match_dup 3)))
9120 (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT)
9121 (clobber (reg:CC FLAGS_REG))]
9122 ""
9123 "#"
9124 "reload_completed"
9125 [(parallel [(set (match_dup 1)
9126 (ashiftrt:SWI48 (match_dup 4) (match_dup 5)))
9127 (clobber (reg:CC FLAGS_REG))])
9128 (parallel [(set (match_dup 0)
9129 (div:SWI48 (match_dup 2) (match_dup 3)))
9130 (set (match_dup 1)
9131 (mod:SWI48 (match_dup 2) (match_dup 3)))
9132 (use (match_dup 1))
9133 (clobber (reg:CC FLAGS_REG))])]
9134 {
9135 operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode)-1);
9136
9137 if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
9138 operands[4] = operands[2];
9139 else
9140 {
9141 /* Avoid use of cltd in favor of a mov+shift. */
9142 emit_move_insn (operands[1], operands[2]);
9143 operands[4] = operands[1];
9144 }
9145 }
9146 [(set_attr "type" "multi")
9147 (set_attr "mode" "<MODE>")])
9148
9149 (define_insn_and_split "udivmod<mode>4_1"
9150 [(set (match_operand:SWI48 0 "register_operand" "=a")
9151 (udiv:SWI48 (match_operand:SWI48 2 "register_operand" "0")
9152 (match_operand:SWI48 3 "nonimmediate_operand" "rm")))
9153 (set (match_operand:SWI48 1 "register_operand" "=&d")
9154 (umod:SWI48 (match_dup 2) (match_dup 3)))
9155 (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT)
9156 (clobber (reg:CC FLAGS_REG))]
9157 ""
9158 "#"
9159 "reload_completed"
9160 [(set (match_dup 1) (const_int 0))
9161 (parallel [(set (match_dup 0)
9162 (udiv:SWI48 (match_dup 2) (match_dup 3)))
9163 (set (match_dup 1)
9164 (umod:SWI48 (match_dup 2) (match_dup 3)))
9165 (use (match_dup 1))
9166 (clobber (reg:CC FLAGS_REG))])]
9167 ""
9168 [(set_attr "type" "multi")
9169 (set_attr "mode" "<MODE>")])
9170
9171 (define_insn_and_split "divmodsi4_zext_1"
9172 [(set (match_operand:DI 0 "register_operand" "=a")
9173 (zero_extend:DI
9174 (div:SI (match_operand:SI 2 "register_operand" "0")
9175 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
9176 (set (match_operand:SI 1 "register_operand" "=&d")
9177 (mod:SI (match_dup 2) (match_dup 3)))
9178 (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT)
9179 (clobber (reg:CC FLAGS_REG))]
9180 "TARGET_64BIT"
9181 "#"
9182 "&& reload_completed"
9183 [(parallel [(set (match_dup 1)
9184 (ashiftrt:SI (match_dup 4) (match_dup 5)))
9185 (clobber (reg:CC FLAGS_REG))])
9186 (parallel [(set (match_dup 0)
9187 (zero_extend:DI (div:SI (match_dup 2) (match_dup 3))))
9188 (set (match_dup 1)
9189 (mod:SI (match_dup 2) (match_dup 3)))
9190 (use (match_dup 1))
9191 (clobber (reg:CC FLAGS_REG))])]
9192 {
9193 operands[5] = GEN_INT (GET_MODE_BITSIZE (SImode)-1);
9194
9195 if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
9196 operands[4] = operands[2];
9197 else
9198 {
9199 /* Avoid use of cltd in favor of a mov+shift. */
9200 emit_move_insn (operands[1], operands[2]);
9201 operands[4] = operands[1];
9202 }
9203 }
9204 [(set_attr "type" "multi")
9205 (set_attr "mode" "SI")])
9206
9207 (define_insn_and_split "udivmodsi4_zext_1"
9208 [(set (match_operand:DI 0 "register_operand" "=a")
9209 (zero_extend:DI
9210 (udiv:SI (match_operand:SI 2 "register_operand" "0")
9211 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
9212 (set (match_operand:SI 1 "register_operand" "=&d")
9213 (umod:SI (match_dup 2) (match_dup 3)))
9214 (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT)
9215 (clobber (reg:CC FLAGS_REG))]
9216 "TARGET_64BIT"
9217 "#"
9218 "&& reload_completed"
9219 [(set (match_dup 1) (const_int 0))
9220 (parallel [(set (match_dup 0)
9221 (zero_extend:DI (udiv:SI (match_dup 2) (match_dup 3))))
9222 (set (match_dup 1)
9223 (umod:SI (match_dup 2) (match_dup 3)))
9224 (use (match_dup 1))
9225 (clobber (reg:CC FLAGS_REG))])]
9226 ""
9227 [(set_attr "type" "multi")
9228 (set_attr "mode" "SI")])
9229
9230 (define_insn_and_split "divmodsi4_zext_2"
9231 [(set (match_operand:DI 1 "register_operand" "=&d")
9232 (zero_extend:DI
9233 (mod:SI (match_operand:SI 2 "register_operand" "0")
9234 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
9235 (set (match_operand:SI 0 "register_operand" "=a")
9236 (div:SI (match_dup 2) (match_dup 3)))
9237 (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT)
9238 (clobber (reg:CC FLAGS_REG))]
9239 "TARGET_64BIT"
9240 "#"
9241 "&& reload_completed"
9242 [(parallel [(set (match_dup 6)
9243 (ashiftrt:SI (match_dup 4) (match_dup 5)))
9244 (clobber (reg:CC FLAGS_REG))])
9245 (parallel [(set (match_dup 1)
9246 (zero_extend:DI (mod:SI (match_dup 2) (match_dup 3))))
9247 (set (match_dup 0)
9248 (div:SI (match_dup 2) (match_dup 3)))
9249 (use (match_dup 6))
9250 (clobber (reg:CC FLAGS_REG))])]
9251 {
9252 operands[5] = GEN_INT (GET_MODE_BITSIZE (SImode)-1);
9253 operands[6] = gen_lowpart (SImode, operands[1]);
9254
9255 if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
9256 operands[4] = operands[2];
9257 else
9258 {
9259 /* Avoid use of cltd in favor of a mov+shift. */
9260 emit_move_insn (operands[6], operands[2]);
9261 operands[4] = operands[6];
9262 }
9263 }
9264 [(set_attr "type" "multi")
9265 (set_attr "mode" "SI")])
9266
9267 (define_insn_and_split "udivmodsi4_zext_2"
9268 [(set (match_operand:DI 1 "register_operand" "=&d")
9269 (zero_extend:DI
9270 (umod:SI (match_operand:SI 2 "register_operand" "0")
9271 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
9272 (set (match_operand:SI 0 "register_operand" "=a")
9273 (udiv:SI (match_dup 2) (match_dup 3)))
9274 (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT)
9275 (clobber (reg:CC FLAGS_REG))]
9276 "TARGET_64BIT"
9277 "#"
9278 "&& reload_completed"
9279 [(set (match_dup 4) (const_int 0))
9280 (parallel [(set (match_dup 1)
9281 (zero_extend:DI (umod:SI (match_dup 2) (match_dup 3))))
9282 (set (match_dup 0)
9283 (udiv:SI (match_dup 2) (match_dup 3)))
9284 (use (match_dup 4))
9285 (clobber (reg:CC FLAGS_REG))])]
9286 "operands[4] = gen_lowpart (SImode, operands[1]);"
9287 [(set_attr "type" "multi")
9288 (set_attr "mode" "SI")])
9289
9290 (define_insn_and_split "*divmod<mode>4"
9291 [(set (match_operand:SWIM248 0 "register_operand" "=a")
9292 (div:SWIM248 (match_operand:SWIM248 2 "register_operand" "0")
9293 (match_operand:SWIM248 3 "nonimmediate_operand" "rm")))
9294 (set (match_operand:SWIM248 1 "register_operand" "=&d")
9295 (mod:SWIM248 (match_dup 2) (match_dup 3)))
9296 (clobber (reg:CC FLAGS_REG))]
9297 ""
9298 "#"
9299 "reload_completed"
9300 [(parallel [(set (match_dup 1)
9301 (ashiftrt:SWIM248 (match_dup 4) (match_dup 5)))
9302 (clobber (reg:CC FLAGS_REG))])
9303 (parallel [(set (match_dup 0)
9304 (div:SWIM248 (match_dup 2) (match_dup 3)))
9305 (set (match_dup 1)
9306 (mod:SWIM248 (match_dup 2) (match_dup 3)))
9307 (use (match_dup 1))
9308 (clobber (reg:CC FLAGS_REG))])]
9309 {
9310 operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode)-1);
9311
9312 if (<MODE>mode != HImode
9313 && (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD))
9314 operands[4] = operands[2];
9315 else
9316 {
9317 /* Avoid use of cltd in favor of a mov+shift. */
9318 emit_move_insn (operands[1], operands[2]);
9319 operands[4] = operands[1];
9320 }
9321 }
9322 [(set_attr "type" "multi")
9323 (set_attr "mode" "<MODE>")])
9324
9325 (define_insn_and_split "*udivmod<mode>4"
9326 [(set (match_operand:SWIM248 0 "register_operand" "=a")
9327 (udiv:SWIM248 (match_operand:SWIM248 2 "register_operand" "0")
9328 (match_operand:SWIM248 3 "nonimmediate_operand" "rm")))
9329 (set (match_operand:SWIM248 1 "register_operand" "=&d")
9330 (umod:SWIM248 (match_dup 2) (match_dup 3)))
9331 (clobber (reg:CC FLAGS_REG))]
9332 ""
9333 "#"
9334 "reload_completed"
9335 [(set (match_dup 1) (const_int 0))
9336 (parallel [(set (match_dup 0)
9337 (udiv:SWIM248 (match_dup 2) (match_dup 3)))
9338 (set (match_dup 1)
9339 (umod:SWIM248 (match_dup 2) (match_dup 3)))
9340 (use (match_dup 1))
9341 (clobber (reg:CC FLAGS_REG))])]
9342 ""
9343 [(set_attr "type" "multi")
9344 (set_attr "mode" "<MODE>")])
9345
9346 ;; Optimize division or modulo by constant power of 2, if the constant
9347 ;; materializes only after expansion.
9348 (define_insn_and_split "*udivmod<mode>4_pow2"
9349 [(set (match_operand:SWI48 0 "register_operand" "=r")
9350 (udiv:SWI48 (match_operand:SWI48 2 "register_operand" "0")
9351 (match_operand:SWI48 3 "const_int_operand")))
9352 (set (match_operand:SWI48 1 "register_operand" "=r")
9353 (umod:SWI48 (match_dup 2) (match_dup 3)))
9354 (clobber (reg:CC FLAGS_REG))]
9355 "IN_RANGE (exact_log2 (UINTVAL (operands[3])), 1, 31)"
9356 "#"
9357 "&& reload_completed"
9358 [(set (match_dup 1) (match_dup 2))
9359 (parallel [(set (match_dup 0) (lshiftrt:<MODE> (match_dup 2) (match_dup 4)))
9360 (clobber (reg:CC FLAGS_REG))])
9361 (parallel [(set (match_dup 1) (and:<MODE> (match_dup 1) (match_dup 5)))
9362 (clobber (reg:CC FLAGS_REG))])]
9363 {
9364 int v = exact_log2 (UINTVAL (operands[3]));
9365 operands[4] = GEN_INT (v);
9366 operands[5] = GEN_INT ((HOST_WIDE_INT_1U << v) - 1);
9367 }
9368 [(set_attr "type" "multi")
9369 (set_attr "mode" "<MODE>")])
9370
9371 (define_insn_and_split "*divmodsi4_zext_1"
9372 [(set (match_operand:DI 0 "register_operand" "=a")
9373 (zero_extend:DI
9374 (div:SI (match_operand:SI 2 "register_operand" "0")
9375 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
9376 (set (match_operand:SI 1 "register_operand" "=&d")
9377 (mod:SI (match_dup 2) (match_dup 3)))
9378 (clobber (reg:CC FLAGS_REG))]
9379 "TARGET_64BIT"
9380 "#"
9381 "&& reload_completed"
9382 [(parallel [(set (match_dup 1)
9383 (ashiftrt:SI (match_dup 4) (match_dup 5)))
9384 (clobber (reg:CC FLAGS_REG))])
9385 (parallel [(set (match_dup 0)
9386 (zero_extend:DI (div:SI (match_dup 2) (match_dup 3))))
9387 (set (match_dup 1)
9388 (mod:SI (match_dup 2) (match_dup 3)))
9389 (use (match_dup 1))
9390 (clobber (reg:CC FLAGS_REG))])]
9391 {
9392 operands[5] = GEN_INT (GET_MODE_BITSIZE (SImode)-1);
9393
9394 if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
9395 operands[4] = operands[2];
9396 else
9397 {
9398 /* Avoid use of cltd in favor of a mov+shift. */
9399 emit_move_insn (operands[1], operands[2]);
9400 operands[4] = operands[1];
9401 }
9402 }
9403 [(set_attr "type" "multi")
9404 (set_attr "mode" "SI")])
9405
9406 (define_insn_and_split "*udivmodsi4_zext_1"
9407 [(set (match_operand:DI 0 "register_operand" "=a")
9408 (zero_extend:DI
9409 (udiv:SI (match_operand:SI 2 "register_operand" "0")
9410 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
9411 (set (match_operand:SI 1 "register_operand" "=&d")
9412 (umod:SI (match_dup 2) (match_dup 3)))
9413 (clobber (reg:CC FLAGS_REG))]
9414 "TARGET_64BIT"
9415 "#"
9416 "&& reload_completed"
9417 [(set (match_dup 1) (const_int 0))
9418 (parallel [(set (match_dup 0)
9419 (zero_extend:DI (udiv:SI (match_dup 2) (match_dup 3))))
9420 (set (match_dup 1)
9421 (umod:SI (match_dup 2) (match_dup 3)))
9422 (use (match_dup 1))
9423 (clobber (reg:CC FLAGS_REG))])]
9424 ""
9425 [(set_attr "type" "multi")
9426 (set_attr "mode" "SI")])
9427
9428 (define_insn_and_split "*udivmodsi4_pow2_zext_1"
9429 [(set (match_operand:DI 0 "register_operand" "=r")
9430 (zero_extend:DI
9431 (udiv:SI (match_operand:SI 2 "register_operand" "0")
9432 (match_operand:SI 3 "const_int_operand"))))
9433 (set (match_operand:SI 1 "register_operand" "=r")
9434 (umod:SI (match_dup 2) (match_dup 3)))
9435 (clobber (reg:CC FLAGS_REG))]
9436 "TARGET_64BIT
9437 && IN_RANGE (exact_log2 (UINTVAL (operands[3])), 1, 31)"
9438 "#"
9439 "&& reload_completed"
9440 [(set (match_dup 1) (match_dup 2))
9441 (parallel [(set (match_dup 0)
9442 (zero_extend:DI (lshiftrt:SI (match_dup 2) (match_dup 4))))
9443 (clobber (reg:CC FLAGS_REG))])
9444 (parallel [(set (match_dup 1) (and:SI (match_dup 1) (match_dup 5)))
9445 (clobber (reg:CC FLAGS_REG))])]
9446 {
9447 int v = exact_log2 (UINTVAL (operands[3]));
9448 operands[4] = GEN_INT (v);
9449 operands[5] = GEN_INT ((HOST_WIDE_INT_1U << v) - 1);
9450 }
9451 [(set_attr "type" "multi")
9452 (set_attr "mode" "SI")])
9453
9454 (define_insn_and_split "*divmodsi4_zext_2"
9455 [(set (match_operand:DI 1 "register_operand" "=&d")
9456 (zero_extend:DI
9457 (mod:SI (match_operand:SI 2 "register_operand" "0")
9458 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
9459 (set (match_operand:SI 0 "register_operand" "=a")
9460 (div:SI (match_dup 2) (match_dup 3)))
9461 (clobber (reg:CC FLAGS_REG))]
9462 "TARGET_64BIT"
9463 "#"
9464 "&& reload_completed"
9465 [(parallel [(set (match_dup 6)
9466 (ashiftrt:SI (match_dup 4) (match_dup 5)))
9467 (clobber (reg:CC FLAGS_REG))])
9468 (parallel [(set (match_dup 1)
9469 (zero_extend:DI (mod:SI (match_dup 2) (match_dup 3))))
9470 (set (match_dup 0)
9471 (div:SI (match_dup 2) (match_dup 3)))
9472 (use (match_dup 6))
9473 (clobber (reg:CC FLAGS_REG))])]
9474 {
9475 operands[5] = GEN_INT (GET_MODE_BITSIZE (SImode)-1);
9476 operands[6] = gen_lowpart (SImode, operands[1]);
9477
9478 if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
9479 operands[4] = operands[2];
9480 else
9481 {
9482 /* Avoid use of cltd in favor of a mov+shift. */
9483 emit_move_insn (operands[6], operands[2]);
9484 operands[4] = operands[6];
9485 }
9486 }
9487 [(set_attr "type" "multi")
9488 (set_attr "mode" "SI")])
9489
9490 (define_insn_and_split "*udivmodsi4_zext_2"
9491 [(set (match_operand:DI 1 "register_operand" "=&d")
9492 (zero_extend:DI
9493 (umod:SI (match_operand:SI 2 "register_operand" "0")
9494 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
9495 (set (match_operand:SI 0 "register_operand" "=a")
9496 (udiv:SI (match_dup 2) (match_dup 3)))
9497 (clobber (reg:CC FLAGS_REG))]
9498 "TARGET_64BIT"
9499 "#"
9500 "&& reload_completed"
9501 [(set (match_dup 4) (const_int 0))
9502 (parallel [(set (match_dup 1)
9503 (zero_extend:DI (umod:SI (match_dup 2) (match_dup 3))))
9504 (set (match_dup 0)
9505 (udiv:SI (match_dup 2) (match_dup 3)))
9506 (use (match_dup 4))
9507 (clobber (reg:CC FLAGS_REG))])]
9508 "operands[4] = gen_lowpart (SImode, operands[1]);"
9509 [(set_attr "type" "multi")
9510 (set_attr "mode" "SI")])
9511
9512 (define_insn_and_split "*udivmodsi4_pow2_zext_2"
9513 [(set (match_operand:DI 1 "register_operand" "=r")
9514 (zero_extend:DI
9515 (umod:SI (match_operand:SI 2 "register_operand" "0")
9516 (match_operand:SI 3 "const_int_operand"))))
9517 (set (match_operand:SI 0 "register_operand" "=r")
9518 (udiv:SI (match_dup 2) (match_dup 3)))
9519 (clobber (reg:CC FLAGS_REG))]
9520 "TARGET_64BIT
9521 && IN_RANGE (exact_log2 (UINTVAL (operands[3])), 1, 31)"
9522 "#"
9523 "&& reload_completed"
9524 [(set (match_dup 1) (match_dup 2))
9525 (parallel [(set (match_dup 0) (lshiftrt:SI (match_dup 2) (match_dup 4)))
9526 (clobber (reg:CC FLAGS_REG))])
9527 (parallel [(set (match_dup 1)
9528 (zero_extend:DI (and:SI (match_dup 1) (match_dup 5))))
9529 (clobber (reg:CC FLAGS_REG))])]
9530 {
9531 int v = exact_log2 (UINTVAL (operands[3]));
9532 operands[4] = GEN_INT (v);
9533 operands[5] = GEN_INT ((HOST_WIDE_INT_1U << v) - 1);
9534 }
9535 [(set_attr "type" "multi")
9536 (set_attr "mode" "SI")])
9537
9538 (define_insn "*<u>divmod<mode>4_noext"
9539 [(set (match_operand:SWIM248 0 "register_operand" "=a")
9540 (any_div:SWIM248
9541 (match_operand:SWIM248 2 "register_operand" "0")
9542 (match_operand:SWIM248 3 "nonimmediate_operand" "rm")))
9543 (set (match_operand:SWIM248 1 "register_operand" "=d")
9544 (<paired_mod>:SWIM248 (match_dup 2) (match_dup 3)))
9545 (use (match_operand:SWIM248 4 "register_operand" "1"))
9546 (clobber (reg:CC FLAGS_REG))]
9547 ""
9548 "<sgnprefix>div{<imodesuffix>}\t%3"
9549 [(set_attr "type" "idiv")
9550 (set_attr "mode" "<MODE>")])
9551
9552 (define_insn "*<u>divmodsi4_noext_zext_1"
9553 [(set (match_operand:DI 0 "register_operand" "=a")
9554 (zero_extend:DI
9555 (any_div:SI (match_operand:SI 2 "register_operand" "0")
9556 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
9557 (set (match_operand:SI 1 "register_operand" "=d")
9558 (<paired_mod>:SI (match_dup 2) (match_dup 3)))
9559 (use (match_operand:SI 4 "register_operand" "1"))
9560 (clobber (reg:CC FLAGS_REG))]
9561 "TARGET_64BIT"
9562 "<sgnprefix>div{l}\t%3"
9563 [(set_attr "type" "idiv")
9564 (set_attr "mode" "SI")])
9565
9566 (define_insn "*<u>divmodsi4_noext_zext_2"
9567 [(set (match_operand:DI 1 "register_operand" "=d")
9568 (zero_extend:DI
9569 (<paired_mod>:SI (match_operand:SI 2 "register_operand" "0")
9570 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
9571 (set (match_operand:SI 0 "register_operand" "=a")
9572 (any_div:SI (match_dup 2) (match_dup 3)))
9573 (use (match_operand:SI 4 "register_operand" "1"))
9574 (clobber (reg:CC FLAGS_REG))]
9575 "TARGET_64BIT"
9576 "<sgnprefix>div{l}\t%3"
9577 [(set_attr "type" "idiv")
9578 (set_attr "mode" "SI")])
9579
9580 ;; Avoid sign-extension (using cdq) for constant numerators.
9581 (define_insn_and_split "*divmodsi4_const"
9582 [(set (match_operand:SI 0 "register_operand" "=&a")
9583 (div:SI (match_operand:SI 2 "const_int_operand")
9584 (match_operand:SI 3 "nonimmediate_operand" "rm")))
9585 (set (match_operand:SI 1 "register_operand" "=&d")
9586 (mod:SI (match_dup 2) (match_dup 3)))
9587 (clobber (reg:CC FLAGS_REG))]
9588 "!optimize_function_for_size_p (cfun)"
9589 "#"
9590 "&& reload_completed"
9591 [(set (match_dup 0) (match_dup 2))
9592 (set (match_dup 1) (match_dup 4))
9593 (parallel [(set (match_dup 0)
9594 (div:SI (match_dup 0) (match_dup 3)))
9595 (set (match_dup 1)
9596 (mod:SI (match_dup 0) (match_dup 3)))
9597 (use (match_dup 1))
9598 (clobber (reg:CC FLAGS_REG))])]
9599 {
9600 operands[4] = INTVAL (operands[2]) < 0 ? constm1_rtx : const0_rtx;
9601 }
9602 [(set_attr "type" "multi")
9603 (set_attr "mode" "SI")])
9604
9605 (define_expand "divmodqi4"
9606 [(parallel [(set (match_operand:QI 0 "register_operand")
9607 (div:QI
9608 (match_operand:QI 1 "register_operand")
9609 (match_operand:QI 2 "nonimmediate_operand")))
9610 (set (match_operand:QI 3 "register_operand")
9611 (mod:QI (match_dup 1) (match_dup 2)))
9612 (clobber (reg:CC FLAGS_REG))])]
9613 "TARGET_QIMODE_MATH"
9614 {
9615 rtx div, mod;
9616 rtx tmp0, tmp1;
9617
9618 tmp0 = gen_reg_rtx (HImode);
9619 tmp1 = gen_reg_rtx (HImode);
9620
9621 /* Extend operands[1] to HImode. Generate 8bit divide. Result is in AX. */
9622 emit_insn (gen_extendqihi2 (tmp1, operands[1]));
9623 emit_insn (gen_divmodhiqi3 (tmp0, tmp1, operands[2]));
9624
9625 /* Extract remainder from AH. */
9626 tmp1 = gen_rtx_ZERO_EXTRACT (HImode, tmp0, GEN_INT (8), GEN_INT (8));
9627 tmp1 = lowpart_subreg (QImode, tmp1, HImode);
9628 rtx_insn *insn = emit_move_insn (operands[3], tmp1);
9629
9630 mod = gen_rtx_MOD (QImode, operands[1], operands[2]);
9631 set_unique_reg_note (insn, REG_EQUAL, mod);
9632
9633 /* Extract quotient from AL. */
9634 insn = emit_move_insn (operands[0], gen_lowpart (QImode, tmp0));
9635
9636 div = gen_rtx_DIV (QImode, operands[1], operands[2]);
9637 set_unique_reg_note (insn, REG_EQUAL, div);
9638
9639 DONE;
9640 })
9641
9642 (define_expand "udivmodqi4"
9643 [(parallel [(set (match_operand:QI 0 "register_operand")
9644 (udiv:QI
9645 (match_operand:QI 1 "register_operand")
9646 (match_operand:QI 2 "nonimmediate_operand")))
9647 (set (match_operand:QI 3 "register_operand")
9648 (umod:QI (match_dup 1) (match_dup 2)))
9649 (clobber (reg:CC FLAGS_REG))])]
9650 "TARGET_QIMODE_MATH"
9651 {
9652 rtx div, mod;
9653 rtx tmp0, tmp1;
9654
9655 tmp0 = gen_reg_rtx (HImode);
9656 tmp1 = gen_reg_rtx (HImode);
9657
9658 /* Extend operands[1] to HImode. Generate 8bit divide. Result is in AX. */
9659 emit_insn (gen_zero_extendqihi2 (tmp1, operands[1]));
9660 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, operands[2]));
9661
9662 /* Extract remainder from AH. */
9663 tmp1 = gen_rtx_ZERO_EXTRACT (HImode, tmp0, GEN_INT (8), GEN_INT (8));
9664 tmp1 = lowpart_subreg (QImode, tmp1, HImode);
9665 rtx_insn *insn = emit_move_insn (operands[3], tmp1);
9666
9667 mod = gen_rtx_UMOD (QImode, operands[1], operands[2]);
9668 set_unique_reg_note (insn, REG_EQUAL, mod);
9669
9670 /* Extract quotient from AL. */
9671 insn = emit_move_insn (operands[0], gen_lowpart (QImode, tmp0));
9672
9673 div = gen_rtx_UDIV (QImode, operands[1], operands[2]);
9674 set_unique_reg_note (insn, REG_EQUAL, div);
9675
9676 DONE;
9677 })
9678
9679 ;; Divide AX by r/m8, with result stored in
9680 ;; AL <- Quotient
9681 ;; AH <- Remainder
9682 ;; Change div/mod to HImode and extend the second argument to HImode
9683 ;; so that mode of div/mod matches with mode of arguments. Otherwise
9684 ;; combine may fail.
9685 (define_insn "<u>divmodhiqi3"
9686 [(set (match_operand:HI 0 "register_operand" "=a")
9687 (ior:HI
9688 (ashift:HI
9689 (zero_extend:HI
9690 (truncate:QI
9691 (mod:HI (match_operand:HI 1 "register_operand" "0")
9692 (any_extend:HI
9693 (match_operand:QI 2 "nonimmediate_operand" "qm")))))
9694 (const_int 8))
9695 (zero_extend:HI
9696 (truncate:QI
9697 (div:HI (match_dup 1) (any_extend:HI (match_dup 2)))))))
9698 (clobber (reg:CC FLAGS_REG))]
9699 "TARGET_QIMODE_MATH"
9700 "<sgnprefix>div{b}\t%2"
9701 [(set_attr "type" "idiv")
9702 (set_attr "mode" "QI")])
9703
9704 ;; We cannot use div/idiv for double division, because it causes
9705 ;; "division by zero" on the overflow and that's not what we expect
9706 ;; from truncate. Because true (non truncating) double division is
9707 ;; never generated, we can't create this insn anyway.
9708 ;
9709 ;(define_insn ""
9710 ; [(set (match_operand:SI 0 "register_operand" "=a")
9711 ; (truncate:SI
9712 ; (udiv:DI (match_operand:DI 1 "register_operand" "A")
9713 ; (zero_extend:DI
9714 ; (match_operand:SI 2 "nonimmediate_operand" "rm")))))
9715 ; (set (match_operand:SI 3 "register_operand" "=d")
9716 ; (truncate:SI
9717 ; (umod:DI (match_dup 1) (zero_extend:DI (match_dup 2)))))
9718 ; (clobber (reg:CC FLAGS_REG))]
9719 ; ""
9720 ; "div{l}\t{%2, %0|%0, %2}"
9721 ; [(set_attr "type" "idiv")])
9722 \f
9723 ;;- Logical AND instructions
9724
9725 ;; On Pentium, "test imm, reg" is pairable only with eax, ax, and al.
9726 ;; Note that this excludes ah.
9727
9728 (define_expand "@test<mode>_ccno_1"
9729 [(set (reg:CCNO FLAGS_REG)
9730 (compare:CCNO
9731 (and:SWI48
9732 (match_operand:SWI48 0 "nonimmediate_operand")
9733 (match_operand:SWI48 1 "<nonmemory_szext_operand>"))
9734 (const_int 0)))])
9735
9736 (define_expand "testqi_ccz_1"
9737 [(set (reg:CCZ FLAGS_REG)
9738 (compare:CCZ
9739 (and:QI
9740 (match_operand:QI 0 "nonimmediate_operand")
9741 (match_operand:QI 1 "nonmemory_operand"))
9742 (const_int 0)))])
9743
9744 (define_insn "*testdi_1"
9745 [(set (reg FLAGS_REG)
9746 (compare
9747 (and:DI
9748 (match_operand:DI 0 "nonimmediate_operand" "%r,rm")
9749 (match_operand:DI 1 "x86_64_szext_nonmemory_operand" "Z,re"))
9750 (const_int 0)))]
9751 "TARGET_64BIT
9752 && ix86_match_ccmode
9753 (insn,
9754 /* If we are going to emit testl instead of testq, and the operands[1]
9755 constant might have the SImode sign bit set, make sure the sign
9756 flag isn't tested, because the instruction will set the sign flag
9757 based on bit 31 rather than bit 63. If it isn't CONST_INT,
9758 conservatively assume it might have bit 31 set. */
9759 (satisfies_constraint_Z (operands[1])
9760 && (!CONST_INT_P (operands[1])
9761 || val_signbit_known_set_p (SImode, INTVAL (operands[1]))))
9762 ? CCZmode : CCNOmode)"
9763 "@
9764 test{l}\t{%k1, %k0|%k0, %k1}
9765 test{q}\t{%1, %0|%0, %1}"
9766 [(set_attr "type" "test")
9767 (set_attr "mode" "SI,DI")])
9768
9769 (define_insn "*testqi_1_maybe_si"
9770 [(set (reg FLAGS_REG)
9771 (compare
9772 (and:QI
9773 (match_operand:QI 0 "nonimmediate_operand" "%qm,qm,r")
9774 (match_operand:QI 1 "nonmemory_operand" "q,n,n"))
9775 (const_int 0)))]
9776 "ix86_match_ccmode (insn,
9777 CONST_INT_P (operands[1])
9778 && INTVAL (operands[1]) >= 0 ? CCNOmode : CCZmode)"
9779 {
9780 if (get_attr_mode (insn) == MODE_SI)
9781 {
9782 if (CONST_INT_P (operands[1]) && INTVAL (operands[1]) < 0)
9783 operands[1] = GEN_INT (INTVAL (operands[1]) & 0xff);
9784 return "test{l}\t{%1, %k0|%k0, %1}";
9785 }
9786 return "test{b}\t{%1, %0|%0, %1}";
9787 }
9788 [(set_attr "type" "test")
9789 (set (attr "mode")
9790 (cond [(eq_attr "alternative" "2")
9791 (const_string "SI")
9792 (and (match_test "optimize_insn_for_size_p ()")
9793 (and (match_operand 0 "ext_QIreg_operand")
9794 (match_operand 1 "const_0_to_127_operand")))
9795 (const_string "SI")
9796 ]
9797 (const_string "QI")))
9798 (set_attr "pent_pair" "uv,np,np")])
9799
9800 (define_insn "*test<mode>_1"
9801 [(set (reg FLAGS_REG)
9802 (compare
9803 (and:SWI124
9804 (match_operand:SWI124 0 "nonimmediate_operand" "%<r>m,*a,<r>m")
9805 (match_operand:SWI124 1 "<nonmemory_szext_operand>" "<r>,<i>,<i>"))
9806 (const_int 0)))]
9807 "ix86_match_ccmode (insn, CCNOmode)"
9808 "test{<imodesuffix>}\t{%1, %0|%0, %1}"
9809 [(set_attr "type" "test")
9810 (set_attr "mode" "<MODE>")
9811 (set_attr "pent_pair" "uv,uv,np")])
9812
9813 (define_expand "testqi_ext_1_ccno"
9814 [(set (reg:CCNO FLAGS_REG)
9815 (compare:CCNO
9816 (and:QI
9817 (subreg:QI
9818 (zero_extract:HI
9819 (match_operand:HI 0 "register_operand")
9820 (const_int 8)
9821 (const_int 8)) 0)
9822 (match_operand:QI 1 "const_int_operand"))
9823 (const_int 0)))])
9824
9825 (define_insn "*testqi_ext<mode>_1"
9826 [(set (reg FLAGS_REG)
9827 (compare
9828 (and:QI
9829 (subreg:QI
9830 (zero_extract:SWI248
9831 (match_operand:SWI248 0 "register_operand" "Q,Q")
9832 (const_int 8)
9833 (const_int 8)) 0)
9834 (match_operand:QI 1 "general_operand" "QnBc,m"))
9835 (const_int 0)))]
9836 "ix86_match_ccmode (insn, CCNOmode)"
9837 "test{b}\t{%1, %h0|%h0, %1}"
9838 [(set_attr "isa" "*,nox64")
9839 (set_attr "type" "test")
9840 (set_attr "mode" "QI")])
9841
9842 (define_insn "*testqi_ext<mode>_2"
9843 [(set (reg FLAGS_REG)
9844 (compare
9845 (and:QI
9846 (subreg:QI
9847 (zero_extract:SWI248
9848 (match_operand:SWI248 0 "register_operand" "Q")
9849 (const_int 8)
9850 (const_int 8)) 0)
9851 (subreg:QI
9852 (zero_extract:SWI248
9853 (match_operand:SWI248 1 "register_operand" "Q")
9854 (const_int 8)
9855 (const_int 8)) 0))
9856 (const_int 0)))]
9857 "ix86_match_ccmode (insn, CCNOmode)"
9858 "test{b}\t{%h1, %h0|%h0, %h1}"
9859 [(set_attr "type" "test")
9860 (set_attr "mode" "QI")])
9861
9862 ;; Provide a *testti instruction that STV can implement using ptest.
9863 ;; This pattern splits into *andti3_doubleword and *cmpti_doubleword.
9864 (define_insn_and_split "*testti_doubleword"
9865 [(set (reg:CCZ FLAGS_REG)
9866 (compare:CCZ
9867 (and:TI (match_operand:TI 0 "register_operand")
9868 (match_operand:TI 1 "general_operand"))
9869 (const_int 0)))]
9870 "TARGET_64BIT
9871 && ix86_pre_reload_split ()"
9872 "#"
9873 "&& 1"
9874 [(parallel [(set (match_dup 2) (and:TI (match_dup 0) (match_dup 1)))
9875 (clobber (reg:CC FLAGS_REG))])
9876 (set (reg:CCZ FLAGS_REG) (compare:CCZ (match_dup 2) (const_int 0)))]
9877 {
9878 operands[2] = gen_reg_rtx (TImode);
9879 if (!x86_64_hilo_general_operand (operands[1], TImode))
9880 operands[1] = force_reg (TImode, operands[1]);
9881 })
9882
9883 ;; Combine likes to form bit extractions for some tests. Humor it.
9884 (define_insn_and_split "*testqi_ext_3"
9885 [(set (match_operand 0 "flags_reg_operand")
9886 (match_operator 1 "compare_operator"
9887 [(zero_extract:SWI248
9888 (match_operand 2 "int_nonimmediate_operand" "rm")
9889 (match_operand 3 "const_int_operand")
9890 (match_operand 4 "const_int_operand"))
9891 (const_int 0)]))]
9892 "/* Ensure that resulting mask is zero or sign extended operand. */
9893 INTVAL (operands[4]) >= 0
9894 && ((INTVAL (operands[3]) > 0
9895 && INTVAL (operands[3]) + INTVAL (operands[4]) <= 32)
9896 || (<MODE>mode == DImode
9897 && INTVAL (operands[3]) > 32
9898 && INTVAL (operands[3]) + INTVAL (operands[4]) == 64))
9899 && ix86_match_ccmode (insn,
9900 /* If zero_extract mode precision is the same
9901 as len, the SF of the zero_extract
9902 comparison will be the most significant
9903 extracted bit, but this could be matched
9904 after splitting only for pos 0 len all bits
9905 trivial extractions. Require CCZmode. */
9906 (GET_MODE_PRECISION (<MODE>mode)
9907 == INTVAL (operands[3]))
9908 /* Otherwise, require CCZmode if we'd use a mask
9909 with the most significant bit set and can't
9910 widen it to wider mode. *testdi_1 also
9911 requires CCZmode if the mask has bit
9912 31 set and all bits above it clear. */
9913 || (INTVAL (operands[3]) + INTVAL (operands[4])
9914 >= 32)
9915 /* We can't widen also if val is not a REG. */
9916 || (INTVAL (operands[3]) + INTVAL (operands[4])
9917 == GET_MODE_PRECISION (GET_MODE (operands[2]))
9918 && !register_operand (operands[2],
9919 GET_MODE (operands[2])))
9920 /* And we shouldn't widen if
9921 TARGET_PARTIAL_REG_STALL. */
9922 || (TARGET_PARTIAL_REG_STALL
9923 && (INTVAL (operands[3]) + INTVAL (operands[4])
9924 >= (paradoxical_subreg_p (operands[2])
9925 && (GET_MODE_CLASS
9926 (GET_MODE (SUBREG_REG (operands[2])))
9927 == MODE_INT)
9928 ? GET_MODE_PRECISION
9929 (GET_MODE (SUBREG_REG (operands[2])))
9930 : GET_MODE_PRECISION
9931 (GET_MODE (operands[2])))))
9932 ? CCZmode : CCNOmode)"
9933 "#"
9934 "&& 1"
9935 [(set (match_dup 0) (match_op_dup 1 [(match_dup 2) (const_int 0)]))]
9936 {
9937 rtx val = operands[2];
9938 HOST_WIDE_INT len = INTVAL (operands[3]);
9939 HOST_WIDE_INT pos = INTVAL (operands[4]);
9940 machine_mode mode = GET_MODE (val);
9941
9942 if (SUBREG_P (val))
9943 {
9944 machine_mode submode = GET_MODE (SUBREG_REG (val));
9945
9946 /* Narrow paradoxical subregs to prevent partial register stalls. */
9947 if (GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (submode)
9948 && GET_MODE_CLASS (submode) == MODE_INT
9949 && (GET_MODE (operands[0]) == CCZmode
9950 || pos + len < GET_MODE_PRECISION (submode)
9951 || REG_P (SUBREG_REG (val))))
9952 {
9953 val = SUBREG_REG (val);
9954 mode = submode;
9955 }
9956 }
9957
9958 /* Small HImode tests can be converted to QImode. */
9959 if (pos + len <= 8
9960 && register_operand (val, HImode))
9961 {
9962 rtx nval = gen_lowpart (QImode, val);
9963 if (!MEM_P (nval)
9964 || GET_MODE (operands[0]) == CCZmode
9965 || pos + len < 8)
9966 {
9967 val = nval;
9968 mode = QImode;
9969 }
9970 }
9971
9972 gcc_assert (pos + len <= GET_MODE_PRECISION (mode));
9973
9974 /* If the mask is going to have the sign bit set in the mode
9975 we want to do the comparison in and user isn't interested just
9976 in the zero flag, then we must widen the target mode. */
9977 if (pos + len == GET_MODE_PRECISION (mode)
9978 && GET_MODE (operands[0]) != CCZmode)
9979 {
9980 gcc_assert (pos + len < 32 && !MEM_P (val));
9981 mode = SImode;
9982 val = gen_lowpart (mode, val);
9983 }
9984
9985 wide_int mask
9986 = wi::shifted_mask (pos, len, false, GET_MODE_PRECISION (mode));
9987
9988 operands[2] = gen_rtx_AND (mode, val, immed_wide_int_const (mask, mode));
9989 })
9990
9991 ;; Split and;cmp (as optimized by combine) into not;test
9992 ;; Except when TARGET_BMI provides andn (*andn_<mode>_ccno).
9993 (define_insn_and_split "*test<mode>_not"
9994 [(set (reg:CCZ FLAGS_REG)
9995 (compare:CCZ
9996 (and:SWI
9997 (not:SWI (match_operand:SWI 0 "register_operand"))
9998 (match_operand:SWI 1 "<nonmemory_szext_operand>"))
9999 (const_int 0)))]
10000 "ix86_pre_reload_split ()
10001 && (!TARGET_BMI || !REG_P (operands[1]))"
10002 "#"
10003 "&& 1"
10004 [(set (match_dup 2) (not:SWI (match_dup 0)))
10005 (set (reg:CCZ FLAGS_REG)
10006 (compare:CCZ (and:SWI (match_dup 2) (match_dup 1))
10007 (const_int 0)))]
10008 "operands[2] = gen_reg_rtx (<MODE>mode);")
10009
10010 ;; Split and;cmp (as optimized by combine) into andn;cmp $0
10011 (define_insn_and_split "*test<mode>_not_doubleword"
10012 [(set (reg:CCZ FLAGS_REG)
10013 (compare:CCZ
10014 (and:DWI
10015 (not:DWI (match_operand:DWI 0 "nonimmediate_operand"))
10016 (match_operand:DWI 1 "nonimmediate_operand"))
10017 (const_int 0)))]
10018 "ix86_pre_reload_split ()"
10019 "#"
10020 "&& 1"
10021 [(parallel
10022 [(set (match_dup 2) (and:DWI (not:DWI (match_dup 0)) (match_dup 1)))
10023 (clobber (reg:CC FLAGS_REG))])
10024 (set (reg:CCZ FLAGS_REG) (compare:CCZ (match_dup 2) (const_int 0)))]
10025 {
10026 operands[0] = force_reg (<MODE>mode, operands[0]);
10027 operands[2] = gen_reg_rtx (<MODE>mode);
10028 })
10029
10030 ;; Convert HImode/SImode test instructions with immediate to QImode ones.
10031 ;; i386 does not allow to encode test with 8bit sign extended immediate, so
10032 ;; this is relatively important trick.
10033 ;; Do the conversion only post-reload to avoid limiting of the register class
10034 ;; to QI regs.
10035 (define_split
10036 [(set (match_operand 0 "flags_reg_operand")
10037 (match_operator 1 "compare_operator"
10038 [(and (match_operand 2 "QIreg_operand")
10039 (match_operand 3 "const_int_operand"))
10040 (const_int 0)]))]
10041 "reload_completed
10042 && GET_MODE (operands[2]) != QImode
10043 && ((ix86_match_ccmode (insn, CCZmode)
10044 && !(INTVAL (operands[3]) & ~(255 << 8)))
10045 || (ix86_match_ccmode (insn, CCNOmode)
10046 && !(INTVAL (operands[3]) & ~(127 << 8))))"
10047 [(set (match_dup 0)
10048 (match_op_dup 1
10049 [(and:QI
10050 (subreg:QI
10051 (zero_extract:SI (match_dup 2)
10052 (const_int 8)
10053 (const_int 8)) 0)
10054 (match_dup 3))
10055 (const_int 0)]))]
10056 {
10057 operands[2] = gen_lowpart (SImode, operands[2]);
10058 operands[3] = gen_int_mode (INTVAL (operands[3]) >> 8, QImode);
10059 })
10060
10061 (define_split
10062 [(set (match_operand 0 "flags_reg_operand")
10063 (match_operator 1 "compare_operator"
10064 [(and (match_operand 2 "nonimmediate_operand")
10065 (match_operand 3 "const_int_operand"))
10066 (const_int 0)]))]
10067 "reload_completed
10068 && GET_MODE (operands[2]) != QImode
10069 && (!REG_P (operands[2]) || ANY_QI_REG_P (operands[2]))
10070 && ((ix86_match_ccmode (insn, CCZmode)
10071 && !(INTVAL (operands[3]) & ~255))
10072 || (ix86_match_ccmode (insn, CCNOmode)
10073 && !(INTVAL (operands[3]) & ~127)))"
10074 [(set (match_dup 0)
10075 (match_op_dup 1 [(and:QI (match_dup 2) (match_dup 3))
10076 (const_int 0)]))]
10077 {
10078 operands[2] = gen_lowpart (QImode, operands[2]);
10079 operands[3] = gen_int_mode (INTVAL (operands[3]), QImode);
10080 })
10081
10082 ;; %%% This used to optimize known byte-wide and operations to memory,
10083 ;; and sometimes to QImode registers. If this is considered useful,
10084 ;; it should be done with splitters.
10085
10086 (define_expand "and<mode>3"
10087 [(set (match_operand:SDWIM 0 "nonimmediate_operand")
10088 (and:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand")
10089 (match_operand:SDWIM 2 "<general_szext_operand>")))]
10090 ""
10091 {
10092 machine_mode mode = <MODE>mode;
10093
10094 if (GET_MODE_SIZE (<MODE>mode) > UNITS_PER_WORD
10095 && !x86_64_hilo_general_operand (operands[2], <MODE>mode))
10096 operands[2] = force_reg (<MODE>mode, operands[2]);
10097
10098 if (GET_MODE_SIZE (<MODE>mode) <= UNITS_PER_WORD
10099 && const_int_operand (operands[2], <MODE>mode)
10100 && register_operand (operands[0], <MODE>mode)
10101 && !(TARGET_ZERO_EXTEND_WITH_AND
10102 && optimize_function_for_speed_p (cfun)))
10103 {
10104 unsigned HOST_WIDE_INT ival = UINTVAL (operands[2]);
10105
10106 if (ival == GET_MODE_MASK (SImode))
10107 mode = SImode;
10108 else if (ival == GET_MODE_MASK (HImode))
10109 mode = HImode;
10110 else if (ival == GET_MODE_MASK (QImode))
10111 mode = QImode;
10112 }
10113
10114 if (mode != <MODE>mode)
10115 emit_insn (gen_extend_insn
10116 (operands[0], gen_lowpart (mode, operands[1]),
10117 <MODE>mode, mode, 1));
10118 else
10119 ix86_expand_binary_operator (AND, <MODE>mode, operands);
10120
10121 DONE;
10122 })
10123
10124 (define_insn_and_split "*and<dwi>3_doubleword"
10125 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
10126 (and:<DWI>
10127 (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0")
10128 (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o")))
10129 (clobber (reg:CC FLAGS_REG))]
10130 "ix86_binary_operator_ok (AND, <DWI>mode, operands)"
10131 "#"
10132 "&& reload_completed"
10133 [(const_int:DWIH 0)]
10134 {
10135 bool emit_insn_deleted_note_p = false;
10136
10137 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
10138
10139 if (operands[2] == const0_rtx)
10140 emit_move_insn (operands[0], const0_rtx);
10141 else if (operands[2] == constm1_rtx)
10142 emit_insn_deleted_note_p = true;
10143 else
10144 ix86_expand_binary_operator (AND, <MODE>mode, &operands[0]);
10145
10146 if (operands[5] == const0_rtx)
10147 emit_move_insn (operands[3], const0_rtx);
10148 else if (operands[5] == constm1_rtx)
10149 {
10150 if (emit_insn_deleted_note_p)
10151 emit_note (NOTE_INSN_DELETED);
10152 }
10153 else
10154 ix86_expand_binary_operator (AND, <MODE>mode, &operands[3]);
10155
10156 DONE;
10157 })
10158
10159 (define_insn "*anddi_1"
10160 [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r,?k")
10161 (and:DI
10162 (match_operand:DI 1 "nonimmediate_operand" "%0,0,0,qm,k")
10163 (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,m,L,k")))
10164 (clobber (reg:CC FLAGS_REG))]
10165 "TARGET_64BIT && ix86_binary_operator_ok (AND, DImode, operands)"
10166 "@
10167 and{l}\t{%k2, %k0|%k0, %k2}
10168 and{q}\t{%2, %0|%0, %2}
10169 and{q}\t{%2, %0|%0, %2}
10170 #
10171 #"
10172 [(set_attr "isa" "x64,x64,x64,x64,avx512bw")
10173 (set_attr "type" "alu,alu,alu,imovx,msklog")
10174 (set_attr "length_immediate" "*,*,*,0,*")
10175 (set (attr "prefix_rex")
10176 (if_then_else
10177 (and (eq_attr "type" "imovx")
10178 (and (match_test "INTVAL (operands[2]) == 0xff")
10179 (match_operand 1 "ext_QIreg_operand")))
10180 (const_string "1")
10181 (const_string "*")))
10182 (set_attr "mode" "SI,DI,DI,SI,DI")])
10183
10184 (define_insn_and_split "*anddi_1_btr"
10185 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
10186 (and:DI
10187 (match_operand:DI 1 "nonimmediate_operand" "%0")
10188 (match_operand:DI 2 "const_int_operand" "n")))
10189 (clobber (reg:CC FLAGS_REG))]
10190 "TARGET_64BIT && TARGET_USE_BT
10191 && ix86_binary_operator_ok (AND, DImode, operands)
10192 && IN_RANGE (exact_log2 (~INTVAL (operands[2])), 31, 63)"
10193 "#"
10194 "&& reload_completed"
10195 [(parallel [(set (zero_extract:DI (match_dup 0)
10196 (const_int 1)
10197 (match_dup 3))
10198 (const_int 0))
10199 (clobber (reg:CC FLAGS_REG))])]
10200 "operands[3] = GEN_INT (exact_log2 (~INTVAL (operands[2])));"
10201 [(set_attr "type" "alu1")
10202 (set_attr "prefix_0f" "1")
10203 (set_attr "znver1_decode" "double")
10204 (set_attr "mode" "DI")])
10205
10206 ;; Turn *anddi_1 into *andsi_1_zext if possible.
10207 (define_split
10208 [(set (match_operand:DI 0 "register_operand")
10209 (and:DI (subreg:DI (match_operand:SI 1 "register_operand") 0)
10210 (match_operand:DI 2 "x86_64_zext_immediate_operand")))
10211 (clobber (reg:CC FLAGS_REG))]
10212 "TARGET_64BIT"
10213 [(parallel [(set (match_dup 0)
10214 (zero_extend:DI (and:SI (match_dup 1) (match_dup 2))))
10215 (clobber (reg:CC FLAGS_REG))])]
10216 {
10217 if (GET_CODE (operands[2]) == SYMBOL_REF
10218 || GET_CODE (operands[2]) == LABEL_REF)
10219 {
10220 operands[2] = shallow_copy_rtx (operands[2]);
10221 PUT_MODE (operands[2], SImode);
10222 }
10223 else if (GET_CODE (operands[2]) == CONST)
10224 {
10225 /* (const:DI (plus:DI (symbol_ref:DI ("...")) (const_int N))) */
10226 operands[2] = copy_rtx (operands[2]);
10227 PUT_MODE (operands[2], SImode);
10228 PUT_MODE (XEXP (operands[2], 0), SImode);
10229 PUT_MODE (XEXP (XEXP (operands[2], 0), 0), SImode);
10230 }
10231 else
10232 operands[2] = gen_lowpart (SImode, operands[2]);
10233 })
10234
10235 ;; See comment for addsi_1_zext why we do use nonimmediate_operand
10236 (define_insn "*andsi_1_zext"
10237 [(set (match_operand:DI 0 "register_operand" "=r")
10238 (zero_extend:DI
10239 (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
10240 (match_operand:SI 2 "x86_64_general_operand" "rBMe"))))
10241 (clobber (reg:CC FLAGS_REG))]
10242 "TARGET_64BIT && ix86_binary_operator_ok (AND, SImode, operands)"
10243 "and{l}\t{%2, %k0|%k0, %2}"
10244 [(set_attr "type" "alu")
10245 (set_attr "mode" "SI")])
10246
10247 (define_insn "*and<mode>_1"
10248 [(set (match_operand:SWI24 0 "nonimmediate_operand" "=rm,r,Ya,?k")
10249 (and:SWI24 (match_operand:SWI24 1 "nonimmediate_operand" "%0,0,qm,k")
10250 (match_operand:SWI24 2 "<general_operand>" "r<i>,<m>,L,k")))
10251 (clobber (reg:CC FLAGS_REG))]
10252 "ix86_binary_operator_ok (AND, <MODE>mode, operands)"
10253 "@
10254 and{<imodesuffix>}\t{%2, %0|%0, %2}
10255 and{<imodesuffix>}\t{%2, %0|%0, %2}
10256 #
10257 #"
10258 [(set (attr "isa")
10259 (cond [(eq_attr "alternative" "3")
10260 (if_then_else (eq_attr "mode" "SI")
10261 (const_string "avx512bw")
10262 (const_string "avx512f"))
10263 ]
10264 (const_string "*")))
10265 (set_attr "type" "alu,alu,imovx,msklog")
10266 (set_attr "length_immediate" "*,*,0,*")
10267 (set (attr "prefix_rex")
10268 (if_then_else
10269 (and (eq_attr "type" "imovx")
10270 (and (match_test "INTVAL (operands[2]) == 0xff")
10271 (match_operand 1 "ext_QIreg_operand")))
10272 (const_string "1")
10273 (const_string "*")))
10274 (set_attr "mode" "<MODE>,<MODE>,SI,<MODE>")])
10275
10276 (define_insn "*andqi_1"
10277 [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,?k")
10278 (and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,k")
10279 (match_operand:QI 2 "general_operand" "qn,m,rn,k")))
10280 (clobber (reg:CC FLAGS_REG))]
10281 "ix86_binary_operator_ok (AND, QImode, operands)"
10282 "@
10283 and{b}\t{%2, %0|%0, %2}
10284 and{b}\t{%2, %0|%0, %2}
10285 and{l}\t{%k2, %k0|%k0, %k2}
10286 #"
10287 [(set_attr "type" "alu,alu,alu,msklog")
10288 (set (attr "mode")
10289 (cond [(eq_attr "alternative" "2")
10290 (const_string "SI")
10291 (and (eq_attr "alternative" "3")
10292 (match_test "!TARGET_AVX512DQ"))
10293 (const_string "HI")
10294 ]
10295 (const_string "QI")))
10296 ;; Potential partial reg stall on alternative 2.
10297 (set (attr "preferred_for_speed")
10298 (cond [(eq_attr "alternative" "2")
10299 (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
10300 (symbol_ref "true")))])
10301
10302 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
10303 (define_insn_and_split "*and<mode>_1_slp"
10304 [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>,&<r>"))
10305 (and:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "%0,!<r>")
10306 (match_operand:SWI12 2 "general_operand" "<r>mn,<r>mn")))
10307 (clobber (reg:CC FLAGS_REG))]
10308 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
10309 "@
10310 and{<imodesuffix>}\t{%2, %0|%0, %2}
10311 #"
10312 "&& reload_completed"
10313 [(set (strict_low_part (match_dup 0)) (match_dup 1))
10314 (parallel
10315 [(set (strict_low_part (match_dup 0))
10316 (and:SWI12 (match_dup 0) (match_dup 2)))
10317 (clobber (reg:CC FLAGS_REG))])]
10318 ""
10319 [(set_attr "type" "alu")
10320 (set_attr "mode" "<MODE>")])
10321
10322 (define_split
10323 [(set (match_operand:SWI248 0 "register_operand")
10324 (and:SWI248 (match_operand:SWI248 1 "nonimmediate_operand")
10325 (match_operand:SWI248 2 "const_int_operand")))
10326 (clobber (reg:CC FLAGS_REG))]
10327 "reload_completed
10328 && (!REG_P (operands[1])
10329 || REGNO (operands[0]) != REGNO (operands[1]))"
10330 [(const_int 0)]
10331 {
10332 unsigned HOST_WIDE_INT ival = UINTVAL (operands[2]);
10333 machine_mode mode;
10334
10335 if (ival == GET_MODE_MASK (SImode))
10336 mode = SImode;
10337 else if (ival == GET_MODE_MASK (HImode))
10338 mode = HImode;
10339 else if (ival == GET_MODE_MASK (QImode))
10340 mode = QImode;
10341 else
10342 gcc_unreachable ();
10343
10344 /* Zero extend to SImode to avoid partial register stalls. */
10345 if (<MODE_SIZE> < GET_MODE_SIZE (SImode))
10346 operands[0] = gen_lowpart (SImode, operands[0]);
10347
10348 emit_insn (gen_extend_insn
10349 (operands[0], gen_lowpart (mode, operands[1]),
10350 GET_MODE (operands[0]), mode, 1));
10351 DONE;
10352 })
10353
10354 (define_split
10355 [(set (match_operand:SWI48 0 "register_operand")
10356 (and:SWI48 (match_dup 0)
10357 (const_int -65536)))
10358 (clobber (reg:CC FLAGS_REG))]
10359 "(TARGET_FAST_PREFIX && !TARGET_PARTIAL_REG_STALL)
10360 || optimize_function_for_size_p (cfun)"
10361 [(set (strict_low_part (match_dup 1)) (const_int 0))]
10362 "operands[1] = gen_lowpart (HImode, operands[0]);")
10363
10364 (define_split
10365 [(set (match_operand:SWI248 0 "any_QIreg_operand")
10366 (and:SWI248 (match_dup 0)
10367 (const_int -256)))
10368 (clobber (reg:CC FLAGS_REG))]
10369 "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
10370 && reload_completed"
10371 [(set (strict_low_part (match_dup 1)) (const_int 0))]
10372 "operands[1] = gen_lowpart (QImode, operands[0]);")
10373
10374 (define_split
10375 [(set (match_operand:SWI248 0 "QIreg_operand")
10376 (and:SWI248 (match_dup 0)
10377 (const_int -65281)))
10378 (clobber (reg:CC FLAGS_REG))]
10379 "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
10380 && reload_completed"
10381 [(parallel
10382 [(set (zero_extract:SI (match_dup 0)
10383 (const_int 8)
10384 (const_int 8))
10385 (subreg:SI
10386 (xor:QI
10387 (subreg:QI
10388 (zero_extract:SI (match_dup 0)
10389 (const_int 8)
10390 (const_int 8)) 0)
10391 (subreg:QI
10392 (zero_extract:SI (match_dup 0)
10393 (const_int 8)
10394 (const_int 8)) 0)) 0))
10395 (clobber (reg:CC FLAGS_REG))])]
10396 "operands[0] = gen_lowpart (SImode, operands[0]);")
10397
10398 (define_insn "*anddi_2"
10399 [(set (reg FLAGS_REG)
10400 (compare
10401 (and:DI
10402 (match_operand:DI 1 "nonimmediate_operand" "%0,0,0")
10403 (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,m"))
10404 (const_int 0)))
10405 (set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r")
10406 (and:DI (match_dup 1) (match_dup 2)))]
10407 "TARGET_64BIT
10408 && ix86_match_ccmode
10409 (insn,
10410 /* If we are going to emit andl instead of andq, and the operands[2]
10411 constant might have the SImode sign bit set, make sure the sign
10412 flag isn't tested, because the instruction will set the sign flag
10413 based on bit 31 rather than bit 63. If it isn't CONST_INT,
10414 conservatively assume it might have bit 31 set. */
10415 (satisfies_constraint_Z (operands[2])
10416 && (!CONST_INT_P (operands[2])
10417 || val_signbit_known_set_p (SImode, INTVAL (operands[2]))))
10418 ? CCZmode : CCNOmode)
10419 && ix86_binary_operator_ok (AND, DImode, operands)"
10420 "@
10421 and{l}\t{%k2, %k0|%k0, %k2}
10422 and{q}\t{%2, %0|%0, %2}
10423 and{q}\t{%2, %0|%0, %2}"
10424 [(set_attr "type" "alu")
10425 (set_attr "mode" "SI,DI,DI")])
10426
10427 ;; See comment for addsi_1_zext why we do use nonimmediate_operand
10428 (define_insn "*andsi_2_zext"
10429 [(set (reg FLAGS_REG)
10430 (compare (and:SI
10431 (match_operand:SI 1 "nonimmediate_operand" "%0")
10432 (match_operand:SI 2 "x86_64_general_operand" "rBMe"))
10433 (const_int 0)))
10434 (set (match_operand:DI 0 "register_operand" "=r")
10435 (zero_extend:DI (and:SI (match_dup 1) (match_dup 2))))]
10436 "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
10437 && ix86_binary_operator_ok (AND, SImode, operands)"
10438 "and{l}\t{%2, %k0|%k0, %2}"
10439 [(set_attr "type" "alu")
10440 (set_attr "mode" "SI")])
10441
10442 (define_insn "*andqi_2_maybe_si"
10443 [(set (reg FLAGS_REG)
10444 (compare (and:QI
10445 (match_operand:QI 1 "nonimmediate_operand" "%0,0,0")
10446 (match_operand:QI 2 "general_operand" "qn,m,n"))
10447 (const_int 0)))
10448 (set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r")
10449 (and:QI (match_dup 1) (match_dup 2)))]
10450 "ix86_binary_operator_ok (AND, QImode, operands)
10451 && ix86_match_ccmode (insn,
10452 CONST_INT_P (operands[2])
10453 && INTVAL (operands[2]) >= 0 ? CCNOmode : CCZmode)"
10454 {
10455 if (get_attr_mode (insn) == MODE_SI)
10456 {
10457 if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) < 0)
10458 operands[2] = GEN_INT (INTVAL (operands[2]) & 0xff);
10459 return "and{l}\t{%2, %k0|%k0, %2}";
10460 }
10461 return "and{b}\t{%2, %0|%0, %2}";
10462 }
10463 [(set_attr "type" "alu")
10464 (set (attr "mode")
10465 (cond [(eq_attr "alternative" "2")
10466 (const_string "SI")
10467 (and (match_test "optimize_insn_for_size_p ()")
10468 (and (match_operand 0 "ext_QIreg_operand")
10469 (match_operand 2 "const_0_to_127_operand")))
10470 (const_string "SI")
10471 ]
10472 (const_string "QI")))
10473 ;; Potential partial reg stall on alternative 2.
10474 (set (attr "preferred_for_speed")
10475 (cond [(eq_attr "alternative" "2")
10476 (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
10477 (symbol_ref "true")))])
10478
10479 (define_insn "*and<mode>_2"
10480 [(set (reg FLAGS_REG)
10481 (compare (and:SWI124
10482 (match_operand:SWI124 1 "nonimmediate_operand" "%0,0")
10483 (match_operand:SWI124 2 "<general_operand>" "<r><i>,<m>"))
10484 (const_int 0)))
10485 (set (match_operand:SWI124 0 "nonimmediate_operand" "=<r>m,<r>")
10486 (and:SWI124 (match_dup 1) (match_dup 2)))]
10487 "ix86_match_ccmode (insn, CCNOmode)
10488 && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
10489 "and{<imodesuffix>}\t{%2, %0|%0, %2}"
10490 [(set_attr "type" "alu")
10491 (set_attr "mode" "<MODE>")])
10492
10493 (define_expand "andqi_ext_1"
10494 [(parallel
10495 [(set (zero_extract:HI (match_operand:HI 0 "register_operand")
10496 (const_int 8)
10497 (const_int 8))
10498 (subreg:HI
10499 (and:QI
10500 (subreg:QI
10501 (zero_extract:HI (match_operand:HI 1 "register_operand")
10502 (const_int 8)
10503 (const_int 8)) 0)
10504 (match_operand:QI 2 "const_int_operand")) 0))
10505 (clobber (reg:CC FLAGS_REG))])])
10506
10507 (define_insn "*andqi_ext<mode>_1"
10508 [(set (zero_extract:SWI248
10509 (match_operand:SWI248 0 "register_operand" "+Q,Q")
10510 (const_int 8)
10511 (const_int 8))
10512 (subreg:SWI248
10513 (and:QI
10514 (subreg:QI
10515 (zero_extract:SWI248
10516 (match_operand:SWI248 1 "register_operand" "0,0")
10517 (const_int 8)
10518 (const_int 8)) 0)
10519 (match_operand:QI 2 "general_operand" "QnBc,m")) 0))
10520 (clobber (reg:CC FLAGS_REG))]
10521 "/* FIXME: without this LRA can't reload this pattern, see PR82524. */
10522 rtx_equal_p (operands[0], operands[1])"
10523 "and{b}\t{%2, %h0|%h0, %2}"
10524 [(set_attr "isa" "*,nox64")
10525 (set_attr "type" "alu")
10526 (set_attr "mode" "QI")])
10527
10528 ;; Generated by peephole translating test to and. This shows up
10529 ;; often in fp comparisons.
10530 (define_insn "*andqi_ext<mode>_1_cc"
10531 [(set (reg FLAGS_REG)
10532 (compare
10533 (and:QI
10534 (subreg:QI
10535 (zero_extract:SWI248
10536 (match_operand:SWI248 1 "register_operand" "0,0")
10537 (const_int 8)
10538 (const_int 8)) 0)
10539 (match_operand:QI 2 "general_operand" "QnBc,m"))
10540 (const_int 0)))
10541 (set (zero_extract:SWI248
10542 (match_operand:SWI248 0 "register_operand" "+Q,Q")
10543 (const_int 8)
10544 (const_int 8))
10545 (subreg:SWI248
10546 (and:QI
10547 (subreg:QI
10548 (zero_extract:SWI248
10549 (match_dup 1)
10550 (const_int 8)
10551 (const_int 8)) 0)
10552 (match_dup 2)) 0))]
10553 "ix86_match_ccmode (insn, CCNOmode)
10554 /* FIXME: without this LRA can't reload this pattern, see PR82524. */
10555 && rtx_equal_p (operands[0], operands[1])"
10556 "and{b}\t{%2, %h0|%h0, %2}"
10557 [(set_attr "isa" "*,nox64")
10558 (set_attr "type" "alu")
10559 (set_attr "mode" "QI")])
10560
10561 (define_insn "*andqi_ext<mode>_2"
10562 [(set (zero_extract:SWI248
10563 (match_operand:SWI248 0 "register_operand" "+Q")
10564 (const_int 8)
10565 (const_int 8))
10566 (subreg:SWI248
10567 (and:QI
10568 (subreg:QI
10569 (zero_extract:SWI248
10570 (match_operand:SWI248 1 "register_operand" "%0")
10571 (const_int 8)
10572 (const_int 8)) 0)
10573 (subreg:QI
10574 (zero_extract:SWI248
10575 (match_operand:SWI248 2 "register_operand" "Q")
10576 (const_int 8)
10577 (const_int 8)) 0)) 0))
10578 (clobber (reg:CC FLAGS_REG))]
10579 "/* FIXME: without this LRA can't reload this pattern, see PR82524. */
10580 rtx_equal_p (operands[0], operands[1])
10581 || rtx_equal_p (operands[0], operands[2])"
10582 "and{b}\t{%h2, %h0|%h0, %h2}"
10583 [(set_attr "type" "alu")
10584 (set_attr "mode" "QI")])
10585
10586 ;; Convert wide AND instructions with immediate operand to shorter QImode
10587 ;; equivalents when possible.
10588 ;; Don't do the splitting with memory operands, since it introduces risk
10589 ;; of memory mismatch stalls. We may want to do the splitting for optimizing
10590 ;; for size, but that can (should?) be handled by generic code instead.
10591 (define_split
10592 [(set (match_operand:SWI248 0 "QIreg_operand")
10593 (and:SWI248 (match_operand:SWI248 1 "register_operand")
10594 (match_operand:SWI248 2 "const_int_operand")))
10595 (clobber (reg:CC FLAGS_REG))]
10596 "reload_completed
10597 && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
10598 && !(~INTVAL (operands[2]) & ~(255 << 8))"
10599 [(parallel
10600 [(set (zero_extract:SI (match_dup 0)
10601 (const_int 8)
10602 (const_int 8))
10603 (subreg:SI
10604 (and:QI
10605 (subreg:QI
10606 (zero_extract:SI (match_dup 1)
10607 (const_int 8)
10608 (const_int 8)) 0)
10609 (match_dup 2)) 0))
10610 (clobber (reg:CC FLAGS_REG))])]
10611 {
10612 operands[0] = gen_lowpart (SImode, operands[0]);
10613 operands[1] = gen_lowpart (SImode, operands[1]);
10614 operands[2] = gen_int_mode (INTVAL (operands[2]) >> 8, QImode);
10615 })
10616
10617 ;; Since AND can be encoded with sign extended immediate, this is only
10618 ;; profitable when 7th bit is not set.
10619 (define_split
10620 [(set (match_operand:SWI248 0 "any_QIreg_operand")
10621 (and:SWI248 (match_operand:SWI248 1 "general_operand")
10622 (match_operand:SWI248 2 "const_int_operand")))
10623 (clobber (reg:CC FLAGS_REG))]
10624 "reload_completed
10625 && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
10626 && !(~INTVAL (operands[2]) & ~255)
10627 && !(INTVAL (operands[2]) & 128)"
10628 [(parallel [(set (strict_low_part (match_dup 0))
10629 (and:QI (match_dup 1)
10630 (match_dup 2)))
10631 (clobber (reg:CC FLAGS_REG))])]
10632 {
10633 operands[0] = gen_lowpart (QImode, operands[0]);
10634 operands[1] = gen_lowpart (QImode, operands[1]);
10635 operands[2] = gen_int_mode (INTVAL (operands[2]), QImode);
10636 })
10637
10638 (define_insn_and_split "*andn<dwi>3_doubleword_bmi"
10639 [(set (match_operand:<DWI> 0 "register_operand" "=&r,r,r")
10640 (and:<DWI>
10641 (not:<DWI> (match_operand:<DWI> 1 "register_operand" "r,0,r"))
10642 (match_operand:<DWI> 2 "nonimmediate_operand" "ro,ro,0")))
10643 (clobber (reg:CC FLAGS_REG))]
10644 "TARGET_BMI"
10645 "#"
10646 "&& reload_completed"
10647 [(parallel [(set (match_dup 0)
10648 (and:DWIH (not:DWIH (match_dup 1)) (match_dup 2)))
10649 (clobber (reg:CC FLAGS_REG))])
10650 (parallel [(set (match_dup 3)
10651 (and:DWIH (not:DWIH (match_dup 4)) (match_dup 5)))
10652 (clobber (reg:CC FLAGS_REG))])]
10653 "split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);")
10654
10655 (define_insn_and_split "*andn<mode>3_doubleword"
10656 [(set (match_operand:DWI 0 "register_operand")
10657 (and:DWI
10658 (not:DWI (match_operand:DWI 1 "register_operand"))
10659 (match_operand:DWI 2 "nonimmediate_operand")))
10660 (clobber (reg:CC FLAGS_REG))]
10661 "!TARGET_BMI
10662 && ix86_pre_reload_split ()"
10663 "#"
10664 "&& 1"
10665 [(set (match_dup 3) (not:DWI (match_dup 1)))
10666 (parallel [(set (match_dup 0)
10667 (and:DWI (match_dup 3) (match_dup 2)))
10668 (clobber (reg:CC FLAGS_REG))])]
10669 "operands[3] = gen_reg_rtx (<MODE>mode);")
10670
10671 (define_insn "*andn<mode>_1"
10672 [(set (match_operand:SWI48 0 "register_operand" "=r,r,?k")
10673 (and:SWI48
10674 (not:SWI48 (match_operand:SWI48 1 "register_operand" "r,r,k"))
10675 (match_operand:SWI48 2 "nonimmediate_operand" "r,m,k")))
10676 (clobber (reg:CC FLAGS_REG))]
10677 "TARGET_BMI || TARGET_AVX512BW"
10678 "@
10679 andn\t{%2, %1, %0|%0, %1, %2}
10680 andn\t{%2, %1, %0|%0, %1, %2}
10681 #"
10682 [(set_attr "isa" "bmi,bmi,avx512bw")
10683 (set_attr "type" "bitmanip,bitmanip,msklog")
10684 (set_attr "btver2_decode" "direct, double,*")
10685 (set_attr "mode" "<MODE>")])
10686
10687 (define_insn "*andn<mode>_1"
10688 [(set (match_operand:SWI12 0 "register_operand" "=r,?k")
10689 (and:SWI12
10690 (not:SWI12 (match_operand:SWI12 1 "register_operand" "r,k"))
10691 (match_operand:SWI12 2 "register_operand" "r,k")))
10692 (clobber (reg:CC FLAGS_REG))]
10693 "TARGET_BMI || TARGET_AVX512BW"
10694 "@
10695 andn\t{%k2, %k1, %k0|%k0, %k1, %k2}
10696 #"
10697 [(set_attr "isa" "bmi,avx512f")
10698 (set_attr "type" "bitmanip,msklog")
10699 (set_attr "btver2_decode" "direct,*")
10700 (set (attr "mode")
10701 (cond [(eq_attr "alternative" "0")
10702 (const_string "SI")
10703 (and (eq_attr "alternative" "1")
10704 (match_test "!TARGET_AVX512DQ"))
10705 (const_string "HI")
10706 ]
10707 (const_string "<MODE>")))])
10708
10709 (define_insn "*andn_<mode>_ccno"
10710 [(set (reg FLAGS_REG)
10711 (compare
10712 (and:SWI48
10713 (not:SWI48 (match_operand:SWI48 1 "register_operand" "r,r"))
10714 (match_operand:SWI48 2 "nonimmediate_operand" "r,m"))
10715 (const_int 0)))
10716 (clobber (match_scratch:SWI48 0 "=r,r"))]
10717 "TARGET_BMI && ix86_match_ccmode (insn, CCNOmode)"
10718 "andn\t{%2, %1, %0|%0, %1, %2}"
10719 [(set_attr "type" "bitmanip")
10720 (set_attr "btver2_decode" "direct, double")
10721 (set_attr "mode" "<MODE>")])
10722
10723 ;; Split *andnsi_1 after reload with -Oz when not;and is shorter.
10724 (define_split
10725 [(set (match_operand:SI 0 "register_operand")
10726 (and:SI (not:SI (match_operand:SI 1 "register_operand"))
10727 (match_operand:SI 2 "nonimmediate_operand")))
10728 (clobber (reg:CC FLAGS_REG))]
10729 "reload_completed
10730 && optimize_insn_for_size_p () && optimize_size > 1
10731 && REGNO (operands[0]) == REGNO (operands[1])
10732 && LEGACY_INT_REG_P (operands[0])
10733 && !REX_INT_REG_P (operands[2])
10734 && !reg_overlap_mentioned_p (operands[0], operands[2])"
10735 [(set (match_dup 0) (not:SI (match_dup 1)))
10736 (parallel [(set (match_dup 0) (and:SI (match_dup 0) (match_dup 2)))
10737 (clobber (reg:CC FLAGS_REG))])])
10738
10739 ;; Split *andn_si_ccno with -Oz when not;test is shorter.
10740 (define_split
10741 [(set (match_operand 0 "flags_reg_operand")
10742 (match_operator 1 "compare_operator"
10743 [(and:SI (not:SI (match_operand:SI 2 "general_reg_operand"))
10744 (match_operand:SI 3 "nonimmediate_operand"))
10745 (const_int 0)]))
10746 (clobber (match_dup 2))]
10747 "reload_completed
10748 && optimize_insn_for_size_p () && optimize_size > 1
10749 && LEGACY_INT_REG_P (operands[2])
10750 && !REX_INT_REG_P (operands[3])
10751 && !reg_overlap_mentioned_p (operands[2], operands[3])"
10752 [(set (match_dup 2) (not:SI (match_dup 2)))
10753 (set (match_dup 0) (match_op_dup 1
10754 [(and:SI (match_dup 3) (match_dup 2))
10755 (const_int 0)]))])
10756
10757 ;; Variant 1 of 4: Split ((A | B) ^ A) ^ C as (B & ~A) ^ C.
10758 (define_split
10759 [(set (match_operand:SWI48 0 "register_operand")
10760 (xor:SWI48
10761 (xor:SWI48
10762 (ior:SWI48 (match_operand:SWI48 1 "register_operand")
10763 (match_operand:SWI48 2 "nonimmediate_operand"))
10764 (match_dup 1))
10765 (match_operand:SWI48 3 "nonimmediate_operand")))
10766 (clobber (reg:CC FLAGS_REG))]
10767 "TARGET_BMI"
10768 [(parallel
10769 [(set (match_dup 4) (and:SWI48 (not:SWI48 (match_dup 1)) (match_dup 2)))
10770 (clobber (reg:CC FLAGS_REG))])
10771 (parallel
10772 [(set (match_dup 0) (xor:SWI48 (match_dup 4) (match_dup 3)))
10773 (clobber (reg:CC FLAGS_REG))])]
10774 "operands[4] = gen_reg_rtx (<MODE>mode);")
10775
10776 ;; Variant 2 of 4: Split ((A | B) ^ B) ^ C as (A & ~B) ^ C.
10777 (define_split
10778 [(set (match_operand:SWI48 0 "register_operand")
10779 (xor:SWI48
10780 (xor:SWI48
10781 (ior:SWI48 (match_operand:SWI48 1 "register_operand")
10782 (match_operand:SWI48 2 "register_operand"))
10783 (match_dup 2))
10784 (match_operand:SWI48 3 "nonimmediate_operand")))
10785 (clobber (reg:CC FLAGS_REG))]
10786 "TARGET_BMI"
10787 [(parallel
10788 [(set (match_dup 4) (and:SWI48 (not:SWI48 (match_dup 2)) (match_dup 1)))
10789 (clobber (reg:CC FLAGS_REG))])
10790 (parallel
10791 [(set (match_dup 0) (xor:SWI48 (match_dup 4) (match_dup 3)))
10792 (clobber (reg:CC FLAGS_REG))])]
10793 "operands[4] = gen_reg_rtx (<MODE>mode);")
10794
10795 ;; Variant 3 of 4: Split ((A | B) ^ C) ^ A as (B & ~A) ^ C.
10796 (define_split
10797 [(set (match_operand:SWI48 0 "register_operand")
10798 (xor:SWI48
10799 (xor:SWI48
10800 (ior:SWI48 (match_operand:SWI48 1 "register_operand")
10801 (match_operand:SWI48 2 "nonimmediate_operand"))
10802 (match_operand:SWI48 3 "nonimmediate_operand"))
10803 (match_dup 1)))
10804 (clobber (reg:CC FLAGS_REG))]
10805 "TARGET_BMI"
10806 [(parallel
10807 [(set (match_dup 4) (and:SWI48 (not:SWI48 (match_dup 1)) (match_dup 2)))
10808 (clobber (reg:CC FLAGS_REG))])
10809 (parallel
10810 [(set (match_dup 0) (xor:SWI48 (match_dup 4) (match_dup 3)))
10811 (clobber (reg:CC FLAGS_REG))])]
10812 "operands[4] = gen_reg_rtx (<MODE>mode);")
10813
10814 ;; Variant 4 of 4: Split ((A | B) ^ C) ^ B as (A & ~B) ^ C.
10815 (define_split
10816 [(set (match_operand:SWI48 0 "register_operand")
10817 (xor:SWI48
10818 (xor:SWI48
10819 (ior:SWI48 (match_operand:SWI48 1 "register_operand")
10820 (match_operand:SWI48 2 "register_operand"))
10821 (match_operand:SWI48 3 "nonimmediate_operand"))
10822 (match_dup 2)))
10823 (clobber (reg:CC FLAGS_REG))]
10824 "TARGET_BMI"
10825 [(parallel
10826 [(set (match_dup 4) (and:SWI48 (not:SWI48 (match_dup 2)) (match_dup 1)))
10827 (clobber (reg:CC FLAGS_REG))])
10828 (parallel
10829 [(set (match_dup 0) (xor:SWI48 (match_dup 4) (match_dup 3)))
10830 (clobber (reg:CC FLAGS_REG))])]
10831 "operands[4] = gen_reg_rtx (<MODE>mode);")
10832 \f
10833 ;; Logical inclusive and exclusive OR instructions
10834
10835 ;; %%% This used to optimize known byte-wide and operations to memory.
10836 ;; If this is considered useful, it should be done with splitters.
10837
10838 (define_expand "<code><mode>3"
10839 [(set (match_operand:SDWIM 0 "nonimmediate_operand")
10840 (any_or:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand")
10841 (match_operand:SDWIM 2 "<general_operand>")))]
10842 ""
10843 {
10844 if (GET_MODE_SIZE (<MODE>mode) > UNITS_PER_WORD
10845 && !x86_64_hilo_general_operand (operands[2], <MODE>mode))
10846 operands[2] = force_reg (<MODE>mode, operands[2]);
10847
10848 ix86_expand_binary_operator (<CODE>, <MODE>mode, operands);
10849 DONE;
10850 })
10851
10852 (define_insn_and_split "*<code><dwi>3_doubleword"
10853 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
10854 (any_or:<DWI>
10855 (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0")
10856 (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o")))
10857 (clobber (reg:CC FLAGS_REG))]
10858 "ix86_binary_operator_ok (<CODE>, <DWI>mode, operands)"
10859 "#"
10860 "&& reload_completed"
10861 [(const_int:DWIH 0)]
10862 {
10863 /* This insn may disappear completely when operands[2] == const0_rtx
10864 and operands[0] == operands[1], which requires a NOTE_INSN_DELETED. */
10865 bool emit_insn_deleted_note_p = false;
10866
10867 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
10868
10869 if (operands[2] == const0_rtx)
10870 emit_insn_deleted_note_p = true;
10871 else if (operands[2] == constm1_rtx)
10872 {
10873 if (<CODE> == IOR)
10874 emit_move_insn (operands[0], constm1_rtx);
10875 else
10876 ix86_expand_unary_operator (NOT, <MODE>mode, &operands[0]);
10877 }
10878 else
10879 ix86_expand_binary_operator (<CODE>, <MODE>mode, &operands[0]);
10880
10881 if (operands[5] == const0_rtx)
10882 {
10883 if (emit_insn_deleted_note_p)
10884 emit_note (NOTE_INSN_DELETED);
10885 }
10886 else if (operands[5] == constm1_rtx)
10887 {
10888 if (<CODE> == IOR)
10889 emit_move_insn (operands[3], constm1_rtx);
10890 else
10891 ix86_expand_unary_operator (NOT, <MODE>mode, &operands[3]);
10892 }
10893 else
10894 ix86_expand_binary_operator (<CODE>, <MODE>mode, &operands[3]);
10895
10896 DONE;
10897 })
10898
10899 (define_insn "*<code><mode>_1"
10900 [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r,?k")
10901 (any_or:SWI248
10902 (match_operand:SWI248 1 "nonimmediate_operand" "%0,0,k")
10903 (match_operand:SWI248 2 "<general_operand>" "r<i>,<m>,k")))
10904 (clobber (reg:CC FLAGS_REG))]
10905 "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10906 "@
10907 <logic>{<imodesuffix>}\t{%2, %0|%0, %2}
10908 <logic>{<imodesuffix>}\t{%2, %0|%0, %2}
10909 #"
10910 [(set (attr "isa")
10911 (cond [(eq_attr "alternative" "2")
10912 (if_then_else (eq_attr "mode" "SI,DI")
10913 (const_string "avx512bw")
10914 (const_string "avx512f"))
10915 ]
10916 (const_string "*")))
10917 (set_attr "type" "alu, alu, msklog")
10918 (set_attr "mode" "<MODE>")])
10919
10920 (define_insn_and_split "*notxor<mode>_1"
10921 [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r,?k")
10922 (not:SWI248
10923 (xor:SWI248
10924 (match_operand:SWI248 1 "nonimmediate_operand" "%0,0,k")
10925 (match_operand:SWI248 2 "<general_operand>" "r<i>,<m>,k"))))
10926 (clobber (reg:CC FLAGS_REG))]
10927 "ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
10928 "#"
10929 "&& reload_completed"
10930 [(parallel
10931 [(set (match_dup 0)
10932 (xor:SWI248 (match_dup 1) (match_dup 2)))
10933 (clobber (reg:CC FLAGS_REG))])
10934 (set (match_dup 0)
10935 (not:SWI248 (match_dup 0)))]
10936 {
10937 if (MASK_REG_P (operands[0]))
10938 {
10939 emit_insn (gen_kxnor<mode> (operands[0], operands[1], operands[2]));
10940 DONE;
10941 }
10942 }
10943 [(set (attr "isa")
10944 (cond [(eq_attr "alternative" "2")
10945 (if_then_else (eq_attr "mode" "SI,DI")
10946 (const_string "avx512bw")
10947 (const_string "avx512f"))
10948 ]
10949 (const_string "*")))
10950 (set_attr "type" "alu, alu, msklog")
10951 (set_attr "mode" "<MODE>")])
10952
10953 (define_insn_and_split "*iordi_1_bts"
10954 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
10955 (ior:DI
10956 (match_operand:DI 1 "nonimmediate_operand" "%0")
10957 (match_operand:DI 2 "const_int_operand" "n")))
10958 (clobber (reg:CC FLAGS_REG))]
10959 "TARGET_64BIT && TARGET_USE_BT
10960 && ix86_binary_operator_ok (IOR, DImode, operands)
10961 && IN_RANGE (exact_log2 (INTVAL (operands[2])), 31, 63)"
10962 "#"
10963 "&& reload_completed"
10964 [(parallel [(set (zero_extract:DI (match_dup 0)
10965 (const_int 1)
10966 (match_dup 3))
10967 (const_int 1))
10968 (clobber (reg:CC FLAGS_REG))])]
10969 "operands[3] = GEN_INT (exact_log2 (INTVAL (operands[2])));"
10970 [(set_attr "type" "alu1")
10971 (set_attr "prefix_0f" "1")
10972 (set_attr "znver1_decode" "double")
10973 (set_attr "mode" "DI")])
10974
10975 (define_insn_and_split "*xordi_1_btc"
10976 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
10977 (xor:DI
10978 (match_operand:DI 1 "nonimmediate_operand" "%0")
10979 (match_operand:DI 2 "const_int_operand" "n")))
10980 (clobber (reg:CC FLAGS_REG))]
10981 "TARGET_64BIT && TARGET_USE_BT
10982 && ix86_binary_operator_ok (XOR, DImode, operands)
10983 && IN_RANGE (exact_log2 (INTVAL (operands[2])), 31, 63)"
10984 "#"
10985 "&& reload_completed"
10986 [(parallel [(set (zero_extract:DI (match_dup 0)
10987 (const_int 1)
10988 (match_dup 3))
10989 (not:DI (zero_extract:DI (match_dup 0)
10990 (const_int 1)
10991 (match_dup 3))))
10992 (clobber (reg:CC FLAGS_REG))])]
10993 "operands[3] = GEN_INT (exact_log2 (INTVAL (operands[2])));"
10994 [(set_attr "type" "alu1")
10995 (set_attr "prefix_0f" "1")
10996 (set_attr "znver1_decode" "double")
10997 (set_attr "mode" "DI")])
10998
10999 ;; Optimize a ^ ((a ^ b) & mask) to (~mask & a) | (b & mask)
11000 (define_insn_and_split "*xor2andn"
11001 [(set (match_operand:SWI248 0 "register_operand")
11002 (xor:SWI248
11003 (and:SWI248
11004 (xor:SWI248
11005 (match_operand:SWI248 1 "nonimmediate_operand")
11006 (match_operand:SWI248 2 "nonimmediate_operand"))
11007 (match_operand:SWI248 3 "nonimmediate_operand"))
11008 (match_dup 1)))
11009 (clobber (reg:CC FLAGS_REG))]
11010 "TARGET_BMI && ix86_pre_reload_split ()"
11011 "#"
11012 "&& 1"
11013 [(parallel [(set (match_dup 4)
11014 (and:SWI248
11015 (not:SWI248
11016 (match_dup 3))
11017 (match_dup 1)))
11018 (clobber (reg:CC FLAGS_REG))])
11019 (parallel [(set (match_dup 5)
11020 (and:SWI248
11021 (match_dup 3)
11022 (match_dup 2)))
11023 (clobber (reg:CC FLAGS_REG))])
11024 (parallel [(set (match_dup 0)
11025 (ior:SWI248
11026 (match_dup 4)
11027 (match_dup 5)))
11028 (clobber (reg:CC FLAGS_REG))])]
11029 {
11030 operands[1] = force_reg (<MODE>mode, operands[1]);
11031 operands[3] = force_reg (<MODE>mode, operands[3]);
11032 operands[4] = gen_reg_rtx (<MODE>mode);
11033 operands[5] = gen_reg_rtx (<MODE>mode);
11034 })
11035
11036 ;; See comment for addsi_1_zext why we do use nonimmediate_operand
11037 (define_insn "*<code>si_1_zext"
11038 [(set (match_operand:DI 0 "register_operand" "=r")
11039 (zero_extend:DI
11040 (any_or:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
11041 (match_operand:SI 2 "x86_64_general_operand" "rBMe"))))
11042 (clobber (reg:CC FLAGS_REG))]
11043 "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
11044 "<logic>{l}\t{%2, %k0|%k0, %2}"
11045 [(set_attr "type" "alu")
11046 (set_attr "mode" "SI")])
11047
11048 (define_insn "*<code>si_1_zext_imm"
11049 [(set (match_operand:DI 0 "register_operand" "=r")
11050 (any_or:DI
11051 (zero_extend:DI (match_operand:SI 1 "register_operand" "%0"))
11052 (match_operand:DI 2 "x86_64_zext_immediate_operand" "Z")))
11053 (clobber (reg:CC FLAGS_REG))]
11054 "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
11055 "<logic>{l}\t{%2, %k0|%k0, %2}"
11056 [(set_attr "type" "alu")
11057 (set_attr "mode" "SI")])
11058
11059 (define_insn "*<code>qi_1"
11060 [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,?k")
11061 (any_or:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,k")
11062 (match_operand:QI 2 "general_operand" "qn,m,rn,k")))
11063 (clobber (reg:CC FLAGS_REG))]
11064 "ix86_binary_operator_ok (<CODE>, QImode, operands)"
11065 "@
11066 <logic>{b}\t{%2, %0|%0, %2}
11067 <logic>{b}\t{%2, %0|%0, %2}
11068 <logic>{l}\t{%k2, %k0|%k0, %k2}
11069 #"
11070 [(set_attr "isa" "*,*,*,avx512f")
11071 (set_attr "type" "alu,alu,alu,msklog")
11072 (set (attr "mode")
11073 (cond [(eq_attr "alternative" "2")
11074 (const_string "SI")
11075 (and (eq_attr "alternative" "3")
11076 (match_test "!TARGET_AVX512DQ"))
11077 (const_string "HI")
11078 ]
11079 (const_string "QI")))
11080 ;; Potential partial reg stall on alternative 2.
11081 (set (attr "preferred_for_speed")
11082 (cond [(eq_attr "alternative" "2")
11083 (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
11084 (symbol_ref "true")))])
11085
11086 (define_insn_and_split "*notxorqi_1"
11087 [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,?k")
11088 (not:QI
11089 (xor:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,k")
11090 (match_operand:QI 2 "general_operand" "qn,m,rn,k"))))
11091 (clobber (reg:CC FLAGS_REG))]
11092 "ix86_binary_operator_ok (XOR, QImode, operands)"
11093 "#"
11094 "&& reload_completed"
11095 [(parallel
11096 [(set (match_dup 0)
11097 (xor:QI (match_dup 1) (match_dup 2)))
11098 (clobber (reg:CC FLAGS_REG))])
11099 (set (match_dup 0)
11100 (not:QI (match_dup 0)))]
11101 {
11102 if (mask_reg_operand (operands[0], QImode))
11103 {
11104 emit_insn (gen_kxnorqi (operands[0], operands[1], operands[2]));
11105 DONE;
11106 }
11107 }
11108 [(set_attr "isa" "*,*,*,avx512f")
11109 (set_attr "type" "alu,alu,alu,msklog")
11110 (set (attr "mode")
11111 (cond [(eq_attr "alternative" "2")
11112 (const_string "SI")
11113 (and (eq_attr "alternative" "3")
11114 (match_test "!TARGET_AVX512DQ"))
11115 (const_string "HI")
11116 ]
11117 (const_string "QI")))
11118 ;; Potential partial reg stall on alternative 2.
11119 (set (attr "preferred_for_speed")
11120 (cond [(eq_attr "alternative" "2")
11121 (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
11122 (symbol_ref "true")))])
11123
11124 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
11125 (define_insn_and_split "*<code><mode>_1_slp"
11126 [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>,&<r>"))
11127 (any_or:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "%0,!<r>")
11128 (match_operand:SWI12 2 "general_operand" "<r>mn,<r>mn")))
11129 (clobber (reg:CC FLAGS_REG))]
11130 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
11131 "@
11132 <logic>{<imodesuffix>}\t{%2, %0|%0, %2}
11133 #"
11134 "&& reload_completed"
11135 [(set (strict_low_part (match_dup 0)) (match_dup 1))
11136 (parallel
11137 [(set (strict_low_part (match_dup 0))
11138 (any_or:SWI12 (match_dup 0) (match_dup 2)))
11139 (clobber (reg:CC FLAGS_REG))])]
11140 ""
11141 [(set_attr "type" "alu")
11142 (set_attr "mode" "<MODE>")])
11143
11144 ;; convert (sign_extend:WIDE (any_logic:NARROW (memory, immediate)))
11145 ;; to (any_logic:WIDE (sign_extend (memory)), (sign_extend (immediate))).
11146 ;; This eliminates sign extension after logic operation.
11147
11148 (define_split
11149 [(set (match_operand:SWI248 0 "register_operand")
11150 (sign_extend:SWI248
11151 (any_logic:QI (match_operand:QI 1 "memory_operand")
11152 (match_operand:QI 2 "const_int_operand"))))]
11153 ""
11154 [(set (match_dup 3) (sign_extend:SWI248 (match_dup 1)))
11155 (set (match_dup 0) (any_logic:SWI248 (match_dup 3) (match_dup 2)))]
11156 "operands[3] = gen_reg_rtx (<MODE>mode);")
11157
11158 (define_split
11159 [(set (match_operand:SWI48 0 "register_operand")
11160 (sign_extend:SWI48
11161 (any_logic:HI (match_operand:HI 1 "memory_operand")
11162 (match_operand:HI 2 "const_int_operand"))))]
11163 ""
11164 [(set (match_dup 3) (sign_extend:SWI48 (match_dup 1)))
11165 (set (match_dup 0) (any_logic:SWI48 (match_dup 3) (match_dup 2)))]
11166 "operands[3] = gen_reg_rtx (<MODE>mode);")
11167
11168 (define_split
11169 [(set (match_operand:DI 0 "register_operand")
11170 (sign_extend:DI
11171 (any_logic:SI (match_operand:SI 1 "memory_operand")
11172 (match_operand:SI 2 "const_int_operand"))))]
11173 "TARGET_64BIT"
11174 [(set (match_dup 3) (sign_extend:DI (match_dup 1)))
11175 (set (match_dup 0) (any_logic:DI (match_dup 3) (match_dup 2)))]
11176 "operands[3] = gen_reg_rtx (DImode);")
11177
11178 (define_insn "*<code><mode>_2"
11179 [(set (reg FLAGS_REG)
11180 (compare (any_or:SWI
11181 (match_operand:SWI 1 "nonimmediate_operand" "%0,0")
11182 (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>"))
11183 (const_int 0)))
11184 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
11185 (any_or:SWI (match_dup 1) (match_dup 2)))]
11186 "ix86_match_ccmode (insn, CCNOmode)
11187 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
11188 "<logic>{<imodesuffix>}\t{%2, %0|%0, %2}"
11189 [(set_attr "type" "alu")
11190 (set_attr "mode" "<MODE>")])
11191
11192 ;; See comment for addsi_1_zext why we do use nonimmediate_operand
11193 ;; ??? Special case for immediate operand is missing - it is tricky.
11194 (define_insn "*<code>si_2_zext"
11195 [(set (reg FLAGS_REG)
11196 (compare (any_or:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
11197 (match_operand:SI 2 "x86_64_general_operand" "rBMe"))
11198 (const_int 0)))
11199 (set (match_operand:DI 0 "register_operand" "=r")
11200 (zero_extend:DI (any_or:SI (match_dup 1) (match_dup 2))))]
11201 "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
11202 && ix86_binary_operator_ok (<CODE>, SImode, operands)"
11203 "<logic>{l}\t{%2, %k0|%k0, %2}"
11204 [(set_attr "type" "alu")
11205 (set_attr "mode" "SI")])
11206
11207 (define_insn "*<code>si_2_zext_imm"
11208 [(set (reg FLAGS_REG)
11209 (compare (any_or:SI
11210 (match_operand:SI 1 "nonimmediate_operand" "%0")
11211 (match_operand:SI 2 "x86_64_zext_immediate_operand" "Z"))
11212 (const_int 0)))
11213 (set (match_operand:DI 0 "register_operand" "=r")
11214 (any_or:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))]
11215 "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
11216 && ix86_binary_operator_ok (<CODE>, SImode, operands)"
11217 "<logic>{l}\t{%2, %k0|%k0, %2}"
11218 [(set_attr "type" "alu")
11219 (set_attr "mode" "SI")])
11220
11221 (define_insn "*<code><mode>_3"
11222 [(set (reg FLAGS_REG)
11223 (compare (any_or:SWI
11224 (match_operand:SWI 1 "nonimmediate_operand" "%0")
11225 (match_operand:SWI 2 "<general_operand>" "<g>"))
11226 (const_int 0)))
11227 (clobber (match_scratch:SWI 0 "=<r>"))]
11228 "ix86_match_ccmode (insn, CCNOmode)
11229 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11230 "<logic>{<imodesuffix>}\t{%2, %0|%0, %2}"
11231 [(set_attr "type" "alu")
11232 (set_attr "mode" "<MODE>")])
11233
11234 (define_insn "*<code>qi_ext<mode>_1"
11235 [(set (zero_extract:SWI248
11236 (match_operand:SWI248 0 "register_operand" "+Q,Q")
11237 (const_int 8)
11238 (const_int 8))
11239 (subreg:SWI248
11240 (any_or:QI
11241 (subreg:QI
11242 (zero_extract:SWI248
11243 (match_operand:SWI248 1 "register_operand" "0,0")
11244 (const_int 8)
11245 (const_int 8)) 0)
11246 (match_operand:QI 2 "general_operand" "QnBc,m")) 0))
11247 (clobber (reg:CC FLAGS_REG))]
11248 "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
11249 /* FIXME: without this LRA can't reload this pattern, see PR82524. */
11250 && rtx_equal_p (operands[0], operands[1])"
11251 "<logic>{b}\t{%2, %h0|%h0, %2}"
11252 [(set_attr "isa" "*,nox64")
11253 (set_attr "type" "alu")
11254 (set_attr "mode" "QI")])
11255
11256 (define_insn "*<code>qi_ext<mode>_2"
11257 [(set (zero_extract:SWI248
11258 (match_operand:SWI248 0 "register_operand" "+Q")
11259 (const_int 8)
11260 (const_int 8))
11261 (subreg:SWI248
11262 (any_or:QI
11263 (subreg:QI
11264 (zero_extract:SWI248
11265 (match_operand:SWI248 1 "register_operand" "%0")
11266 (const_int 8)
11267 (const_int 8)) 0)
11268 (subreg:QI
11269 (zero_extract:SWI248
11270 (match_operand:SWI248 2 "register_operand" "Q")
11271 (const_int 8)
11272 (const_int 8)) 0)) 0))
11273 (clobber (reg:CC FLAGS_REG))]
11274 "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
11275 /* FIXME: without this LRA can't reload this pattern, see PR82524. */
11276 && (rtx_equal_p (operands[0], operands[1])
11277 || rtx_equal_p (operands[0], operands[2]))"
11278 "<logic>{b}\t{%h2, %h0|%h0, %h2}"
11279 [(set_attr "type" "alu")
11280 (set_attr "mode" "QI")])
11281
11282 ;; Convert wide OR instructions with immediate operand to shorter QImode
11283 ;; equivalents when possible.
11284 ;; Don't do the splitting with memory operands, since it introduces risk
11285 ;; of memory mismatch stalls. We may want to do the splitting for optimizing
11286 ;; for size, but that can (should?) be handled by generic code instead.
11287 (define_split
11288 [(set (match_operand:SWI248 0 "QIreg_operand")
11289 (any_or:SWI248 (match_operand:SWI248 1 "register_operand")
11290 (match_operand:SWI248 2 "const_int_operand")))
11291 (clobber (reg:CC FLAGS_REG))]
11292 "reload_completed
11293 && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
11294 && !(INTVAL (operands[2]) & ~(255 << 8))"
11295 [(parallel
11296 [(set (zero_extract:SI (match_dup 0)
11297 (const_int 8)
11298 (const_int 8))
11299 (subreg:SI
11300 (any_or:QI
11301 (subreg:QI
11302 (zero_extract:SI (match_dup 1)
11303 (const_int 8)
11304 (const_int 8)) 0)
11305 (match_dup 2)) 0))
11306 (clobber (reg:CC FLAGS_REG))])]
11307 {
11308 /* Handle the case where INTVAL (operands[2]) == 0. */
11309 if (operands[2] == const0_rtx)
11310 {
11311 if (!rtx_equal_p (operands[0], operands[1]))
11312 emit_move_insn (operands[0], operands[1]);
11313 else
11314 emit_note (NOTE_INSN_DELETED);
11315 DONE;
11316 }
11317 operands[0] = gen_lowpart (SImode, operands[0]);
11318 operands[1] = gen_lowpart (SImode, operands[1]);
11319 operands[2] = gen_int_mode (INTVAL (operands[2]) >> 8, QImode);
11320 })
11321
11322 ;; Since OR can be encoded with sign extended immediate, this is only
11323 ;; profitable when 7th bit is set.
11324 (define_split
11325 [(set (match_operand:SWI248 0 "any_QIreg_operand")
11326 (any_or:SWI248 (match_operand:SWI248 1 "general_operand")
11327 (match_operand:SWI248 2 "const_int_operand")))
11328 (clobber (reg:CC FLAGS_REG))]
11329 "reload_completed
11330 && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
11331 && !(INTVAL (operands[2]) & ~255)
11332 && (INTVAL (operands[2]) & 128)"
11333 [(parallel [(set (strict_low_part (match_dup 0))
11334 (any_or:QI (match_dup 1)
11335 (match_dup 2)))
11336 (clobber (reg:CC FLAGS_REG))])]
11337 {
11338 operands[0] = gen_lowpart (QImode, operands[0]);
11339 operands[1] = gen_lowpart (QImode, operands[1]);
11340 operands[2] = gen_int_mode (INTVAL (operands[2]), QImode);
11341 })
11342
11343 (define_expand "xorqi_ext_1_cc"
11344 [(parallel
11345 [(set (reg:CCNO FLAGS_REG)
11346 (compare:CCNO
11347 (xor:QI
11348 (subreg:QI
11349 (zero_extract:HI (match_operand:HI 1 "register_operand")
11350 (const_int 8)
11351 (const_int 8)) 0)
11352 (match_operand:QI 2 "const_int_operand"))
11353 (const_int 0)))
11354 (set (zero_extract:HI (match_operand:HI 0 "register_operand")
11355 (const_int 8)
11356 (const_int 8))
11357 (subreg:HI
11358 (xor:QI
11359 (subreg:QI
11360 (zero_extract:HI (match_dup 1)
11361 (const_int 8)
11362 (const_int 8)) 0)
11363 (match_dup 2)) 0))])])
11364
11365 (define_insn "*xorqi_ext<mode>_1_cc"
11366 [(set (reg FLAGS_REG)
11367 (compare
11368 (xor:QI
11369 (subreg:QI
11370 (zero_extract:SWI248
11371 (match_operand:SWI248 1 "register_operand" "0,0")
11372 (const_int 8)
11373 (const_int 8)) 0)
11374 (match_operand:QI 2 "general_operand" "QnBc,m"))
11375 (const_int 0)))
11376 (set (zero_extract:SWI248
11377 (match_operand:SWI248 0 "register_operand" "+Q,Q")
11378 (const_int 8)
11379 (const_int 8))
11380 (subreg:SWI248
11381 (xor:QI
11382 (subreg:QI
11383 (zero_extract:SWI248
11384 (match_dup 1)
11385 (const_int 8)
11386 (const_int 8)) 0)
11387 (match_dup 2)) 0))]
11388 "ix86_match_ccmode (insn, CCNOmode)
11389 /* FIXME: without this LRA can't reload this pattern, see PR82524. */
11390 && rtx_equal_p (operands[0], operands[1])"
11391 "xor{b}\t{%2, %h0|%h0, %2}"
11392 [(set_attr "isa" "*,nox64")
11393 (set_attr "type" "alu")
11394 (set_attr "mode" "QI")])
11395
11396 ;; Split DST = (HI<<32)|LO early to minimize register usage.
11397 (define_code_iterator any_or_plus [plus ior xor])
11398 (define_insn_and_split "*concat<mode><dwi>3_1"
11399 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
11400 (any_or_plus:<DWI>
11401 (ashift:<DWI> (match_operand:<DWI> 1 "register_operand" "r,r")
11402 (match_operand:<DWI> 2 "const_int_operand"))
11403 (zero_extend:<DWI>
11404 (match_operand:DWIH 3 "nonimmediate_operand" "r,m"))))]
11405 "INTVAL (operands[2]) == <MODE_SIZE> * BITS_PER_UNIT"
11406 "#"
11407 "&& reload_completed"
11408 [(clobber (const_int 0))]
11409 {
11410 split_double_concat (<DWI>mode, operands[0], operands[3],
11411 gen_lowpart (<MODE>mode, operands[1]));
11412 DONE;
11413 })
11414
11415 (define_insn_and_split "*concat<mode><dwi>3_2"
11416 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
11417 (any_or_plus:<DWI>
11418 (zero_extend:<DWI>
11419 (match_operand:DWIH 1 "nonimmediate_operand" "r,m"))
11420 (ashift:<DWI> (match_operand:<DWI> 2 "register_operand" "r,r")
11421 (match_operand:<DWI> 3 "const_int_operand"))))]
11422 "INTVAL (operands[3]) == <MODE_SIZE> * BITS_PER_UNIT"
11423 "#"
11424 "&& reload_completed"
11425 [(clobber (const_int 0))]
11426 {
11427 split_double_concat (<DWI>mode, operands[0], operands[1],
11428 gen_lowpart (<MODE>mode, operands[2]));
11429 DONE;
11430 })
11431
11432 (define_insn_and_split "*concat<mode><dwi>3_3"
11433 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r,r,&r")
11434 (any_or_plus:<DWI>
11435 (ashift:<DWI>
11436 (zero_extend:<DWI>
11437 (match_operand:DWIH 1 "nonimmediate_operand" "r,m,r,m"))
11438 (match_operand:<DWI> 2 "const_int_operand"))
11439 (zero_extend:<DWI>
11440 (match_operand:DWIH 3 "nonimmediate_operand" "r,r,m,m"))))]
11441 "INTVAL (operands[2]) == <MODE_SIZE> * BITS_PER_UNIT"
11442 "#"
11443 "&& reload_completed"
11444 [(clobber (const_int 0))]
11445 {
11446 split_double_concat (<DWI>mode, operands[0], operands[3], operands[1]);
11447 DONE;
11448 })
11449
11450 (define_insn_and_split "*concat<mode><dwi>3_4"
11451 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r,r,&r")
11452 (any_or_plus:<DWI>
11453 (zero_extend:<DWI>
11454 (match_operand:DWIH 1 "nonimmediate_operand" "r,m,r,m"))
11455 (ashift:<DWI>
11456 (zero_extend:<DWI>
11457 (match_operand:DWIH 2 "nonimmediate_operand" "r,r,m,m"))
11458 (match_operand:<DWI> 3 "const_int_operand"))))]
11459 "INTVAL (operands[3]) == <MODE_SIZE> * BITS_PER_UNIT"
11460 "#"
11461 "&& reload_completed"
11462 [(clobber (const_int 0))]
11463 {
11464 split_double_concat (<DWI>mode, operands[0], operands[1], operands[2]);
11465 DONE;
11466 })
11467 \f
11468 ;; Negation instructions
11469
11470 (define_expand "neg<mode>2"
11471 [(set (match_operand:SDWIM 0 "nonimmediate_operand")
11472 (neg:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand")))]
11473 ""
11474 "ix86_expand_unary_operator (NEG, <MODE>mode, operands); DONE;")
11475
11476 (define_insn_and_split "*neg<dwi>2_doubleword"
11477 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro")
11478 (neg:<DWI> (match_operand:<DWI> 1 "nonimmediate_operand" "0")))
11479 (clobber (reg:CC FLAGS_REG))]
11480 "ix86_unary_operator_ok (NEG, <DWI>mode, operands)"
11481 "#"
11482 "&& reload_completed"
11483 [(parallel
11484 [(set (reg:CCC FLAGS_REG)
11485 (unspec:CCC [(match_dup 1) (const_int 0)] UNSPEC_CC_NE))
11486 (set (match_dup 0) (neg:DWIH (match_dup 1)))])
11487 (parallel
11488 [(set (match_dup 2)
11489 (plus:DWIH (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
11490 (match_dup 3))
11491 (const_int 0)))
11492 (clobber (reg:CC FLAGS_REG))])
11493 (parallel
11494 [(set (match_dup 2)
11495 (neg:DWIH (match_dup 2)))
11496 (clobber (reg:CC FLAGS_REG))])]
11497 "split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[2]);")
11498
11499 ;; Convert:
11500 ;; mov %esi, %edx
11501 ;; negl %eax
11502 ;; adcl $0, %edx
11503 ;; negl %edx
11504 ;; to:
11505 ;; xorl %edx, %edx
11506 ;; negl %eax
11507 ;; sbbl %esi, %edx
11508
11509 (define_peephole2
11510 [(set (match_operand:SWI48 0 "general_reg_operand")
11511 (match_operand:SWI48 1 "nonimmediate_gr_operand"))
11512 (parallel
11513 [(set (reg:CCC FLAGS_REG)
11514 (unspec:CCC [(match_operand:SWI48 2 "general_reg_operand")
11515 (const_int 0)] UNSPEC_CC_NE))
11516 (set (match_dup 2) (neg:SWI48 (match_dup 2)))])
11517 (parallel
11518 [(set (match_dup 0)
11519 (plus:SWI48 (plus:SWI48
11520 (ltu:SWI48 (reg:CC FLAGS_REG) (const_int 0))
11521 (match_dup 0))
11522 (const_int 0)))
11523 (clobber (reg:CC FLAGS_REG))])
11524 (parallel
11525 [(set (match_dup 0)
11526 (neg:SWI48 (match_dup 0)))
11527 (clobber (reg:CC FLAGS_REG))])]
11528 "REGNO (operands[0]) != REGNO (operands[2])
11529 && !reg_mentioned_p (operands[0], operands[1])
11530 && !reg_mentioned_p (operands[2], operands[1])"
11531 [(parallel
11532 [(set (reg:CCC FLAGS_REG)
11533 (unspec:CCC [(match_dup 2) (const_int 0)] UNSPEC_CC_NE))
11534 (set (match_dup 2) (neg:SWI48 (match_dup 2)))])
11535 (parallel
11536 [(set (match_dup 0)
11537 (minus:SWI48 (minus:SWI48
11538 (match_dup 0)
11539 (ltu:SWI48 (reg:CC FLAGS_REG) (const_int 0)))
11540 (match_dup 1)))
11541 (clobber (reg:CC FLAGS_REG))])]
11542 "ix86_expand_clear (operands[0]);")
11543
11544 ;; Convert:
11545 ;; xorl %edx, %edx
11546 ;; negl %eax
11547 ;; adcl $0, %edx
11548 ;; negl %edx
11549 ;; to:
11550 ;; negl %eax
11551 ;; sbbl %edx, %edx // *x86_mov<mode>cc_0_m1
11552
11553 (define_peephole2
11554 [(parallel
11555 [(set (match_operand:SWI48 0 "general_reg_operand") (const_int 0))
11556 (clobber (reg:CC FLAGS_REG))])
11557 (parallel
11558 [(set (reg:CCC FLAGS_REG)
11559 (unspec:CCC [(match_operand:SWI48 1 "general_reg_operand")
11560 (const_int 0)] UNSPEC_CC_NE))
11561 (set (match_dup 1) (neg:SWI48 (match_dup 1)))])
11562 (parallel
11563 [(set (match_dup 0)
11564 (plus:SWI48 (plus:SWI48
11565 (ltu:SWI48 (reg:CC FLAGS_REG) (const_int 0))
11566 (match_dup 0))
11567 (const_int 0)))
11568 (clobber (reg:CC FLAGS_REG))])
11569 (parallel
11570 [(set (match_dup 0)
11571 (neg:SWI48 (match_dup 0)))
11572 (clobber (reg:CC FLAGS_REG))])]
11573 "REGNO (operands[0]) != REGNO (operands[1])"
11574 [(parallel
11575 [(set (reg:CCC FLAGS_REG)
11576 (unspec:CCC [(match_dup 1) (const_int 0)] UNSPEC_CC_NE))
11577 (set (match_dup 1) (neg:SWI48 (match_dup 1)))])
11578 (parallel
11579 [(set (match_dup 0)
11580 (if_then_else:SWI48 (ltu:SWI48 (reg:CC FLAGS_REG) (const_int 0))
11581 (const_int -1)
11582 (const_int 0)))
11583 (clobber (reg:CC FLAGS_REG))])])
11584
11585 (define_insn "*neg<mode>_1"
11586 [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
11587 (neg:SWI (match_operand:SWI 1 "nonimmediate_operand" "0")))
11588 (clobber (reg:CC FLAGS_REG))]
11589 "ix86_unary_operator_ok (NEG, <MODE>mode, operands)"
11590 "neg{<imodesuffix>}\t%0"
11591 [(set_attr "type" "negnot")
11592 (set_attr "mode" "<MODE>")])
11593
11594 (define_insn "*negsi_1_zext"
11595 [(set (match_operand:DI 0 "register_operand" "=r")
11596 (zero_extend:DI
11597 (neg:SI (match_operand:SI 1 "register_operand" "0"))))
11598 (clobber (reg:CC FLAGS_REG))]
11599 "TARGET_64BIT && ix86_unary_operator_ok (NEG, SImode, operands)"
11600 "neg{l}\t%k0"
11601 [(set_attr "type" "negnot")
11602 (set_attr "mode" "SI")])
11603
11604 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
11605 (define_insn_and_split "*neg<mode>_1_slp"
11606 [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>,&<r>"))
11607 (neg:SWI12 (match_operand:SWI12 1 "register_operand" "0,!<r>")))
11608 (clobber (reg:CC FLAGS_REG))]
11609 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
11610 "@
11611 neg{<imodesuffix>}\t%0
11612 #"
11613 "&& reload_completed"
11614 [(set (strict_low_part (match_dup 0)) (match_dup 1))
11615 (parallel
11616 [(set (strict_low_part (match_dup 0))
11617 (neg:SWI12 (match_dup 0)))
11618 (clobber (reg:CC FLAGS_REG))])]
11619 ""
11620 [(set_attr "type" "negnot")
11621 (set_attr "mode" "<MODE>")])
11622
11623 (define_insn "*neg<mode>_2"
11624 [(set (reg FLAGS_REG)
11625 (compare
11626 (neg:SWI (match_operand:SWI 1 "nonimmediate_operand" "0"))
11627 (const_int 0)))
11628 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
11629 (neg:SWI (match_dup 1)))]
11630 "ix86_match_ccmode (insn, CCGOCmode)
11631 && ix86_unary_operator_ok (NEG, <MODE>mode, operands)"
11632 "neg{<imodesuffix>}\t%0"
11633 [(set_attr "type" "negnot")
11634 (set_attr "mode" "<MODE>")])
11635
11636 (define_insn "*negsi_2_zext"
11637 [(set (reg FLAGS_REG)
11638 (compare
11639 (neg:SI (match_operand:SI 1 "register_operand" "0"))
11640 (const_int 0)))
11641 (set (match_operand:DI 0 "register_operand" "=r")
11642 (zero_extend:DI
11643 (neg:SI (match_dup 1))))]
11644 "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
11645 && ix86_unary_operator_ok (NEG, SImode, operands)"
11646 "neg{l}\t%k0"
11647 [(set_attr "type" "negnot")
11648 (set_attr "mode" "SI")])
11649
11650 (define_insn "*neg<mode>_ccc_1"
11651 [(set (reg:CCC FLAGS_REG)
11652 (unspec:CCC
11653 [(match_operand:SWI 1 "nonimmediate_operand" "0")
11654 (const_int 0)] UNSPEC_CC_NE))
11655 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
11656 (neg:SWI (match_dup 1)))]
11657 ""
11658 "neg{<imodesuffix>}\t%0"
11659 [(set_attr "type" "negnot")
11660 (set_attr "mode" "<MODE>")])
11661
11662 (define_insn "*neg<mode>_ccc_2"
11663 [(set (reg:CCC FLAGS_REG)
11664 (unspec:CCC
11665 [(match_operand:SWI 1 "nonimmediate_operand" "0")
11666 (const_int 0)] UNSPEC_CC_NE))
11667 (clobber (match_scratch:SWI 0 "=<r>"))]
11668 ""
11669 "neg{<imodesuffix>}\t%0"
11670 [(set_attr "type" "negnot")
11671 (set_attr "mode" "<MODE>")])
11672
11673 (define_expand "x86_neg<mode>_ccc"
11674 [(parallel
11675 [(set (reg:CCC FLAGS_REG)
11676 (unspec:CCC [(match_operand:SWI48 1 "register_operand")
11677 (const_int 0)] UNSPEC_CC_NE))
11678 (set (match_operand:SWI48 0 "register_operand")
11679 (neg:SWI48 (match_dup 1)))])])
11680
11681 (define_insn "*negqi_ext<mode>_2"
11682 [(set (zero_extract:SWI248
11683 (match_operand:SWI248 0 "register_operand" "+Q")
11684 (const_int 8)
11685 (const_int 8))
11686 (subreg:SWI248
11687 (neg:QI
11688 (subreg:QI
11689 (zero_extract:SWI248
11690 (match_operand:SWI248 1 "register_operand" "0")
11691 (const_int 8)
11692 (const_int 8)) 0)) 0))
11693 (clobber (reg:CC FLAGS_REG))]
11694 "/* FIXME: without this LRA can't reload this pattern, see PR82524. */
11695 rtx_equal_p (operands[0], operands[1])"
11696 "neg{b}\t%h0"
11697 [(set_attr "type" "negnot")
11698 (set_attr "mode" "QI")])
11699
11700 ;; Negate with jump on overflow.
11701 (define_expand "negv<mode>3"
11702 [(parallel [(set (reg:CCO FLAGS_REG)
11703 (unspec:CCO
11704 [(match_operand:SWI 1 "register_operand")
11705 (match_dup 3)] UNSPEC_CC_NE))
11706 (set (match_operand:SWI 0 "register_operand")
11707 (neg:SWI (match_dup 1)))])
11708 (set (pc) (if_then_else
11709 (eq (reg:CCO FLAGS_REG) (const_int 0))
11710 (label_ref (match_operand 2))
11711 (pc)))]
11712 ""
11713 {
11714 operands[3]
11715 = gen_int_mode (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (<MODE>mode) - 1),
11716 <MODE>mode);
11717 })
11718
11719 (define_insn "*negv<mode>3"
11720 [(set (reg:CCO FLAGS_REG)
11721 (unspec:CCO [(match_operand:SWI 1 "nonimmediate_operand" "0")
11722 (match_operand:SWI 2 "const_int_operand")]
11723 UNSPEC_CC_NE))
11724 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
11725 (neg:SWI (match_dup 1)))]
11726 "ix86_unary_operator_ok (NEG, <MODE>mode, operands)
11727 && mode_signbit_p (<MODE>mode, operands[2])"
11728 "neg{<imodesuffix>}\t%0"
11729 [(set_attr "type" "negnot")
11730 (set_attr "mode" "<MODE>")])
11731
11732 ;; Optimize *negsi_1 followed by *cmpsi_ccno_1 (PR target/91384)
11733 (define_peephole2
11734 [(set (match_operand:SWI 0 "general_reg_operand")
11735 (match_operand:SWI 1 "general_reg_operand"))
11736 (parallel [(set (match_dup 0) (neg:SWI (match_dup 0)))
11737 (clobber (reg:CC FLAGS_REG))])
11738 (set (reg:CCZ FLAGS_REG) (compare:CCZ (match_dup 1) (const_int 0)))]
11739 ""
11740 [(set (match_dup 0) (match_dup 1))
11741 (parallel [(set (reg:CCZ FLAGS_REG)
11742 (compare:CCZ (neg:SWI (match_dup 0)) (const_int 0)))
11743 (set (match_dup 0) (neg:SWI (match_dup 0)))])])
11744
11745 ;; Special expand pattern to handle integer mode abs
11746
11747 (define_expand "abs<mode>2"
11748 [(parallel
11749 [(set (match_operand:SDWIM 0 "register_operand")
11750 (abs:SDWIM
11751 (match_operand:SDWIM 1 "general_operand")))
11752 (clobber (reg:CC FLAGS_REG))])]
11753 "TARGET_CMOVE
11754 && (<MODE>mode != QImode || !TARGET_PARTIAL_REG_STALL)"
11755 {
11756 if (TARGET_EXPAND_ABS)
11757 {
11758 machine_mode mode = <MODE>mode;
11759 operands[1] = force_reg (mode, operands[1]);
11760
11761 /* Generate rtx abs using:
11762 abs (x) = (((signed) x >> (W-1)) ^ x) - ((signed) x >> (W-1)) */
11763
11764 rtx shift_amount = gen_int_mode (GET_MODE_PRECISION (mode) - 1, QImode);
11765 rtx shift_dst = expand_simple_binop (mode, ASHIFTRT, operands[1],
11766 shift_amount, NULL_RTX,
11767 0, OPTAB_DIRECT);
11768 rtx xor_dst = expand_simple_binop (mode, XOR, shift_dst, operands[1],
11769 operands[0], 0, OPTAB_DIRECT);
11770 rtx minus_dst = expand_simple_binop (mode, MINUS, xor_dst, shift_dst,
11771 operands[0], 0, OPTAB_DIRECT);
11772 if (!rtx_equal_p (minus_dst, operands[0]))
11773 emit_move_insn (operands[0], minus_dst);
11774 DONE;
11775 }
11776 })
11777
11778 (define_insn_and_split "*abs<dwi>2_doubleword"
11779 [(set (match_operand:<DWI> 0 "register_operand")
11780 (abs:<DWI>
11781 (match_operand:<DWI> 1 "general_operand")))
11782 (clobber (reg:CC FLAGS_REG))]
11783 "TARGET_CMOVE
11784 && ix86_pre_reload_split ()"
11785 "#"
11786 "&& 1"
11787 [(parallel
11788 [(set (reg:CCC FLAGS_REG)
11789 (unspec:CCC [(match_dup 1) (const_int 0)] UNSPEC_CC_NE))
11790 (set (match_dup 2) (neg:DWIH (match_dup 1)))])
11791 (parallel
11792 [(set (match_dup 5)
11793 (plus:DWIH (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
11794 (match_dup 4))
11795 (const_int 0)))
11796 (clobber (reg:CC FLAGS_REG))])
11797 (parallel
11798 [(set (reg:CCGOC FLAGS_REG)
11799 (compare:CCGOC
11800 (neg:DWIH (match_dup 5))
11801 (const_int 0)))
11802 (set (match_dup 5)
11803 (neg:DWIH (match_dup 5)))])
11804 (set (match_dup 0)
11805 (if_then_else:DWIH
11806 (ge (reg:CCGOC FLAGS_REG) (const_int 0))
11807 (match_dup 2)
11808 (match_dup 1)))
11809 (set (match_dup 3)
11810 (if_then_else:DWIH
11811 (ge (reg:CCGOC FLAGS_REG) (const_int 0))
11812 (match_dup 5)
11813 (match_dup 4)))]
11814 {
11815 operands[1] = force_reg (<DWI>mode, operands[1]);
11816 operands[2] = gen_reg_rtx (<DWI>mode);
11817
11818 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
11819 })
11820
11821 (define_insn_and_split "*nabs<dwi>2_doubleword"
11822 [(set (match_operand:<DWI> 0 "register_operand")
11823 (neg:<DWI>
11824 (abs:<DWI>
11825 (match_operand:<DWI> 1 "general_operand"))))
11826 (clobber (reg:CC FLAGS_REG))]
11827 "TARGET_CMOVE
11828 && ix86_pre_reload_split ()"
11829 "#"
11830 "&& 1"
11831 [(parallel
11832 [(set (reg:CCC FLAGS_REG)
11833 (unspec:CCC [(match_dup 1) (const_int 0)] UNSPEC_CC_NE))
11834 (set (match_dup 2) (neg:DWIH (match_dup 1)))])
11835 (parallel
11836 [(set (match_dup 5)
11837 (plus:DWIH (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
11838 (match_dup 4))
11839 (const_int 0)))
11840 (clobber (reg:CC FLAGS_REG))])
11841 (parallel
11842 [(set (reg:CCGOC FLAGS_REG)
11843 (compare:CCGOC
11844 (neg:DWIH (match_dup 5))
11845 (const_int 0)))
11846 (set (match_dup 5)
11847 (neg:DWIH (match_dup 5)))])
11848 (set (match_dup 0)
11849 (if_then_else:DWIH
11850 (lt (reg:CCGOC FLAGS_REG) (const_int 0))
11851 (match_dup 2)
11852 (match_dup 1)))
11853 (set (match_dup 3)
11854 (if_then_else:DWIH
11855 (lt (reg:CCGOC FLAGS_REG) (const_int 0))
11856 (match_dup 5)
11857 (match_dup 4)))]
11858 {
11859 operands[1] = force_reg (<DWI>mode, operands[1]);
11860 operands[2] = gen_reg_rtx (<DWI>mode);
11861
11862 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
11863 })
11864
11865 (define_insn_and_split "*abs<mode>2_1"
11866 [(set (match_operand:SWI 0 "register_operand")
11867 (abs:SWI
11868 (match_operand:SWI 1 "general_operand")))
11869 (clobber (reg:CC FLAGS_REG))]
11870 "TARGET_CMOVE
11871 && (<MODE>mode != QImode || !TARGET_PARTIAL_REG_STALL)
11872 && ix86_pre_reload_split ()"
11873 "#"
11874 "&& 1"
11875 [(parallel
11876 [(set (reg:CCGOC FLAGS_REG)
11877 (compare:CCGOC
11878 (neg:SWI (match_dup 1))
11879 (const_int 0)))
11880 (set (match_dup 2)
11881 (neg:SWI (match_dup 1)))])
11882 (set (match_dup 0)
11883 (if_then_else:SWI
11884 (ge (reg:CCGOC FLAGS_REG) (const_int 0))
11885 (match_dup 2)
11886 (match_dup 1)))]
11887 {
11888 operands[1] = force_reg (<MODE>mode, operands[1]);
11889 operands[2] = gen_reg_rtx (<MODE>mode);
11890 })
11891
11892 (define_insn_and_split "*nabs<mode>2_1"
11893 [(set (match_operand:SWI 0 "register_operand")
11894 (neg:SWI
11895 (abs:SWI
11896 (match_operand:SWI 1 "general_operand"))))
11897 (clobber (reg:CC FLAGS_REG))]
11898 "TARGET_CMOVE
11899 && (<MODE>mode != QImode || !TARGET_PARTIAL_REG_STALL)
11900 && ix86_pre_reload_split ()"
11901 "#"
11902 "&& 1"
11903 [(parallel
11904 [(set (reg:CCGOC FLAGS_REG)
11905 (compare:CCGOC
11906 (neg:SWI (match_dup 1))
11907 (const_int 0)))
11908 (set (match_dup 2)
11909 (neg:SWI (match_dup 1)))])
11910 (set (match_dup 0)
11911 (if_then_else:SWI
11912 (lt (reg:CCGOC FLAGS_REG) (const_int 0))
11913 (match_dup 2)
11914 (match_dup 1)))]
11915 {
11916 operands[1] = force_reg (<MODE>mode, operands[1]);
11917 operands[2] = gen_reg_rtx (<MODE>mode);
11918 })
11919
11920 (define_expand "<code>tf2"
11921 [(set (match_operand:TF 0 "register_operand")
11922 (absneg:TF (match_operand:TF 1 "register_operand")))]
11923 "TARGET_SSE"
11924 "ix86_expand_fp_absneg_operator (<CODE>, TFmode, operands); DONE;")
11925
11926 (define_insn_and_split "*<code>tf2_1"
11927 [(set (match_operand:TF 0 "register_operand" "=x,x,Yv,Yv")
11928 (absneg:TF
11929 (match_operand:TF 1 "vector_operand" "0,xBm,Yv,m")))
11930 (use (match_operand:TF 2 "vector_operand" "xBm,0,Yvm,Yv"))]
11931 "TARGET_SSE"
11932 "#"
11933 "&& reload_completed"
11934 [(set (match_dup 0)
11935 (<absneg_op>:TF (match_dup 1) (match_dup 2)))]
11936 {
11937 if (TARGET_AVX)
11938 {
11939 if (MEM_P (operands[1]))
11940 std::swap (operands[1], operands[2]);
11941 }
11942 else
11943 {
11944 if (operands_match_p (operands[0], operands[2]))
11945 std::swap (operands[1], operands[2]);
11946 }
11947 }
11948 [(set_attr "isa" "noavx,noavx,avx,avx")])
11949
11950 (define_insn_and_split "*nabstf2_1"
11951 [(set (match_operand:TF 0 "register_operand" "=x,x,Yv,Yv")
11952 (neg:TF
11953 (abs:TF
11954 (match_operand:TF 1 "vector_operand" "0,xBm,Yv,m"))))
11955 (use (match_operand:TF 2 "vector_operand" "xBm,0,Yvm,Yv"))]
11956 "TARGET_SSE"
11957 "#"
11958 "&& reload_completed"
11959 [(set (match_dup 0)
11960 (ior:TF (match_dup 1) (match_dup 2)))]
11961 {
11962 if (TARGET_AVX)
11963 {
11964 if (MEM_P (operands[1]))
11965 std::swap (operands[1], operands[2]);
11966 }
11967 else
11968 {
11969 if (operands_match_p (operands[0], operands[2]))
11970 std::swap (operands[1], operands[2]);
11971 }
11972 }
11973 [(set_attr "isa" "noavx,noavx,avx,avx")])
11974
11975 (define_expand "<code>hf2"
11976 [(set (match_operand:HF 0 "register_operand")
11977 (absneg:HF (match_operand:HF 1 "register_operand")))]
11978 "TARGET_AVX512FP16"
11979 "ix86_expand_fp_absneg_operator (<CODE>, HFmode, operands); DONE;")
11980
11981 (define_expand "<code><mode>2"
11982 [(set (match_operand:X87MODEF 0 "register_operand")
11983 (absneg:X87MODEF (match_operand:X87MODEF 1 "register_operand")))]
11984 "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
11985 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
11986
11987 ;; Changing of sign for FP values is doable using integer unit too.
11988 (define_insn "*<code><mode>2_i387_1"
11989 [(set (match_operand:X87MODEF 0 "register_operand" "=f,!r")
11990 (absneg:X87MODEF
11991 (match_operand:X87MODEF 1 "register_operand" "0,0")))
11992 (clobber (reg:CC FLAGS_REG))]
11993 "TARGET_80387 && !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
11994 "#")
11995
11996 (define_split
11997 [(set (match_operand:X87MODEF 0 "fp_register_operand")
11998 (absneg:X87MODEF (match_operand:X87MODEF 1 "fp_register_operand")))
11999 (clobber (reg:CC FLAGS_REG))]
12000 "TARGET_80387 && reload_completed"
12001 [(set (match_dup 0) (absneg:X87MODEF (match_dup 1)))])
12002
12003 (define_split
12004 [(set (match_operand:X87MODEF 0 "general_reg_operand")
12005 (absneg:X87MODEF (match_operand:X87MODEF 1 "general_reg_operand")))
12006 (clobber (reg:CC FLAGS_REG))]
12007 "TARGET_80387 && reload_completed"
12008 [(const_int 0)]
12009 "ix86_split_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
12010
12011 (define_insn_and_split "*<code>hf2_1"
12012 [(set (match_operand:HF 0 "register_operand" "=Yv")
12013 (absneg:HF
12014 (match_operand:HF 1 "register_operand" "Yv")))
12015 (use (match_operand:V8HF 2 "vector_operand" "Yvm"))
12016 (clobber (reg:CC FLAGS_REG))]
12017 "TARGET_AVX512FP16"
12018 "#"
12019 "&& reload_completed"
12020 [(set (match_dup 0)
12021 (<absneg_op>:V8HF (match_dup 1) (match_dup 2)))]
12022 {
12023 operands[0] = lowpart_subreg (V8HFmode, operands[0], HFmode);
12024 operands[1] = lowpart_subreg (V8HFmode, operands[1], HFmode);
12025 })
12026
12027 (define_insn "*<code><mode>2_1"
12028 [(set (match_operand:MODEF 0 "register_operand" "=x,x,Yv,f,!r")
12029 (absneg:MODEF
12030 (match_operand:MODEF 1 "register_operand" "0,x,Yv,0,0")))
12031 (use (match_operand:<ssevecmode> 2 "vector_operand" "xBm,0,Yvm,X,X"))
12032 (clobber (reg:CC FLAGS_REG))]
12033 "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
12034 "#"
12035 [(set_attr "isa" "noavx,noavx,avx,*,*")
12036 (set (attr "enabled")
12037 (if_then_else
12038 (match_test ("SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"))
12039 (if_then_else
12040 (eq_attr "alternative" "3,4")
12041 (symbol_ref "TARGET_MIX_SSE_I387")
12042 (const_string "*"))
12043 (if_then_else
12044 (eq_attr "alternative" "3,4")
12045 (symbol_ref "true")
12046 (symbol_ref "false"))))])
12047
12048 (define_split
12049 [(set (match_operand:MODEF 0 "sse_reg_operand")
12050 (absneg:MODEF
12051 (match_operand:MODEF 1 "sse_reg_operand")))
12052 (use (match_operand:<ssevecmodef> 2 "vector_operand"))
12053 (clobber (reg:CC FLAGS_REG))]
12054 "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
12055 && reload_completed"
12056 [(set (match_dup 0)
12057 (<absneg_op>:<ssevecmodef> (match_dup 1) (match_dup 2)))]
12058 {
12059 machine_mode mode = <MODE>mode;
12060 machine_mode vmode = <ssevecmodef>mode;
12061
12062 operands[0] = lowpart_subreg (vmode, operands[0], mode);
12063 operands[1] = lowpart_subreg (vmode, operands[1], mode);
12064
12065 if (!TARGET_AVX && operands_match_p (operands[0], operands[2]))
12066 std::swap (operands[1], operands[2]);
12067 })
12068
12069 (define_split
12070 [(set (match_operand:MODEF 0 "fp_register_operand")
12071 (absneg:MODEF (match_operand:MODEF 1 "fp_register_operand")))
12072 (use (match_operand 2))
12073 (clobber (reg:CC FLAGS_REG))]
12074 "TARGET_80387 && reload_completed"
12075 [(set (match_dup 0) (absneg:MODEF (match_dup 1)))])
12076
12077 (define_split
12078 [(set (match_operand:MODEF 0 "general_reg_operand")
12079 (absneg:MODEF (match_operand:MODEF 1 "general_reg_operand")))
12080 (use (match_operand 2))
12081 (clobber (reg:CC FLAGS_REG))]
12082 "TARGET_80387 && reload_completed"
12083 [(const_int 0)]
12084 "ix86_split_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
12085
12086 (define_insn_and_split "*nabs<mode>2_1"
12087 [(set (match_operand:MODEF 0 "register_operand" "=x,x,Yv")
12088 (neg:MODEF
12089 (abs:MODEF
12090 (match_operand:MODEF 1 "register_operand" "0,x,Yv"))))
12091 (use (match_operand:<ssevecmode> 2 "vector_operand" "xBm,0,Yvm"))]
12092 "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
12093 "#"
12094 "&& reload_completed"
12095 [(set (match_dup 0)
12096 (ior:<ssevecmodef> (match_dup 1) (match_dup 2)))]
12097 {
12098 machine_mode mode = <MODE>mode;
12099 machine_mode vmode = <ssevecmodef>mode;
12100
12101 operands[0] = lowpart_subreg (vmode, operands[0], mode);
12102 operands[1] = lowpart_subreg (vmode, operands[1], mode);
12103
12104 if (!TARGET_AVX && operands_match_p (operands[0], operands[2]))
12105 std::swap (operands[1], operands[2]);
12106 }
12107 [(set_attr "isa" "noavx,noavx,avx")])
12108
12109 ;; Conditionalize these after reload. If they match before reload, we
12110 ;; lose the clobber and ability to use integer instructions.
12111
12112 (define_insn "*<code><mode>2_i387"
12113 [(set (match_operand:X87MODEF 0 "register_operand" "=f")
12114 (absneg:X87MODEF (match_operand:X87MODEF 1 "register_operand" "0")))]
12115 "TARGET_80387 && reload_completed"
12116 "<absneg_mnemonic>"
12117 [(set_attr "type" "fsgn")
12118 (set_attr "mode" "<MODE>")])
12119
12120 ;; Copysign instructions
12121
12122 (define_expand "copysign<mode>3"
12123 [(match_operand:SSEMODEF 0 "register_operand")
12124 (match_operand:SSEMODEF 1 "nonmemory_operand")
12125 (match_operand:SSEMODEF 2 "register_operand")]
12126 "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
12127 || (TARGET_SSE && (<MODE>mode == TFmode))
12128 || (TARGET_AVX512FP16 && (<MODE>mode ==HFmode))"
12129 "ix86_expand_copysign (operands); DONE;")
12130
12131 (define_expand "xorsign<mode>3"
12132 [(match_operand:MODEFH 0 "register_operand")
12133 (match_operand:MODEFH 1 "register_operand")
12134 (match_operand:MODEFH 2 "register_operand")]
12135 "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
12136 || <MODE>mode == HFmode"
12137 {
12138 if (rtx_equal_p (operands[1], operands[2]))
12139 emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
12140 else
12141 ix86_expand_xorsign (operands);
12142 DONE;
12143 })
12144 \f
12145 ;; One complement instructions
12146
12147 (define_expand "one_cmpl<mode>2"
12148 [(set (match_operand:SDWIM 0 "nonimmediate_operand")
12149 (not:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand")))]
12150 ""
12151 "ix86_expand_unary_operator (NOT, <MODE>mode, operands); DONE;")
12152
12153 (define_insn_and_split "*one_cmpl<dwi>2_doubleword"
12154 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro")
12155 (not:<DWI> (match_operand:<DWI> 1 "nonimmediate_operand" "0")))]
12156 "ix86_unary_operator_ok (NOT, <DWI>mode, operands)"
12157 "#"
12158 "&& reload_completed"
12159 [(set (match_dup 0)
12160 (not:DWIH (match_dup 1)))
12161 (set (match_dup 2)
12162 (not:DWIH (match_dup 3)))]
12163 "split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[2]);")
12164
12165 (define_insn "*one_cmpl<mode>2_1"
12166 [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,?k")
12167 (not:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "0,k")))]
12168 "ix86_unary_operator_ok (NOT, <MODE>mode, operands)"
12169 "@
12170 not{<imodesuffix>}\t%0
12171 #"
12172 [(set (attr "isa")
12173 (cond [(eq_attr "alternative" "1")
12174 (if_then_else (eq_attr "mode" "SI,DI")
12175 (const_string "avx512bw")
12176 (const_string "avx512f"))
12177 ]
12178 (const_string "*")))
12179 (set_attr "type" "negnot,msklog")
12180 (set_attr "mode" "<MODE>")])
12181
12182 (define_insn "*one_cmplsi2_1_zext"
12183 [(set (match_operand:DI 0 "register_operand" "=r,?k")
12184 (zero_extend:DI
12185 (not:SI (match_operand:SI 1 "register_operand" "0,k"))))]
12186 "TARGET_64BIT && ix86_unary_operator_ok (NOT, SImode, operands)"
12187 "@
12188 not{l}\t%k0
12189 #"
12190 [(set_attr "isa" "x64,avx512bw")
12191 (set_attr "type" "negnot,msklog")
12192 (set_attr "mode" "SI,SI")])
12193
12194 (define_insn "*one_cmplqi2_1"
12195 [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,?k")
12196 (not:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,k")))]
12197 "ix86_unary_operator_ok (NOT, QImode, operands)"
12198 "@
12199 not{b}\t%0
12200 not{l}\t%k0
12201 #"
12202 [(set_attr "isa" "*,*,avx512f")
12203 (set_attr "type" "negnot,negnot,msklog")
12204 (set (attr "mode")
12205 (cond [(eq_attr "alternative" "1")
12206 (const_string "SI")
12207 (and (eq_attr "alternative" "2")
12208 (match_test "!TARGET_AVX512DQ"))
12209 (const_string "HI")
12210 ]
12211 (const_string "QI")))
12212 ;; Potential partial reg stall on alternative 1.
12213 (set (attr "preferred_for_speed")
12214 (cond [(eq_attr "alternative" "1")
12215 (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
12216 (symbol_ref "true")))])
12217
12218 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
12219 (define_insn_and_split "*one_cmpl<mode>_1_slp"
12220 [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>,&<r>"))
12221 (not:SWI12 (match_operand:SWI12 1 "register_operand" "0,!<r>")))]
12222 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
12223 "@
12224 not{<imodesuffix>}\t%0
12225 #"
12226 "&& reload_completed"
12227 [(set (strict_low_part (match_dup 0)) (match_dup 1))
12228 (set (strict_low_part (match_dup 0))
12229 (not:SWI12 (match_dup 0)))]
12230 ""
12231 [(set_attr "type" "negnot")
12232 (set_attr "mode" "<MODE>")])
12233
12234 (define_insn "*one_cmpl<mode>2_2"
12235 [(set (reg FLAGS_REG)
12236 (compare (not:SWI (match_operand:SWI 1 "nonimmediate_operand" "0"))
12237 (const_int 0)))
12238 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
12239 (not:SWI (match_dup 1)))]
12240 "ix86_match_ccmode (insn, CCNOmode)
12241 && ix86_unary_operator_ok (NOT, <MODE>mode, operands)"
12242 "#"
12243 [(set_attr "type" "alu1")
12244 (set_attr "mode" "<MODE>")])
12245
12246 (define_split
12247 [(set (match_operand 0 "flags_reg_operand")
12248 (match_operator 2 "compare_operator"
12249 [(not:SWI (match_operand:SWI 3 "nonimmediate_operand"))
12250 (const_int 0)]))
12251 (set (match_operand:SWI 1 "nonimmediate_operand")
12252 (not:SWI (match_dup 3)))]
12253 "ix86_match_ccmode (insn, CCNOmode)"
12254 [(parallel [(set (match_dup 0)
12255 (match_op_dup 2 [(xor:SWI (match_dup 3) (const_int -1))
12256 (const_int 0)]))
12257 (set (match_dup 1)
12258 (xor:SWI (match_dup 3) (const_int -1)))])])
12259
12260 (define_insn "*one_cmplsi2_2_zext"
12261 [(set (reg FLAGS_REG)
12262 (compare (not:SI (match_operand:SI 1 "register_operand" "0"))
12263 (const_int 0)))
12264 (set (match_operand:DI 0 "register_operand" "=r")
12265 (zero_extend:DI (not:SI (match_dup 1))))]
12266 "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
12267 && ix86_unary_operator_ok (NOT, SImode, operands)"
12268 "#"
12269 [(set_attr "type" "alu1")
12270 (set_attr "mode" "SI")])
12271
12272 (define_split
12273 [(set (match_operand 0 "flags_reg_operand")
12274 (match_operator 2 "compare_operator"
12275 [(not:SI (match_operand:SI 3 "register_operand"))
12276 (const_int 0)]))
12277 (set (match_operand:DI 1 "register_operand")
12278 (zero_extend:DI (not:SI (match_dup 3))))]
12279 "ix86_match_ccmode (insn, CCNOmode)"
12280 [(parallel [(set (match_dup 0)
12281 (match_op_dup 2 [(xor:SI (match_dup 3) (const_int -1))
12282 (const_int 0)]))
12283 (set (match_dup 1)
12284 (zero_extend:DI (xor:SI (match_dup 3) (const_int -1))))])])
12285 \f
12286 ;; Shift instructions
12287
12288 ;; DImode shifts are implemented using the i386 "shift double" opcode,
12289 ;; which is written as "sh[lr]d[lw] imm,reg,reg/mem". If the shift count
12290 ;; is variable, then the count is in %cl and the "imm" operand is dropped
12291 ;; from the assembler input.
12292 ;;
12293 ;; This instruction shifts the target reg/mem as usual, but instead of
12294 ;; shifting in zeros, bits are shifted in from reg operand. If the insn
12295 ;; is a left shift double, bits are taken from the high order bits of
12296 ;; reg, else if the insn is a shift right double, bits are taken from the
12297 ;; low order bits of reg. So if %eax is "1234" and %edx is "5678",
12298 ;; "shldl $8,%edx,%eax" leaves %edx unchanged and sets %eax to "2345".
12299 ;;
12300 ;; Since sh[lr]d does not change the `reg' operand, that is done
12301 ;; separately, making all shifts emit pairs of shift double and normal
12302 ;; shift. Since sh[lr]d does not shift more than 31 bits, and we wish to
12303 ;; support a 63 bit shift, each shift where the count is in a reg expands
12304 ;; to a pair of shifts, a branch, a shift by 32 and a label.
12305 ;;
12306 ;; If the shift count is a constant, we need never emit more than one
12307 ;; shift pair, instead using moves and sign extension for counts greater
12308 ;; than 31.
12309
12310 (define_expand "ashl<mode>3"
12311 [(set (match_operand:SDWIM 0 "<shift_operand>")
12312 (ashift:SDWIM (match_operand:SDWIM 1 "<ashl_input_operand>")
12313 (match_operand:QI 2 "nonmemory_operand")))]
12314 ""
12315 "ix86_expand_binary_operator (ASHIFT, <MODE>mode, operands); DONE;")
12316
12317 (define_insn_and_split "*ashl<dwi>3_doubleword_mask"
12318 [(set (match_operand:<DWI> 0 "register_operand")
12319 (ashift:<DWI>
12320 (match_operand:<DWI> 1 "register_operand")
12321 (subreg:QI
12322 (and
12323 (match_operand 2 "register_operand" "c")
12324 (match_operand 3 "const_int_operand")) 0)))
12325 (clobber (reg:CC FLAGS_REG))]
12326 "((INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT)) == 0
12327 || ((INTVAL (operands[3]) & (2 * <MODE_SIZE> * BITS_PER_UNIT - 1))
12328 == (2 * <MODE_SIZE> * BITS_PER_UNIT - 1)))
12329 && GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT
12330 && IN_RANGE (GET_MODE_SIZE (GET_MODE (operands[2])), 2,
12331 4 << (TARGET_64BIT ? 1 : 0))
12332 && ix86_pre_reload_split ()"
12333 "#"
12334 "&& 1"
12335 [(parallel
12336 [(set (match_dup 6)
12337 (ior:DWIH (ashift:DWIH (match_dup 6)
12338 (and:QI (match_dup 2) (match_dup 8)))
12339 (subreg:DWIH
12340 (lshiftrt:<DWI> (zero_extend:<DWI> (match_dup 5))
12341 (minus:QI (match_dup 9)
12342 (and:QI (match_dup 2) (match_dup 8)))) 0)))
12343 (clobber (reg:CC FLAGS_REG))])
12344 (parallel
12345 [(set (match_dup 4)
12346 (ashift:DWIH (match_dup 5) (match_dup 2)))
12347 (clobber (reg:CC FLAGS_REG))])]
12348 {
12349 if ((INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT)) != 0)
12350 {
12351 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
12352 operands[2] = gen_lowpart (QImode, operands[2]);
12353 emit_insn (gen_ashl<dwi>3_doubleword (operands[0], operands[1],
12354 operands[2]));
12355 DONE;
12356 }
12357
12358 split_double_mode (<DWI>mode, &operands[0], 2, &operands[4], &operands[6]);
12359
12360 operands[8] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT - 1);
12361 operands[9] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);
12362
12363 if ((INTVAL (operands[3]) & ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
12364 != ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
12365 {
12366 rtx xops[3];
12367 xops[0] = gen_reg_rtx (GET_MODE (operands[2]));
12368 xops[1] = operands[2];
12369 xops[2] = GEN_INT (INTVAL (operands[3])
12370 & ((<MODE_SIZE> * BITS_PER_UNIT) - 1));
12371 ix86_expand_binary_operator (AND, GET_MODE (operands[2]), xops);
12372 operands[2] = xops[0];
12373 }
12374
12375 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
12376 operands[2] = gen_lowpart (QImode, operands[2]);
12377
12378 if (!rtx_equal_p (operands[6], operands[7]))
12379 emit_move_insn (operands[6], operands[7]);
12380 })
12381
12382 (define_insn_and_split "*ashl<dwi>3_doubleword_mask_1"
12383 [(set (match_operand:<DWI> 0 "register_operand")
12384 (ashift:<DWI>
12385 (match_operand:<DWI> 1 "register_operand")
12386 (and:QI
12387 (match_operand:QI 2 "register_operand" "c")
12388 (match_operand:QI 3 "const_int_operand"))))
12389 (clobber (reg:CC FLAGS_REG))]
12390 "((INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT)) == 0
12391 || ((INTVAL (operands[3]) & (2 * <MODE_SIZE> * BITS_PER_UNIT - 1))
12392 == (2 * <MODE_SIZE> * BITS_PER_UNIT - 1)))
12393 && ix86_pre_reload_split ()"
12394 "#"
12395 "&& 1"
12396 [(parallel
12397 [(set (match_dup 6)
12398 (ior:DWIH (ashift:DWIH (match_dup 6)
12399 (and:QI (match_dup 2) (match_dup 8)))
12400 (subreg:DWIH
12401 (lshiftrt:<DWI> (zero_extend:<DWI> (match_dup 5))
12402 (minus:QI (match_dup 9)
12403 (and:QI (match_dup 2) (match_dup 8)))) 0)))
12404 (clobber (reg:CC FLAGS_REG))])
12405 (parallel
12406 [(set (match_dup 4)
12407 (ashift:DWIH (match_dup 5) (match_dup 2)))
12408 (clobber (reg:CC FLAGS_REG))])]
12409 {
12410 if ((INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT)) != 0)
12411 {
12412 emit_insn (gen_ashl<dwi>3_doubleword (operands[0], operands[1],
12413 operands[2]));
12414 DONE;
12415 }
12416
12417 split_double_mode (<DWI>mode, &operands[0], 2, &operands[4], &operands[6]);
12418
12419 operands[8] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT - 1);
12420 operands[9] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);
12421
12422 if ((INTVAL (operands[3]) & ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
12423 != ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
12424 {
12425 rtx tem = gen_reg_rtx (QImode);
12426 emit_insn (gen_andqi3 (tem, operands[2], operands[3]));
12427 operands[2] = tem;
12428 }
12429
12430 if (!rtx_equal_p (operands[6], operands[7]))
12431 emit_move_insn (operands[6], operands[7]);
12432 })
12433
12434 (define_insn "ashl<mode>3_doubleword"
12435 [(set (match_operand:DWI 0 "register_operand" "=&r")
12436 (ashift:DWI (match_operand:DWI 1 "reg_or_pm1_operand" "0n")
12437 (match_operand:QI 2 "nonmemory_operand" "<S>c")))
12438 (clobber (reg:CC FLAGS_REG))]
12439 ""
12440 "#"
12441 [(set_attr "type" "multi")])
12442
12443 (define_split
12444 [(set (match_operand:DWI 0 "register_operand")
12445 (ashift:DWI (match_operand:DWI 1 "nonmemory_operand")
12446 (match_operand:QI 2 "nonmemory_operand")))
12447 (clobber (reg:CC FLAGS_REG))]
12448 "epilogue_completed"
12449 [(const_int 0)]
12450 "ix86_split_ashl (operands, NULL_RTX, <MODE>mode); DONE;")
12451
12452 ;; By default we don't ask for a scratch register, because when DWImode
12453 ;; values are manipulated, registers are already at a premium. But if
12454 ;; we have one handy, we won't turn it away.
12455
12456 (define_peephole2
12457 [(match_scratch:DWIH 3 "r")
12458 (parallel [(set (match_operand:<DWI> 0 "register_operand")
12459 (ashift:<DWI>
12460 (match_operand:<DWI> 1 "nonmemory_operand")
12461 (match_operand:QI 2 "nonmemory_operand")))
12462 (clobber (reg:CC FLAGS_REG))])
12463 (match_dup 3)]
12464 "TARGET_CMOVE"
12465 [(const_int 0)]
12466 "ix86_split_ashl (operands, operands[3], <DWI>mode); DONE;")
12467
12468 (define_insn "x86_64_shld"
12469 [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
12470 (ior:DI (ashift:DI (match_dup 0)
12471 (and:QI (match_operand:QI 2 "nonmemory_operand" "Jc")
12472 (const_int 63)))
12473 (subreg:DI
12474 (lshiftrt:TI
12475 (zero_extend:TI
12476 (match_operand:DI 1 "register_operand" "r"))
12477 (minus:QI (const_int 64)
12478 (and:QI (match_dup 2) (const_int 63)))) 0)))
12479 (clobber (reg:CC FLAGS_REG))]
12480 "TARGET_64BIT"
12481 "shld{q}\t{%s2%1, %0|%0, %1, %2}"
12482 [(set_attr "type" "ishift")
12483 (set_attr "prefix_0f" "1")
12484 (set_attr "mode" "DI")
12485 (set_attr "athlon_decode" "vector")
12486 (set_attr "amdfam10_decode" "vector")
12487 (set_attr "bdver1_decode" "vector")])
12488
12489 (define_insn "x86_64_shld_1"
12490 [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
12491 (ior:DI (ashift:DI (match_dup 0)
12492 (match_operand:QI 2 "const_0_to_63_operand"))
12493 (subreg:DI
12494 (lshiftrt:TI
12495 (zero_extend:TI
12496 (match_operand:DI 1 "register_operand" "r"))
12497 (match_operand:QI 3 "const_0_to_255_operand")) 0)))
12498 (clobber (reg:CC FLAGS_REG))]
12499 "TARGET_64BIT
12500 && INTVAL (operands[3]) == 64 - INTVAL (operands[2])"
12501 "shld{q}\t{%2, %1, %0|%0, %1, %2}"
12502 [(set_attr "type" "ishift")
12503 (set_attr "prefix_0f" "1")
12504 (set_attr "mode" "DI")
12505 (set_attr "length_immediate" "1")
12506 (set_attr "athlon_decode" "vector")
12507 (set_attr "amdfam10_decode" "vector")
12508 (set_attr "bdver1_decode" "vector")])
12509
12510 (define_insn_and_split "*x86_64_shld_shrd_1_nozext"
12511 [(set (match_operand:DI 0 "nonimmediate_operand")
12512 (ior:DI (ashift:DI (match_operand:DI 4 "nonimmediate_operand")
12513 (match_operand:QI 2 "const_0_to_63_operand"))
12514 (lshiftrt:DI
12515 (match_operand:DI 1 "nonimmediate_operand")
12516 (match_operand:QI 3 "const_0_to_63_operand"))))
12517 (clobber (reg:CC FLAGS_REG))]
12518 "TARGET_64BIT
12519 && INTVAL (operands[3]) == 64 - INTVAL (operands[2])
12520 && ix86_pre_reload_split ()"
12521 "#"
12522 "&& 1"
12523 [(const_int 0)]
12524 {
12525 if (rtx_equal_p (operands[4], operands[0]))
12526 {
12527 operands[1] = force_reg (DImode, operands[1]);
12528 emit_insn (gen_x86_64_shld_1 (operands[0], operands[1], operands[2], operands[3]));
12529 }
12530 else if (rtx_equal_p (operands[1], operands[0]))
12531 {
12532 operands[4] = force_reg (DImode, operands[4]);
12533 emit_insn (gen_x86_64_shrd_1 (operands[0], operands[4], operands[3], operands[2]));
12534 }
12535 else
12536 {
12537 operands[1] = force_reg (DImode, operands[1]);
12538 rtx tmp = gen_reg_rtx (DImode);
12539 emit_move_insn (tmp, operands[4]);
12540 emit_insn (gen_x86_64_shld_1 (tmp, operands[1], operands[2], operands[3]));
12541 emit_move_insn (operands[0], tmp);
12542 }
12543 DONE;
12544 })
12545
12546 (define_insn_and_split "*x86_64_shld_2"
12547 [(set (match_operand:DI 0 "nonimmediate_operand")
12548 (ior:DI (ashift:DI (match_dup 0)
12549 (match_operand:QI 2 "nonmemory_operand"))
12550 (lshiftrt:DI (match_operand:DI 1 "register_operand")
12551 (minus:QI (const_int 64) (match_dup 2)))))
12552 (clobber (reg:CC FLAGS_REG))]
12553 "TARGET_64BIT && ix86_pre_reload_split ()"
12554 "#"
12555 "&& 1"
12556 [(parallel [(set (match_dup 0)
12557 (ior:DI (ashift:DI (match_dup 0)
12558 (and:QI (match_dup 2) (const_int 63)))
12559 (subreg:DI
12560 (lshiftrt:TI
12561 (zero_extend:TI (match_dup 1))
12562 (minus:QI (const_int 64)
12563 (and:QI (match_dup 2)
12564 (const_int 63)))) 0)))
12565 (clobber (reg:CC FLAGS_REG))])])
12566
12567 (define_insn "x86_shld"
12568 [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
12569 (ior:SI (ashift:SI (match_dup 0)
12570 (and:QI (match_operand:QI 2 "nonmemory_operand" "Ic")
12571 (const_int 31)))
12572 (subreg:SI
12573 (lshiftrt:DI
12574 (zero_extend:DI
12575 (match_operand:SI 1 "register_operand" "r"))
12576 (minus:QI (const_int 32)
12577 (and:QI (match_dup 2) (const_int 31)))) 0)))
12578 (clobber (reg:CC FLAGS_REG))]
12579 ""
12580 "shld{l}\t{%s2%1, %0|%0, %1, %2}"
12581 [(set_attr "type" "ishift")
12582 (set_attr "prefix_0f" "1")
12583 (set_attr "mode" "SI")
12584 (set_attr "pent_pair" "np")
12585 (set_attr "athlon_decode" "vector")
12586 (set_attr "amdfam10_decode" "vector")
12587 (set_attr "bdver1_decode" "vector")])
12588
12589 (define_insn "x86_shld_1"
12590 [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
12591 (ior:SI (ashift:SI (match_dup 0)
12592 (match_operand:QI 2 "const_0_to_31_operand"))
12593 (subreg:SI
12594 (lshiftrt:DI
12595 (zero_extend:DI
12596 (match_operand:SI 1 "register_operand" "r"))
12597 (match_operand:QI 3 "const_0_to_63_operand")) 0)))
12598 (clobber (reg:CC FLAGS_REG))]
12599 "INTVAL (operands[3]) == 32 - INTVAL (operands[2])"
12600 "shld{l}\t{%2, %1, %0|%0, %1, %2}"
12601 [(set_attr "type" "ishift")
12602 (set_attr "prefix_0f" "1")
12603 (set_attr "length_immediate" "1")
12604 (set_attr "mode" "SI")
12605 (set_attr "pent_pair" "np")
12606 (set_attr "athlon_decode" "vector")
12607 (set_attr "amdfam10_decode" "vector")
12608 (set_attr "bdver1_decode" "vector")])
12609
12610 (define_insn_and_split "*x86_shld_shrd_1_nozext"
12611 [(set (match_operand:SI 0 "nonimmediate_operand")
12612 (ior:SI (ashift:SI (match_operand:SI 4 "nonimmediate_operand")
12613 (match_operand:QI 2 "const_0_to_31_operand"))
12614 (lshiftrt:SI
12615 (match_operand:SI 1 "nonimmediate_operand")
12616 (match_operand:QI 3 "const_0_to_31_operand"))))
12617 (clobber (reg:CC FLAGS_REG))]
12618 "INTVAL (operands[3]) == 32 - INTVAL (operands[2])
12619 && ix86_pre_reload_split ()"
12620 "#"
12621 "&& 1"
12622 [(const_int 0)]
12623 {
12624 if (rtx_equal_p (operands[4], operands[0]))
12625 {
12626 operands[1] = force_reg (SImode, operands[1]);
12627 emit_insn (gen_x86_shld_1 (operands[0], operands[1], operands[2], operands[3]));
12628 }
12629 else if (rtx_equal_p (operands[1], operands[0]))
12630 {
12631 operands[4] = force_reg (SImode, operands[4]);
12632 emit_insn (gen_x86_shrd_1 (operands[0], operands[4], operands[3], operands[2]));
12633 }
12634 else
12635 {
12636 operands[1] = force_reg (SImode, operands[1]);
12637 rtx tmp = gen_reg_rtx (SImode);
12638 emit_move_insn (tmp, operands[4]);
12639 emit_insn (gen_x86_shld_1 (tmp, operands[1], operands[2], operands[3]));
12640 emit_move_insn (operands[0], tmp);
12641 }
12642 DONE;
12643 })
12644
12645 (define_insn_and_split "*x86_shld_2"
12646 [(set (match_operand:SI 0 "nonimmediate_operand")
12647 (ior:SI (ashift:SI (match_dup 0)
12648 (match_operand:QI 2 "nonmemory_operand"))
12649 (lshiftrt:SI (match_operand:SI 1 "register_operand")
12650 (minus:QI (const_int 32) (match_dup 2)))))
12651 (clobber (reg:CC FLAGS_REG))]
12652 "TARGET_64BIT && ix86_pre_reload_split ()"
12653 "#"
12654 "&& 1"
12655 [(parallel [(set (match_dup 0)
12656 (ior:SI (ashift:SI (match_dup 0)
12657 (and:QI (match_dup 2) (const_int 31)))
12658 (subreg:SI
12659 (lshiftrt:DI
12660 (zero_extend:DI (match_dup 1))
12661 (minus:QI (const_int 32)
12662 (and:QI (match_dup 2)
12663 (const_int 31)))) 0)))
12664 (clobber (reg:CC FLAGS_REG))])])
12665
12666 (define_expand "@x86_shift<mode>_adj_1"
12667 [(set (reg:CCZ FLAGS_REG)
12668 (compare:CCZ (and:QI (match_operand:QI 2 "register_operand")
12669 (match_dup 4))
12670 (const_int 0)))
12671 (set (match_operand:SWI48 0 "register_operand")
12672 (if_then_else:SWI48 (ne (reg:CCZ FLAGS_REG) (const_int 0))
12673 (match_operand:SWI48 1 "register_operand")
12674 (match_dup 0)))
12675 (set (match_dup 1)
12676 (if_then_else:SWI48 (ne (reg:CCZ FLAGS_REG) (const_int 0))
12677 (match_operand:SWI48 3 "register_operand")
12678 (match_dup 1)))]
12679 "TARGET_CMOVE"
12680 "operands[4] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));")
12681
12682 (define_expand "@x86_shift<mode>_adj_2"
12683 [(use (match_operand:SWI48 0 "register_operand"))
12684 (use (match_operand:SWI48 1 "register_operand"))
12685 (use (match_operand:QI 2 "register_operand"))]
12686 ""
12687 {
12688 rtx_code_label *label = gen_label_rtx ();
12689 rtx tmp;
12690
12691 emit_insn (gen_testqi_ccz_1 (operands[2],
12692 GEN_INT (GET_MODE_BITSIZE (<MODE>mode))));
12693
12694 tmp = gen_rtx_REG (CCZmode, FLAGS_REG);
12695 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
12696 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
12697 gen_rtx_LABEL_REF (VOIDmode, label),
12698 pc_rtx);
12699 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
12700 JUMP_LABEL (tmp) = label;
12701
12702 emit_move_insn (operands[0], operands[1]);
12703 ix86_expand_clear (operands[1]);
12704
12705 emit_label (label);
12706 LABEL_NUSES (label) = 1;
12707
12708 DONE;
12709 })
12710
12711 ;; Avoid useless masking of count operand.
12712 (define_insn_and_split "*ashl<mode>3_mask"
12713 [(set (match_operand:SWI48 0 "nonimmediate_operand")
12714 (ashift:SWI48
12715 (match_operand:SWI48 1 "nonimmediate_operand")
12716 (subreg:QI
12717 (and
12718 (match_operand 2 "register_operand" "c,r")
12719 (match_operand 3 "const_int_operand")) 0)))
12720 (clobber (reg:CC FLAGS_REG))]
12721 "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)
12722 && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
12723 == GET_MODE_BITSIZE (<MODE>mode)-1
12724 && GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT
12725 && IN_RANGE (GET_MODE_SIZE (GET_MODE (operands[2])), 2,
12726 4 << (TARGET_64BIT ? 1 : 0))
12727 && ix86_pre_reload_split ()"
12728 "#"
12729 "&& 1"
12730 [(parallel
12731 [(set (match_dup 0)
12732 (ashift:SWI48 (match_dup 1)
12733 (match_dup 2)))
12734 (clobber (reg:CC FLAGS_REG))])]
12735 {
12736 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
12737 operands[2] = gen_lowpart (QImode, operands[2]);
12738 }
12739 [(set_attr "isa" "*,bmi2")])
12740
12741 (define_insn_and_split "*ashl<mode>3_mask_1"
12742 [(set (match_operand:SWI48 0 "nonimmediate_operand")
12743 (ashift:SWI48
12744 (match_operand:SWI48 1 "nonimmediate_operand")
12745 (and:QI
12746 (match_operand:QI 2 "register_operand" "c,r")
12747 (match_operand:QI 3 "const_int_operand"))))
12748 (clobber (reg:CC FLAGS_REG))]
12749 "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)
12750 && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
12751 == GET_MODE_BITSIZE (<MODE>mode)-1
12752 && ix86_pre_reload_split ()"
12753 "#"
12754 "&& 1"
12755 [(parallel
12756 [(set (match_dup 0)
12757 (ashift:SWI48 (match_dup 1)
12758 (match_dup 2)))
12759 (clobber (reg:CC FLAGS_REG))])]
12760 ""
12761 [(set_attr "isa" "*,bmi2")])
12762
12763 (define_insn "*bmi2_ashl<mode>3_1"
12764 [(set (match_operand:SWI48 0 "register_operand" "=r")
12765 (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
12766 (match_operand:SWI48 2 "register_operand" "r")))]
12767 "TARGET_BMI2"
12768 "shlx\t{%2, %1, %0|%0, %1, %2}"
12769 [(set_attr "type" "ishiftx")
12770 (set_attr "mode" "<MODE>")])
12771
12772 (define_insn "*ashl<mode>3_1"
12773 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,?k")
12774 (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l,rm,k")
12775 (match_operand:QI 2 "nonmemory_operand" "c<S>,M,r,<KS>")))
12776 (clobber (reg:CC FLAGS_REG))]
12777 "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)"
12778 {
12779 switch (get_attr_type (insn))
12780 {
12781 case TYPE_LEA:
12782 case TYPE_ISHIFTX:
12783 case TYPE_MSKLOG:
12784 return "#";
12785
12786 case TYPE_ALU:
12787 gcc_assert (operands[2] == const1_rtx);
12788 gcc_assert (rtx_equal_p (operands[0], operands[1]));
12789 return "add{<imodesuffix>}\t%0, %0";
12790
12791 default:
12792 if (operands[2] == const1_rtx
12793 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
12794 return "sal{<imodesuffix>}\t%0";
12795 else
12796 return "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
12797 }
12798 }
12799 [(set_attr "isa" "*,*,bmi2,avx512bw")
12800 (set (attr "type")
12801 (cond [(eq_attr "alternative" "1")
12802 (const_string "lea")
12803 (eq_attr "alternative" "2")
12804 (const_string "ishiftx")
12805 (and (and (match_test "TARGET_DOUBLE_WITH_ADD")
12806 (match_operand 0 "register_operand"))
12807 (match_operand 2 "const1_operand"))
12808 (const_string "alu")
12809 (eq_attr "alternative" "3")
12810 (const_string "msklog")
12811 ]
12812 (const_string "ishift")))
12813 (set (attr "length_immediate")
12814 (if_then_else
12815 (ior (eq_attr "type" "alu")
12816 (and (eq_attr "type" "ishift")
12817 (and (match_operand 2 "const1_operand")
12818 (ior (match_test "TARGET_SHIFT1")
12819 (match_test "optimize_function_for_size_p (cfun)")))))
12820 (const_string "0")
12821 (const_string "*")))
12822 (set_attr "mode" "<MODE>")])
12823
12824 ;; Convert shift to the shiftx pattern to avoid flags dependency.
12825 (define_split
12826 [(set (match_operand:SWI48 0 "register_operand")
12827 (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
12828 (match_operand:QI 2 "register_operand")))
12829 (clobber (reg:CC FLAGS_REG))]
12830 "TARGET_BMI2 && reload_completed"
12831 [(set (match_dup 0)
12832 (ashift:SWI48 (match_dup 1) (match_dup 2)))]
12833 "operands[2] = gen_lowpart (<MODE>mode, operands[2]);")
12834
12835 (define_insn "*bmi2_ashlsi3_1_zext"
12836 [(set (match_operand:DI 0 "register_operand" "=r")
12837 (zero_extend:DI
12838 (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
12839 (match_operand:SI 2 "register_operand" "r"))))]
12840 "TARGET_64BIT && TARGET_BMI2"
12841 "shlx\t{%2, %1, %k0|%k0, %1, %2}"
12842 [(set_attr "type" "ishiftx")
12843 (set_attr "mode" "SI")])
12844
12845 (define_insn "*ashlsi3_1_zext"
12846 [(set (match_operand:DI 0 "register_operand" "=r,r,r")
12847 (zero_extend:DI
12848 (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0,l,rm")
12849 (match_operand:QI 2 "nonmemory_operand" "cI,M,r"))))
12850 (clobber (reg:CC FLAGS_REG))]
12851 "TARGET_64BIT && ix86_binary_operator_ok (ASHIFT, SImode, operands)"
12852 {
12853 switch (get_attr_type (insn))
12854 {
12855 case TYPE_LEA:
12856 case TYPE_ISHIFTX:
12857 return "#";
12858
12859 case TYPE_ALU:
12860 gcc_assert (operands[2] == const1_rtx);
12861 return "add{l}\t%k0, %k0";
12862
12863 default:
12864 if (operands[2] == const1_rtx
12865 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
12866 return "sal{l}\t%k0";
12867 else
12868 return "sal{l}\t{%2, %k0|%k0, %2}";
12869 }
12870 }
12871 [(set_attr "isa" "*,*,bmi2")
12872 (set (attr "type")
12873 (cond [(eq_attr "alternative" "1")
12874 (const_string "lea")
12875 (eq_attr "alternative" "2")
12876 (const_string "ishiftx")
12877 (and (match_test "TARGET_DOUBLE_WITH_ADD")
12878 (match_operand 2 "const1_operand"))
12879 (const_string "alu")
12880 ]
12881 (const_string "ishift")))
12882 (set (attr "length_immediate")
12883 (if_then_else
12884 (ior (eq_attr "type" "alu")
12885 (and (eq_attr "type" "ishift")
12886 (and (match_operand 2 "const1_operand")
12887 (ior (match_test "TARGET_SHIFT1")
12888 (match_test "optimize_function_for_size_p (cfun)")))))
12889 (const_string "0")
12890 (const_string "*")))
12891 (set_attr "mode" "SI")])
12892
12893 ;; Convert shift to the shiftx pattern to avoid flags dependency.
12894 (define_split
12895 [(set (match_operand:DI 0 "register_operand")
12896 (zero_extend:DI
12897 (ashift:SI (match_operand:SI 1 "nonimmediate_operand")
12898 (match_operand:QI 2 "register_operand"))))
12899 (clobber (reg:CC FLAGS_REG))]
12900 "TARGET_64BIT && TARGET_BMI2 && reload_completed"
12901 [(set (match_dup 0)
12902 (zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))]
12903 "operands[2] = gen_lowpart (SImode, operands[2]);")
12904
12905 (define_insn "*ashlhi3_1"
12906 [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,Yp,?k")
12907 (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l,k")
12908 (match_operand:QI 2 "nonmemory_operand" "cI,M,Ww")))
12909 (clobber (reg:CC FLAGS_REG))]
12910 "ix86_binary_operator_ok (ASHIFT, HImode, operands)"
12911 {
12912 switch (get_attr_type (insn))
12913 {
12914 case TYPE_LEA:
12915 case TYPE_MSKLOG:
12916 return "#";
12917
12918 case TYPE_ALU:
12919 gcc_assert (operands[2] == const1_rtx);
12920 return "add{w}\t%0, %0";
12921
12922 default:
12923 if (operands[2] == const1_rtx
12924 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
12925 return "sal{w}\t%0";
12926 else
12927 return "sal{w}\t{%2, %0|%0, %2}";
12928 }
12929 }
12930 [(set_attr "isa" "*,*,avx512f")
12931 (set (attr "type")
12932 (cond [(eq_attr "alternative" "1")
12933 (const_string "lea")
12934 (eq_attr "alternative" "2")
12935 (const_string "msklog")
12936 (and (and (match_test "TARGET_DOUBLE_WITH_ADD")
12937 (match_operand 0 "register_operand"))
12938 (match_operand 2 "const1_operand"))
12939 (const_string "alu")
12940 ]
12941 (const_string "ishift")))
12942 (set (attr "length_immediate")
12943 (if_then_else
12944 (ior (eq_attr "type" "alu")
12945 (and (eq_attr "type" "ishift")
12946 (and (match_operand 2 "const1_operand")
12947 (ior (match_test "TARGET_SHIFT1")
12948 (match_test "optimize_function_for_size_p (cfun)")))))
12949 (const_string "0")
12950 (const_string "*")))
12951 (set_attr "mode" "HI,SI,HI")])
12952
12953 (define_insn "*ashlqi3_1"
12954 [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,Yp,?k")
12955 (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l,k")
12956 (match_operand:QI 2 "nonmemory_operand" "cI,cI,M,Wb")))
12957 (clobber (reg:CC FLAGS_REG))]
12958 "ix86_binary_operator_ok (ASHIFT, QImode, operands)"
12959 {
12960 switch (get_attr_type (insn))
12961 {
12962 case TYPE_LEA:
12963 case TYPE_MSKLOG:
12964 return "#";
12965
12966 case TYPE_ALU:
12967 gcc_assert (operands[2] == const1_rtx);
12968 if (REG_P (operands[1]) && !ANY_QI_REGNO_P (REGNO (operands[1])))
12969 return "add{l}\t%k0, %k0";
12970 else
12971 return "add{b}\t%0, %0";
12972
12973 default:
12974 if (operands[2] == const1_rtx
12975 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
12976 {
12977 if (get_attr_mode (insn) == MODE_SI)
12978 return "sal{l}\t%k0";
12979 else
12980 return "sal{b}\t%0";
12981 }
12982 else
12983 {
12984 if (get_attr_mode (insn) == MODE_SI)
12985 return "sal{l}\t{%2, %k0|%k0, %2}";
12986 else
12987 return "sal{b}\t{%2, %0|%0, %2}";
12988 }
12989 }
12990 }
12991 [(set_attr "isa" "*,*,*,avx512dq")
12992 (set (attr "type")
12993 (cond [(eq_attr "alternative" "2")
12994 (const_string "lea")
12995 (eq_attr "alternative" "3")
12996 (const_string "msklog")
12997 (and (and (match_test "TARGET_DOUBLE_WITH_ADD")
12998 (match_operand 0 "register_operand"))
12999 (match_operand 2 "const1_operand"))
13000 (const_string "alu")
13001 ]
13002 (const_string "ishift")))
13003 (set (attr "length_immediate")
13004 (if_then_else
13005 (ior (eq_attr "type" "alu")
13006 (and (eq_attr "type" "ishift")
13007 (and (match_operand 2 "const1_operand")
13008 (ior (match_test "TARGET_SHIFT1")
13009 (match_test "optimize_function_for_size_p (cfun)")))))
13010 (const_string "0")
13011 (const_string "*")))
13012 (set_attr "mode" "QI,SI,SI,QI")
13013 ;; Potential partial reg stall on alternative 1.
13014 (set (attr "preferred_for_speed")
13015 (cond [(eq_attr "alternative" "1")
13016 (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
13017 (symbol_ref "true")))])
13018
13019 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
13020 (define_insn_and_split "*ashl<mode>3_1_slp"
13021 [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>,&<r>"))
13022 (ashift:SWI12 (match_operand:SWI12 1 "register_operand" "0,!<r>")
13023 (match_operand:QI 2 "nonmemory_operand" "cI,cI")))
13024 (clobber (reg:CC FLAGS_REG))]
13025 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
13026 {
13027 if (which_alternative)
13028 return "#";
13029
13030 switch (get_attr_type (insn))
13031 {
13032 case TYPE_ALU:
13033 gcc_assert (operands[2] == const1_rtx);
13034 return "add{<imodesuffix>}\t%0, %0";
13035
13036 default:
13037 if (operands[2] == const1_rtx
13038 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
13039 return "sal{<imodesuffix>}\t%0";
13040 else
13041 return "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
13042 }
13043 }
13044 "&& reload_completed"
13045 [(set (strict_low_part (match_dup 0)) (match_dup 1))
13046 (parallel
13047 [(set (strict_low_part (match_dup 0))
13048 (ashift:SWI12 (match_dup 0) (match_dup 2)))
13049 (clobber (reg:CC FLAGS_REG))])]
13050 ""
13051 [(set (attr "type")
13052 (cond [(and (match_test "TARGET_DOUBLE_WITH_ADD")
13053 (match_operand 2 "const1_operand"))
13054 (const_string "alu")
13055 ]
13056 (const_string "ishift")))
13057 (set (attr "length_immediate")
13058 (if_then_else
13059 (ior (eq_attr "type" "alu")
13060 (and (eq_attr "type" "ishift")
13061 (and (match_operand 2 "const1_operand")
13062 (ior (match_test "TARGET_SHIFT1")
13063 (match_test "optimize_function_for_size_p (cfun)")))))
13064 (const_string "0")
13065 (const_string "*")))
13066 (set_attr "mode" "<MODE>")])
13067
13068 ;; Convert ashift to the lea pattern to avoid flags dependency.
13069 (define_split
13070 [(set (match_operand:SWI 0 "register_operand")
13071 (ashift:SWI (match_operand:SWI 1 "index_register_operand")
13072 (match_operand 2 "const_0_to_3_operand")))
13073 (clobber (reg:CC FLAGS_REG))]
13074 "reload_completed
13075 && REGNO (operands[0]) != REGNO (operands[1])"
13076 [(set (match_dup 0)
13077 (mult:<LEAMODE> (match_dup 1) (match_dup 2)))]
13078 {
13079 if (<MODE>mode != <LEAMODE>mode)
13080 {
13081 operands[0] = gen_lowpart (<LEAMODE>mode, operands[0]);
13082 operands[1] = gen_lowpart (<LEAMODE>mode, operands[1]);
13083 }
13084 operands[2] = GEN_INT (1 << INTVAL (operands[2]));
13085 })
13086
13087 ;; Convert ashift to the lea pattern to avoid flags dependency.
13088 (define_split
13089 [(set (match_operand:DI 0 "register_operand")
13090 (zero_extend:DI
13091 (ashift:SI (match_operand:SI 1 "index_register_operand")
13092 (match_operand 2 "const_0_to_3_operand"))))
13093 (clobber (reg:CC FLAGS_REG))]
13094 "TARGET_64BIT && reload_completed
13095 && REGNO (operands[0]) != REGNO (operands[1])"
13096 [(set (match_dup 0)
13097 (zero_extend:DI (mult:SI (match_dup 1) (match_dup 2))))]
13098 {
13099 operands[1] = gen_lowpart (SImode, operands[1]);
13100 operands[2] = GEN_INT (1 << INTVAL (operands[2]));
13101 })
13102
13103 ;; This pattern can't accept a variable shift count, since shifts by
13104 ;; zero don't affect the flags. We assume that shifts by constant
13105 ;; zero are optimized away.
13106 (define_insn "*ashl<mode>3_cmp"
13107 [(set (reg FLAGS_REG)
13108 (compare
13109 (ashift:SWI (match_operand:SWI 1 "nonimmediate_operand" "0")
13110 (match_operand:QI 2 "<shift_immediate_operand>" "<S>"))
13111 (const_int 0)))
13112 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
13113 (ashift:SWI (match_dup 1) (match_dup 2)))]
13114 "(optimize_function_for_size_p (cfun)
13115 || !TARGET_PARTIAL_FLAG_REG_STALL
13116 || (operands[2] == const1_rtx
13117 && (TARGET_SHIFT1
13118 || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))
13119 && ix86_match_ccmode (insn, CCGOCmode)
13120 && ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)"
13121 {
13122 switch (get_attr_type (insn))
13123 {
13124 case TYPE_ALU:
13125 gcc_assert (operands[2] == const1_rtx);
13126 return "add{<imodesuffix>}\t%0, %0";
13127
13128 default:
13129 if (operands[2] == const1_rtx
13130 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
13131 return "sal{<imodesuffix>}\t%0";
13132 else
13133 return "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
13134 }
13135 }
13136 [(set (attr "type")
13137 (cond [(and (and (match_test "TARGET_DOUBLE_WITH_ADD")
13138 (match_operand 0 "register_operand"))
13139 (match_operand 2 "const1_operand"))
13140 (const_string "alu")
13141 ]
13142 (const_string "ishift")))
13143 (set (attr "length_immediate")
13144 (if_then_else
13145 (ior (eq_attr "type" "alu")
13146 (and (eq_attr "type" "ishift")
13147 (and (match_operand 2 "const1_operand")
13148 (ior (match_test "TARGET_SHIFT1")
13149 (match_test "optimize_function_for_size_p (cfun)")))))
13150 (const_string "0")
13151 (const_string "*")))
13152 (set_attr "mode" "<MODE>")])
13153
13154 (define_insn "*ashlsi3_cmp_zext"
13155 [(set (reg FLAGS_REG)
13156 (compare
13157 (ashift:SI (match_operand:SI 1 "register_operand" "0")
13158 (match_operand:QI 2 "const_1_to_31_operand"))
13159 (const_int 0)))
13160 (set (match_operand:DI 0 "register_operand" "=r")
13161 (zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))]
13162 "TARGET_64BIT
13163 && (optimize_function_for_size_p (cfun)
13164 || !TARGET_PARTIAL_FLAG_REG_STALL
13165 || (operands[2] == const1_rtx
13166 && (TARGET_SHIFT1
13167 || TARGET_DOUBLE_WITH_ADD)))
13168 && ix86_match_ccmode (insn, CCGOCmode)
13169 && ix86_binary_operator_ok (ASHIFT, SImode, operands)"
13170 {
13171 switch (get_attr_type (insn))
13172 {
13173 case TYPE_ALU:
13174 gcc_assert (operands[2] == const1_rtx);
13175 return "add{l}\t%k0, %k0";
13176
13177 default:
13178 if (operands[2] == const1_rtx
13179 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
13180 return "sal{l}\t%k0";
13181 else
13182 return "sal{l}\t{%2, %k0|%k0, %2}";
13183 }
13184 }
13185 [(set (attr "type")
13186 (cond [(and (match_test "TARGET_DOUBLE_WITH_ADD")
13187 (match_operand 2 "const1_operand"))
13188 (const_string "alu")
13189 ]
13190 (const_string "ishift")))
13191 (set (attr "length_immediate")
13192 (if_then_else
13193 (ior (eq_attr "type" "alu")
13194 (and (eq_attr "type" "ishift")
13195 (and (match_operand 2 "const1_operand")
13196 (ior (match_test "TARGET_SHIFT1")
13197 (match_test "optimize_function_for_size_p (cfun)")))))
13198 (const_string "0")
13199 (const_string "*")))
13200 (set_attr "mode" "SI")])
13201
13202 (define_insn "*ashl<mode>3_cconly"
13203 [(set (reg FLAGS_REG)
13204 (compare
13205 (ashift:SWI (match_operand:SWI 1 "register_operand" "0")
13206 (match_operand:QI 2 "<shift_immediate_operand>" "<S>"))
13207 (const_int 0)))
13208 (clobber (match_scratch:SWI 0 "=<r>"))]
13209 "(optimize_function_for_size_p (cfun)
13210 || !TARGET_PARTIAL_FLAG_REG_STALL
13211 || (operands[2] == const1_rtx
13212 && (TARGET_SHIFT1
13213 || TARGET_DOUBLE_WITH_ADD)))
13214 && ix86_match_ccmode (insn, CCGOCmode)"
13215 {
13216 switch (get_attr_type (insn))
13217 {
13218 case TYPE_ALU:
13219 gcc_assert (operands[2] == const1_rtx);
13220 return "add{<imodesuffix>}\t%0, %0";
13221
13222 default:
13223 if (operands[2] == const1_rtx
13224 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
13225 return "sal{<imodesuffix>}\t%0";
13226 else
13227 return "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
13228 }
13229 }
13230 [(set (attr "type")
13231 (cond [(and (and (match_test "TARGET_DOUBLE_WITH_ADD")
13232 (match_operand 0 "register_operand"))
13233 (match_operand 2 "const1_operand"))
13234 (const_string "alu")
13235 ]
13236 (const_string "ishift")))
13237 (set (attr "length_immediate")
13238 (if_then_else
13239 (ior (eq_attr "type" "alu")
13240 (and (eq_attr "type" "ishift")
13241 (and (match_operand 2 "const1_operand")
13242 (ior (match_test "TARGET_SHIFT1")
13243 (match_test "optimize_function_for_size_p (cfun)")))))
13244 (const_string "0")
13245 (const_string "*")))
13246 (set_attr "mode" "<MODE>")])
13247
13248 (define_insn "*ashlqi_ext<mode>_2"
13249 [(set (zero_extract:SWI248
13250 (match_operand:SWI248 0 "register_operand" "+Q")
13251 (const_int 8)
13252 (const_int 8))
13253 (subreg:SWI248
13254 (ashift:QI
13255 (subreg:QI
13256 (zero_extract:SWI248
13257 (match_operand:SWI248 1 "register_operand" "0")
13258 (const_int 8)
13259 (const_int 8)) 0)
13260 (match_operand:QI 2 "nonmemory_operand" "cI")) 0))
13261 (clobber (reg:CC FLAGS_REG))]
13262 "/* FIXME: without this LRA can't reload this pattern, see PR82524. */
13263 rtx_equal_p (operands[0], operands[1])"
13264 {
13265 switch (get_attr_type (insn))
13266 {
13267 case TYPE_ALU:
13268 gcc_assert (operands[2] == const1_rtx);
13269 return "add{b}\t%h0, %h0";
13270
13271 default:
13272 if (operands[2] == const1_rtx
13273 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
13274 return "sal{b}\t%h0";
13275 else
13276 return "sal{b}\t{%2, %h0|%h0, %2}";
13277 }
13278 }
13279 [(set (attr "type")
13280 (cond [(and (match_test "TARGET_DOUBLE_WITH_ADD")
13281 (match_operand 2 "const1_operand"))
13282 (const_string "alu")
13283 ]
13284 (const_string "ishift")))
13285 (set (attr "length_immediate")
13286 (if_then_else
13287 (ior (eq_attr "type" "alu")
13288 (and (eq_attr "type" "ishift")
13289 (and (match_operand 2 "const1_operand")
13290 (ior (match_test "TARGET_SHIFT1")
13291 (match_test "optimize_function_for_size_p (cfun)")))))
13292 (const_string "0")
13293 (const_string "*")))
13294 (set_attr "mode" "QI")])
13295
13296 ;; See comment above `ashl<mode>3' about how this works.
13297
13298 (define_expand "<insn><mode>3"
13299 [(set (match_operand:SDWIM 0 "<shift_operand>")
13300 (any_shiftrt:SDWIM (match_operand:SDWIM 1 "<shift_operand>")
13301 (match_operand:QI 2 "nonmemory_operand")))]
13302 ""
13303 "ix86_expand_binary_operator (<CODE>, <MODE>mode, operands); DONE;")
13304
13305 ;; Avoid useless masking of count operand.
13306 (define_insn_and_split "*<insn><mode>3_mask"
13307 [(set (match_operand:SWI48 0 "nonimmediate_operand")
13308 (any_shiftrt:SWI48
13309 (match_operand:SWI48 1 "nonimmediate_operand")
13310 (subreg:QI
13311 (and
13312 (match_operand 2 "register_operand" "c,r")
13313 (match_operand 3 "const_int_operand")) 0)))
13314 (clobber (reg:CC FLAGS_REG))]
13315 "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
13316 && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
13317 == GET_MODE_BITSIZE (<MODE>mode)-1
13318 && GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT
13319 && IN_RANGE (GET_MODE_SIZE (GET_MODE (operands[2])), 2,
13320 4 << (TARGET_64BIT ? 1 : 0))
13321 && ix86_pre_reload_split ()"
13322 "#"
13323 "&& 1"
13324 [(parallel
13325 [(set (match_dup 0)
13326 (any_shiftrt:SWI48 (match_dup 1)
13327 (match_dup 2)))
13328 (clobber (reg:CC FLAGS_REG))])]
13329 {
13330 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
13331 operands[2] = gen_lowpart (QImode, operands[2]);
13332 }
13333 [(set_attr "isa" "*,bmi2")])
13334
13335 (define_insn_and_split "*<insn><mode>3_mask_1"
13336 [(set (match_operand:SWI48 0 "nonimmediate_operand")
13337 (any_shiftrt:SWI48
13338 (match_operand:SWI48 1 "nonimmediate_operand")
13339 (and:QI
13340 (match_operand:QI 2 "register_operand" "c,r")
13341 (match_operand:QI 3 "const_int_operand"))))
13342 (clobber (reg:CC FLAGS_REG))]
13343 "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
13344 && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
13345 == GET_MODE_BITSIZE (<MODE>mode)-1
13346 && ix86_pre_reload_split ()"
13347 "#"
13348 "&& 1"
13349 [(parallel
13350 [(set (match_dup 0)
13351 (any_shiftrt:SWI48 (match_dup 1)
13352 (match_dup 2)))
13353 (clobber (reg:CC FLAGS_REG))])]
13354 ""
13355 [(set_attr "isa" "*,bmi2")])
13356
13357 (define_insn_and_split "*<insn><dwi>3_doubleword_mask"
13358 [(set (match_operand:<DWI> 0 "register_operand")
13359 (any_shiftrt:<DWI>
13360 (match_operand:<DWI> 1 "register_operand")
13361 (subreg:QI
13362 (and
13363 (match_operand 2 "register_operand" "c")
13364 (match_operand 3 "const_int_operand")) 0)))
13365 (clobber (reg:CC FLAGS_REG))]
13366 "((INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT)) == 0
13367 || ((INTVAL (operands[3]) & (2 * <MODE_SIZE> * BITS_PER_UNIT - 1))
13368 == (2 * <MODE_SIZE> * BITS_PER_UNIT - 1)))
13369 && GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT
13370 && IN_RANGE (GET_MODE_SIZE (GET_MODE (operands[2])), 2,
13371 4 << (TARGET_64BIT ? 1 : 0))
13372 && ix86_pre_reload_split ()"
13373 "#"
13374 "&& 1"
13375 [(parallel
13376 [(set (match_dup 4)
13377 (ior:DWIH (lshiftrt:DWIH (match_dup 4)
13378 (and:QI (match_dup 2) (match_dup 8)))
13379 (subreg:DWIH
13380 (ashift:<DWI> (zero_extend:<DWI> (match_dup 7))
13381 (minus:QI (match_dup 9)
13382 (and:QI (match_dup 2) (match_dup 8)))) 0)))
13383 (clobber (reg:CC FLAGS_REG))])
13384 (parallel
13385 [(set (match_dup 6)
13386 (any_shiftrt:DWIH (match_dup 7) (match_dup 2)))
13387 (clobber (reg:CC FLAGS_REG))])]
13388 {
13389 if ((INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT)) != 0)
13390 {
13391 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
13392 operands[2] = gen_lowpart (QImode, operands[2]);
13393 emit_insn (gen_<insn><dwi>3_doubleword (operands[0], operands[1],
13394 operands[2]));
13395 DONE;
13396 }
13397
13398 split_double_mode (<DWI>mode, &operands[0], 2, &operands[4], &operands[6]);
13399
13400 operands[8] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT - 1);
13401 operands[9] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);
13402
13403 if ((INTVAL (operands[3]) & ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
13404 != ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
13405 {
13406 rtx xops[3];
13407 xops[0] = gen_reg_rtx (GET_MODE (operands[2]));
13408 xops[1] = operands[2];
13409 xops[2] = GEN_INT (INTVAL (operands[3])
13410 & ((<MODE_SIZE> * BITS_PER_UNIT) - 1));
13411 ix86_expand_binary_operator (AND, GET_MODE (operands[2]), xops);
13412 operands[2] = xops[0];
13413 }
13414
13415 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
13416 operands[2] = gen_lowpart (QImode, operands[2]);
13417
13418 if (!rtx_equal_p (operands[4], operands[5]))
13419 emit_move_insn (operands[4], operands[5]);
13420 })
13421
13422 (define_insn_and_split "*<insn><dwi>3_doubleword_mask_1"
13423 [(set (match_operand:<DWI> 0 "register_operand")
13424 (any_shiftrt:<DWI>
13425 (match_operand:<DWI> 1 "register_operand")
13426 (and:QI
13427 (match_operand:QI 2 "register_operand" "c")
13428 (match_operand:QI 3 "const_int_operand"))))
13429 (clobber (reg:CC FLAGS_REG))]
13430 "((INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT)) == 0
13431 || ((INTVAL (operands[3]) & (2 * <MODE_SIZE> * BITS_PER_UNIT - 1))
13432 == (2 * <MODE_SIZE> * BITS_PER_UNIT - 1)))
13433 && ix86_pre_reload_split ()"
13434 "#"
13435 "&& 1"
13436 [(parallel
13437 [(set (match_dup 4)
13438 (ior:DWIH (lshiftrt:DWIH (match_dup 4)
13439 (and:QI (match_dup 2) (match_dup 8)))
13440 (subreg:DWIH
13441 (ashift:<DWI> (zero_extend:<DWI> (match_dup 7))
13442 (minus:QI (match_dup 9)
13443 (and:QI (match_dup 2) (match_dup 8)))) 0)))
13444 (clobber (reg:CC FLAGS_REG))])
13445 (parallel
13446 [(set (match_dup 6)
13447 (any_shiftrt:DWIH (match_dup 7) (match_dup 2)))
13448 (clobber (reg:CC FLAGS_REG))])]
13449 {
13450 if ((INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT)) != 0)
13451 {
13452 emit_insn (gen_<insn><dwi>3_doubleword (operands[0], operands[1],
13453 operands[2]));
13454 DONE;
13455 }
13456
13457 split_double_mode (<DWI>mode, &operands[0], 2, &operands[4], &operands[6]);
13458
13459 operands[8] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT - 1);
13460 operands[9] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);
13461
13462 if ((INTVAL (operands[3]) & ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
13463 != ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
13464 {
13465 rtx tem = gen_reg_rtx (QImode);
13466 emit_insn (gen_andqi3 (tem, operands[2], operands[3]));
13467 operands[2] = tem;
13468 }
13469
13470 if (!rtx_equal_p (operands[4], operands[5]))
13471 emit_move_insn (operands[4], operands[5]);
13472 })
13473
13474 (define_insn_and_split "<insn><mode>3_doubleword"
13475 [(set (match_operand:DWI 0 "register_operand" "=&r")
13476 (any_shiftrt:DWI (match_operand:DWI 1 "register_operand" "0")
13477 (match_operand:QI 2 "nonmemory_operand" "<S>c")))
13478 (clobber (reg:CC FLAGS_REG))]
13479 ""
13480 "#"
13481 "epilogue_completed"
13482 [(const_int 0)]
13483 "ix86_split_<insn> (operands, NULL_RTX, <MODE>mode); DONE;"
13484 [(set_attr "type" "multi")])
13485
13486 ;; By default we don't ask for a scratch register, because when DWImode
13487 ;; values are manipulated, registers are already at a premium. But if
13488 ;; we have one handy, we won't turn it away.
13489
13490 (define_peephole2
13491 [(match_scratch:DWIH 3 "r")
13492 (parallel [(set (match_operand:<DWI> 0 "register_operand")
13493 (any_shiftrt:<DWI>
13494 (match_operand:<DWI> 1 "register_operand")
13495 (match_operand:QI 2 "nonmemory_operand")))
13496 (clobber (reg:CC FLAGS_REG))])
13497 (match_dup 3)]
13498 "TARGET_CMOVE"
13499 [(const_int 0)]
13500 "ix86_split_<insn> (operands, operands[3], <DWI>mode); DONE;")
13501
13502 (define_insn "x86_64_shrd"
13503 [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
13504 (ior:DI (lshiftrt:DI (match_dup 0)
13505 (and:QI (match_operand:QI 2 "nonmemory_operand" "Jc")
13506 (const_int 63)))
13507 (subreg:DI
13508 (ashift:TI
13509 (zero_extend:TI
13510 (match_operand:DI 1 "register_operand" "r"))
13511 (minus:QI (const_int 64)
13512 (and:QI (match_dup 2) (const_int 63)))) 0)))
13513 (clobber (reg:CC FLAGS_REG))]
13514 "TARGET_64BIT"
13515 "shrd{q}\t{%s2%1, %0|%0, %1, %2}"
13516 [(set_attr "type" "ishift")
13517 (set_attr "prefix_0f" "1")
13518 (set_attr "mode" "DI")
13519 (set_attr "athlon_decode" "vector")
13520 (set_attr "amdfam10_decode" "vector")
13521 (set_attr "bdver1_decode" "vector")])
13522
13523 (define_insn "x86_64_shrd_1"
13524 [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
13525 (ior:DI (lshiftrt:DI (match_dup 0)
13526 (match_operand:QI 2 "const_0_to_63_operand"))
13527 (subreg:DI
13528 (ashift:TI
13529 (zero_extend:TI
13530 (match_operand:DI 1 "register_operand" "r"))
13531 (match_operand:QI 3 "const_0_to_255_operand")) 0)))
13532 (clobber (reg:CC FLAGS_REG))]
13533 "TARGET_64BIT
13534 && INTVAL (operands[3]) == 64 - INTVAL (operands[2])"
13535 "shrd{q}\t{%2, %1, %0|%0, %1, %2}"
13536 [(set_attr "type" "ishift")
13537 (set_attr "prefix_0f" "1")
13538 (set_attr "length_immediate" "1")
13539 (set_attr "mode" "DI")
13540 (set_attr "athlon_decode" "vector")
13541 (set_attr "amdfam10_decode" "vector")
13542 (set_attr "bdver1_decode" "vector")])
13543
13544 (define_insn_and_split "*x86_64_shrd_shld_1_nozext"
13545 [(set (match_operand:DI 0 "nonimmediate_operand")
13546 (ior:DI (lshiftrt:DI (match_operand:DI 4 "nonimmediate_operand")
13547 (match_operand:QI 2 "const_0_to_63_operand"))
13548 (ashift:DI
13549 (match_operand:DI 1 "nonimmediate_operand")
13550 (match_operand:QI 3 "const_0_to_63_operand"))))
13551 (clobber (reg:CC FLAGS_REG))]
13552 "TARGET_64BIT
13553 && INTVAL (operands[3]) == 64 - INTVAL (operands[2])
13554 && ix86_pre_reload_split ()"
13555 "#"
13556 "&& 1"
13557 [(const_int 0)]
13558 {
13559 if (rtx_equal_p (operands[4], operands[0]))
13560 {
13561 operands[1] = force_reg (DImode, operands[1]);
13562 emit_insn (gen_x86_64_shrd_1 (operands[0], operands[1], operands[2], operands[3]));
13563 }
13564 else if (rtx_equal_p (operands[1], operands[0]))
13565 {
13566 operands[4] = force_reg (DImode, operands[4]);
13567 emit_insn (gen_x86_64_shld_1 (operands[0], operands[4], operands[3], operands[2]));
13568 }
13569 else
13570 {
13571 operands[1] = force_reg (DImode, operands[1]);
13572 rtx tmp = gen_reg_rtx (DImode);
13573 emit_move_insn (tmp, operands[4]);
13574 emit_insn (gen_x86_64_shrd_1 (tmp, operands[1], operands[2], operands[3]));
13575 emit_move_insn (operands[0], tmp);
13576 }
13577 DONE;
13578 })
13579
13580 (define_insn_and_split "*x86_64_shrd_2"
13581 [(set (match_operand:DI 0 "nonimmediate_operand")
13582 (ior:DI (lshiftrt:DI (match_dup 0)
13583 (match_operand:QI 2 "nonmemory_operand"))
13584 (ashift:DI (match_operand:DI 1 "register_operand")
13585 (minus:QI (const_int 64) (match_dup 2)))))
13586 (clobber (reg:CC FLAGS_REG))]
13587 "TARGET_64BIT && ix86_pre_reload_split ()"
13588 "#"
13589 "&& 1"
13590 [(parallel [(set (match_dup 0)
13591 (ior:DI (lshiftrt:DI (match_dup 0)
13592 (and:QI (match_dup 2) (const_int 63)))
13593 (subreg:DI
13594 (ashift:TI
13595 (zero_extend:TI (match_dup 1))
13596 (minus:QI (const_int 64)
13597 (and:QI (match_dup 2)
13598 (const_int 63)))) 0)))
13599 (clobber (reg:CC FLAGS_REG))])])
13600
13601 (define_insn "x86_shrd"
13602 [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
13603 (ior:SI (lshiftrt:SI (match_dup 0)
13604 (and:QI (match_operand:QI 2 "nonmemory_operand" "Ic")
13605 (const_int 31)))
13606 (subreg:SI
13607 (ashift:DI
13608 (zero_extend:DI
13609 (match_operand:SI 1 "register_operand" "r"))
13610 (minus:QI (const_int 32)
13611 (and:QI (match_dup 2) (const_int 31)))) 0)))
13612 (clobber (reg:CC FLAGS_REG))]
13613 ""
13614 "shrd{l}\t{%s2%1, %0|%0, %1, %2}"
13615 [(set_attr "type" "ishift")
13616 (set_attr "prefix_0f" "1")
13617 (set_attr "mode" "SI")
13618 (set_attr "pent_pair" "np")
13619 (set_attr "athlon_decode" "vector")
13620 (set_attr "amdfam10_decode" "vector")
13621 (set_attr "bdver1_decode" "vector")])
13622
13623 (define_insn "x86_shrd_1"
13624 [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
13625 (ior:SI (lshiftrt:SI (match_dup 0)
13626 (match_operand:QI 2 "const_0_to_31_operand"))
13627 (subreg:SI
13628 (ashift:DI
13629 (zero_extend:DI
13630 (match_operand:SI 1 "register_operand" "r"))
13631 (match_operand:QI 3 "const_0_to_63_operand")) 0)))
13632 (clobber (reg:CC FLAGS_REG))]
13633 "INTVAL (operands[3]) == 32 - INTVAL (operands[2])"
13634 "shrd{l}\t{%2, %1, %0|%0, %1, %2}"
13635 [(set_attr "type" "ishift")
13636 (set_attr "prefix_0f" "1")
13637 (set_attr "length_immediate" "1")
13638 (set_attr "mode" "SI")
13639 (set_attr "pent_pair" "np")
13640 (set_attr "athlon_decode" "vector")
13641 (set_attr "amdfam10_decode" "vector")
13642 (set_attr "bdver1_decode" "vector")])
13643
13644 (define_insn_and_split "*x86_shrd_shld_1_nozext"
13645 [(set (match_operand:SI 0 "nonimmediate_operand")
13646 (ior:SI (lshiftrt:SI (match_operand:SI 4 "nonimmediate_operand")
13647 (match_operand:QI 2 "const_0_to_31_operand"))
13648 (ashift:SI
13649 (match_operand:SI 1 "nonimmediate_operand")
13650 (match_operand:QI 3 "const_0_to_31_operand"))))
13651 (clobber (reg:CC FLAGS_REG))]
13652 "INTVAL (operands[3]) == 32 - INTVAL (operands[2])
13653 && ix86_pre_reload_split ()"
13654 "#"
13655 "&& 1"
13656 [(const_int 0)]
13657 {
13658 if (rtx_equal_p (operands[4], operands[0]))
13659 {
13660 operands[1] = force_reg (SImode, operands[1]);
13661 emit_insn (gen_x86_shrd_1 (operands[0], operands[1], operands[2], operands[3]));
13662 }
13663 else if (rtx_equal_p (operands[1], operands[0]))
13664 {
13665 operands[4] = force_reg (SImode, operands[4]);
13666 emit_insn (gen_x86_shld_1 (operands[0], operands[4], operands[3], operands[2]));
13667 }
13668 else
13669 {
13670 operands[1] = force_reg (SImode, operands[1]);
13671 rtx tmp = gen_reg_rtx (SImode);
13672 emit_move_insn (tmp, operands[4]);
13673 emit_insn (gen_x86_shrd_1 (tmp, operands[1], operands[2], operands[3]));
13674 emit_move_insn (operands[0], tmp);
13675 }
13676 DONE;
13677 })
13678
13679 (define_insn_and_split "*x86_shrd_2"
13680 [(set (match_operand:SI 0 "nonimmediate_operand")
13681 (ior:SI (lshiftrt:SI (match_dup 0)
13682 (match_operand:QI 2 "nonmemory_operand"))
13683 (ashift:SI (match_operand:SI 1 "register_operand")
13684 (minus:QI (const_int 32) (match_dup 2)))))
13685 (clobber (reg:CC FLAGS_REG))]
13686 "TARGET_64BIT && ix86_pre_reload_split ()"
13687 "#"
13688 "&& 1"
13689 [(parallel [(set (match_dup 0)
13690 (ior:SI (lshiftrt:SI (match_dup 0)
13691 (and:QI (match_dup 2) (const_int 31)))
13692 (subreg:SI
13693 (ashift:DI
13694 (zero_extend:DI (match_dup 1))
13695 (minus:QI (const_int 32)
13696 (and:QI (match_dup 2)
13697 (const_int 31)))) 0)))
13698 (clobber (reg:CC FLAGS_REG))])])
13699
13700 ;; Base name for insn mnemonic.
13701 (define_mode_attr cvt_mnemonic
13702 [(SI "{cltd|cdq}") (DI "{cqto|cqo}")])
13703
13704 (define_insn "ashr<mode>3_cvt"
13705 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=*d,rm")
13706 (ashiftrt:SWI48
13707 (match_operand:SWI48 1 "nonimmediate_operand" "*a,0")
13708 (match_operand:QI 2 "const_int_operand")))
13709 (clobber (reg:CC FLAGS_REG))]
13710 "INTVAL (operands[2]) == GET_MODE_BITSIZE (<MODE>mode)-1
13711 && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun))
13712 && ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands)"
13713 "@
13714 <cvt_mnemonic>
13715 sar{<imodesuffix>}\t{%2, %0|%0, %2}"
13716 [(set_attr "type" "imovx,ishift")
13717 (set_attr "prefix_0f" "0,*")
13718 (set_attr "length_immediate" "0,*")
13719 (set_attr "modrm" "0,1")
13720 (set_attr "mode" "<MODE>")])
13721
13722 (define_insn "*ashrsi3_cvt_zext"
13723 [(set (match_operand:DI 0 "register_operand" "=*d,r")
13724 (zero_extend:DI
13725 (ashiftrt:SI (match_operand:SI 1 "register_operand" "*a,0")
13726 (match_operand:QI 2 "const_int_operand"))))
13727 (clobber (reg:CC FLAGS_REG))]
13728 "TARGET_64BIT && INTVAL (operands[2]) == 31
13729 && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun))
13730 && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
13731 "@
13732 {cltd|cdq}
13733 sar{l}\t{%2, %k0|%k0, %2}"
13734 [(set_attr "type" "imovx,ishift")
13735 (set_attr "prefix_0f" "0,*")
13736 (set_attr "length_immediate" "0,*")
13737 (set_attr "modrm" "0,1")
13738 (set_attr "mode" "SI")])
13739
13740 (define_expand "@x86_shift<mode>_adj_3"
13741 [(use (match_operand:SWI48 0 "register_operand"))
13742 (use (match_operand:SWI48 1 "register_operand"))
13743 (use (match_operand:QI 2 "register_operand"))]
13744 ""
13745 {
13746 rtx_code_label *label = gen_label_rtx ();
13747 rtx tmp;
13748
13749 emit_insn (gen_testqi_ccz_1 (operands[2],
13750 GEN_INT (GET_MODE_BITSIZE (<MODE>mode))));
13751
13752 tmp = gen_rtx_REG (CCZmode, FLAGS_REG);
13753 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13754 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
13755 gen_rtx_LABEL_REF (VOIDmode, label),
13756 pc_rtx);
13757 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
13758 JUMP_LABEL (tmp) = label;
13759
13760 emit_move_insn (operands[0], operands[1]);
13761 emit_insn (gen_ashr<mode>3_cvt (operands[1], operands[1],
13762 GEN_INT (GET_MODE_BITSIZE (<MODE>mode)-1)));
13763 emit_label (label);
13764 LABEL_NUSES (label) = 1;
13765
13766 DONE;
13767 })
13768
13769 (define_insn "*bmi2_<insn><mode>3_1"
13770 [(set (match_operand:SWI48 0 "register_operand" "=r")
13771 (any_shiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
13772 (match_operand:SWI48 2 "register_operand" "r")))]
13773 "TARGET_BMI2"
13774 "<shift>x\t{%2, %1, %0|%0, %1, %2}"
13775 [(set_attr "type" "ishiftx")
13776 (set_attr "mode" "<MODE>")])
13777
13778 (define_insn "*ashr<mode>3_1"
13779 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
13780 (ashiftrt:SWI48
13781 (match_operand:SWI48 1 "nonimmediate_operand" "0,rm")
13782 (match_operand:QI 2 "nonmemory_operand" "c<S>,r")))
13783 (clobber (reg:CC FLAGS_REG))]
13784 "ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands)"
13785 {
13786 switch (get_attr_type (insn))
13787 {
13788 case TYPE_ISHIFTX:
13789 return "#";
13790
13791 default:
13792 if (operands[2] == const1_rtx
13793 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
13794 return "sar{<imodesuffix>}\t%0";
13795 else
13796 return "sar{<imodesuffix>}\t{%2, %0|%0, %2}";
13797 }
13798 }
13799 [(set_attr "isa" "*,bmi2")
13800 (set_attr "type" "ishift,ishiftx")
13801 (set (attr "length_immediate")
13802 (if_then_else
13803 (and (match_operand 2 "const1_operand")
13804 (ior (match_test "TARGET_SHIFT1")
13805 (match_test "optimize_function_for_size_p (cfun)")))
13806 (const_string "0")
13807 (const_string "*")))
13808 (set_attr "mode" "<MODE>")])
13809
13810 ;; Specialization of *lshr<mode>3_1 below, extracting the SImode
13811 ;; highpart of a DI to be extracted, but allowing it to be clobbered.
13812 (define_insn_and_split "*highpartdisi2"
13813 [(set (subreg:DI (match_operand:SI 0 "register_operand" "=r,x,?k") 0)
13814 (lshiftrt:DI (match_operand:DI 1 "register_operand" "0,0,k")
13815 (const_int 32)))
13816 (clobber (reg:CC FLAGS_REG))]
13817 "TARGET_64BIT"
13818 "#"
13819 "&& reload_completed"
13820 [(parallel
13821 [(set (match_dup 0) (lshiftrt:DI (match_dup 1) (const_int 32)))
13822 (clobber (reg:CC FLAGS_REG))])]
13823 {
13824 if (SSE_REG_P (operands[0]))
13825 {
13826 rtx tmp = gen_rtx_REG (V4SImode, REGNO (operands[0]));
13827 emit_insn (gen_sse_shufps_v4si (tmp, tmp, tmp,
13828 const1_rtx, const1_rtx,
13829 GEN_INT (5), GEN_INT (5)));
13830 DONE;
13831 }
13832 operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
13833 })
13834
13835 (define_insn "*lshr<mode>3_1"
13836 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,?k")
13837 (lshiftrt:SWI48
13838 (match_operand:SWI48 1 "nonimmediate_operand" "0,rm,k")
13839 (match_operand:QI 2 "nonmemory_operand" "c<S>,r,<KS>")))
13840 (clobber (reg:CC FLAGS_REG))]
13841 "ix86_binary_operator_ok (LSHIFTRT, <MODE>mode, operands)"
13842 {
13843 switch (get_attr_type (insn))
13844 {
13845 case TYPE_ISHIFTX:
13846 case TYPE_MSKLOG:
13847 return "#";
13848
13849 default:
13850 if (operands[2] == const1_rtx
13851 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
13852 return "shr{<imodesuffix>}\t%0";
13853 else
13854 return "shr{<imodesuffix>}\t{%2, %0|%0, %2}";
13855 }
13856 }
13857 [(set_attr "isa" "*,bmi2,avx512bw")
13858 (set_attr "type" "ishift,ishiftx,msklog")
13859 (set (attr "length_immediate")
13860 (if_then_else
13861 (and (and (match_operand 2 "const1_operand")
13862 (eq_attr "alternative" "0"))
13863 (ior (match_test "TARGET_SHIFT1")
13864 (match_test "optimize_function_for_size_p (cfun)")))
13865 (const_string "0")
13866 (const_string "*")))
13867 (set_attr "mode" "<MODE>")])
13868
13869 ;; Convert shift to the shiftx pattern to avoid flags dependency.
13870 (define_split
13871 [(set (match_operand:SWI48 0 "register_operand")
13872 (any_shiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
13873 (match_operand:QI 2 "register_operand")))
13874 (clobber (reg:CC FLAGS_REG))]
13875 "TARGET_BMI2 && reload_completed"
13876 [(set (match_dup 0)
13877 (any_shiftrt:SWI48 (match_dup 1) (match_dup 2)))]
13878 "operands[2] = gen_lowpart (<MODE>mode, operands[2]);")
13879
13880 (define_insn "*bmi2_<insn>si3_1_zext"
13881 [(set (match_operand:DI 0 "register_operand" "=r")
13882 (zero_extend:DI
13883 (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
13884 (match_operand:SI 2 "register_operand" "r"))))]
13885 "TARGET_64BIT && TARGET_BMI2"
13886 "<shift>x\t{%2, %1, %k0|%k0, %1, %2}"
13887 [(set_attr "type" "ishiftx")
13888 (set_attr "mode" "SI")])
13889
13890 (define_insn "*<insn>si3_1_zext"
13891 [(set (match_operand:DI 0 "register_operand" "=r,r")
13892 (zero_extend:DI
13893 (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm")
13894 (match_operand:QI 2 "nonmemory_operand" "cI,r"))))
13895 (clobber (reg:CC FLAGS_REG))]
13896 "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
13897 {
13898 switch (get_attr_type (insn))
13899 {
13900 case TYPE_ISHIFTX:
13901 return "#";
13902
13903 default:
13904 if (operands[2] == const1_rtx
13905 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
13906 return "<shift>{l}\t%k0";
13907 else
13908 return "<shift>{l}\t{%2, %k0|%k0, %2}";
13909 }
13910 }
13911 [(set_attr "isa" "*,bmi2")
13912 (set_attr "type" "ishift,ishiftx")
13913 (set (attr "length_immediate")
13914 (if_then_else
13915 (and (match_operand 2 "const1_operand")
13916 (ior (match_test "TARGET_SHIFT1")
13917 (match_test "optimize_function_for_size_p (cfun)")))
13918 (const_string "0")
13919 (const_string "*")))
13920 (set_attr "mode" "SI")])
13921
13922 ;; Convert shift to the shiftx pattern to avoid flags dependency.
13923 (define_split
13924 [(set (match_operand:DI 0 "register_operand")
13925 (zero_extend:DI
13926 (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand")
13927 (match_operand:QI 2 "register_operand"))))
13928 (clobber (reg:CC FLAGS_REG))]
13929 "TARGET_64BIT && TARGET_BMI2 && reload_completed"
13930 [(set (match_dup 0)
13931 (zero_extend:DI (any_shiftrt:SI (match_dup 1) (match_dup 2))))]
13932 "operands[2] = gen_lowpart (SImode, operands[2]);")
13933
13934 (define_insn "*ashr<mode>3_1"
13935 [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m")
13936 (ashiftrt:SWI12
13937 (match_operand:SWI12 1 "nonimmediate_operand" "0")
13938 (match_operand:QI 2 "nonmemory_operand" "c<S>")))
13939 (clobber (reg:CC FLAGS_REG))]
13940 "ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands)"
13941 {
13942 if (operands[2] == const1_rtx
13943 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
13944 return "sar{<imodesuffix>}\t%0";
13945 else
13946 return "sar{<imodesuffix>}\t{%2, %0|%0, %2}";
13947 }
13948 [(set_attr "type" "ishift")
13949 (set (attr "length_immediate")
13950 (if_then_else
13951 (and (match_operand 2 "const1_operand")
13952 (ior (match_test "TARGET_SHIFT1")
13953 (match_test "optimize_function_for_size_p (cfun)")))
13954 (const_string "0")
13955 (const_string "*")))
13956 (set_attr "mode" "<MODE>")])
13957
13958 (define_insn "*lshrqi3_1"
13959 [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,?k")
13960 (lshiftrt:QI
13961 (match_operand:QI 1 "nonimmediate_operand" "0, k")
13962 (match_operand:QI 2 "nonmemory_operand" "cI,Wb")))
13963 (clobber (reg:CC FLAGS_REG))]
13964 "ix86_binary_operator_ok (LSHIFTRT, QImode, operands)"
13965 {
13966 switch (get_attr_type (insn))
13967 {
13968 case TYPE_ISHIFT:
13969 if (operands[2] == const1_rtx
13970 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
13971 return "shr{b}\t%0";
13972 else
13973 return "shr{b}\t{%2, %0|%0, %2}";
13974 case TYPE_MSKLOG:
13975 return "#";
13976 default:
13977 gcc_unreachable ();
13978 }
13979 }
13980 [(set_attr "isa" "*,avx512dq")
13981 (set_attr "type" "ishift,msklog")
13982 (set (attr "length_immediate")
13983 (if_then_else
13984 (and (and (match_operand 2 "const1_operand")
13985 (eq_attr "alternative" "0"))
13986 (ior (match_test "TARGET_SHIFT1")
13987 (match_test "optimize_function_for_size_p (cfun)")))
13988 (const_string "0")
13989 (const_string "*")))
13990 (set_attr "mode" "QI")])
13991
13992 (define_insn "*lshrhi3_1"
13993 [(set (match_operand:HI 0 "nonimmediate_operand" "=rm, ?k")
13994 (lshiftrt:HI
13995 (match_operand:HI 1 "nonimmediate_operand" "0, k")
13996 (match_operand:QI 2 "nonmemory_operand" "cI, Ww")))
13997 (clobber (reg:CC FLAGS_REG))]
13998 "ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
13999 {
14000 switch (get_attr_type (insn))
14001 {
14002 case TYPE_ISHIFT:
14003 if (operands[2] == const1_rtx
14004 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
14005 return "shr{w}\t%0";
14006 else
14007 return "shr{w}\t{%2, %0|%0, %2}";
14008 case TYPE_MSKLOG:
14009 return "#";
14010 default:
14011 gcc_unreachable ();
14012 }
14013 }
14014 [(set_attr "isa" "*, avx512f")
14015 (set_attr "type" "ishift,msklog")
14016 (set (attr "length_immediate")
14017 (if_then_else
14018 (and (and (match_operand 2 "const1_operand")
14019 (eq_attr "alternative" "0"))
14020 (ior (match_test "TARGET_SHIFT1")
14021 (match_test "optimize_function_for_size_p (cfun)")))
14022 (const_string "0")
14023 (const_string "*")))
14024 (set_attr "mode" "HI")])
14025
14026 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
14027 (define_insn_and_split "*<insn><mode>3_1_slp"
14028 [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>,&<r>"))
14029 (any_shiftrt:SWI12 (match_operand:SWI12 1 "register_operand" "0,!<r>")
14030 (match_operand:QI 2 "nonmemory_operand" "cI,cI")))
14031 (clobber (reg:CC FLAGS_REG))]
14032 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
14033 {
14034 if (which_alternative)
14035 return "#";
14036
14037 if (operands[2] == const1_rtx
14038 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
14039 return "<shift>{<imodesuffix>}\t%0";
14040 else
14041 return "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}";
14042 }
14043 "&& reload_completed"
14044 [(set (strict_low_part (match_dup 0)) (match_dup 1))
14045 (parallel
14046 [(set (strict_low_part (match_dup 0))
14047 (any_shiftrt:SWI12 (match_dup 0) (match_dup 2)))
14048 (clobber (reg:CC FLAGS_REG))])]
14049 ""
14050 [(set_attr "type" "ishift")
14051 (set (attr "length_immediate")
14052 (if_then_else
14053 (and (match_operand 2 "const1_operand")
14054 (ior (match_test "TARGET_SHIFT1")
14055 (match_test "optimize_function_for_size_p (cfun)")))
14056 (const_string "0")
14057 (const_string "*")))
14058 (set_attr "mode" "<MODE>")])
14059
14060 ;; This pattern can't accept a variable shift count, since shifts by
14061 ;; zero don't affect the flags. We assume that shifts by constant
14062 ;; zero are optimized away.
14063 (define_insn "*<insn><mode>3_cmp"
14064 [(set (reg FLAGS_REG)
14065 (compare
14066 (any_shiftrt:SWI
14067 (match_operand:SWI 1 "nonimmediate_operand" "0")
14068 (match_operand:QI 2 "<shift_immediate_operand>" "<S>"))
14069 (const_int 0)))
14070 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
14071 (any_shiftrt:SWI (match_dup 1) (match_dup 2)))]
14072 "(optimize_function_for_size_p (cfun)
14073 || !TARGET_PARTIAL_FLAG_REG_STALL
14074 || (operands[2] == const1_rtx
14075 && TARGET_SHIFT1))
14076 && ix86_match_ccmode (insn, CCGOCmode)
14077 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
14078 {
14079 if (operands[2] == const1_rtx
14080 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
14081 return "<shift>{<imodesuffix>}\t%0";
14082 else
14083 return "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}";
14084 }
14085 [(set_attr "type" "ishift")
14086 (set (attr "length_immediate")
14087 (if_then_else
14088 (and (match_operand 2 "const1_operand")
14089 (ior (match_test "TARGET_SHIFT1")
14090 (match_test "optimize_function_for_size_p (cfun)")))
14091 (const_string "0")
14092 (const_string "*")))
14093 (set_attr "mode" "<MODE>")])
14094
14095 (define_insn "*<insn>si3_cmp_zext"
14096 [(set (reg FLAGS_REG)
14097 (compare
14098 (any_shiftrt:SI (match_operand:SI 1 "register_operand" "0")
14099 (match_operand:QI 2 "const_1_to_31_operand"))
14100 (const_int 0)))
14101 (set (match_operand:DI 0 "register_operand" "=r")
14102 (zero_extend:DI (any_shiftrt:SI (match_dup 1) (match_dup 2))))]
14103 "TARGET_64BIT
14104 && (optimize_function_for_size_p (cfun)
14105 || !TARGET_PARTIAL_FLAG_REG_STALL
14106 || (operands[2] == const1_rtx
14107 && TARGET_SHIFT1))
14108 && ix86_match_ccmode (insn, CCGOCmode)
14109 && ix86_binary_operator_ok (<CODE>, SImode, operands)"
14110 {
14111 if (operands[2] == const1_rtx
14112 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
14113 return "<shift>{l}\t%k0";
14114 else
14115 return "<shift>{l}\t{%2, %k0|%k0, %2}";
14116 }
14117 [(set_attr "type" "ishift")
14118 (set (attr "length_immediate")
14119 (if_then_else
14120 (and (match_operand 2 "const1_operand")
14121 (ior (match_test "TARGET_SHIFT1")
14122 (match_test "optimize_function_for_size_p (cfun)")))
14123 (const_string "0")
14124 (const_string "*")))
14125 (set_attr "mode" "SI")])
14126
14127 (define_insn "*<insn><mode>3_cconly"
14128 [(set (reg FLAGS_REG)
14129 (compare
14130 (any_shiftrt:SWI
14131 (match_operand:SWI 1 "register_operand" "0")
14132 (match_operand:QI 2 "<shift_immediate_operand>" "<S>"))
14133 (const_int 0)))
14134 (clobber (match_scratch:SWI 0 "=<r>"))]
14135 "(optimize_function_for_size_p (cfun)
14136 || !TARGET_PARTIAL_FLAG_REG_STALL
14137 || (operands[2] == const1_rtx
14138 && TARGET_SHIFT1))
14139 && ix86_match_ccmode (insn, CCGOCmode)"
14140 {
14141 if (operands[2] == const1_rtx
14142 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
14143 return "<shift>{<imodesuffix>}\t%0";
14144 else
14145 return "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}";
14146 }
14147 [(set_attr "type" "ishift")
14148 (set (attr "length_immediate")
14149 (if_then_else
14150 (and (match_operand 2 "const1_operand")
14151 (ior (match_test "TARGET_SHIFT1")
14152 (match_test "optimize_function_for_size_p (cfun)")))
14153 (const_string "0")
14154 (const_string "*")))
14155 (set_attr "mode" "<MODE>")])
14156
14157 (define_insn "*<insn>qi_ext<mode>_2"
14158 [(set (zero_extract:SWI248
14159 (match_operand:SWI248 0 "register_operand" "+Q")
14160 (const_int 8)
14161 (const_int 8))
14162 (subreg:SWI248
14163 (any_shiftrt:QI
14164 (subreg:QI
14165 (zero_extract:SWI248
14166 (match_operand:SWI248 1 "register_operand" "0")
14167 (const_int 8)
14168 (const_int 8)) 0)
14169 (match_operand:QI 2 "nonmemory_operand" "cI")) 0))
14170 (clobber (reg:CC FLAGS_REG))]
14171 "/* FIXME: without this LRA can't reload this pattern, see PR82524. */
14172 rtx_equal_p (operands[0], operands[1])"
14173 {
14174 if (operands[2] == const1_rtx
14175 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
14176 return "<shift>{b}\t%h0";
14177 else
14178 return "<shift>{b}\t{%2, %h0|%h0, %2}";
14179 }
14180 [(set_attr "type" "ishift")
14181 (set (attr "length_immediate")
14182 (if_then_else
14183 (and (match_operand 2 "const1_operand")
14184 (ior (match_test "TARGET_SHIFT1")
14185 (match_test "optimize_function_for_size_p (cfun)")))
14186 (const_string "0")
14187 (const_string "*")))
14188 (set_attr "mode" "QI")])
14189 \f
14190 ;; Rotate instructions
14191
14192 (define_expand "<insn>ti3"
14193 [(set (match_operand:TI 0 "register_operand")
14194 (any_rotate:TI (match_operand:TI 1 "register_operand")
14195 (match_operand:QI 2 "nonmemory_operand")))]
14196 "TARGET_64BIT"
14197 {
14198 if (const_1_to_63_operand (operands[2], VOIDmode))
14199 emit_insn (gen_ix86_<insn>ti3_doubleword
14200 (operands[0], operands[1], operands[2]));
14201 else if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 64)
14202 emit_insn (gen_<insn>64ti2_doubleword (operands[0], operands[1]));
14203 else
14204 {
14205 rtx amount = force_reg (QImode, operands[2]);
14206 rtx src_lo = gen_lowpart (DImode, operands[1]);
14207 rtx src_hi = gen_highpart (DImode, operands[1]);
14208 rtx tmp_lo = gen_reg_rtx (DImode);
14209 rtx tmp_hi = gen_reg_rtx (DImode);
14210 emit_move_insn (tmp_lo, src_lo);
14211 emit_move_insn (tmp_hi, src_hi);
14212 rtx (*shiftd) (rtx, rtx, rtx)
14213 = (<CODE> == ROTATE) ? gen_x86_64_shld : gen_x86_64_shrd;
14214 emit_insn (shiftd (tmp_lo, src_hi, amount));
14215 emit_insn (shiftd (tmp_hi, src_lo, amount));
14216 rtx dst_lo = gen_lowpart (DImode, operands[0]);
14217 rtx dst_hi = gen_highpart (DImode, operands[0]);
14218 emit_move_insn (dst_lo, tmp_lo);
14219 emit_move_insn (dst_hi, tmp_hi);
14220 emit_insn (gen_x86_shiftdi_adj_1 (dst_lo, dst_hi, amount, tmp_lo));
14221 }
14222 DONE;
14223 })
14224
14225 (define_expand "<insn>di3"
14226 [(set (match_operand:DI 0 "shiftdi_operand")
14227 (any_rotate:DI (match_operand:DI 1 "shiftdi_operand")
14228 (match_operand:QI 2 "nonmemory_operand")))]
14229 ""
14230 {
14231 if (TARGET_64BIT)
14232 ix86_expand_binary_operator (<CODE>, DImode, operands);
14233 else if (const_1_to_31_operand (operands[2], VOIDmode))
14234 emit_insn (gen_ix86_<insn>di3_doubleword
14235 (operands[0], operands[1], operands[2]));
14236 else if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 32)
14237 emit_insn (gen_<insn>32di2_doubleword (operands[0], operands[1]));
14238 else
14239 FAIL;
14240
14241 DONE;
14242 })
14243
14244 (define_expand "<insn><mode>3"
14245 [(set (match_operand:SWIM124 0 "nonimmediate_operand")
14246 (any_rotate:SWIM124 (match_operand:SWIM124 1 "nonimmediate_operand")
14247 (match_operand:QI 2 "nonmemory_operand")))]
14248 ""
14249 "ix86_expand_binary_operator (<CODE>, <MODE>mode, operands); DONE;")
14250
14251 ;; Avoid useless masking of count operand.
14252 (define_insn_and_split "*<insn><mode>3_mask"
14253 [(set (match_operand:SWI 0 "nonimmediate_operand")
14254 (any_rotate:SWI
14255 (match_operand:SWI 1 "nonimmediate_operand")
14256 (subreg:QI
14257 (and
14258 (match_operand 2 "register_operand" "c")
14259 (match_operand 3 "const_int_operand")) 0)))
14260 (clobber (reg:CC FLAGS_REG))]
14261 "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
14262 && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
14263 == GET_MODE_BITSIZE (<MODE>mode)-1
14264 && GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT
14265 && IN_RANGE (GET_MODE_SIZE (GET_MODE (operands[2])), 2,
14266 4 << (TARGET_64BIT ? 1 : 0))
14267 && ix86_pre_reload_split ()"
14268 "#"
14269 "&& 1"
14270 [(parallel
14271 [(set (match_dup 0)
14272 (any_rotate:SWI (match_dup 1)
14273 (match_dup 2)))
14274 (clobber (reg:CC FLAGS_REG))])]
14275 {
14276 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
14277 operands[2] = gen_lowpart (QImode, operands[2]);
14278 })
14279
14280 (define_split
14281 [(set (match_operand:SWI 0 "register_operand")
14282 (any_rotate:SWI
14283 (match_operand:SWI 1 "const_int_operand")
14284 (subreg:QI
14285 (and
14286 (match_operand 2 "register_operand")
14287 (match_operand 3 "const_int_operand")) 0)))]
14288 "(INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode) - 1))
14289 == GET_MODE_BITSIZE (<MODE>mode) - 1
14290 && GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT
14291 && IN_RANGE (GET_MODE_SIZE (GET_MODE (operands[2])), 2,
14292 4 << (TARGET_64BIT ? 1 : 0))"
14293 [(set (match_dup 4) (match_dup 1))
14294 (set (match_dup 0)
14295 (any_rotate:SWI (match_dup 4)
14296 (subreg:QI (match_dup 2) 0)))]
14297 "operands[4] = gen_reg_rtx (<MODE>mode);")
14298
14299 (define_insn_and_split "*<insn><mode>3_mask_1"
14300 [(set (match_operand:SWI 0 "nonimmediate_operand")
14301 (any_rotate:SWI
14302 (match_operand:SWI 1 "nonimmediate_operand")
14303 (and:QI
14304 (match_operand:QI 2 "register_operand" "c")
14305 (match_operand:QI 3 "const_int_operand"))))
14306 (clobber (reg:CC FLAGS_REG))]
14307 "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
14308 && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
14309 == GET_MODE_BITSIZE (<MODE>mode)-1
14310 && ix86_pre_reload_split ()"
14311 "#"
14312 "&& 1"
14313 [(parallel
14314 [(set (match_dup 0)
14315 (any_rotate:SWI (match_dup 1)
14316 (match_dup 2)))
14317 (clobber (reg:CC FLAGS_REG))])])
14318
14319 (define_split
14320 [(set (match_operand:SWI 0 "register_operand")
14321 (any_rotate:SWI
14322 (match_operand:SWI 1 "const_int_operand")
14323 (and:QI
14324 (match_operand:QI 2 "register_operand")
14325 (match_operand:QI 3 "const_int_operand"))))]
14326 "(INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode) - 1))
14327 == GET_MODE_BITSIZE (<MODE>mode) - 1"
14328 [(set (match_dup 4) (match_dup 1))
14329 (set (match_dup 0)
14330 (any_rotate:SWI (match_dup 4) (match_dup 2)))]
14331 "operands[4] = gen_reg_rtx (<MODE>mode);")
14332
14333 ;; Implement rotation using two double-precision
14334 ;; shift instructions and a scratch register.
14335
14336 (define_insn_and_split "ix86_rotl<dwi>3_doubleword"
14337 [(set (match_operand:<DWI> 0 "register_operand" "=r")
14338 (rotate:<DWI> (match_operand:<DWI> 1 "register_operand" "0")
14339 (match_operand:QI 2 "<shift_immediate_operand>" "<S>")))
14340 (clobber (reg:CC FLAGS_REG))
14341 (clobber (match_scratch:DWIH 3 "=&r"))]
14342 ""
14343 "#"
14344 "reload_completed"
14345 [(set (match_dup 3) (match_dup 4))
14346 (parallel
14347 [(set (match_dup 4)
14348 (ior:DWIH (ashift:DWIH (match_dup 4)
14349 (and:QI (match_dup 2) (match_dup 6)))
14350 (subreg:DWIH
14351 (lshiftrt:<DWI> (zero_extend:<DWI> (match_dup 5))
14352 (minus:QI (match_dup 7)
14353 (and:QI (match_dup 2)
14354 (match_dup 6)))) 0)))
14355 (clobber (reg:CC FLAGS_REG))])
14356 (parallel
14357 [(set (match_dup 5)
14358 (ior:DWIH (ashift:DWIH (match_dup 5)
14359 (and:QI (match_dup 2) (match_dup 6)))
14360 (subreg:DWIH
14361 (lshiftrt:<DWI> (zero_extend:<DWI> (match_dup 3))
14362 (minus:QI (match_dup 7)
14363 (and:QI (match_dup 2)
14364 (match_dup 6)))) 0)))
14365 (clobber (reg:CC FLAGS_REG))])]
14366 {
14367 operands[6] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode) - 1);
14368 operands[7] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
14369
14370 split_double_mode (<DWI>mode, &operands[0], 1, &operands[4], &operands[5]);
14371 })
14372
14373 (define_insn_and_split "ix86_rotr<dwi>3_doubleword"
14374 [(set (match_operand:<DWI> 0 "register_operand" "=r")
14375 (rotatert:<DWI> (match_operand:<DWI> 1 "register_operand" "0")
14376 (match_operand:QI 2 "<shift_immediate_operand>" "<S>")))
14377 (clobber (reg:CC FLAGS_REG))
14378 (clobber (match_scratch:DWIH 3 "=&r"))]
14379 ""
14380 "#"
14381 "reload_completed"
14382 [(set (match_dup 3) (match_dup 4))
14383 (parallel
14384 [(set (match_dup 4)
14385 (ior:DWIH (lshiftrt:DWIH (match_dup 4)
14386 (and:QI (match_dup 2) (match_dup 6)))
14387 (subreg:DWIH
14388 (ashift:<DWI> (zero_extend:<DWI> (match_dup 5))
14389 (minus:QI (match_dup 7)
14390 (and:QI (match_dup 2)
14391 (match_dup 6)))) 0)))
14392 (clobber (reg:CC FLAGS_REG))])
14393 (parallel
14394 [(set (match_dup 5)
14395 (ior:DWIH (lshiftrt:DWIH (match_dup 5)
14396 (and:QI (match_dup 2) (match_dup 6)))
14397 (subreg:DWIH
14398 (ashift:<DWI> (zero_extend:<DWI> (match_dup 3))
14399 (minus:QI (match_dup 7)
14400 (and:QI (match_dup 2)
14401 (match_dup 6)))) 0)))
14402 (clobber (reg:CC FLAGS_REG))])]
14403 {
14404 operands[6] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode) - 1);
14405 operands[7] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
14406
14407 split_double_mode (<DWI>mode, &operands[0], 1, &operands[4], &operands[5]);
14408 })
14409
14410 (define_insn_and_split "<insn>32di2_doubleword"
14411 [(set (match_operand:DI 0 "register_operand" "=r,r,r")
14412 (any_rotate:DI (match_operand:DI 1 "nonimmediate_operand" "0,r,o")
14413 (const_int 32)))]
14414 "!TARGET_64BIT"
14415 "#"
14416 "&& reload_completed"
14417 [(set (match_dup 0) (match_dup 3))
14418 (set (match_dup 2) (match_dup 1))]
14419 {
14420 split_double_mode (DImode, &operands[0], 2, &operands[0], &operands[2]);
14421 if (rtx_equal_p (operands[0], operands[1]))
14422 {
14423 emit_insn (gen_swapsi (operands[0], operands[2]));
14424 DONE;
14425 }
14426 })
14427
14428 (define_insn_and_split "<insn>64ti2_doubleword"
14429 [(set (match_operand:TI 0 "register_operand" "=r,r,r")
14430 (any_rotate:TI (match_operand:TI 1 "nonimmediate_operand" "0,r,o")
14431 (const_int 64)))]
14432 "TARGET_64BIT"
14433 "#"
14434 "&& reload_completed"
14435 [(set (match_dup 0) (match_dup 3))
14436 (set (match_dup 2) (match_dup 1))]
14437 {
14438 split_double_mode (TImode, &operands[0], 2, &operands[0], &operands[2]);
14439 if (rtx_equal_p (operands[0], operands[1]))
14440 {
14441 emit_insn (gen_swapdi (operands[0], operands[2]));
14442 DONE;
14443 }
14444 })
14445
14446 (define_mode_attr rorx_immediate_operand
14447 [(SI "const_0_to_31_operand")
14448 (DI "const_0_to_63_operand")])
14449
14450 (define_insn "*bmi2_rorx<mode>3_1"
14451 [(set (match_operand:SWI48 0 "register_operand" "=r")
14452 (rotatert:SWI48
14453 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
14454 (match_operand:QI 2 "<rorx_immediate_operand>" "<S>")))]
14455 "TARGET_BMI2 && !optimize_function_for_size_p (cfun)"
14456 "rorx\t{%2, %1, %0|%0, %1, %2}"
14457 [(set_attr "type" "rotatex")
14458 (set_attr "mode" "<MODE>")])
14459
14460 (define_insn "*<insn><mode>3_1"
14461 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
14462 (any_rotate:SWI48
14463 (match_operand:SWI48 1 "nonimmediate_operand" "0,rm")
14464 (match_operand:QI 2 "nonmemory_operand" "c<S>,<S>")))
14465 (clobber (reg:CC FLAGS_REG))]
14466 "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
14467 {
14468 switch (get_attr_type (insn))
14469 {
14470 case TYPE_ROTATEX:
14471 return "#";
14472
14473 default:
14474 if (operands[2] == const1_rtx
14475 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
14476 return "<rotate>{<imodesuffix>}\t%0";
14477 else
14478 return "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
14479 }
14480 }
14481 [(set_attr "isa" "*,bmi2")
14482 (set_attr "type" "rotate,rotatex")
14483 (set (attr "preferred_for_size")
14484 (cond [(eq_attr "alternative" "0")
14485 (symbol_ref "true")]
14486 (symbol_ref "false")))
14487 (set (attr "length_immediate")
14488 (if_then_else
14489 (and (eq_attr "type" "rotate")
14490 (and (match_operand 2 "const1_operand")
14491 (ior (match_test "TARGET_SHIFT1")
14492 (match_test "optimize_function_for_size_p (cfun)"))))
14493 (const_string "0")
14494 (const_string "*")))
14495 (set_attr "mode" "<MODE>")])
14496
14497 ;; Convert rotate to the rotatex pattern to avoid flags dependency.
14498 (define_split
14499 [(set (match_operand:SWI48 0 "register_operand")
14500 (rotate:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
14501 (match_operand:QI 2 "const_int_operand")))
14502 (clobber (reg:CC FLAGS_REG))]
14503 "TARGET_BMI2 && reload_completed && !optimize_function_for_size_p (cfun)"
14504 [(set (match_dup 0)
14505 (rotatert:SWI48 (match_dup 1) (match_dup 2)))]
14506 {
14507 int bitsize = GET_MODE_BITSIZE (<MODE>mode);
14508
14509 operands[2] = GEN_INT ((bitsize - INTVAL (operands[2])) % bitsize);
14510 })
14511
14512 (define_split
14513 [(set (match_operand:SWI48 0 "register_operand")
14514 (rotatert:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
14515 (match_operand:QI 2 "const_int_operand")))
14516 (clobber (reg:CC FLAGS_REG))]
14517 "TARGET_BMI2 && reload_completed && !optimize_function_for_size_p (cfun)"
14518 [(set (match_dup 0)
14519 (rotatert:SWI48 (match_dup 1) (match_dup 2)))])
14520
14521 (define_insn "*bmi2_rorxsi3_1_zext"
14522 [(set (match_operand:DI 0 "register_operand" "=r")
14523 (zero_extend:DI
14524 (rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
14525 (match_operand:QI 2 "const_0_to_31_operand"))))]
14526 "TARGET_64BIT && TARGET_BMI2 && !optimize_function_for_size_p (cfun)"
14527 "rorx\t{%2, %1, %k0|%k0, %1, %2}"
14528 [(set_attr "type" "rotatex")
14529 (set_attr "mode" "SI")])
14530
14531 (define_insn "*<insn>si3_1_zext"
14532 [(set (match_operand:DI 0 "register_operand" "=r,r")
14533 (zero_extend:DI
14534 (any_rotate:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm")
14535 (match_operand:QI 2 "nonmemory_operand" "cI,I"))))
14536 (clobber (reg:CC FLAGS_REG))]
14537 "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
14538 {
14539 switch (get_attr_type (insn))
14540 {
14541 case TYPE_ROTATEX:
14542 return "#";
14543
14544 default:
14545 if (operands[2] == const1_rtx
14546 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
14547 return "<rotate>{l}\t%k0";
14548 else
14549 return "<rotate>{l}\t{%2, %k0|%k0, %2}";
14550 }
14551 }
14552 [(set_attr "isa" "*,bmi2")
14553 (set_attr "type" "rotate,rotatex")
14554 (set (attr "preferred_for_size")
14555 (cond [(eq_attr "alternative" "0")
14556 (symbol_ref "true")]
14557 (symbol_ref "false")))
14558 (set (attr "length_immediate")
14559 (if_then_else
14560 (and (eq_attr "type" "rotate")
14561 (and (match_operand 2 "const1_operand")
14562 (ior (match_test "TARGET_SHIFT1")
14563 (match_test "optimize_function_for_size_p (cfun)"))))
14564 (const_string "0")
14565 (const_string "*")))
14566 (set_attr "mode" "SI")])
14567
14568 ;; Convert rotate to the rotatex pattern to avoid flags dependency.
14569 (define_split
14570 [(set (match_operand:DI 0 "register_operand")
14571 (zero_extend:DI
14572 (rotate:SI (match_operand:SI 1 "nonimmediate_operand")
14573 (match_operand:QI 2 "const_int_operand"))))
14574 (clobber (reg:CC FLAGS_REG))]
14575 "TARGET_64BIT && TARGET_BMI2 && reload_completed
14576 && !optimize_function_for_size_p (cfun)"
14577 [(set (match_dup 0)
14578 (zero_extend:DI (rotatert:SI (match_dup 1) (match_dup 2))))]
14579 {
14580 int bitsize = GET_MODE_BITSIZE (SImode);
14581
14582 operands[2] = GEN_INT ((bitsize - INTVAL (operands[2])) % bitsize);
14583 })
14584
14585 (define_split
14586 [(set (match_operand:DI 0 "register_operand")
14587 (zero_extend:DI
14588 (rotatert:SI (match_operand:SI 1 "nonimmediate_operand")
14589 (match_operand:QI 2 "const_int_operand"))))
14590 (clobber (reg:CC FLAGS_REG))]
14591 "TARGET_64BIT && TARGET_BMI2 && reload_completed
14592 && !optimize_function_for_size_p (cfun)"
14593 [(set (match_dup 0)
14594 (zero_extend:DI (rotatert:SI (match_dup 1) (match_dup 2))))])
14595
14596 (define_insn "*<insn><mode>3_1"
14597 [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m")
14598 (any_rotate:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "0")
14599 (match_operand:QI 2 "nonmemory_operand" "c<S>")))
14600 (clobber (reg:CC FLAGS_REG))]
14601 "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
14602 {
14603 if (operands[2] == const1_rtx
14604 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
14605 return "<rotate>{<imodesuffix>}\t%0";
14606 else
14607 return "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
14608 }
14609 [(set_attr "type" "rotate")
14610 (set (attr "length_immediate")
14611 (if_then_else
14612 (and (match_operand 2 "const1_operand")
14613 (ior (match_test "TARGET_SHIFT1")
14614 (match_test "optimize_function_for_size_p (cfun)")))
14615 (const_string "0")
14616 (const_string "*")))
14617 (set_attr "mode" "<MODE>")])
14618
14619 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
14620 (define_insn_and_split "*<insn><mode>3_1_slp"
14621 [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>,&<r>"))
14622 (any_rotate:SWI12 (match_operand:SWI12 1 "register_operand" "0,!<r>")
14623 (match_operand:QI 2 "nonmemory_operand" "cI,cI")))
14624 (clobber (reg:CC FLAGS_REG))]
14625 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
14626 {
14627 if (which_alternative)
14628 return "#";
14629
14630 if (operands[2] == const1_rtx
14631 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
14632 return "<rotate>{<imodesuffix>}\t%0";
14633 else
14634 return "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
14635 }
14636 "&& reload_completed"
14637 [(set (strict_low_part (match_dup 0)) (match_dup 1))
14638 (parallel
14639 [(set (strict_low_part (match_dup 0))
14640 (any_rotate:SWI12 (match_dup 0) (match_dup 2)))
14641 (clobber (reg:CC FLAGS_REG))])]
14642 ""
14643 [(set_attr "type" "rotate")
14644 (set (attr "length_immediate")
14645 (if_then_else
14646 (and (match_operand 2 "const1_operand")
14647 (ior (match_test "TARGET_SHIFT1")
14648 (match_test "optimize_function_for_size_p (cfun)")))
14649 (const_string "0")
14650 (const_string "*")))
14651 (set_attr "mode" "<MODE>")])
14652
14653 (define_split
14654 [(set (match_operand:HI 0 "QIreg_operand")
14655 (any_rotate:HI (match_dup 0) (const_int 8)))
14656 (clobber (reg:CC FLAGS_REG))]
14657 "reload_completed
14658 && (TARGET_USE_XCHGB || optimize_function_for_size_p (cfun))"
14659 [(parallel [(set (strict_low_part (match_dup 0))
14660 (bswap:HI (match_dup 0)))
14661 (clobber (reg:CC FLAGS_REG))])])
14662 \f
14663 ;; Bit set / bit test instructions
14664
14665 ;; %%% bts, btr, btc
14666
14667 ;; These instructions are *slow* when applied to memory.
14668
14669 (define_code_attr btsc [(ior "bts") (xor "btc")])
14670
14671 (define_insn "*<btsc><mode>"
14672 [(set (match_operand:SWI48 0 "register_operand" "=r")
14673 (any_or:SWI48
14674 (ashift:SWI48 (const_int 1)
14675 (match_operand:QI 2 "register_operand" "r"))
14676 (match_operand:SWI48 1 "register_operand" "0")))
14677 (clobber (reg:CC FLAGS_REG))]
14678 "TARGET_USE_BT"
14679 "<btsc>{<imodesuffix>}\t{%<k>2, %0|%0, %<k>2}"
14680 [(set_attr "type" "alu1")
14681 (set_attr "prefix_0f" "1")
14682 (set_attr "znver1_decode" "double")
14683 (set_attr "mode" "<MODE>")])
14684
14685 ;; Avoid useless masking of count operand.
14686 (define_insn_and_split "*<btsc><mode>_mask"
14687 [(set (match_operand:SWI48 0 "register_operand")
14688 (any_or:SWI48
14689 (ashift:SWI48
14690 (const_int 1)
14691 (subreg:QI
14692 (and
14693 (match_operand 1 "register_operand")
14694 (match_operand 2 "const_int_operand")) 0))
14695 (match_operand:SWI48 3 "register_operand")))
14696 (clobber (reg:CC FLAGS_REG))]
14697 "TARGET_USE_BT
14698 && (INTVAL (operands[2]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
14699 == GET_MODE_BITSIZE (<MODE>mode)-1
14700 && GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14701 && IN_RANGE (GET_MODE_SIZE (GET_MODE (operands[1])), 2,
14702 4 << (TARGET_64BIT ? 1 : 0))
14703 && ix86_pre_reload_split ()"
14704 "#"
14705 "&& 1"
14706 [(parallel
14707 [(set (match_dup 0)
14708 (any_or:SWI48
14709 (ashift:SWI48 (const_int 1)
14710 (match_dup 1))
14711 (match_dup 3)))
14712 (clobber (reg:CC FLAGS_REG))])]
14713 {
14714 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
14715 operands[1] = gen_lowpart (QImode, operands[1]);
14716 })
14717
14718 (define_insn_and_split "*<btsc><mode>_mask_1"
14719 [(set (match_operand:SWI48 0 "register_operand")
14720 (any_or:SWI48
14721 (ashift:SWI48
14722 (const_int 1)
14723 (and:QI
14724 (match_operand:QI 1 "register_operand")
14725 (match_operand:QI 2 "const_int_operand")))
14726 (match_operand:SWI48 3 "register_operand")))
14727 (clobber (reg:CC FLAGS_REG))]
14728 "TARGET_USE_BT
14729 && (INTVAL (operands[2]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
14730 == GET_MODE_BITSIZE (<MODE>mode)-1
14731 && ix86_pre_reload_split ()"
14732 "#"
14733 "&& 1"
14734 [(parallel
14735 [(set (match_dup 0)
14736 (any_or:SWI48
14737 (ashift:SWI48 (const_int 1)
14738 (match_dup 1))
14739 (match_dup 3)))
14740 (clobber (reg:CC FLAGS_REG))])])
14741
14742 (define_insn "*btr<mode>"
14743 [(set (match_operand:SWI48 0 "register_operand" "=r")
14744 (and:SWI48
14745 (rotate:SWI48 (const_int -2)
14746 (match_operand:QI 2 "register_operand" "r"))
14747 (match_operand:SWI48 1 "register_operand" "0")))
14748 (clobber (reg:CC FLAGS_REG))]
14749 "TARGET_USE_BT"
14750 "btr{<imodesuffix>}\t{%<k>2, %0|%0, %<k>2}"
14751 [(set_attr "type" "alu1")
14752 (set_attr "prefix_0f" "1")
14753 (set_attr "znver1_decode" "double")
14754 (set_attr "mode" "<MODE>")])
14755
14756 ;; Avoid useless masking of count operand.
14757 (define_insn_and_split "*btr<mode>_mask"
14758 [(set (match_operand:SWI48 0 "register_operand")
14759 (and:SWI48
14760 (rotate:SWI48
14761 (const_int -2)
14762 (subreg:QI
14763 (and
14764 (match_operand 1 "register_operand")
14765 (match_operand 2 "const_int_operand")) 0))
14766 (match_operand:SWI48 3 "register_operand")))
14767 (clobber (reg:CC FLAGS_REG))]
14768 "TARGET_USE_BT
14769 && (INTVAL (operands[2]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
14770 == GET_MODE_BITSIZE (<MODE>mode)-1
14771 && GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14772 && IN_RANGE (GET_MODE_SIZE (GET_MODE (operands[1])), 2,
14773 4 << (TARGET_64BIT ? 1 : 0))
14774 && ix86_pre_reload_split ()"
14775 "#"
14776 "&& 1"
14777 [(parallel
14778 [(set (match_dup 0)
14779 (and:SWI48
14780 (rotate:SWI48 (const_int -2)
14781 (match_dup 1))
14782 (match_dup 3)))
14783 (clobber (reg:CC FLAGS_REG))])]
14784 {
14785 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
14786 operands[1] = gen_lowpart (QImode, operands[1]);
14787 })
14788
14789 (define_insn_and_split "*btr<mode>_mask_1"
14790 [(set (match_operand:SWI48 0 "register_operand")
14791 (and:SWI48
14792 (rotate:SWI48
14793 (const_int -2)
14794 (and:QI
14795 (match_operand:QI 1 "register_operand")
14796 (match_operand:QI 2 "const_int_operand")))
14797 (match_operand:SWI48 3 "register_operand")))
14798 (clobber (reg:CC FLAGS_REG))]
14799 "TARGET_USE_BT
14800 && (INTVAL (operands[2]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
14801 == GET_MODE_BITSIZE (<MODE>mode)-1
14802 && ix86_pre_reload_split ()"
14803 "#"
14804 "&& 1"
14805 [(parallel
14806 [(set (match_dup 0)
14807 (and:SWI48
14808 (rotate:SWI48 (const_int -2)
14809 (match_dup 1))
14810 (match_dup 3)))
14811 (clobber (reg:CC FLAGS_REG))])])
14812
14813 (define_insn_and_split "*btr<mode>_1"
14814 [(set (match_operand:SWI12 0 "register_operand")
14815 (and:SWI12
14816 (subreg:SWI12
14817 (rotate:SI (const_int -2)
14818 (match_operand:QI 2 "register_operand")) 0)
14819 (match_operand:SWI12 1 "nonimmediate_operand")))
14820 (clobber (reg:CC FLAGS_REG))]
14821 "TARGET_USE_BT && ix86_pre_reload_split ()"
14822 "#"
14823 "&& 1"
14824 [(parallel
14825 [(set (match_dup 0)
14826 (and:SI (rotate:SI (const_int -2) (match_dup 2))
14827 (match_dup 1)))
14828 (clobber (reg:CC FLAGS_REG))])]
14829 {
14830 operands[0] = lowpart_subreg (SImode, operands[0], <MODE>mode);
14831 operands[1] = force_reg (<MODE>mode, operands[1]);
14832 operands[1] = lowpart_subreg (SImode, operands[1], <MODE>mode);
14833 })
14834
14835 (define_insn_and_split "*btr<mode>_2"
14836 [(set (zero_extract:HI
14837 (match_operand:SWI12 0 "nonimmediate_operand")
14838 (const_int 1)
14839 (zero_extend:SI (match_operand:QI 1 "register_operand")))
14840 (const_int 0))
14841 (clobber (reg:CC FLAGS_REG))]
14842 "TARGET_USE_BT && ix86_pre_reload_split ()"
14843 "#"
14844 "&& MEM_P (operands[0])"
14845 [(set (match_dup 2) (match_dup 0))
14846 (parallel
14847 [(set (match_dup 3)
14848 (and:SI (rotate:SI (const_int -2) (match_dup 1))
14849 (match_dup 4)))
14850 (clobber (reg:CC FLAGS_REG))])
14851 (set (match_dup 0) (match_dup 5))]
14852 {
14853 operands[2] = gen_reg_rtx (<MODE>mode);
14854 operands[5] = gen_reg_rtx (<MODE>mode);
14855 operands[3] = lowpart_subreg (SImode, operands[5], <MODE>mode);
14856 operands[4] = lowpart_subreg (SImode, operands[2], <MODE>mode);
14857 })
14858
14859 (define_split
14860 [(set (zero_extract:HI
14861 (match_operand:SWI12 0 "register_operand")
14862 (const_int 1)
14863 (zero_extend:SI (match_operand:QI 1 "register_operand")))
14864 (const_int 0))
14865 (clobber (reg:CC FLAGS_REG))]
14866 "TARGET_USE_BT && ix86_pre_reload_split ()"
14867 [(parallel
14868 [(set (match_dup 0)
14869 (and:SI (rotate:SI (const_int -2) (match_dup 1))
14870 (match_dup 2)))
14871 (clobber (reg:CC FLAGS_REG))])]
14872 {
14873 operands[2] = lowpart_subreg (SImode, operands[0], <MODE>mode);
14874 operands[0] = lowpart_subreg (SImode, operands[0], <MODE>mode);
14875 })
14876
14877 ;; These instructions are never faster than the corresponding
14878 ;; and/ior/xor operations when using immediate operand, so with
14879 ;; 32-bit there's no point. But in 64-bit, we can't hold the
14880 ;; relevant immediates within the instruction itself, so operating
14881 ;; on bits in the high 32-bits of a register becomes easier.
14882 ;;
14883 ;; These are slow on Nocona, but fast on Athlon64. We do require the use
14884 ;; of btrq and btcq for corner cases of post-reload expansion of absdf and
14885 ;; negdf respectively, so they can never be disabled entirely.
14886
14887 (define_insn "*btsq_imm"
14888 [(set (zero_extract:DI (match_operand:DI 0 "nonimmediate_operand" "+rm")
14889 (const_int 1)
14890 (match_operand 1 "const_0_to_63_operand"))
14891 (const_int 1))
14892 (clobber (reg:CC FLAGS_REG))]
14893 "TARGET_64BIT && (TARGET_USE_BT || reload_completed)"
14894 "bts{q}\t{%1, %0|%0, %1}"
14895 [(set_attr "type" "alu1")
14896 (set_attr "prefix_0f" "1")
14897 (set_attr "znver1_decode" "double")
14898 (set_attr "mode" "DI")])
14899
14900 (define_insn "*btrq_imm"
14901 [(set (zero_extract:DI (match_operand:DI 0 "nonimmediate_operand" "+rm")
14902 (const_int 1)
14903 (match_operand 1 "const_0_to_63_operand"))
14904 (const_int 0))
14905 (clobber (reg:CC FLAGS_REG))]
14906 "TARGET_64BIT && (TARGET_USE_BT || reload_completed)"
14907 "btr{q}\t{%1, %0|%0, %1}"
14908 [(set_attr "type" "alu1")
14909 (set_attr "prefix_0f" "1")
14910 (set_attr "znver1_decode" "double")
14911 (set_attr "mode" "DI")])
14912
14913 (define_insn "*btcq_imm"
14914 [(set (zero_extract:DI (match_operand:DI 0 "nonimmediate_operand" "+rm")
14915 (const_int 1)
14916 (match_operand 1 "const_0_to_63_operand"))
14917 (not:DI (zero_extract:DI (match_dup 0) (const_int 1) (match_dup 1))))
14918 (clobber (reg:CC FLAGS_REG))]
14919 "TARGET_64BIT && (TARGET_USE_BT || reload_completed)"
14920 "btc{q}\t{%1, %0|%0, %1}"
14921 [(set_attr "type" "alu1")
14922 (set_attr "prefix_0f" "1")
14923 (set_attr "znver1_decode" "double")
14924 (set_attr "mode" "DI")])
14925
14926 ;; Allow Nocona to avoid these instructions if a register is available.
14927
14928 (define_peephole2
14929 [(match_scratch:DI 2 "r")
14930 (parallel [(set (zero_extract:DI
14931 (match_operand:DI 0 "nonimmediate_operand")
14932 (const_int 1)
14933 (match_operand 1 "const_0_to_63_operand"))
14934 (const_int 1))
14935 (clobber (reg:CC FLAGS_REG))])]
14936 "TARGET_64BIT && !TARGET_USE_BT"
14937 [(parallel [(set (match_dup 0)
14938 (ior:DI (match_dup 0) (match_dup 3)))
14939 (clobber (reg:CC FLAGS_REG))])]
14940 {
14941 int i = INTVAL (operands[1]);
14942
14943 operands[3] = gen_int_mode (HOST_WIDE_INT_1U << i, DImode);
14944
14945 if (!x86_64_immediate_operand (operands[3], DImode))
14946 {
14947 emit_move_insn (operands[2], operands[3]);
14948 operands[3] = operands[2];
14949 }
14950 })
14951
14952 (define_peephole2
14953 [(match_scratch:DI 2 "r")
14954 (parallel [(set (zero_extract:DI
14955 (match_operand:DI 0 "nonimmediate_operand")
14956 (const_int 1)
14957 (match_operand 1 "const_0_to_63_operand"))
14958 (const_int 0))
14959 (clobber (reg:CC FLAGS_REG))])]
14960 "TARGET_64BIT && !TARGET_USE_BT"
14961 [(parallel [(set (match_dup 0)
14962 (and:DI (match_dup 0) (match_dup 3)))
14963 (clobber (reg:CC FLAGS_REG))])]
14964 {
14965 int i = INTVAL (operands[1]);
14966
14967 operands[3] = gen_int_mode (~(HOST_WIDE_INT_1U << i), DImode);
14968
14969 if (!x86_64_immediate_operand (operands[3], DImode))
14970 {
14971 emit_move_insn (operands[2], operands[3]);
14972 operands[3] = operands[2];
14973 }
14974 })
14975
14976 (define_peephole2
14977 [(match_scratch:DI 2 "r")
14978 (parallel [(set (zero_extract:DI
14979 (match_operand:DI 0 "nonimmediate_operand")
14980 (const_int 1)
14981 (match_operand 1 "const_0_to_63_operand"))
14982 (not:DI (zero_extract:DI
14983 (match_dup 0) (const_int 1) (match_dup 1))))
14984 (clobber (reg:CC FLAGS_REG))])]
14985 "TARGET_64BIT && !TARGET_USE_BT"
14986 [(parallel [(set (match_dup 0)
14987 (xor:DI (match_dup 0) (match_dup 3)))
14988 (clobber (reg:CC FLAGS_REG))])]
14989 {
14990 int i = INTVAL (operands[1]);
14991
14992 operands[3] = gen_int_mode (HOST_WIDE_INT_1U << i, DImode);
14993
14994 if (!x86_64_immediate_operand (operands[3], DImode))
14995 {
14996 emit_move_insn (operands[2], operands[3]);
14997 operands[3] = operands[2];
14998 }
14999 })
15000
15001 ;; %%% bt
15002
15003 (define_insn "*bt<mode>"
15004 [(set (reg:CCC FLAGS_REG)
15005 (compare:CCC
15006 (zero_extract:SWI48
15007 (match_operand:SWI48 0 "nonimmediate_operand" "r,m")
15008 (const_int 1)
15009 (match_operand:SI 1 "nonmemory_operand" "r<S>,<S>"))
15010 (const_int 0)))]
15011 ""
15012 {
15013 switch (get_attr_mode (insn))
15014 {
15015 case MODE_SI:
15016 return "bt{l}\t{%1, %k0|%k0, %1}";
15017
15018 case MODE_DI:
15019 return "bt{q}\t{%q1, %0|%0, %q1}";
15020
15021 default:
15022 gcc_unreachable ();
15023 }
15024 }
15025 [(set_attr "type" "alu1")
15026 (set_attr "prefix_0f" "1")
15027 (set (attr "mode")
15028 (if_then_else
15029 (and (match_test "CONST_INT_P (operands[1])")
15030 (match_test "INTVAL (operands[1]) < 32"))
15031 (const_string "SI")
15032 (const_string "<MODE>")))])
15033
15034 (define_insn_and_split "*jcc_bt<mode>"
15035 [(set (pc)
15036 (if_then_else (match_operator 0 "bt_comparison_operator"
15037 [(zero_extract:SWI48
15038 (match_operand:SWI48 1 "nonimmediate_operand")
15039 (const_int 1)
15040 (match_operand:SI 2 "nonmemory_operand"))
15041 (const_int 0)])
15042 (label_ref (match_operand 3))
15043 (pc)))
15044 (clobber (reg:CC FLAGS_REG))]
15045 "(TARGET_USE_BT || optimize_function_for_size_p (cfun))
15046 && (CONST_INT_P (operands[2])
15047 ? (INTVAL (operands[2]) < GET_MODE_BITSIZE (<MODE>mode)
15048 && INTVAL (operands[2])
15049 >= (optimize_function_for_size_p (cfun) ? 8 : 32))
15050 : !memory_operand (operands[1], <MODE>mode))
15051 && ix86_pre_reload_split ()"
15052 "#"
15053 "&& 1"
15054 [(set (reg:CCC FLAGS_REG)
15055 (compare:CCC
15056 (zero_extract:SWI48
15057 (match_dup 1)
15058 (const_int 1)
15059 (match_dup 2))
15060 (const_int 0)))
15061 (set (pc)
15062 (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
15063 (label_ref (match_dup 3))
15064 (pc)))]
15065 {
15066 operands[0] = shallow_copy_rtx (operands[0]);
15067 PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
15068 })
15069
15070 (define_insn_and_split "*jcc_bt<mode>_1"
15071 [(set (pc)
15072 (if_then_else (match_operator 0 "bt_comparison_operator"
15073 [(zero_extract:SWI48
15074 (match_operand:SWI48 1 "register_operand")
15075 (const_int 1)
15076 (zero_extend:SI
15077 (match_operand:QI 2 "register_operand")))
15078 (const_int 0)])
15079 (label_ref (match_operand 3))
15080 (pc)))
15081 (clobber (reg:CC FLAGS_REG))]
15082 "(TARGET_USE_BT || optimize_function_for_size_p (cfun))
15083 && ix86_pre_reload_split ()"
15084 "#"
15085 "&& 1"
15086 [(set (reg:CCC FLAGS_REG)
15087 (compare:CCC
15088 (zero_extract:SWI48
15089 (match_dup 1)
15090 (const_int 1)
15091 (match_dup 2))
15092 (const_int 0)))
15093 (set (pc)
15094 (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
15095 (label_ref (match_dup 3))
15096 (pc)))]
15097 {
15098 operands[2] = lowpart_subreg (SImode, operands[2], QImode);
15099 operands[0] = shallow_copy_rtx (operands[0]);
15100 PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
15101 })
15102
15103 ;; Avoid useless masking of bit offset operand.
15104 (define_insn_and_split "*jcc_bt<mode>_mask"
15105 [(set (pc)
15106 (if_then_else (match_operator 0 "bt_comparison_operator"
15107 [(zero_extract:SWI48
15108 (match_operand:SWI48 1 "register_operand")
15109 (const_int 1)
15110 (and:SI
15111 (match_operand:SI 2 "register_operand")
15112 (match_operand 3 "const_int_operand")))])
15113 (label_ref (match_operand 4))
15114 (pc)))
15115 (clobber (reg:CC FLAGS_REG))]
15116 "(TARGET_USE_BT || optimize_function_for_size_p (cfun))
15117 && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
15118 == GET_MODE_BITSIZE (<MODE>mode)-1
15119 && ix86_pre_reload_split ()"
15120 "#"
15121 "&& 1"
15122 [(set (reg:CCC FLAGS_REG)
15123 (compare:CCC
15124 (zero_extract:SWI48
15125 (match_dup 1)
15126 (const_int 1)
15127 (match_dup 2))
15128 (const_int 0)))
15129 (set (pc)
15130 (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
15131 (label_ref (match_dup 4))
15132 (pc)))]
15133 {
15134 operands[0] = shallow_copy_rtx (operands[0]);
15135 PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
15136 })
15137
15138 (define_insn_and_split "*jcc_bt<mode>_mask_1"
15139 [(set (pc)
15140 (if_then_else (match_operator 0 "bt_comparison_operator"
15141 [(zero_extract:SWI48
15142 (match_operand:SWI48 1 "register_operand")
15143 (const_int 1)
15144 (zero_extend:SI
15145 (subreg:QI
15146 (and
15147 (match_operand 2 "register_operand")
15148 (match_operand 3 "const_int_operand")) 0)))])
15149 (label_ref (match_operand 4))
15150 (pc)))
15151 (clobber (reg:CC FLAGS_REG))]
15152 "(TARGET_USE_BT || optimize_function_for_size_p (cfun))
15153 && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
15154 == GET_MODE_BITSIZE (<MODE>mode)-1
15155 && GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT
15156 && IN_RANGE (GET_MODE_SIZE (GET_MODE (operands[2])), 2,
15157 4 << (TARGET_64BIT ? 1 : 0))
15158 && ix86_pre_reload_split ()"
15159 "#"
15160 "&& 1"
15161 [(set (reg:CCC FLAGS_REG)
15162 (compare:CCC
15163 (zero_extract:SWI48
15164 (match_dup 1)
15165 (const_int 1)
15166 (match_dup 2))
15167 (const_int 0)))
15168 (set (pc)
15169 (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
15170 (label_ref (match_dup 4))
15171 (pc)))]
15172 {
15173 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
15174 operands[2] = gen_lowpart (SImode, operands[2]);
15175 operands[0] = shallow_copy_rtx (operands[0]);
15176 PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
15177 })
15178
15179 ;; Help combine recognize bt followed by cmov
15180 (define_split
15181 [(set (match_operand:SWI248 0 "register_operand")
15182 (if_then_else:SWI248
15183 (match_operator 5 "bt_comparison_operator"
15184 [(zero_extract:SWI48
15185 (match_operand:SWI48 1 "register_operand")
15186 (const_int 1)
15187 (zero_extend:SI (match_operand:QI 2 "register_operand")))
15188 (const_int 0)])
15189 (match_operand:SWI248 3 "nonimmediate_operand")
15190 (match_operand:SWI248 4 "nonimmediate_operand")))]
15191 "TARGET_USE_BT && TARGET_CMOVE
15192 && !(MEM_P (operands[3]) && MEM_P (operands[4]))
15193 && ix86_pre_reload_split ()"
15194 [(set (reg:CCC FLAGS_REG)
15195 (compare:CCC
15196 (zero_extract:SWI48 (match_dup 1) (const_int 1) (match_dup 2))
15197 (const_int 0)))
15198 (set (match_dup 0)
15199 (if_then_else:SWI248 (eq (reg:CCC FLAGS_REG) (const_int 0))
15200 (match_dup 3)
15201 (match_dup 4)))]
15202 {
15203 if (GET_CODE (operands[5]) == EQ)
15204 std::swap (operands[3], operands[4]);
15205 operands[2] = lowpart_subreg (SImode, operands[2], QImode);
15206 })
15207
15208 ;; Help combine recognize bt followed by setc
15209 (define_insn_and_split "*bt<mode>_setcqi"
15210 [(set (subreg:SWI48 (match_operand:QI 0 "register_operand") 0)
15211 (zero_extract:SWI48
15212 (match_operand:SWI48 1 "register_operand")
15213 (const_int 1)
15214 (zero_extend:SI (match_operand:QI 2 "register_operand"))))
15215 (clobber (reg:CC FLAGS_REG))]
15216 "TARGET_USE_BT && ix86_pre_reload_split ()"
15217 "#"
15218 "&& 1"
15219 [(set (reg:CCC FLAGS_REG)
15220 (compare:CCC
15221 (zero_extract:SWI48 (match_dup 1) (const_int 1) (match_dup 2))
15222 (const_int 0)))
15223 (set (match_dup 0)
15224 (eq:QI (reg:CCC FLAGS_REG) (const_int 0)))]
15225 {
15226 operands[2] = lowpart_subreg (SImode, operands[2], QImode);
15227 })
15228
15229 ;; Help combine recognize bt followed by setnc
15230 (define_insn_and_split "*bt<mode>_setncqi"
15231 [(set (match_operand:QI 0 "register_operand")
15232 (and:QI
15233 (not:QI
15234 (subreg:QI
15235 (lshiftrt:SWI48 (match_operand:SWI48 1 "register_operand")
15236 (match_operand:QI 2 "register_operand")) 0))
15237 (const_int 1)))
15238 (clobber (reg:CC FLAGS_REG))]
15239 "TARGET_USE_BT && ix86_pre_reload_split ()"
15240 "#"
15241 "&& 1"
15242 [(set (reg:CCC FLAGS_REG)
15243 (compare:CCC
15244 (zero_extract:SWI48 (match_dup 1) (const_int 1) (match_dup 2))
15245 (const_int 0)))
15246 (set (match_dup 0)
15247 (ne:QI (reg:CCC FLAGS_REG) (const_int 0)))]
15248 {
15249 operands[2] = lowpart_subreg (SImode, operands[2], QImode);
15250 })
15251
15252 (define_insn_and_split "*bt<mode>_setnc<mode>"
15253 [(set (match_operand:SWI48 0 "register_operand")
15254 (and:SWI48
15255 (not:SWI48
15256 (lshiftrt:SWI48 (match_operand:SWI48 1 "register_operand")
15257 (match_operand:QI 2 "register_operand")))
15258 (const_int 1)))
15259 (clobber (reg:CC FLAGS_REG))]
15260 "TARGET_USE_BT && ix86_pre_reload_split ()"
15261 "#"
15262 "&& 1"
15263 [(set (reg:CCC FLAGS_REG)
15264 (compare:CCC
15265 (zero_extract:SWI48 (match_dup 1) (const_int 1) (match_dup 2))
15266 (const_int 0)))
15267 (set (match_dup 3)
15268 (ne:QI (reg:CCC FLAGS_REG) (const_int 0)))
15269 (set (match_dup 0) (zero_extend:SWI48 (match_dup 3)))]
15270 {
15271 operands[2] = lowpart_subreg (SImode, operands[2], QImode);
15272 operands[3] = gen_reg_rtx (QImode);
15273 })
15274 \f
15275 ;; Store-flag instructions.
15276
15277 (define_split
15278 [(set (match_operand:QI 0 "nonimmediate_operand")
15279 (match_operator:QI 1 "add_comparison_operator"
15280 [(not:SWI (match_operand:SWI 2 "register_operand"))
15281 (match_operand:SWI 3 "nonimmediate_operand")]))]
15282 ""
15283 [(set (reg:CCC FLAGS_REG)
15284 (compare:CCC
15285 (plus:SWI (match_dup 2) (match_dup 3))
15286 (match_dup 2)))
15287 (set (match_dup 0)
15288 (match_op_dup 1 [(reg:CCC FLAGS_REG) (const_int 0)]))])
15289
15290 (define_split
15291 [(set (match_operand:QI 0 "nonimmediate_operand")
15292 (match_operator:QI 1 "shr_comparison_operator"
15293 [(match_operand:DI 2 "register_operand")
15294 (match_operand 3 "const_int_operand")]))]
15295 "TARGET_64BIT
15296 && IN_RANGE (exact_log2 (UINTVAL (operands[3]) + 1), 32, 63)"
15297 [(set (reg:CCZ FLAGS_REG)
15298 (compare:CCZ
15299 (lshiftrt:DI (match_dup 2) (match_dup 4))
15300 (const_int 0)))
15301 (set (match_dup 0)
15302 (match_op_dup 1 [(reg:CCZ FLAGS_REG) (const_int 0)]))]
15303 {
15304 enum rtx_code new_code;
15305
15306 operands[1] = shallow_copy_rtx (operands[1]);
15307 switch (GET_CODE (operands[1]))
15308 {
15309 case GTU: new_code = NE; break;
15310 case LEU: new_code = EQ; break;
15311 default: gcc_unreachable ();
15312 }
15313 PUT_CODE (operands[1], new_code);
15314
15315 operands[4] = GEN_INT (exact_log2 (UINTVAL (operands[3]) + 1));
15316 })
15317
15318 ;; For all sCOND expanders, also expand the compare or test insn that
15319 ;; generates cc0. Generate an equality comparison if `seq' or `sne'.
15320
15321 (define_insn_and_split "*setcc_di_1"
15322 [(set (match_operand:DI 0 "register_operand" "=q")
15323 (match_operator:DI 1 "ix86_comparison_operator"
15324 [(reg FLAGS_REG) (const_int 0)]))]
15325 "TARGET_64BIT && !TARGET_PARTIAL_REG_STALL"
15326 "#"
15327 "&& reload_completed"
15328 [(set (match_dup 2) (match_dup 1))
15329 (set (match_dup 0) (zero_extend:DI (match_dup 2)))]
15330 {
15331 operands[1] = shallow_copy_rtx (operands[1]);
15332 PUT_MODE (operands[1], QImode);
15333 operands[2] = gen_lowpart (QImode, operands[0]);
15334 })
15335
15336 (define_insn_and_split "*setcc_<mode>_1_and"
15337 [(set (match_operand:SWI24 0 "register_operand" "=q")
15338 (match_operator:SWI24 1 "ix86_comparison_operator"
15339 [(reg FLAGS_REG) (const_int 0)]))
15340 (clobber (reg:CC FLAGS_REG))]
15341 "!TARGET_PARTIAL_REG_STALL
15342 && TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)"
15343 "#"
15344 "&& reload_completed"
15345 [(set (match_dup 2) (match_dup 1))
15346 (parallel [(set (match_dup 0) (zero_extend:SWI24 (match_dup 2)))
15347 (clobber (reg:CC FLAGS_REG))])]
15348 {
15349 operands[1] = shallow_copy_rtx (operands[1]);
15350 PUT_MODE (operands[1], QImode);
15351 operands[2] = gen_lowpart (QImode, operands[0]);
15352 })
15353
15354 (define_insn_and_split "*setcc_<mode>_1_movzbl"
15355 [(set (match_operand:SWI24 0 "register_operand" "=q")
15356 (match_operator:SWI24 1 "ix86_comparison_operator"
15357 [(reg FLAGS_REG) (const_int 0)]))]
15358 "!TARGET_PARTIAL_REG_STALL
15359 && (!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun))"
15360 "#"
15361 "&& reload_completed"
15362 [(set (match_dup 2) (match_dup 1))
15363 (set (match_dup 0) (zero_extend:SWI24 (match_dup 2)))]
15364 {
15365 operands[1] = shallow_copy_rtx (operands[1]);
15366 PUT_MODE (operands[1], QImode);
15367 operands[2] = gen_lowpart (QImode, operands[0]);
15368 })
15369
15370 (define_insn "*setcc_qi"
15371 [(set (match_operand:QI 0 "nonimmediate_operand" "=qm")
15372 (match_operator:QI 1 "ix86_comparison_operator"
15373 [(reg FLAGS_REG) (const_int 0)]))]
15374 ""
15375 "set%C1\t%0"
15376 [(set_attr "type" "setcc")
15377 (set_attr "mode" "QI")])
15378
15379 (define_insn "*setcc_qi_slp"
15380 [(set (strict_low_part (match_operand:QI 0 "register_operand" "+q"))
15381 (match_operator:QI 1 "ix86_comparison_operator"
15382 [(reg FLAGS_REG) (const_int 0)]))]
15383 ""
15384 "set%C1\t%0"
15385 [(set_attr "type" "setcc")
15386 (set_attr "mode" "QI")])
15387
15388 ;; In general it is not safe to assume too much about CCmode registers,
15389 ;; so simplify-rtx stops when it sees a second one. Under certain
15390 ;; conditions this is safe on x86, so help combine not create
15391 ;;
15392 ;; seta %al
15393 ;; testb %al, %al
15394 ;; sete %al
15395
15396 (define_split
15397 [(set (match_operand:QI 0 "nonimmediate_operand")
15398 (ne:QI (match_operator 1 "ix86_comparison_operator"
15399 [(reg FLAGS_REG) (const_int 0)])
15400 (const_int 0)))]
15401 ""
15402 [(set (match_dup 0) (match_dup 1))]
15403 {
15404 operands[1] = shallow_copy_rtx (operands[1]);
15405 PUT_MODE (operands[1], QImode);
15406 })
15407
15408 (define_split
15409 [(set (strict_low_part (match_operand:QI 0 "register_operand"))
15410 (ne:QI (match_operator 1 "ix86_comparison_operator"
15411 [(reg FLAGS_REG) (const_int 0)])
15412 (const_int 0)))]
15413 ""
15414 [(set (match_dup 0) (match_dup 1))]
15415 {
15416 operands[1] = shallow_copy_rtx (operands[1]);
15417 PUT_MODE (operands[1], QImode);
15418 })
15419
15420 (define_split
15421 [(set (match_operand:QI 0 "nonimmediate_operand")
15422 (eq:QI (match_operator 1 "ix86_comparison_operator"
15423 [(reg FLAGS_REG) (const_int 0)])
15424 (const_int 0)))]
15425 ""
15426 [(set (match_dup 0) (match_dup 1))]
15427 {
15428 operands[1] = shallow_copy_rtx (operands[1]);
15429 PUT_MODE (operands[1], QImode);
15430 PUT_CODE (operands[1],
15431 ix86_reverse_condition (GET_CODE (operands[1]),
15432 GET_MODE (XEXP (operands[1], 0))));
15433
15434 /* Make sure that (a) the CCmode we have for the flags is strong
15435 enough for the reversed compare or (b) we have a valid FP compare. */
15436 if (! ix86_comparison_operator (operands[1], VOIDmode))
15437 FAIL;
15438 })
15439
15440 (define_split
15441 [(set (strict_low_part (match_operand:QI 0 "register_operand"))
15442 (eq:QI (match_operator 1 "ix86_comparison_operator"
15443 [(reg FLAGS_REG) (const_int 0)])
15444 (const_int 0)))]
15445 ""
15446 [(set (match_dup 0) (match_dup 1))]
15447 {
15448 operands[1] = shallow_copy_rtx (operands[1]);
15449 PUT_MODE (operands[1], QImode);
15450 PUT_CODE (operands[1],
15451 ix86_reverse_condition (GET_CODE (operands[1]),
15452 GET_MODE (XEXP (operands[1], 0))));
15453
15454 /* Make sure that (a) the CCmode we have for the flags is strong
15455 enough for the reversed compare or (b) we have a valid FP compare. */
15456 if (! ix86_comparison_operator (operands[1], VOIDmode))
15457 FAIL;
15458 })
15459
15460 ;; The SSE store flag instructions saves 0 or 0xffffffff to the result.
15461 ;; subsequent logical operations are used to imitate conditional moves.
15462 ;; 0xffffffff is NaN, but not in normalized form, so we can't represent
15463 ;; it directly.
15464
15465 (define_insn "setcc_<mode>_sse"
15466 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
15467 (match_operator:MODEF 3 "sse_comparison_operator"
15468 [(match_operand:MODEF 1 "register_operand" "0,x")
15469 (match_operand:MODEF 2 "nonimmediate_operand" "xm,xm")]))]
15470 "SSE_FLOAT_MODE_P (<MODE>mode)"
15471 "@
15472 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
15473 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
15474 [(set_attr "isa" "noavx,avx")
15475 (set_attr "type" "ssecmp")
15476 (set_attr "length_immediate" "1")
15477 (set_attr "prefix" "orig,vex")
15478 (set_attr "mode" "<MODE>")])
15479
15480 (define_insn "setcc_hf_mask"
15481 [(set (match_operand:QI 0 "register_operand" "=k")
15482 (unspec:QI
15483 [(match_operand:HF 1 "register_operand" "v")
15484 (match_operand:HF 2 "nonimmediate_operand" "vm")
15485 (match_operand:SI 3 "const_0_to_31_operand")]
15486 UNSPEC_PCMP))]
15487 "TARGET_AVX512FP16"
15488 "vcmpsh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15489 [(set_attr "type" "ssecmp")
15490 (set_attr "prefix" "evex")
15491 (set_attr "mode" "HF")])
15492
15493 \f
15494 ;; Basic conditional jump instructions.
15495
15496 (define_split
15497 [(set (pc)
15498 (if_then_else
15499 (match_operator 1 "add_comparison_operator"
15500 [(not:SWI (match_operand:SWI 2 "register_operand"))
15501 (match_operand:SWI 3 "nonimmediate_operand")])
15502 (label_ref (match_operand 0))
15503 (pc)))]
15504 ""
15505 [(set (reg:CCC FLAGS_REG)
15506 (compare:CCC
15507 (plus:SWI (match_dup 2) (match_dup 3))
15508 (match_dup 2)))
15509 (set (pc)
15510 (if_then_else (match_op_dup 1 [(reg:CCC FLAGS_REG) (const_int 0)])
15511 (label_ref (match_operand 0))
15512 (pc)))])
15513
15514 (define_split
15515 [(set (pc)
15516 (if_then_else
15517 (match_operator 1 "shr_comparison_operator"
15518 [(match_operand:DI 2 "register_operand")
15519 (match_operand 3 "const_int_operand")])
15520 (label_ref (match_operand 0))
15521 (pc)))]
15522 "TARGET_64BIT
15523 && IN_RANGE (exact_log2 (UINTVAL (operands[3]) + 1), 32, 63)"
15524 [(set (reg:CCZ FLAGS_REG)
15525 (compare:CCZ
15526 (lshiftrt:DI (match_dup 2) (match_dup 4))
15527 (const_int 0)))
15528 (set (pc)
15529 (if_then_else (match_op_dup 1 [(reg:CCZ FLAGS_REG) (const_int 0)])
15530 (label_ref (match_operand 0))
15531 (pc)))]
15532 {
15533 enum rtx_code new_code;
15534
15535 operands[1] = shallow_copy_rtx (operands[1]);
15536 switch (GET_CODE (operands[1]))
15537 {
15538 case GTU: new_code = NE; break;
15539 case LEU: new_code = EQ; break;
15540 default: gcc_unreachable ();
15541 }
15542 PUT_CODE (operands[1], new_code);
15543
15544 operands[4] = GEN_INT (exact_log2 (UINTVAL (operands[3]) + 1));
15545 })
15546
15547 ;; We ignore the overflow flag for signed branch instructions.
15548
15549 (define_insn "*jcc"
15550 [(set (pc)
15551 (if_then_else (match_operator 1 "ix86_comparison_operator"
15552 [(reg FLAGS_REG) (const_int 0)])
15553 (label_ref (match_operand 0))
15554 (pc)))]
15555 ""
15556 "%!%+j%C1\t%l0"
15557 [(set_attr "type" "ibr")
15558 (set_attr "modrm" "0")
15559 (set (attr "length")
15560 (if_then_else
15561 (and (ge (minus (match_dup 0) (pc))
15562 (const_int -126))
15563 (lt (minus (match_dup 0) (pc))
15564 (const_int 128)))
15565 (const_int 2)
15566 (const_int 6)))])
15567
15568 ;; In general it is not safe to assume too much about CCmode registers,
15569 ;; so simplify-rtx stops when it sees a second one. Under certain
15570 ;; conditions this is safe on x86, so help combine not create
15571 ;;
15572 ;; seta %al
15573 ;; testb %al, %al
15574 ;; je Lfoo
15575
15576 (define_split
15577 [(set (pc)
15578 (if_then_else (ne (match_operator 0 "ix86_comparison_operator"
15579 [(reg FLAGS_REG) (const_int 0)])
15580 (const_int 0))
15581 (label_ref (match_operand 1))
15582 (pc)))]
15583 ""
15584 [(set (pc)
15585 (if_then_else (match_dup 0)
15586 (label_ref (match_dup 1))
15587 (pc)))]
15588 {
15589 operands[0] = shallow_copy_rtx (operands[0]);
15590 PUT_MODE (operands[0], VOIDmode);
15591 })
15592
15593 (define_split
15594 [(set (pc)
15595 (if_then_else (eq (match_operator 0 "ix86_comparison_operator"
15596 [(reg FLAGS_REG) (const_int 0)])
15597 (const_int 0))
15598 (label_ref (match_operand 1))
15599 (pc)))]
15600 ""
15601 [(set (pc)
15602 (if_then_else (match_dup 0)
15603 (label_ref (match_dup 1))
15604 (pc)))]
15605 {
15606 operands[0] = shallow_copy_rtx (operands[0]);
15607 PUT_MODE (operands[0], VOIDmode);
15608 PUT_CODE (operands[0],
15609 ix86_reverse_condition (GET_CODE (operands[0]),
15610 GET_MODE (XEXP (operands[0], 0))));
15611
15612 /* Make sure that (a) the CCmode we have for the flags is strong
15613 enough for the reversed compare or (b) we have a valid FP compare. */
15614 if (! ix86_comparison_operator (operands[0], VOIDmode))
15615 FAIL;
15616 })
15617 \f
15618 ;; Unconditional and other jump instructions
15619
15620 (define_insn "jump"
15621 [(set (pc)
15622 (label_ref (match_operand 0)))]
15623 ""
15624 "%!jmp\t%l0"
15625 [(set_attr "type" "ibr")
15626 (set_attr "modrm" "0")
15627 (set (attr "length")
15628 (if_then_else
15629 (and (ge (minus (match_dup 0) (pc))
15630 (const_int -126))
15631 (lt (minus (match_dup 0) (pc))
15632 (const_int 128)))
15633 (const_int 2)
15634 (const_int 5)))])
15635
15636 (define_expand "indirect_jump"
15637 [(set (pc) (match_operand 0 "indirect_branch_operand"))]
15638 ""
15639 {
15640 if (TARGET_X32 || TARGET_INDIRECT_BRANCH_REGISTER)
15641 operands[0] = convert_memory_address (word_mode, operands[0]);
15642 cfun->machine->has_local_indirect_jump = true;
15643 })
15644
15645 (define_insn "*indirect_jump"
15646 [(set (pc) (match_operand:W 0 "indirect_branch_operand" "rBw"))]
15647 ""
15648 "* return ix86_output_indirect_jmp (operands[0]);"
15649 [(set (attr "type")
15650 (if_then_else (match_test "(cfun->machine->indirect_branch_type
15651 != indirect_branch_keep)")
15652 (const_string "multi")
15653 (const_string "ibr")))
15654 (set_attr "length_immediate" "0")])
15655
15656 (define_expand "tablejump"
15657 [(parallel [(set (pc) (match_operand 0 "indirect_branch_operand"))
15658 (use (label_ref (match_operand 1)))])]
15659 ""
15660 {
15661 /* In PIC mode, the table entries are stored GOT (32-bit) or PC (64-bit)
15662 relative. Convert the relative address to an absolute address. */
15663 if (flag_pic)
15664 {
15665 rtx op0, op1;
15666 enum rtx_code code;
15667
15668 /* We can't use @GOTOFF for text labels on VxWorks;
15669 see gotoff_operand. */
15670 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
15671 {
15672 code = PLUS;
15673 op0 = operands[0];
15674 op1 = gen_rtx_LABEL_REF (Pmode, operands[1]);
15675 }
15676 else if (TARGET_MACHO || HAVE_AS_GOTOFF_IN_DATA)
15677 {
15678 code = PLUS;
15679 op0 = operands[0];
15680 op1 = pic_offset_table_rtx;
15681 }
15682 else
15683 {
15684 code = MINUS;
15685 op0 = pic_offset_table_rtx;
15686 op1 = operands[0];
15687 }
15688
15689 operands[0] = expand_simple_binop (Pmode, code, op0, op1, NULL_RTX, 0,
15690 OPTAB_DIRECT);
15691 }
15692
15693 if (TARGET_X32 || TARGET_INDIRECT_BRANCH_REGISTER)
15694 operands[0] = convert_memory_address (word_mode, operands[0]);
15695 cfun->machine->has_local_indirect_jump = true;
15696 })
15697
15698 (define_insn "*tablejump_1"
15699 [(set (pc) (match_operand:W 0 "indirect_branch_operand" "rBw"))
15700 (use (label_ref (match_operand 1)))]
15701 ""
15702 "* return ix86_output_indirect_jmp (operands[0]);"
15703 [(set (attr "type")
15704 (if_then_else (match_test "(cfun->machine->indirect_branch_type
15705 != indirect_branch_keep)")
15706 (const_string "multi")
15707 (const_string "ibr")))
15708 (set_attr "length_immediate" "0")])
15709 \f
15710 ;; Convert setcc + movzbl to xor + setcc if operands don't overlap.
15711
15712 (define_peephole2
15713 [(set (match_operand 4 "flags_reg_operand") (match_operand 0))
15714 (set (match_operand:QI 1 "register_operand")
15715 (match_operator:QI 2 "ix86_comparison_operator"
15716 [(reg FLAGS_REG) (const_int 0)]))
15717 (set (match_operand 3 "any_QIreg_operand")
15718 (zero_extend (match_dup 1)))]
15719 "(peep2_reg_dead_p (3, operands[1])
15720 || operands_match_p (operands[1], operands[3]))
15721 && ! reg_overlap_mentioned_p (operands[3], operands[0])
15722 && peep2_regno_dead_p (0, FLAGS_REG)"
15723 [(set (match_dup 4) (match_dup 0))
15724 (set (strict_low_part (match_dup 5))
15725 (match_dup 2))]
15726 {
15727 operands[5] = gen_lowpart (QImode, operands[3]);
15728 ix86_expand_clear (operands[3]);
15729 })
15730
15731 (define_peephole2
15732 [(parallel [(set (match_operand 5 "flags_reg_operand") (match_operand 0))
15733 (match_operand 4)])
15734 (set (match_operand:QI 1 "register_operand")
15735 (match_operator:QI 2 "ix86_comparison_operator"
15736 [(reg FLAGS_REG) (const_int 0)]))
15737 (set (match_operand 3 "any_QIreg_operand")
15738 (zero_extend (match_dup 1)))]
15739 "(peep2_reg_dead_p (3, operands[1])
15740 || operands_match_p (operands[1], operands[3]))
15741 && ! reg_overlap_mentioned_p (operands[3], operands[0])
15742 && ! reg_overlap_mentioned_p (operands[3], operands[4])
15743 && ! reg_set_p (operands[3], operands[4])
15744 && peep2_regno_dead_p (0, FLAGS_REG)"
15745 [(parallel [(set (match_dup 5) (match_dup 0))
15746 (match_dup 4)])
15747 (set (strict_low_part (match_dup 6))
15748 (match_dup 2))]
15749 {
15750 operands[6] = gen_lowpart (QImode, operands[3]);
15751 ix86_expand_clear (operands[3]);
15752 })
15753
15754 (define_peephole2
15755 [(set (match_operand 6 "flags_reg_operand") (match_operand 0))
15756 (parallel [(set (match_operand 7 "flags_reg_operand") (match_operand 1))
15757 (match_operand 5)])
15758 (set (match_operand:QI 2 "register_operand")
15759 (match_operator:QI 3 "ix86_comparison_operator"
15760 [(reg FLAGS_REG) (const_int 0)]))
15761 (set (match_operand 4 "any_QIreg_operand")
15762 (zero_extend (match_dup 2)))]
15763 "(peep2_reg_dead_p (4, operands[2])
15764 || operands_match_p (operands[2], operands[4]))
15765 && ! reg_overlap_mentioned_p (operands[4], operands[0])
15766 && ! reg_overlap_mentioned_p (operands[4], operands[1])
15767 && ! reg_overlap_mentioned_p (operands[4], operands[5])
15768 && ! reg_set_p (operands[4], operands[5])
15769 && refers_to_regno_p (FLAGS_REG, operands[1], (rtx *)NULL)
15770 && peep2_regno_dead_p (0, FLAGS_REG)"
15771 [(set (match_dup 6) (match_dup 0))
15772 (parallel [(set (match_dup 7) (match_dup 1))
15773 (match_dup 5)])
15774 (set (strict_low_part (match_dup 8))
15775 (match_dup 3))]
15776 {
15777 operands[8] = gen_lowpart (QImode, operands[4]);
15778 ix86_expand_clear (operands[4]);
15779 })
15780
15781 ;; Similar, but match zero extend with andsi3.
15782
15783 (define_peephole2
15784 [(set (match_operand 4 "flags_reg_operand") (match_operand 0))
15785 (set (match_operand:QI 1 "register_operand")
15786 (match_operator:QI 2 "ix86_comparison_operator"
15787 [(reg FLAGS_REG) (const_int 0)]))
15788 (parallel [(set (match_operand:SI 3 "any_QIreg_operand")
15789 (and:SI (match_dup 3) (const_int 255)))
15790 (clobber (reg:CC FLAGS_REG))])]
15791 "REGNO (operands[1]) == REGNO (operands[3])
15792 && ! reg_overlap_mentioned_p (operands[3], operands[0])
15793 && peep2_regno_dead_p (0, FLAGS_REG)"
15794 [(set (match_dup 4) (match_dup 0))
15795 (set (strict_low_part (match_dup 5))
15796 (match_dup 2))]
15797 {
15798 operands[5] = gen_lowpart (QImode, operands[3]);
15799 ix86_expand_clear (operands[3]);
15800 })
15801
15802 (define_peephole2
15803 [(parallel [(set (match_operand 5 "flags_reg_operand") (match_operand 0))
15804 (match_operand 4)])
15805 (set (match_operand:QI 1 "register_operand")
15806 (match_operator:QI 2 "ix86_comparison_operator"
15807 [(reg FLAGS_REG) (const_int 0)]))
15808 (parallel [(set (match_operand 3 "any_QIreg_operand")
15809 (zero_extend (match_dup 1)))
15810 (clobber (reg:CC FLAGS_REG))])]
15811 "(peep2_reg_dead_p (3, operands[1])
15812 || operands_match_p (operands[1], operands[3]))
15813 && ! reg_overlap_mentioned_p (operands[3], operands[0])
15814 && ! reg_overlap_mentioned_p (operands[3], operands[4])
15815 && ! reg_set_p (operands[3], operands[4])
15816 && peep2_regno_dead_p (0, FLAGS_REG)"
15817 [(parallel [(set (match_dup 5) (match_dup 0))
15818 (match_dup 4)])
15819 (set (strict_low_part (match_dup 6))
15820 (match_dup 2))]
15821 {
15822 operands[6] = gen_lowpart (QImode, operands[3]);
15823 ix86_expand_clear (operands[3]);
15824 })
15825
15826 (define_peephole2
15827 [(set (match_operand 6 "flags_reg_operand") (match_operand 0))
15828 (parallel [(set (match_operand 7 "flags_reg_operand") (match_operand 1))
15829 (match_operand 5)])
15830 (set (match_operand:QI 2 "register_operand")
15831 (match_operator:QI 3 "ix86_comparison_operator"
15832 [(reg FLAGS_REG) (const_int 0)]))
15833 (parallel [(set (match_operand 4 "any_QIreg_operand")
15834 (zero_extend (match_dup 2)))
15835 (clobber (reg:CC FLAGS_REG))])]
15836 "(peep2_reg_dead_p (4, operands[2])
15837 || operands_match_p (operands[2], operands[4]))
15838 && ! reg_overlap_mentioned_p (operands[4], operands[0])
15839 && ! reg_overlap_mentioned_p (operands[4], operands[1])
15840 && ! reg_overlap_mentioned_p (operands[4], operands[5])
15841 && ! reg_set_p (operands[4], operands[5])
15842 && refers_to_regno_p (FLAGS_REG, operands[1], (rtx *)NULL)
15843 && peep2_regno_dead_p (0, FLAGS_REG)"
15844 [(set (match_dup 6) (match_dup 0))
15845 (parallel [(set (match_dup 7) (match_dup 1))
15846 (match_dup 5)])
15847 (set (strict_low_part (match_dup 8))
15848 (match_dup 3))]
15849 {
15850 operands[8] = gen_lowpart (QImode, operands[4]);
15851 ix86_expand_clear (operands[4]);
15852 })
15853 \f
15854 ;; Call instructions.
15855
15856 ;; The predicates normally associated with named expanders are not properly
15857 ;; checked for calls. This is a bug in the generic code, but it isn't that
15858 ;; easy to fix. Ignore it for now and be prepared to fix things up.
15859
15860 ;; P6 processors will jump to the address after the decrement when %esp
15861 ;; is used as a call operand, so they will execute return address as a code.
15862 ;; See Pentium Pro errata 70, Pentium 2 errata A33 and Pentium 3 errata E17.
15863
15864 ;; Register constraint for call instruction.
15865 (define_mode_attr c [(SI "l") (DI "r")])
15866
15867 ;; Call subroutine returning no value.
15868
15869 (define_expand "call"
15870 [(call (match_operand:QI 0)
15871 (match_operand 1))
15872 (use (match_operand 2))]
15873 ""
15874 {
15875 ix86_expand_call (NULL, operands[0], operands[1],
15876 operands[2], NULL, false);
15877 DONE;
15878 })
15879
15880 (define_expand "sibcall"
15881 [(call (match_operand:QI 0)
15882 (match_operand 1))
15883 (use (match_operand 2))]
15884 ""
15885 {
15886 ix86_expand_call (NULL, operands[0], operands[1],
15887 operands[2], NULL, true);
15888 DONE;
15889 })
15890
15891 (define_insn "*call"
15892 [(call (mem:QI (match_operand:W 0 "call_insn_operand" "<c>BwBz"))
15893 (match_operand 1))]
15894 "!SIBLING_CALL_P (insn)"
15895 "* return ix86_output_call_insn (insn, operands[0]);"
15896 [(set_attr "type" "call")])
15897
15898 ;; This covers both call and sibcall since only GOT slot is allowed.
15899 (define_insn "*call_got_x32"
15900 [(call (mem:QI (zero_extend:DI
15901 (match_operand:SI 0 "GOT_memory_operand" "Bg")))
15902 (match_operand 1))]
15903 "TARGET_X32"
15904 {
15905 rtx fnaddr = gen_const_mem (DImode, XEXP (operands[0], 0));
15906 return ix86_output_call_insn (insn, fnaddr);
15907 }
15908 [(set_attr "type" "call")])
15909
15910 ;; Since sibcall never returns, we can only use call-clobbered register
15911 ;; as GOT base.
15912 (define_insn "*sibcall_GOT_32"
15913 [(call (mem:QI
15914 (mem:SI (plus:SI
15915 (match_operand:SI 0 "register_no_elim_operand" "U")
15916 (match_operand:SI 1 "GOT32_symbol_operand"))))
15917 (match_operand 2))]
15918 "!TARGET_MACHO
15919 && !TARGET_64BIT
15920 && !TARGET_INDIRECT_BRANCH_REGISTER
15921 && SIBLING_CALL_P (insn)"
15922 {
15923 rtx fnaddr = gen_rtx_PLUS (SImode, operands[0], operands[1]);
15924 fnaddr = gen_const_mem (SImode, fnaddr);
15925 return ix86_output_call_insn (insn, fnaddr);
15926 }
15927 [(set_attr "type" "call")])
15928
15929 (define_insn "*sibcall"
15930 [(call (mem:QI (match_operand:W 0 "sibcall_insn_operand" "UBsBz"))
15931 (match_operand 1))]
15932 "SIBLING_CALL_P (insn)"
15933 "* return ix86_output_call_insn (insn, operands[0]);"
15934 [(set_attr "type" "call")])
15935
15936 (define_insn "*sibcall_memory"
15937 [(call (mem:QI (match_operand:W 0 "memory_operand" "m"))
15938 (match_operand 1))
15939 (unspec [(const_int 0)] UNSPEC_PEEPSIB)]
15940 "!TARGET_X32 && !TARGET_INDIRECT_BRANCH_REGISTER"
15941 "* return ix86_output_call_insn (insn, operands[0]);"
15942 [(set_attr "type" "call")])
15943
15944 (define_peephole2
15945 [(set (match_operand:W 0 "register_operand")
15946 (match_operand:W 1 "memory_operand"))
15947 (call (mem:QI (match_dup 0))
15948 (match_operand 3))]
15949 "!TARGET_X32
15950 && !TARGET_INDIRECT_BRANCH_REGISTER
15951 && SIBLING_CALL_P (peep2_next_insn (1))
15952 && !reg_mentioned_p (operands[0],
15953 CALL_INSN_FUNCTION_USAGE (peep2_next_insn (1)))"
15954 [(parallel [(call (mem:QI (match_dup 1))
15955 (match_dup 3))
15956 (unspec [(const_int 0)] UNSPEC_PEEPSIB)])])
15957
15958 (define_peephole2
15959 [(set (match_operand:W 0 "register_operand")
15960 (match_operand:W 1 "memory_operand"))
15961 (unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
15962 (call (mem:QI (match_dup 0))
15963 (match_operand 3))]
15964 "!TARGET_X32
15965 && !TARGET_INDIRECT_BRANCH_REGISTER
15966 && SIBLING_CALL_P (peep2_next_insn (2))
15967 && !reg_mentioned_p (operands[0],
15968 CALL_INSN_FUNCTION_USAGE (peep2_next_insn (2)))"
15969 [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
15970 (parallel [(call (mem:QI (match_dup 1))
15971 (match_dup 3))
15972 (unspec [(const_int 0)] UNSPEC_PEEPSIB)])])
15973
15974 (define_expand "call_pop"
15975 [(parallel [(call (match_operand:QI 0)
15976 (match_operand:SI 1))
15977 (set (reg:SI SP_REG)
15978 (plus:SI (reg:SI SP_REG)
15979 (match_operand:SI 3)))])]
15980 "!TARGET_64BIT"
15981 {
15982 ix86_expand_call (NULL, operands[0], operands[1],
15983 operands[2], operands[3], false);
15984 DONE;
15985 })
15986
15987 (define_insn "*call_pop"
15988 [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lBwBz"))
15989 (match_operand 1))
15990 (set (reg:SI SP_REG)
15991 (plus:SI (reg:SI SP_REG)
15992 (match_operand:SI 2 "immediate_operand" "i")))]
15993 "!TARGET_64BIT && !SIBLING_CALL_P (insn)"
15994 "* return ix86_output_call_insn (insn, operands[0]);"
15995 [(set_attr "type" "call")])
15996
15997 (define_insn "*sibcall_pop"
15998 [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "UBsBz"))
15999 (match_operand 1))
16000 (set (reg:SI SP_REG)
16001 (plus:SI (reg:SI SP_REG)
16002 (match_operand:SI 2 "immediate_operand" "i")))]
16003 "!TARGET_64BIT && SIBLING_CALL_P (insn)"
16004 "* return ix86_output_call_insn (insn, operands[0]);"
16005 [(set_attr "type" "call")])
16006
16007 (define_insn "*sibcall_pop_memory"
16008 [(call (mem:QI (match_operand:SI 0 "memory_operand" "Bs"))
16009 (match_operand 1))
16010 (set (reg:SI SP_REG)
16011 (plus:SI (reg:SI SP_REG)
16012 (match_operand:SI 2 "immediate_operand" "i")))
16013 (unspec [(const_int 0)] UNSPEC_PEEPSIB)]
16014 "!TARGET_64BIT"
16015 "* return ix86_output_call_insn (insn, operands[0]);"
16016 [(set_attr "type" "call")])
16017
16018 (define_peephole2
16019 [(set (match_operand:SI 0 "register_operand")
16020 (match_operand:SI 1 "memory_operand"))
16021 (parallel [(call (mem:QI (match_dup 0))
16022 (match_operand 3))
16023 (set (reg:SI SP_REG)
16024 (plus:SI (reg:SI SP_REG)
16025 (match_operand:SI 4 "immediate_operand")))])]
16026 "!TARGET_64BIT && SIBLING_CALL_P (peep2_next_insn (1))
16027 && !reg_mentioned_p (operands[0],
16028 CALL_INSN_FUNCTION_USAGE (peep2_next_insn (1)))"
16029 [(parallel [(call (mem:QI (match_dup 1))
16030 (match_dup 3))
16031 (set (reg:SI SP_REG)
16032 (plus:SI (reg:SI SP_REG)
16033 (match_dup 4)))
16034 (unspec [(const_int 0)] UNSPEC_PEEPSIB)])])
16035
16036 (define_peephole2
16037 [(set (match_operand:SI 0 "register_operand")
16038 (match_operand:SI 1 "memory_operand"))
16039 (unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
16040 (parallel [(call (mem:QI (match_dup 0))
16041 (match_operand 3))
16042 (set (reg:SI SP_REG)
16043 (plus:SI (reg:SI SP_REG)
16044 (match_operand:SI 4 "immediate_operand")))])]
16045 "!TARGET_64BIT && SIBLING_CALL_P (peep2_next_insn (2))
16046 && !reg_mentioned_p (operands[0],
16047 CALL_INSN_FUNCTION_USAGE (peep2_next_insn (2)))"
16048 [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
16049 (parallel [(call (mem:QI (match_dup 1))
16050 (match_dup 3))
16051 (set (reg:SI SP_REG)
16052 (plus:SI (reg:SI SP_REG)
16053 (match_dup 4)))
16054 (unspec [(const_int 0)] UNSPEC_PEEPSIB)])])
16055
16056 ;; Combining simple memory jump instruction
16057
16058 (define_peephole2
16059 [(set (match_operand:W 0 "register_operand")
16060 (match_operand:W 1 "memory_operand"))
16061 (set (pc) (match_dup 0))]
16062 "!TARGET_X32
16063 && !TARGET_INDIRECT_BRANCH_REGISTER
16064 && peep2_reg_dead_p (2, operands[0])"
16065 [(set (pc) (match_dup 1))])
16066
16067 ;; Call subroutine, returning value in operand 0
16068
16069 (define_expand "call_value"
16070 [(set (match_operand 0)
16071 (call (match_operand:QI 1)
16072 (match_operand 2)))
16073 (use (match_operand 3))]
16074 ""
16075 {
16076 ix86_expand_call (operands[0], operands[1], operands[2],
16077 operands[3], NULL, false);
16078 DONE;
16079 })
16080
16081 (define_expand "sibcall_value"
16082 [(set (match_operand 0)
16083 (call (match_operand:QI 1)
16084 (match_operand 2)))
16085 (use (match_operand 3))]
16086 ""
16087 {
16088 ix86_expand_call (operands[0], operands[1], operands[2],
16089 operands[3], NULL, true);
16090 DONE;
16091 })
16092
16093 (define_insn "*call_value"
16094 [(set (match_operand 0)
16095 (call (mem:QI (match_operand:W 1 "call_insn_operand" "<c>BwBz"))
16096 (match_operand 2)))]
16097 "!SIBLING_CALL_P (insn)"
16098 "* return ix86_output_call_insn (insn, operands[1]);"
16099 [(set_attr "type" "callv")])
16100
16101 ;; This covers both call and sibcall since only GOT slot is allowed.
16102 (define_insn "*call_value_got_x32"
16103 [(set (match_operand 0)
16104 (call (mem:QI
16105 (zero_extend:DI
16106 (match_operand:SI 1 "GOT_memory_operand" "Bg")))
16107 (match_operand 2)))]
16108 "TARGET_X32"
16109 {
16110 rtx fnaddr = gen_const_mem (DImode, XEXP (operands[1], 0));
16111 return ix86_output_call_insn (insn, fnaddr);
16112 }
16113 [(set_attr "type" "callv")])
16114
16115 ;; Since sibcall never returns, we can only use call-clobbered register
16116 ;; as GOT base.
16117 (define_insn "*sibcall_value_GOT_32"
16118 [(set (match_operand 0)
16119 (call (mem:QI
16120 (mem:SI (plus:SI
16121 (match_operand:SI 1 "register_no_elim_operand" "U")
16122 (match_operand:SI 2 "GOT32_symbol_operand"))))
16123 (match_operand 3)))]
16124 "!TARGET_MACHO
16125 && !TARGET_64BIT
16126 && !TARGET_INDIRECT_BRANCH_REGISTER
16127 && SIBLING_CALL_P (insn)"
16128 {
16129 rtx fnaddr = gen_rtx_PLUS (SImode, operands[1], operands[2]);
16130 fnaddr = gen_const_mem (SImode, fnaddr);
16131 return ix86_output_call_insn (insn, fnaddr);
16132 }
16133 [(set_attr "type" "callv")])
16134
16135 (define_insn "*sibcall_value"
16136 [(set (match_operand 0)
16137 (call (mem:QI (match_operand:W 1 "sibcall_insn_operand" "UBsBz"))
16138 (match_operand 2)))]
16139 "SIBLING_CALL_P (insn)"
16140 "* return ix86_output_call_insn (insn, operands[1]);"
16141 [(set_attr "type" "callv")])
16142
16143 (define_insn "*sibcall_value_memory"
16144 [(set (match_operand 0)
16145 (call (mem:QI (match_operand:W 1 "memory_operand" "m"))
16146 (match_operand 2)))
16147 (unspec [(const_int 0)] UNSPEC_PEEPSIB)]
16148 "!TARGET_X32 && !TARGET_INDIRECT_BRANCH_REGISTER"
16149 "* return ix86_output_call_insn (insn, operands[1]);"
16150 [(set_attr "type" "callv")])
16151
16152 (define_peephole2
16153 [(set (match_operand:W 0 "register_operand")
16154 (match_operand:W 1 "memory_operand"))
16155 (set (match_operand 2)
16156 (call (mem:QI (match_dup 0))
16157 (match_operand 3)))]
16158 "!TARGET_X32
16159 && !TARGET_INDIRECT_BRANCH_REGISTER
16160 && SIBLING_CALL_P (peep2_next_insn (1))
16161 && !reg_mentioned_p (operands[0],
16162 CALL_INSN_FUNCTION_USAGE (peep2_next_insn (1)))"
16163 [(parallel [(set (match_dup 2)
16164 (call (mem:QI (match_dup 1))
16165 (match_dup 3)))
16166 (unspec [(const_int 0)] UNSPEC_PEEPSIB)])])
16167
16168 (define_peephole2
16169 [(set (match_operand:W 0 "register_operand")
16170 (match_operand:W 1 "memory_operand"))
16171 (unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
16172 (set (match_operand 2)
16173 (call (mem:QI (match_dup 0))
16174 (match_operand 3)))]
16175 "!TARGET_X32
16176 && !TARGET_INDIRECT_BRANCH_REGISTER
16177 && SIBLING_CALL_P (peep2_next_insn (2))
16178 && !reg_mentioned_p (operands[0],
16179 CALL_INSN_FUNCTION_USAGE (peep2_next_insn (2)))"
16180 [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
16181 (parallel [(set (match_dup 2)
16182 (call (mem:QI (match_dup 1))
16183 (match_dup 3)))
16184 (unspec [(const_int 0)] UNSPEC_PEEPSIB)])])
16185
16186 (define_expand "call_value_pop"
16187 [(parallel [(set (match_operand 0)
16188 (call (match_operand:QI 1)
16189 (match_operand:SI 2)))
16190 (set (reg:SI SP_REG)
16191 (plus:SI (reg:SI SP_REG)
16192 (match_operand:SI 4)))])]
16193 "!TARGET_64BIT"
16194 {
16195 ix86_expand_call (operands[0], operands[1], operands[2],
16196 operands[3], operands[4], false);
16197 DONE;
16198 })
16199
16200 (define_insn "*call_value_pop"
16201 [(set (match_operand 0)
16202 (call (mem:QI (match_operand:SI 1 "call_insn_operand" "lBwBz"))
16203 (match_operand 2)))
16204 (set (reg:SI SP_REG)
16205 (plus:SI (reg:SI SP_REG)
16206 (match_operand:SI 3 "immediate_operand" "i")))]
16207 "!TARGET_64BIT && !SIBLING_CALL_P (insn)"
16208 "* return ix86_output_call_insn (insn, operands[1]);"
16209 [(set_attr "type" "callv")])
16210
16211 (define_insn "*sibcall_value_pop"
16212 [(set (match_operand 0)
16213 (call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "UBsBz"))
16214 (match_operand 2)))
16215 (set (reg:SI SP_REG)
16216 (plus:SI (reg:SI SP_REG)
16217 (match_operand:SI 3 "immediate_operand" "i")))]
16218 "!TARGET_64BIT && SIBLING_CALL_P (insn)"
16219 "* return ix86_output_call_insn (insn, operands[1]);"
16220 [(set_attr "type" "callv")])
16221
16222 (define_insn "*sibcall_value_pop_memory"
16223 [(set (match_operand 0)
16224 (call (mem:QI (match_operand:SI 1 "memory_operand" "m"))
16225 (match_operand 2)))
16226 (set (reg:SI SP_REG)
16227 (plus:SI (reg:SI SP_REG)
16228 (match_operand:SI 3 "immediate_operand" "i")))
16229 (unspec [(const_int 0)] UNSPEC_PEEPSIB)]
16230 "!TARGET_64BIT"
16231 "* return ix86_output_call_insn (insn, operands[1]);"
16232 [(set_attr "type" "callv")])
16233
16234 (define_peephole2
16235 [(set (match_operand:SI 0 "register_operand")
16236 (match_operand:SI 1 "memory_operand"))
16237 (parallel [(set (match_operand 2)
16238 (call (mem:QI (match_dup 0))
16239 (match_operand 3)))
16240 (set (reg:SI SP_REG)
16241 (plus:SI (reg:SI SP_REG)
16242 (match_operand:SI 4 "immediate_operand")))])]
16243 "!TARGET_64BIT && SIBLING_CALL_P (peep2_next_insn (1))
16244 && !reg_mentioned_p (operands[0],
16245 CALL_INSN_FUNCTION_USAGE (peep2_next_insn (1)))"
16246 [(parallel [(set (match_dup 2)
16247 (call (mem:QI (match_dup 1))
16248 (match_dup 3)))
16249 (set (reg:SI SP_REG)
16250 (plus:SI (reg:SI SP_REG)
16251 (match_dup 4)))
16252 (unspec [(const_int 0)] UNSPEC_PEEPSIB)])])
16253
16254 (define_peephole2
16255 [(set (match_operand:SI 0 "register_operand")
16256 (match_operand:SI 1 "memory_operand"))
16257 (unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
16258 (parallel [(set (match_operand 2)
16259 (call (mem:QI (match_dup 0))
16260 (match_operand 3)))
16261 (set (reg:SI SP_REG)
16262 (plus:SI (reg:SI SP_REG)
16263 (match_operand:SI 4 "immediate_operand")))])]
16264 "!TARGET_64BIT && SIBLING_CALL_P (peep2_next_insn (2))
16265 && !reg_mentioned_p (operands[0],
16266 CALL_INSN_FUNCTION_USAGE (peep2_next_insn (2)))"
16267 [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
16268 (parallel [(set (match_dup 2)
16269 (call (mem:QI (match_dup 1))
16270 (match_dup 3)))
16271 (set (reg:SI SP_REG)
16272 (plus:SI (reg:SI SP_REG)
16273 (match_dup 4)))
16274 (unspec [(const_int 0)] UNSPEC_PEEPSIB)])])
16275
16276 ;; Call subroutine returning any type.
16277
16278 (define_expand "untyped_call"
16279 [(parallel [(call (match_operand 0)
16280 (const_int 0))
16281 (match_operand 1)
16282 (match_operand 2)])]
16283 ""
16284 {
16285 int i;
16286
16287 /* In order to give reg-stack an easier job in validating two
16288 coprocessor registers as containing a possible return value,
16289 simply pretend the untyped call returns a complex long double
16290 value.
16291
16292 We can't use SSE_REGPARM_MAX here since callee is unprototyped
16293 and should have the default ABI. */
16294
16295 ix86_expand_call ((TARGET_FLOAT_RETURNS_IN_80387
16296 ? gen_rtx_REG (XCmode, FIRST_FLOAT_REG) : NULL),
16297 operands[0], const0_rtx,
16298 GEN_INT ((TARGET_64BIT
16299 ? (ix86_abi == SYSV_ABI
16300 ? X86_64_SSE_REGPARM_MAX
16301 : X86_64_MS_SSE_REGPARM_MAX)
16302 : X86_32_SSE_REGPARM_MAX)
16303 - 1),
16304 NULL, false);
16305
16306 for (i = 0; i < XVECLEN (operands[2], 0); i++)
16307 {
16308 rtx set = XVECEXP (operands[2], 0, i);
16309 emit_move_insn (SET_DEST (set), SET_SRC (set));
16310 }
16311
16312 /* The optimizer does not know that the call sets the function value
16313 registers we stored in the result block. We avoid problems by
16314 claiming that all hard registers are used and clobbered at this
16315 point. */
16316 emit_insn (gen_blockage ());
16317
16318 DONE;
16319 })
16320 \f
16321 ;; Prologue and epilogue instructions
16322
16323 ;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and
16324 ;; all of memory. This blocks insns from being moved across this point.
16325
16326 (define_insn "blockage"
16327 [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)]
16328 ""
16329 ""
16330 [(set_attr "length" "0")])
16331
16332 ;; Do not schedule instructions accessing memory across this point.
16333
16334 (define_expand "memory_blockage"
16335 [(set (match_dup 0)
16336 (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BLOCKAGE))]
16337 ""
16338 {
16339 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
16340 MEM_VOLATILE_P (operands[0]) = 1;
16341 })
16342
16343 (define_insn "*memory_blockage"
16344 [(set (match_operand:BLK 0)
16345 (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BLOCKAGE))]
16346 ""
16347 ""
16348 [(set_attr "length" "0")])
16349
16350 ;; As USE insns aren't meaningful after reload, this is used instead
16351 ;; to prevent deleting instructions setting registers for PIC code
16352 (define_insn "prologue_use"
16353 [(unspec_volatile [(match_operand 0)] UNSPECV_PROLOGUE_USE)]
16354 ""
16355 ""
16356 [(set_attr "length" "0")])
16357
16358 ;; Insn emitted into the body of a function to return from a function.
16359 ;; This is only done if the function's epilogue is known to be simple.
16360 ;; See comments for ix86_can_use_return_insn_p in i386.cc.
16361
16362 (define_expand "return"
16363 [(simple_return)]
16364 "ix86_can_use_return_insn_p ()"
16365 {
16366 if (crtl->args.pops_args)
16367 {
16368 rtx popc = GEN_INT (crtl->args.pops_args);
16369 emit_jump_insn (gen_simple_return_pop_internal (popc));
16370 DONE;
16371 }
16372 })
16373
16374 ;; We need to disable this for TARGET_SEH, as otherwise
16375 ;; shrink-wrapped prologue gets enabled too. This might exceed
16376 ;; the maximum size of prologue in unwind information.
16377 ;; Also disallow shrink-wrapping if using stack slot to pass the
16378 ;; static chain pointer - the first instruction has to be pushl %esi
16379 ;; and it can't be moved around, as we use alternate entry points
16380 ;; in that case.
16381 ;; Also disallow for ms_hook_prologue functions which have frame
16382 ;; pointer set up in function label which is correctly handled in
16383 ;; ix86_expand_{prologue|epligoue}() only.
16384
16385 (define_expand "simple_return"
16386 [(simple_return)]
16387 "!TARGET_SEH && !ix86_static_chain_on_stack && !ix86_function_ms_hook_prologue (cfun->decl)"
16388 {
16389 if (crtl->args.pops_args)
16390 {
16391 rtx popc = GEN_INT (crtl->args.pops_args);
16392 emit_jump_insn (gen_simple_return_pop_internal (popc));
16393 DONE;
16394 }
16395 })
16396
16397 (define_insn "simple_return_internal"
16398 [(simple_return)]
16399 "reload_completed"
16400 "* return ix86_output_function_return (false);"
16401 [(set_attr "length" "1")
16402 (set_attr "atom_unit" "jeu")
16403 (set_attr "length_immediate" "0")
16404 (set_attr "modrm" "0")])
16405
16406 (define_insn "interrupt_return"
16407 [(simple_return)
16408 (unspec [(const_int 0)] UNSPEC_INTERRUPT_RETURN)]
16409 "reload_completed"
16410 {
16411 return TARGET_64BIT ? (TARGET_UINTR ? "uiret" : "iretq") : "iret";
16412 })
16413
16414 ;; Used by x86_machine_dependent_reorg to avoid penalty on single byte RET
16415 ;; instruction Athlon and K8 have.
16416
16417 (define_insn "simple_return_internal_long"
16418 [(simple_return)
16419 (unspec [(const_int 0)] UNSPEC_REP)]
16420 "reload_completed"
16421 "* return ix86_output_function_return (true);"
16422 [(set_attr "length" "2")
16423 (set_attr "atom_unit" "jeu")
16424 (set_attr "length_immediate" "0")
16425 (set_attr "prefix_rep" "1")
16426 (set_attr "modrm" "0")])
16427
16428 (define_insn_and_split "simple_return_pop_internal"
16429 [(simple_return)
16430 (use (match_operand:SI 0 "const_int_operand"))]
16431 "reload_completed"
16432 "ret\t%0"
16433 "&& cfun->machine->function_return_type != indirect_branch_keep"
16434 [(const_int 0)]
16435 "ix86_split_simple_return_pop_internal (operands[0]); DONE;"
16436 [(set_attr "length" "3")
16437 (set_attr "atom_unit" "jeu")
16438 (set_attr "length_immediate" "2")
16439 (set_attr "modrm" "0")])
16440
16441 (define_expand "simple_return_indirect_internal"
16442 [(parallel
16443 [(simple_return)
16444 (use (match_operand 0 "register_operand"))])])
16445
16446 (define_insn "*simple_return_indirect_internal<mode>"
16447 [(simple_return)
16448 (use (match_operand:W 0 "register_operand" "r"))]
16449 "reload_completed"
16450 "* return ix86_output_indirect_function_return (operands[0]);"
16451 [(set (attr "type")
16452 (if_then_else (match_test "(cfun->machine->indirect_branch_type
16453 != indirect_branch_keep)")
16454 (const_string "multi")
16455 (const_string "ibr")))
16456 (set_attr "length_immediate" "0")])
16457
16458 (define_insn "nop"
16459 [(const_int 0)]
16460 ""
16461 "nop"
16462 [(set_attr "length" "1")
16463 (set_attr "length_immediate" "0")
16464 (set_attr "modrm" "0")])
16465
16466 ;; Generate nops. Operand 0 is the number of nops, up to 8.
16467 (define_insn "nops"
16468 [(unspec_volatile [(match_operand 0 "const_int_operand")]
16469 UNSPECV_NOPS)]
16470 "reload_completed"
16471 {
16472 int num = INTVAL (operands[0]);
16473
16474 gcc_assert (IN_RANGE (num, 1, 8));
16475
16476 while (num--)
16477 fputs ("\tnop\n", asm_out_file);
16478
16479 return "";
16480 }
16481 [(set (attr "length") (symbol_ref "INTVAL (operands[0])"))
16482 (set_attr "length_immediate" "0")
16483 (set_attr "modrm" "0")])
16484
16485 ;; Pad to 16-byte boundary, max skip in op0. Used to avoid
16486 ;; branch prediction penalty for the third jump in a 16-byte
16487 ;; block on K8.
16488
16489 (define_insn "pad"
16490 [(unspec_volatile [(match_operand 0)] UNSPECV_ALIGN)]
16491 ""
16492 {
16493 #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
16494 ASM_OUTPUT_MAX_SKIP_ALIGN (asm_out_file, 4, (int)INTVAL (operands[0]));
16495 #else
16496 /* It is tempting to use ASM_OUTPUT_ALIGN here, but we don't want to do that.
16497 The align insn is used to avoid 3 jump instructions in the row to improve
16498 branch prediction and the benefits hardly outweigh the cost of extra 8
16499 nops on the average inserted by full alignment pseudo operation. */
16500 #endif
16501 return "";
16502 }
16503 [(set_attr "length" "16")])
16504
16505 (define_expand "prologue"
16506 [(const_int 0)]
16507 ""
16508 "ix86_expand_prologue (); DONE;")
16509
16510 (define_expand "set_got"
16511 [(parallel
16512 [(set (match_operand:SI 0 "register_operand")
16513 (unspec:SI [(const_int 0)] UNSPEC_SET_GOT))
16514 (clobber (reg:CC FLAGS_REG))])]
16515 "!TARGET_64BIT"
16516 {
16517 if (flag_pic && !TARGET_VXWORKS_RTP)
16518 ix86_pc_thunk_call_expanded = true;
16519 })
16520
16521 (define_insn "*set_got"
16522 [(set (match_operand:SI 0 "register_operand" "=r")
16523 (unspec:SI [(const_int 0)] UNSPEC_SET_GOT))
16524 (clobber (reg:CC FLAGS_REG))]
16525 "!TARGET_64BIT"
16526 "* return output_set_got (operands[0], NULL_RTX);"
16527 [(set_attr "type" "multi")
16528 (set_attr "length" "12")])
16529
16530 (define_expand "set_got_labelled"
16531 [(parallel
16532 [(set (match_operand:SI 0 "register_operand")
16533 (unspec:SI [(label_ref (match_operand 1))]
16534 UNSPEC_SET_GOT))
16535 (clobber (reg:CC FLAGS_REG))])]
16536 "!TARGET_64BIT"
16537 {
16538 if (flag_pic && !TARGET_VXWORKS_RTP)
16539 ix86_pc_thunk_call_expanded = true;
16540 })
16541
16542 (define_insn "*set_got_labelled"
16543 [(set (match_operand:SI 0 "register_operand" "=r")
16544 (unspec:SI [(label_ref (match_operand 1))]
16545 UNSPEC_SET_GOT))
16546 (clobber (reg:CC FLAGS_REG))]
16547 "!TARGET_64BIT"
16548 "* return output_set_got (operands[0], operands[1]);"
16549 [(set_attr "type" "multi")
16550 (set_attr "length" "12")])
16551
16552 (define_insn "set_got_rex64"
16553 [(set (match_operand:DI 0 "register_operand" "=r")
16554 (unspec:DI [(const_int 0)] UNSPEC_SET_GOT))]
16555 "TARGET_64BIT"
16556 "lea{q}\t{_GLOBAL_OFFSET_TABLE_(%%rip), %0|%0, _GLOBAL_OFFSET_TABLE_[rip]}"
16557 [(set_attr "type" "lea")
16558 (set_attr "length_address" "4")
16559 (set_attr "mode" "DI")])
16560
16561 (define_insn "set_rip_rex64"
16562 [(set (match_operand:DI 0 "register_operand" "=r")
16563 (unspec:DI [(label_ref (match_operand 1))] UNSPEC_SET_RIP))]
16564 "TARGET_64BIT"
16565 "lea{q}\t{%l1(%%rip), %0|%0, %l1[rip]}"
16566 [(set_attr "type" "lea")
16567 (set_attr "length_address" "4")
16568 (set_attr "mode" "DI")])
16569
16570 (define_insn "set_got_offset_rex64"
16571 [(set (match_operand:DI 0 "register_operand" "=r")
16572 (unspec:DI
16573 [(label_ref (match_operand 1))]
16574 UNSPEC_SET_GOT_OFFSET))]
16575 "TARGET_LP64"
16576 "movabs{q}\t{$_GLOBAL_OFFSET_TABLE_-%l1, %0|%0, OFFSET FLAT:_GLOBAL_OFFSET_TABLE_-%l1}"
16577 [(set_attr "type" "imov")
16578 (set_attr "length_immediate" "0")
16579 (set_attr "length_address" "8")
16580 (set_attr "mode" "DI")])
16581
16582 (define_expand "epilogue"
16583 [(const_int 0)]
16584 ""
16585 "ix86_expand_epilogue (1); DONE;")
16586
16587 (define_expand "sibcall_epilogue"
16588 [(const_int 0)]
16589 ""
16590 "ix86_expand_epilogue (0); DONE;")
16591
16592 (define_expand "eh_return"
16593 [(use (match_operand 0 "register_operand"))]
16594 ""
16595 {
16596 rtx tmp, sa = EH_RETURN_STACKADJ_RTX, ra = operands[0];
16597
16598 /* Tricky bit: we write the address of the handler to which we will
16599 be returning into someone else's stack frame, one word below the
16600 stack address we wish to restore. */
16601 tmp = gen_rtx_PLUS (Pmode, arg_pointer_rtx, sa);
16602 tmp = plus_constant (Pmode, tmp, -UNITS_PER_WORD);
16603 /* Return address is always in word_mode. */
16604 tmp = gen_rtx_MEM (word_mode, tmp);
16605 if (GET_MODE (ra) != word_mode)
16606 ra = convert_to_mode (word_mode, ra, 1);
16607 emit_move_insn (tmp, ra);
16608
16609 emit_jump_insn (gen_eh_return_internal ());
16610 emit_barrier ();
16611 DONE;
16612 })
16613
16614 (define_insn_and_split "eh_return_internal"
16615 [(eh_return)]
16616 ""
16617 "#"
16618 "epilogue_completed"
16619 [(const_int 0)]
16620 "ix86_expand_epilogue (2); DONE;")
16621
16622 (define_expand "@leave_<mode>"
16623 [(parallel
16624 [(set (reg:W SP_REG) (plus:W (reg:W BP_REG) (match_dup 0)))
16625 (set (reg:W BP_REG) (mem:W (reg:W BP_REG)))
16626 (clobber (mem:BLK (scratch)))])]
16627 ""
16628 "operands[0] = GEN_INT (<MODE_SIZE>);")
16629
16630 (define_insn "*leave"
16631 [(set (reg:SI SP_REG) (plus:SI (reg:SI BP_REG) (const_int 4)))
16632 (set (reg:SI BP_REG) (mem:SI (reg:SI BP_REG)))
16633 (clobber (mem:BLK (scratch)))]
16634 "!TARGET_64BIT"
16635 "leave"
16636 [(set_attr "type" "leave")])
16637
16638 (define_insn "*leave_rex64"
16639 [(set (reg:DI SP_REG) (plus:DI (reg:DI BP_REG) (const_int 8)))
16640 (set (reg:DI BP_REG) (mem:DI (reg:DI BP_REG)))
16641 (clobber (mem:BLK (scratch)))]
16642 "TARGET_64BIT"
16643 "leave"
16644 [(set_attr "type" "leave")])
16645 \f
16646 ;; Handle -fsplit-stack.
16647
16648 (define_expand "split_stack_prologue"
16649 [(const_int 0)]
16650 ""
16651 {
16652 ix86_expand_split_stack_prologue ();
16653 DONE;
16654 })
16655
16656 ;; In order to support the call/return predictor, we use a return
16657 ;; instruction which the middle-end doesn't see.
16658 (define_insn "split_stack_return"
16659 [(unspec_volatile [(match_operand:SI 0 "const_int_operand")]
16660 UNSPECV_SPLIT_STACK_RETURN)]
16661 ""
16662 {
16663 if (operands[0] == const0_rtx)
16664 return "ret";
16665 else
16666 return "ret\t%0";
16667 }
16668 [(set_attr "atom_unit" "jeu")
16669 (set_attr "modrm" "0")
16670 (set (attr "length")
16671 (if_then_else (match_operand:SI 0 "const0_operand")
16672 (const_int 1)
16673 (const_int 3)))
16674 (set (attr "length_immediate")
16675 (if_then_else (match_operand:SI 0 "const0_operand")
16676 (const_int 0)
16677 (const_int 2)))])
16678
16679 ;; If there are operand 0 bytes available on the stack, jump to
16680 ;; operand 1.
16681
16682 (define_expand "split_stack_space_check"
16683 [(set (pc) (if_then_else
16684 (ltu (minus (reg SP_REG)
16685 (match_operand 0 "register_operand"))
16686 (match_dup 2))
16687 (label_ref (match_operand 1))
16688 (pc)))]
16689 ""
16690 {
16691 rtx reg = gen_reg_rtx (Pmode);
16692
16693 emit_insn (gen_sub3_insn (reg, stack_pointer_rtx, operands[0]));
16694
16695 operands[2] = ix86_split_stack_guard ();
16696 ix86_expand_branch (GEU, reg, operands[2], operands[1]);
16697
16698 DONE;
16699 })
16700 \f
16701 ;; Bit manipulation instructions.
16702
16703 (define_expand "ffs<mode>2"
16704 [(set (match_dup 2) (const_int -1))
16705 (parallel [(set (match_dup 3) (match_dup 4))
16706 (set (match_operand:SWI48 0 "register_operand")
16707 (ctz:SWI48
16708 (match_operand:SWI48 1 "nonimmediate_operand")))])
16709 (set (match_dup 0) (if_then_else:SWI48
16710 (eq (match_dup 3) (const_int 0))
16711 (match_dup 2)
16712 (match_dup 0)))
16713 (parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (const_int 1)))
16714 (clobber (reg:CC FLAGS_REG))])]
16715 ""
16716 {
16717 machine_mode flags_mode;
16718
16719 if (<MODE>mode == SImode && !TARGET_CMOVE)
16720 {
16721 emit_insn (gen_ffssi2_no_cmove (operands[0], operands [1]));
16722 DONE;
16723 }
16724
16725 flags_mode = TARGET_BMI ? CCCmode : CCZmode;
16726
16727 operands[2] = gen_reg_rtx (<MODE>mode);
16728 operands[3] = gen_rtx_REG (flags_mode, FLAGS_REG);
16729 operands[4] = gen_rtx_COMPARE (flags_mode, operands[1], const0_rtx);
16730 })
16731
16732 (define_insn_and_split "ffssi2_no_cmove"
16733 [(set (match_operand:SI 0 "register_operand" "=r")
16734 (ffs:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))
16735 (clobber (match_scratch:SI 2 "=&q"))
16736 (clobber (reg:CC FLAGS_REG))]
16737 "!TARGET_CMOVE"
16738 "#"
16739 "&& reload_completed"
16740 [(parallel [(set (match_dup 4) (match_dup 5))
16741 (set (match_dup 0) (ctz:SI (match_dup 1)))])
16742 (set (strict_low_part (match_dup 3))
16743 (eq:QI (match_dup 4) (const_int 0)))
16744 (parallel [(set (match_dup 2) (neg:SI (match_dup 2)))
16745 (clobber (reg:CC FLAGS_REG))])
16746 (parallel [(set (match_dup 0) (ior:SI (match_dup 0) (match_dup 2)))
16747 (clobber (reg:CC FLAGS_REG))])
16748 (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1)))
16749 (clobber (reg:CC FLAGS_REG))])]
16750 {
16751 machine_mode flags_mode = TARGET_BMI ? CCCmode : CCZmode;
16752
16753 operands[3] = gen_lowpart (QImode, operands[2]);
16754 operands[4] = gen_rtx_REG (flags_mode, FLAGS_REG);
16755 operands[5] = gen_rtx_COMPARE (flags_mode, operands[1], const0_rtx);
16756
16757 ix86_expand_clear (operands[2]);
16758 })
16759
16760 (define_insn_and_split "*tzcnt<mode>_1"
16761 [(set (reg:CCC FLAGS_REG)
16762 (compare:CCC (match_operand:SWI48 1 "nonimmediate_operand" "rm")
16763 (const_int 0)))
16764 (set (match_operand:SWI48 0 "register_operand" "=r")
16765 (ctz:SWI48 (match_dup 1)))]
16766 "TARGET_BMI"
16767 "tzcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
16768 "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
16769 && optimize_function_for_speed_p (cfun)
16770 && !reg_mentioned_p (operands[0], operands[1])"
16771 [(parallel
16772 [(set (reg:CCC FLAGS_REG)
16773 (compare:CCC (match_dup 1) (const_int 0)))
16774 (set (match_dup 0)
16775 (ctz:SWI48 (match_dup 1)))
16776 (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])]
16777 "ix86_expand_clear (operands[0]);"
16778 [(set_attr "type" "alu1")
16779 (set_attr "prefix_0f" "1")
16780 (set_attr "prefix_rep" "1")
16781 (set_attr "btver2_decode" "double")
16782 (set_attr "mode" "<MODE>")])
16783
16784 ; False dependency happens when destination is only updated by tzcnt,
16785 ; lzcnt or popcnt. There is no false dependency when destination is
16786 ; also used in source.
16787 (define_insn "*tzcnt<mode>_1_falsedep"
16788 [(set (reg:CCC FLAGS_REG)
16789 (compare:CCC (match_operand:SWI48 1 "nonimmediate_operand" "rm")
16790 (const_int 0)))
16791 (set (match_operand:SWI48 0 "register_operand" "=r")
16792 (ctz:SWI48 (match_dup 1)))
16793 (unspec [(match_operand:SWI48 2 "register_operand" "0")]
16794 UNSPEC_INSN_FALSE_DEP)]
16795 "TARGET_BMI"
16796 "tzcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
16797 [(set_attr "type" "alu1")
16798 (set_attr "prefix_0f" "1")
16799 (set_attr "prefix_rep" "1")
16800 (set_attr "btver2_decode" "double")
16801 (set_attr "mode" "<MODE>")])
16802
16803 (define_insn "*bsf<mode>_1"
16804 [(set (reg:CCZ FLAGS_REG)
16805 (compare:CCZ (match_operand:SWI48 1 "nonimmediate_operand" "rm")
16806 (const_int 0)))
16807 (set (match_operand:SWI48 0 "register_operand" "=r")
16808 (ctz:SWI48 (match_dup 1)))]
16809 ""
16810 "bsf{<imodesuffix>}\t{%1, %0|%0, %1}"
16811 [(set_attr "type" "alu1")
16812 (set_attr "prefix_0f" "1")
16813 (set_attr "btver2_decode" "double")
16814 (set_attr "znver1_decode" "vector")
16815 (set_attr "mode" "<MODE>")])
16816
16817 (define_insn_and_split "ctz<mode>2"
16818 [(set (match_operand:SWI48 0 "register_operand" "=r")
16819 (ctz:SWI48
16820 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
16821 (clobber (reg:CC FLAGS_REG))]
16822 ""
16823 {
16824 if (TARGET_BMI)
16825 return "tzcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
16826 else if (optimize_function_for_size_p (cfun))
16827 ;
16828 else if (TARGET_CPU_P (GENERIC))
16829 /* tzcnt expands to 'rep bsf' and we can use it even if !TARGET_BMI. */
16830 return "rep%; bsf{<imodesuffix>}\t{%1, %0|%0, %1}";
16831
16832 return "bsf{<imodesuffix>}\t{%1, %0|%0, %1}";
16833 }
16834 "(TARGET_BMI || TARGET_CPU_P (GENERIC))
16835 && TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
16836 && optimize_function_for_speed_p (cfun)
16837 && !reg_mentioned_p (operands[0], operands[1])"
16838 [(parallel
16839 [(set (match_dup 0)
16840 (ctz:SWI48 (match_dup 1)))
16841 (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
16842 (clobber (reg:CC FLAGS_REG))])]
16843 "ix86_expand_clear (operands[0]);"
16844 [(set_attr "type" "alu1")
16845 (set_attr "prefix_0f" "1")
16846 (set (attr "prefix_rep")
16847 (if_then_else
16848 (ior (match_test "TARGET_BMI")
16849 (and (not (match_test "optimize_function_for_size_p (cfun)"))
16850 (match_test "TARGET_CPU_P (GENERIC)")))
16851 (const_string "1")
16852 (const_string "0")))
16853 (set_attr "mode" "<MODE>")])
16854
16855 ; False dependency happens when destination is only updated by tzcnt,
16856 ; lzcnt or popcnt. There is no false dependency when destination is
16857 ; also used in source.
16858 (define_insn "*ctz<mode>2_falsedep"
16859 [(set (match_operand:SWI48 0 "register_operand" "=r")
16860 (ctz:SWI48
16861 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
16862 (unspec [(match_operand:SWI48 2 "register_operand" "0")]
16863 UNSPEC_INSN_FALSE_DEP)
16864 (clobber (reg:CC FLAGS_REG))]
16865 ""
16866 {
16867 if (TARGET_BMI)
16868 return "tzcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
16869 else if (TARGET_CPU_P (GENERIC))
16870 /* tzcnt expands to 'rep bsf' and we can use it even if !TARGET_BMI. */
16871 return "rep%; bsf{<imodesuffix>}\t{%1, %0|%0, %1}";
16872 else
16873 gcc_unreachable ();
16874 }
16875 [(set_attr "type" "alu1")
16876 (set_attr "prefix_0f" "1")
16877 (set_attr "prefix_rep" "1")
16878 (set_attr "mode" "<MODE>")])
16879
16880 (define_insn_and_split "*ctzsi2_zext"
16881 [(set (match_operand:DI 0 "register_operand" "=r")
16882 (and:DI
16883 (subreg:DI
16884 (ctz:SI
16885 (match_operand:SI 1 "nonimmediate_operand" "rm")) 0)
16886 (const_int 63)))
16887 (clobber (reg:CC FLAGS_REG))]
16888 "TARGET_BMI && TARGET_64BIT"
16889 "tzcnt{l}\t{%1, %k0|%k0, %1}"
16890 "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
16891 && optimize_function_for_speed_p (cfun)
16892 && !reg_mentioned_p (operands[0], operands[1])"
16893 [(parallel
16894 [(set (match_dup 0)
16895 (and:DI (subreg:DI (ctz:SI (match_dup 1)) 0) (const_int 63)))
16896 (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
16897 (clobber (reg:CC FLAGS_REG))])]
16898 "ix86_expand_clear (operands[0]);"
16899 [(set_attr "type" "alu1")
16900 (set_attr "prefix_0f" "1")
16901 (set_attr "prefix_rep" "1")
16902 (set_attr "mode" "SI")])
16903
16904 ; False dependency happens when destination is only updated by tzcnt,
16905 ; lzcnt or popcnt. There is no false dependency when destination is
16906 ; also used in source.
16907 (define_insn "*ctzsi2_zext_falsedep"
16908 [(set (match_operand:DI 0 "register_operand" "=r")
16909 (and:DI
16910 (subreg:DI
16911 (ctz:SI
16912 (match_operand:SI 1 "nonimmediate_operand" "rm")) 0)
16913 (const_int 63)))
16914 (unspec [(match_operand:DI 2 "register_operand" "0")]
16915 UNSPEC_INSN_FALSE_DEP)
16916 (clobber (reg:CC FLAGS_REG))]
16917 "TARGET_BMI && TARGET_64BIT"
16918 "tzcnt{l}\t{%1, %k0|%k0, %1}"
16919 [(set_attr "type" "alu1")
16920 (set_attr "prefix_0f" "1")
16921 (set_attr "prefix_rep" "1")
16922 (set_attr "mode" "SI")])
16923
16924 (define_insn_and_split "*ctzsidi2_<s>ext"
16925 [(set (match_operand:DI 0 "register_operand" "=r")
16926 (any_extend:DI
16927 (ctz:SI
16928 (match_operand:SI 1 "nonimmediate_operand" "rm"))))
16929 (clobber (reg:CC FLAGS_REG))]
16930 "TARGET_64BIT"
16931 {
16932 if (TARGET_BMI)
16933 return "tzcnt{l}\t{%1, %k0|%k0, %1}";
16934 else if (TARGET_CPU_P (GENERIC)
16935 && !optimize_function_for_size_p (cfun))
16936 /* tzcnt expands to 'rep bsf' and we can use it even if !TARGET_BMI. */
16937 return "rep%; bsf{l}\t{%1, %k0|%k0, %1}";
16938 return "bsf{l}\t{%1, %k0|%k0, %1}";
16939 }
16940 "(TARGET_BMI || TARGET_CPU_P (GENERIC))
16941 && TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
16942 && optimize_function_for_speed_p (cfun)
16943 && !reg_mentioned_p (operands[0], operands[1])"
16944 [(parallel
16945 [(set (match_dup 0)
16946 (any_extend:DI (ctz:SI (match_dup 1))))
16947 (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
16948 (clobber (reg:CC FLAGS_REG))])]
16949 "ix86_expand_clear (operands[0]);"
16950 [(set_attr "type" "alu1")
16951 (set_attr "prefix_0f" "1")
16952 (set (attr "prefix_rep")
16953 (if_then_else
16954 (ior (match_test "TARGET_BMI")
16955 (and (not (match_test "optimize_function_for_size_p (cfun)"))
16956 (match_test "TARGET_CPU_P (GENERIC)")))
16957 (const_string "1")
16958 (const_string "0")))
16959 (set_attr "mode" "SI")])
16960
16961 (define_insn "*ctzsidi2_<s>ext_falsedep"
16962 [(set (match_operand:DI 0 "register_operand" "=r")
16963 (any_extend:DI
16964 (ctz:SI
16965 (match_operand:SI 1 "nonimmediate_operand" "rm"))))
16966 (unspec [(match_operand:DI 2 "register_operand" "0")]
16967 UNSPEC_INSN_FALSE_DEP)
16968 (clobber (reg:CC FLAGS_REG))]
16969 "TARGET_64BIT"
16970 {
16971 if (TARGET_BMI)
16972 return "tzcnt{l}\t{%1, %k0|%k0, %1}";
16973 else if (TARGET_CPU_P (GENERIC))
16974 /* tzcnt expands to 'rep bsf' and we can use it even if !TARGET_BMI. */
16975 return "rep%; bsf{l}\t{%1, %k0|%k0, %1}";
16976 else
16977 gcc_unreachable ();
16978 }
16979 [(set_attr "type" "alu1")
16980 (set_attr "prefix_0f" "1")
16981 (set_attr "prefix_rep" "1")
16982 (set_attr "mode" "SI")])
16983
16984 (define_insn "bsr_rex64"
16985 [(set (reg:CCZ FLAGS_REG)
16986 (compare:CCZ (match_operand:DI 1 "nonimmediate_operand" "rm")
16987 (const_int 0)))
16988 (set (match_operand:DI 0 "register_operand" "=r")
16989 (minus:DI (const_int 63)
16990 (clz:DI (match_dup 1))))]
16991 "TARGET_64BIT"
16992 "bsr{q}\t{%1, %0|%0, %1}"
16993 [(set_attr "type" "alu1")
16994 (set_attr "prefix_0f" "1")
16995 (set_attr "znver1_decode" "vector")
16996 (set_attr "mode" "DI")])
16997
16998 (define_insn "bsr_rex64_1"
16999 [(set (match_operand:DI 0 "register_operand" "=r")
17000 (minus:DI (const_int 63)
17001 (clz:DI (match_operand:DI 1 "nonimmediate_operand" "rm"))))
17002 (clobber (reg:CC FLAGS_REG))]
17003 "!TARGET_LZCNT && TARGET_64BIT"
17004 "bsr{q}\t{%1, %0|%0, %1}"
17005 [(set_attr "type" "alu1")
17006 (set_attr "prefix_0f" "1")
17007 (set_attr "znver1_decode" "vector")
17008 (set_attr "mode" "DI")])
17009
17010 (define_insn "bsr_rex64_1_zext"
17011 [(set (match_operand:DI 0 "register_operand" "=r")
17012 (zero_extend:DI
17013 (minus:SI (const_int 63)
17014 (subreg:SI
17015 (clz:DI (match_operand:DI 1 "nonimmediate_operand" "rm"))
17016 0))))
17017 (clobber (reg:CC FLAGS_REG))]
17018 "!TARGET_LZCNT && TARGET_64BIT"
17019 "bsr{q}\t{%1, %0|%0, %1}"
17020 [(set_attr "type" "alu1")
17021 (set_attr "prefix_0f" "1")
17022 (set_attr "znver1_decode" "vector")
17023 (set_attr "mode" "DI")])
17024
17025 (define_insn "bsr"
17026 [(set (reg:CCZ FLAGS_REG)
17027 (compare:CCZ (match_operand:SI 1 "nonimmediate_operand" "rm")
17028 (const_int 0)))
17029 (set (match_operand:SI 0 "register_operand" "=r")
17030 (minus:SI (const_int 31)
17031 (clz:SI (match_dup 1))))]
17032 ""
17033 "bsr{l}\t{%1, %0|%0, %1}"
17034 [(set_attr "type" "alu1")
17035 (set_attr "prefix_0f" "1")
17036 (set_attr "znver1_decode" "vector")
17037 (set_attr "mode" "SI")])
17038
17039 (define_insn "bsr_1"
17040 [(set (match_operand:SI 0 "register_operand" "=r")
17041 (minus:SI (const_int 31)
17042 (clz:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))))
17043 (clobber (reg:CC FLAGS_REG))]
17044 "!TARGET_LZCNT"
17045 "bsr{l}\t{%1, %0|%0, %1}"
17046 [(set_attr "type" "alu1")
17047 (set_attr "prefix_0f" "1")
17048 (set_attr "znver1_decode" "vector")
17049 (set_attr "mode" "SI")])
17050
17051 (define_insn "bsr_zext_1"
17052 [(set (match_operand:DI 0 "register_operand" "=r")
17053 (zero_extend:DI
17054 (minus:SI
17055 (const_int 31)
17056 (clz:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))))
17057 (clobber (reg:CC FLAGS_REG))]
17058 "!TARGET_LZCNT && TARGET_64BIT"
17059 "bsr{l}\t{%1, %k0|%k0, %1}"
17060 [(set_attr "type" "alu1")
17061 (set_attr "prefix_0f" "1")
17062 (set_attr "znver1_decode" "vector")
17063 (set_attr "mode" "SI")])
17064
17065 ; As bsr is undefined behavior on zero and for other input
17066 ; values it is in range 0 to 63, we can optimize away sign-extends.
17067 (define_insn_and_split "*bsr_rex64_2"
17068 [(set (match_operand:DI 0 "register_operand")
17069 (xor:DI
17070 (sign_extend:DI
17071 (minus:SI
17072 (const_int 63)
17073 (subreg:SI (clz:DI (match_operand:DI 1 "nonimmediate_operand"))
17074 0)))
17075 (const_int 63)))
17076 (clobber (reg:CC FLAGS_REG))]
17077 "!TARGET_LZCNT && TARGET_64BIT && ix86_pre_reload_split ()"
17078 "#"
17079 "&& 1"
17080 [(parallel [(set (reg:CCZ FLAGS_REG)
17081 (compare:CCZ (match_dup 1) (const_int 0)))
17082 (set (match_dup 2)
17083 (minus:DI (const_int 63) (clz:DI (match_dup 1))))])
17084 (parallel [(set (match_dup 0)
17085 (zero_extend:DI (xor:SI (match_dup 3) (const_int 63))))
17086 (clobber (reg:CC FLAGS_REG))])]
17087 {
17088 operands[2] = gen_reg_rtx (DImode);
17089 operands[3] = lowpart_subreg (SImode, operands[2], DImode);
17090 })
17091
17092 (define_insn_and_split "*bsr_2"
17093 [(set (match_operand:DI 0 "register_operand")
17094 (sign_extend:DI
17095 (xor:SI
17096 (minus:SI
17097 (const_int 31)
17098 (clz:SI (match_operand:SI 1 "nonimmediate_operand")))
17099 (const_int 31))))
17100 (clobber (reg:CC FLAGS_REG))]
17101 "!TARGET_LZCNT && TARGET_64BIT && ix86_pre_reload_split ()"
17102 "#"
17103 "&& 1"
17104 [(parallel [(set (reg:CCZ FLAGS_REG)
17105 (compare:CCZ (match_dup 1) (const_int 0)))
17106 (set (match_dup 2)
17107 (minus:SI (const_int 31) (clz:SI (match_dup 1))))])
17108 (parallel [(set (match_dup 0)
17109 (zero_extend:DI (xor:SI (match_dup 2) (const_int 31))))
17110 (clobber (reg:CC FLAGS_REG))])]
17111 "operands[2] = gen_reg_rtx (SImode);")
17112
17113 ; Splitters to optimize 64 - __builtin_clzl (x) or 32 - __builtin_clz (x).
17114 ; Again, as for !TARGET_LZCNT CLZ is UB at zero, CLZ is guaranteed to be
17115 ; in [0, 63] or [0, 31] range.
17116 (define_split
17117 [(set (match_operand:SI 0 "register_operand")
17118 (minus:SI
17119 (match_operand:SI 2 "const_int_operand")
17120 (xor:SI
17121 (minus:SI (const_int 63)
17122 (subreg:SI
17123 (clz:DI (match_operand:DI 1 "nonimmediate_operand"))
17124 0))
17125 (const_int 63))))]
17126 "!TARGET_LZCNT && TARGET_64BIT && ix86_pre_reload_split ()"
17127 [(set (match_dup 3)
17128 (minus:DI (const_int 63) (clz:DI (match_dup 1))))
17129 (set (match_dup 0)
17130 (plus:SI (match_dup 5) (match_dup 4)))]
17131 {
17132 operands[3] = gen_reg_rtx (DImode);
17133 operands[5] = lowpart_subreg (SImode, operands[3], DImode);
17134 if (INTVAL (operands[2]) == 63)
17135 {
17136 emit_insn (gen_bsr_rex64_1_zext (operands[3], operands[1]));
17137 emit_move_insn (operands[0], operands[5]);
17138 DONE;
17139 }
17140 operands[4] = gen_int_mode (UINTVAL (operands[2]) - 63, SImode);
17141 })
17142
17143 (define_split
17144 [(set (match_operand:SI 0 "register_operand")
17145 (minus:SI
17146 (match_operand:SI 2 "const_int_operand")
17147 (xor:SI
17148 (minus:SI (const_int 31)
17149 (clz:SI (match_operand:SI 1 "nonimmediate_operand")))
17150 (const_int 31))))]
17151 "!TARGET_LZCNT && ix86_pre_reload_split ()"
17152 [(set (match_dup 3)
17153 (minus:SI (const_int 31) (clz:SI (match_dup 1))))
17154 (set (match_dup 0)
17155 (plus:SI (match_dup 3) (match_dup 4)))]
17156 {
17157 if (INTVAL (operands[2]) == 31)
17158 {
17159 emit_insn (gen_bsr_1 (operands[0], operands[1]));
17160 DONE;
17161 }
17162 operands[3] = gen_reg_rtx (SImode);
17163 operands[4] = gen_int_mode (UINTVAL (operands[2]) - 31, SImode);
17164 })
17165
17166 (define_split
17167 [(set (match_operand:DI 0 "register_operand")
17168 (minus:DI
17169 (match_operand:DI 2 "const_int_operand")
17170 (xor:DI
17171 (sign_extend:DI
17172 (minus:SI (const_int 63)
17173 (subreg:SI
17174 (clz:DI (match_operand:DI 1 "nonimmediate_operand"))
17175 0)))
17176 (const_int 63))))]
17177 "!TARGET_LZCNT
17178 && TARGET_64BIT
17179 && ix86_pre_reload_split ()
17180 && ((unsigned HOST_WIDE_INT)
17181 trunc_int_for_mode (UINTVAL (operands[2]) - 63, SImode)
17182 == UINTVAL (operands[2]) - 63)"
17183 [(set (match_dup 3)
17184 (minus:DI (const_int 63) (clz:DI (match_dup 1))))
17185 (set (match_dup 0)
17186 (plus:DI (match_dup 3) (match_dup 4)))]
17187 {
17188 if (INTVAL (operands[2]) == 63)
17189 {
17190 emit_insn (gen_bsr_rex64_1 (operands[0], operands[1]));
17191 DONE;
17192 }
17193 operands[3] = gen_reg_rtx (DImode);
17194 operands[4] = GEN_INT (UINTVAL (operands[2]) - 63);
17195 })
17196
17197 (define_split
17198 [(set (match_operand:DI 0 "register_operand")
17199 (minus:DI
17200 (match_operand:DI 2 "const_int_operand")
17201 (sign_extend:DI
17202 (xor:SI
17203 (minus:SI (const_int 31)
17204 (clz:SI (match_operand:SI 1 "nonimmediate_operand")))
17205 (const_int 31)))))]
17206 "!TARGET_LZCNT
17207 && TARGET_64BIT
17208 && ix86_pre_reload_split ()
17209 && ((unsigned HOST_WIDE_INT)
17210 trunc_int_for_mode (UINTVAL (operands[2]) - 31, SImode)
17211 == UINTVAL (operands[2]) - 31)"
17212 [(set (match_dup 3)
17213 (zero_extend:DI (minus:SI (const_int 31) (clz:SI (match_dup 1)))))
17214 (set (match_dup 0)
17215 (plus:DI (match_dup 3) (match_dup 4)))]
17216 {
17217 if (INTVAL (operands[2]) == 31)
17218 {
17219 emit_insn (gen_bsr_zext_1 (operands[0], operands[1]));
17220 DONE;
17221 }
17222 operands[3] = gen_reg_rtx (DImode);
17223 operands[4] = GEN_INT (UINTVAL (operands[2]) - 31);
17224 })
17225
17226 (define_expand "clz<mode>2"
17227 [(parallel
17228 [(set (reg:CCZ FLAGS_REG)
17229 (compare:CCZ (match_operand:SWI48 1 "nonimmediate_operand" "rm")
17230 (const_int 0)))
17231 (set (match_dup 3) (minus:SWI48
17232 (match_dup 2)
17233 (clz:SWI48 (match_dup 1))))])
17234 (parallel
17235 [(set (match_operand:SWI48 0 "register_operand")
17236 (xor:SWI48 (match_dup 3) (match_dup 2)))
17237 (clobber (reg:CC FLAGS_REG))])]
17238 ""
17239 {
17240 if (TARGET_LZCNT)
17241 {
17242 emit_insn (gen_clz<mode>2_lzcnt (operands[0], operands[1]));
17243 DONE;
17244 }
17245 operands[2] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode)-1);
17246 operands[3] = gen_reg_rtx (<MODE>mode);
17247 })
17248
17249 (define_insn_and_split "clz<mode>2_lzcnt"
17250 [(set (match_operand:SWI48 0 "register_operand" "=r")
17251 (clz:SWI48
17252 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
17253 (clobber (reg:CC FLAGS_REG))]
17254 "TARGET_LZCNT"
17255 "lzcnt{<imodesuffix>}\t{%1, %0|%0, %1}"
17256 "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
17257 && optimize_function_for_speed_p (cfun)
17258 && !reg_mentioned_p (operands[0], operands[1])"
17259 [(parallel
17260 [(set (match_dup 0)
17261 (clz:SWI48 (match_dup 1)))
17262 (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
17263 (clobber (reg:CC FLAGS_REG))])]
17264 "ix86_expand_clear (operands[0]);"
17265 [(set_attr "prefix_rep" "1")
17266 (set_attr "type" "bitmanip")
17267 (set_attr "mode" "<MODE>")])
17268
17269 ; False dependency happens when destination is only updated by tzcnt,
17270 ; lzcnt or popcnt. There is no false dependency when destination is
17271 ; also used in source.
17272 (define_insn "*clz<mode>2_lzcnt_falsedep"
17273 [(set (match_operand:SWI48 0 "register_operand" "=r")
17274 (clz:SWI48
17275 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
17276 (unspec [(match_operand:SWI48 2 "register_operand" "0")]
17277 UNSPEC_INSN_FALSE_DEP)
17278 (clobber (reg:CC FLAGS_REG))]
17279 "TARGET_LZCNT"
17280 "lzcnt{<imodesuffix>}\t{%1, %0|%0, %1}"
17281 [(set_attr "prefix_rep" "1")
17282 (set_attr "type" "bitmanip")
17283 (set_attr "mode" "<MODE>")])
17284
17285 (define_insn_and_split "*clzsi2_lzcnt_zext"
17286 [(set (match_operand:DI 0 "register_operand" "=r")
17287 (and:DI
17288 (subreg:DI
17289 (clz:SI
17290 (match_operand:SI 1 "nonimmediate_operand" "rm")) 0)
17291 (const_int 63)))
17292 (clobber (reg:CC FLAGS_REG))]
17293 "TARGET_LZCNT && TARGET_64BIT"
17294 "lzcnt{l}\t{%1, %k0|%k0, %1}"
17295 "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
17296 && optimize_function_for_speed_p (cfun)
17297 && !reg_mentioned_p (operands[0], operands[1])"
17298 [(parallel
17299 [(set (match_dup 0)
17300 (and:DI (subreg:DI (clz:SI (match_dup 1)) 0) (const_int 63)))
17301 (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
17302 (clobber (reg:CC FLAGS_REG))])]
17303 "ix86_expand_clear (operands[0]);"
17304 [(set_attr "prefix_rep" "1")
17305 (set_attr "type" "bitmanip")
17306 (set_attr "mode" "SI")])
17307
17308 ; False dependency happens when destination is only updated by tzcnt,
17309 ; lzcnt or popcnt. There is no false dependency when destination is
17310 ; also used in source.
17311 (define_insn "*clzsi2_lzcnt_zext_falsedep"
17312 [(set (match_operand:DI 0 "register_operand" "=r")
17313 (and:DI
17314 (subreg:DI
17315 (clz:SI
17316 (match_operand:SWI48 1 "nonimmediate_operand" "rm")) 0)
17317 (const_int 63)))
17318 (unspec [(match_operand:DI 2 "register_operand" "0")]
17319 UNSPEC_INSN_FALSE_DEP)
17320 (clobber (reg:CC FLAGS_REG))]
17321 "TARGET_LZCNT"
17322 "lzcnt{l}\t{%1, %k0|%k0, %1}"
17323 [(set_attr "prefix_rep" "1")
17324 (set_attr "type" "bitmanip")
17325 (set_attr "mode" "SI")])
17326
17327 (define_int_iterator LT_ZCNT
17328 [(UNSPEC_TZCNT "TARGET_BMI")
17329 (UNSPEC_LZCNT "TARGET_LZCNT")])
17330
17331 (define_int_attr lt_zcnt
17332 [(UNSPEC_TZCNT "tzcnt")
17333 (UNSPEC_LZCNT "lzcnt")])
17334
17335 (define_int_attr lt_zcnt_type
17336 [(UNSPEC_TZCNT "alu1")
17337 (UNSPEC_LZCNT "bitmanip")])
17338
17339 ;; Version of lzcnt/tzcnt that is expanded from intrinsics. This version
17340 ;; provides operand size as output when source operand is zero.
17341
17342 (define_insn_and_split "<lt_zcnt>_<mode>"
17343 [(set (match_operand:SWI48 0 "register_operand" "=r")
17344 (unspec:SWI48
17345 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] LT_ZCNT))
17346 (clobber (reg:CC FLAGS_REG))]
17347 ""
17348 "<lt_zcnt>{<imodesuffix>}\t{%1, %0|%0, %1}"
17349 "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
17350 && optimize_function_for_speed_p (cfun)
17351 && !reg_mentioned_p (operands[0], operands[1])"
17352 [(parallel
17353 [(set (match_dup 0)
17354 (unspec:SWI48 [(match_dup 1)] LT_ZCNT))
17355 (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
17356 (clobber (reg:CC FLAGS_REG))])]
17357 "ix86_expand_clear (operands[0]);"
17358 [(set_attr "type" "<lt_zcnt_type>")
17359 (set_attr "prefix_0f" "1")
17360 (set_attr "prefix_rep" "1")
17361 (set_attr "mode" "<MODE>")])
17362
17363 ; False dependency happens when destination is only updated by tzcnt,
17364 ; lzcnt or popcnt. There is no false dependency when destination is
17365 ; also used in source.
17366 (define_insn "*<lt_zcnt>_<mode>_falsedep"
17367 [(set (match_operand:SWI48 0 "register_operand" "=r")
17368 (unspec:SWI48
17369 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] LT_ZCNT))
17370 (unspec [(match_operand:SWI48 2 "register_operand" "0")]
17371 UNSPEC_INSN_FALSE_DEP)
17372 (clobber (reg:CC FLAGS_REG))]
17373 ""
17374 "<lt_zcnt>{<imodesuffix>}\t{%1, %0|%0, %1}"
17375 [(set_attr "type" "<lt_zcnt_type>")
17376 (set_attr "prefix_0f" "1")
17377 (set_attr "prefix_rep" "1")
17378 (set_attr "mode" "<MODE>")])
17379
17380 (define_insn "<lt_zcnt>_hi"
17381 [(set (match_operand:HI 0 "register_operand" "=r")
17382 (unspec:HI
17383 [(match_operand:HI 1 "nonimmediate_operand" "rm")] LT_ZCNT))
17384 (clobber (reg:CC FLAGS_REG))]
17385 ""
17386 "<lt_zcnt>{w}\t{%1, %0|%0, %1}"
17387 [(set_attr "type" "<lt_zcnt_type>")
17388 (set_attr "prefix_0f" "1")
17389 (set_attr "prefix_rep" "1")
17390 (set_attr "mode" "HI")])
17391
17392 ;; BMI instructions.
17393
17394 (define_insn "bmi_bextr_<mode>"
17395 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
17396 (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "r,m")
17397 (match_operand:SWI48 2 "register_operand" "r,r")]
17398 UNSPEC_BEXTR))
17399 (clobber (reg:CC FLAGS_REG))]
17400 "TARGET_BMI"
17401 "bextr\t{%2, %1, %0|%0, %1, %2}"
17402 [(set_attr "type" "bitmanip")
17403 (set_attr "btver2_decode" "direct, double")
17404 (set_attr "mode" "<MODE>")])
17405
17406 (define_insn "*bmi_bextr_<mode>_ccz"
17407 [(set (reg:CCZ FLAGS_REG)
17408 (compare:CCZ
17409 (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "r,m")
17410 (match_operand:SWI48 2 "register_operand" "r,r")]
17411 UNSPEC_BEXTR)
17412 (const_int 0)))
17413 (clobber (match_scratch:SWI48 0 "=r,r"))]
17414 "TARGET_BMI"
17415 "bextr\t{%2, %1, %0|%0, %1, %2}"
17416 [(set_attr "type" "bitmanip")
17417 (set_attr "btver2_decode" "direct, double")
17418 (set_attr "mode" "<MODE>")])
17419
17420 (define_insn "*bmi_blsi_<mode>"
17421 [(set (match_operand:SWI48 0 "register_operand" "=r")
17422 (and:SWI48
17423 (neg:SWI48
17424 (match_operand:SWI48 1 "nonimmediate_operand" "rm"))
17425 (match_dup 1)))
17426 (clobber (reg:CC FLAGS_REG))]
17427 "TARGET_BMI"
17428 "blsi\t{%1, %0|%0, %1}"
17429 [(set_attr "type" "bitmanip")
17430 (set_attr "btver2_decode" "double")
17431 (set_attr "mode" "<MODE>")])
17432
17433 (define_insn "*bmi_blsi_<mode>_cmp"
17434 [(set (reg FLAGS_REG)
17435 (compare
17436 (and:SWI48
17437 (neg:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm"))
17438 (match_dup 1))
17439 (const_int 0)))
17440 (set (match_operand:SWI48 0 "register_operand" "=r")
17441 (and:SWI48 (neg:SWI48 (match_dup 1)) (match_dup 1)))]
17442 "TARGET_BMI && ix86_match_ccmode (insn, CCNOmode)"
17443 "blsi\t{%1, %0|%0, %1}"
17444 [(set_attr "type" "bitmanip")
17445 (set_attr "btver2_decode" "double")
17446 (set_attr "mode" "<MODE>")])
17447
17448 (define_insn "*bmi_blsi_<mode>_ccno"
17449 [(set (reg FLAGS_REG)
17450 (compare
17451 (and:SWI48
17452 (neg:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm"))
17453 (match_dup 1))
17454 (const_int 0)))
17455 (clobber (match_scratch:SWI48 0 "=r"))]
17456 "TARGET_BMI && ix86_match_ccmode (insn, CCNOmode)"
17457 "blsi\t{%1, %0|%0, %1}"
17458 [(set_attr "type" "bitmanip")
17459 (set_attr "btver2_decode" "double")
17460 (set_attr "mode" "<MODE>")])
17461
17462 (define_insn "*bmi_blsmsk_<mode>"
17463 [(set (match_operand:SWI48 0 "register_operand" "=r")
17464 (xor:SWI48
17465 (plus:SWI48
17466 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
17467 (const_int -1))
17468 (match_dup 1)))
17469 (clobber (reg:CC FLAGS_REG))]
17470 "TARGET_BMI"
17471 "blsmsk\t{%1, %0|%0, %1}"
17472 [(set_attr "type" "bitmanip")
17473 (set_attr "btver2_decode" "double")
17474 (set_attr "mode" "<MODE>")])
17475
17476 (define_insn "*bmi_blsr_<mode>"
17477 [(set (match_operand:SWI48 0 "register_operand" "=r")
17478 (and:SWI48
17479 (plus:SWI48
17480 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
17481 (const_int -1))
17482 (match_dup 1)))
17483 (clobber (reg:CC FLAGS_REG))]
17484 "TARGET_BMI"
17485 "blsr\t{%1, %0|%0, %1}"
17486 [(set_attr "type" "bitmanip")
17487 (set_attr "btver2_decode" "double")
17488 (set_attr "mode" "<MODE>")])
17489
17490 (define_insn "*bmi_blsr_<mode>_cmp"
17491 [(set (reg:CCZ FLAGS_REG)
17492 (compare:CCZ
17493 (and:SWI48
17494 (plus:SWI48
17495 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
17496 (const_int -1))
17497 (match_dup 1))
17498 (const_int 0)))
17499 (set (match_operand:SWI48 0 "register_operand" "=r")
17500 (and:SWI48
17501 (plus:SWI48
17502 (match_dup 1)
17503 (const_int -1))
17504 (match_dup 1)))]
17505 "TARGET_BMI"
17506 "blsr\t{%1, %0|%0, %1}"
17507 [(set_attr "type" "bitmanip")
17508 (set_attr "btver2_decode" "double")
17509 (set_attr "mode" "<MODE>")])
17510
17511 (define_insn "*bmi_blsr_<mode>_ccz"
17512 [(set (reg:CCZ FLAGS_REG)
17513 (compare:CCZ
17514 (and:SWI48
17515 (plus:SWI48
17516 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
17517 (const_int -1))
17518 (match_dup 1))
17519 (const_int 0)))
17520 (clobber (match_scratch:SWI48 0 "=r"))]
17521 "TARGET_BMI"
17522 "blsr\t{%1, %0|%0, %1}"
17523 [(set_attr "type" "bitmanip")
17524 (set_attr "btver2_decode" "double")
17525 (set_attr "mode" "<MODE>")])
17526
17527 ;; BMI2 instructions.
17528 (define_expand "bmi2_bzhi_<mode>3"
17529 [(parallel
17530 [(set (match_operand:SWI48 0 "register_operand")
17531 (if_then_else:SWI48
17532 (ne:QI (and:SWI48 (match_operand:SWI48 2 "register_operand")
17533 (const_int 255))
17534 (const_int 0))
17535 (zero_extract:SWI48
17536 (match_operand:SWI48 1 "nonimmediate_operand")
17537 (umin:SWI48 (and:SWI48 (match_dup 2) (const_int 255))
17538 (match_dup 3))
17539 (const_int 0))
17540 (const_int 0)))
17541 (clobber (reg:CC FLAGS_REG))])]
17542 "TARGET_BMI2"
17543 "operands[3] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);")
17544
17545 (define_insn "*bmi2_bzhi_<mode>3"
17546 [(set (match_operand:SWI48 0 "register_operand" "=r")
17547 (if_then_else:SWI48
17548 (ne:QI (and:SWI48 (match_operand:SWI48 2 "register_operand" "r")
17549 (const_int 255))
17550 (const_int 0))
17551 (zero_extract:SWI48
17552 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
17553 (umin:SWI48 (and:SWI48 (match_dup 2) (const_int 255))
17554 (match_operand:SWI48 3 "const_int_operand"))
17555 (const_int 0))
17556 (const_int 0)))
17557 (clobber (reg:CC FLAGS_REG))]
17558 "TARGET_BMI2 && INTVAL (operands[3]) == <MODE_SIZE> * BITS_PER_UNIT"
17559 "bzhi\t{%2, %1, %0|%0, %1, %2}"
17560 [(set_attr "type" "bitmanip")
17561 (set_attr "prefix" "vex")
17562 (set_attr "mode" "<MODE>")])
17563
17564 (define_insn "*bmi2_bzhi_<mode>3_1"
17565 [(set (match_operand:SWI48 0 "register_operand" "=r")
17566 (if_then_else:SWI48
17567 (ne:QI (match_operand:QI 2 "register_operand" "r") (const_int 0))
17568 (zero_extract:SWI48
17569 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
17570 (umin:SWI48 (zero_extend:SWI48 (match_dup 2))
17571 (match_operand:SWI48 3 "const_int_operand"))
17572 (const_int 0))
17573 (const_int 0)))
17574 (clobber (reg:CC FLAGS_REG))]
17575 "TARGET_BMI2 && INTVAL (operands[3]) == <MODE_SIZE> * BITS_PER_UNIT"
17576 "bzhi\t{%<k>2, %1, %0|%0, %1, %<k>2}"
17577 [(set_attr "type" "bitmanip")
17578 (set_attr "prefix" "vex")
17579 (set_attr "mode" "<MODE>")])
17580
17581 (define_insn "*bmi2_bzhi_<mode>3_1_ccz"
17582 [(set (reg:CCZ FLAGS_REG)
17583 (compare:CCZ
17584 (if_then_else:SWI48
17585 (ne:QI (match_operand:QI 2 "register_operand" "r") (const_int 0))
17586 (zero_extract:SWI48
17587 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
17588 (umin:SWI48 (zero_extend:SWI48 (match_dup 2))
17589 (match_operand:SWI48 3 "const_int_operand"))
17590 (const_int 0))
17591 (const_int 0))
17592 (const_int 0)))
17593 (clobber (match_scratch:SWI48 0 "=r"))]
17594 "TARGET_BMI2 && INTVAL (operands[3]) == <MODE_SIZE> * BITS_PER_UNIT"
17595 "bzhi\t{%<k>2, %1, %0|%0, %1, %<k>2}"
17596 [(set_attr "type" "bitmanip")
17597 (set_attr "prefix" "vex")
17598 (set_attr "mode" "<MODE>")])
17599
17600 (define_insn "*bmi2_bzhi_<mode>3_2"
17601 [(set (match_operand:SWI48 0 "register_operand" "=r")
17602 (and:SWI48
17603 (plus:SWI48
17604 (ashift:SWI48 (const_int 1)
17605 (match_operand:QI 2 "register_operand" "r"))
17606 (const_int -1))
17607 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
17608 (clobber (reg:CC FLAGS_REG))]
17609 "TARGET_BMI2"
17610 "bzhi\t{%<k>2, %1, %0|%0, %1, %<k>2}"
17611 [(set_attr "type" "bitmanip")
17612 (set_attr "prefix" "vex")
17613 (set_attr "mode" "<MODE>")])
17614
17615 (define_insn "*bmi2_bzhi_<mode>3_3"
17616 [(set (match_operand:SWI48 0 "register_operand" "=r")
17617 (and:SWI48
17618 (not:SWI48
17619 (ashift:SWI48 (const_int -1)
17620 (match_operand:QI 2 "register_operand" "r")))
17621 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
17622 (clobber (reg:CC FLAGS_REG))]
17623 "TARGET_BMI2"
17624 "bzhi\t{%<k>2, %1, %0|%0, %1, %<k>2}"
17625 [(set_attr "type" "bitmanip")
17626 (set_attr "prefix" "vex")
17627 (set_attr "mode" "<MODE>")])
17628
17629 (define_insn "*bmi2_bzhi_zero_extendsidi_4"
17630 [(set (match_operand:DI 0 "register_operand" "=r")
17631 (zero_extend:DI
17632 (and:SI
17633 (plus:SI
17634 (ashift:SI (const_int 1)
17635 (match_operand:QI 2 "register_operand" "r"))
17636 (const_int -1))
17637 (match_operand:SI 1 "nonimmediate_operand" "rm"))))
17638 (clobber (reg:CC FLAGS_REG))]
17639 "TARGET_64BIT && TARGET_BMI2"
17640 "bzhi\t{%q2, %q1, %q0|%q0, %q1, %q2}"
17641 [(set_attr "type" "bitmanip")
17642 (set_attr "prefix" "vex")
17643 (set_attr "mode" "DI")])
17644
17645 (define_insn "bmi2_pdep_<mode>3"
17646 [(set (match_operand:SWI48 0 "register_operand" "=r")
17647 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")
17648 (match_operand:SWI48 2 "nonimmediate_operand" "rm")]
17649 UNSPEC_PDEP))]
17650 "TARGET_BMI2"
17651 "pdep\t{%2, %1, %0|%0, %1, %2}"
17652 [(set_attr "type" "bitmanip")
17653 (set_attr "prefix" "vex")
17654 (set_attr "mode" "<MODE>")])
17655
17656 (define_insn "bmi2_pext_<mode>3"
17657 [(set (match_operand:SWI48 0 "register_operand" "=r")
17658 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")
17659 (match_operand:SWI48 2 "nonimmediate_operand" "rm")]
17660 UNSPEC_PEXT))]
17661 "TARGET_BMI2"
17662 "pext\t{%2, %1, %0|%0, %1, %2}"
17663 [(set_attr "type" "bitmanip")
17664 (set_attr "prefix" "vex")
17665 (set_attr "mode" "<MODE>")])
17666
17667 ;; TBM instructions.
17668 (define_insn "@tbm_bextri_<mode>"
17669 [(set (match_operand:SWI48 0 "register_operand" "=r")
17670 (zero_extract:SWI48
17671 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
17672 (match_operand 2 "const_0_to_255_operand")
17673 (match_operand 3 "const_0_to_255_operand")))
17674 (clobber (reg:CC FLAGS_REG))]
17675 "TARGET_TBM"
17676 {
17677 operands[2] = GEN_INT (INTVAL (operands[2]) << 8 | INTVAL (operands[3]));
17678 return "bextr\t{%2, %1, %0|%0, %1, %2}";
17679 }
17680 [(set_attr "type" "bitmanip")
17681 (set_attr "mode" "<MODE>")])
17682
17683 (define_insn "*tbm_blcfill_<mode>"
17684 [(set (match_operand:SWI48 0 "register_operand" "=r")
17685 (and:SWI48
17686 (plus:SWI48
17687 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
17688 (const_int 1))
17689 (match_dup 1)))
17690 (clobber (reg:CC FLAGS_REG))]
17691 "TARGET_TBM"
17692 "blcfill\t{%1, %0|%0, %1}"
17693 [(set_attr "type" "bitmanip")
17694 (set_attr "mode" "<MODE>")])
17695
17696 (define_insn "*tbm_blci_<mode>"
17697 [(set (match_operand:SWI48 0 "register_operand" "=r")
17698 (ior:SWI48
17699 (not:SWI48
17700 (plus:SWI48
17701 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
17702 (const_int 1)))
17703 (match_dup 1)))
17704 (clobber (reg:CC FLAGS_REG))]
17705 "TARGET_TBM"
17706 "blci\t{%1, %0|%0, %1}"
17707 [(set_attr "type" "bitmanip")
17708 (set_attr "mode" "<MODE>")])
17709
17710 (define_insn "*tbm_blcic_<mode>"
17711 [(set (match_operand:SWI48 0 "register_operand" "=r")
17712 (and:SWI48
17713 (plus:SWI48
17714 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
17715 (const_int 1))
17716 (not:SWI48
17717 (match_dup 1))))
17718 (clobber (reg:CC FLAGS_REG))]
17719 "TARGET_TBM"
17720 "blcic\t{%1, %0|%0, %1}"
17721 [(set_attr "type" "bitmanip")
17722 (set_attr "mode" "<MODE>")])
17723
17724 (define_insn "*tbm_blcmsk_<mode>"
17725 [(set (match_operand:SWI48 0 "register_operand" "=r")
17726 (xor:SWI48
17727 (plus:SWI48
17728 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
17729 (const_int 1))
17730 (match_dup 1)))
17731 (clobber (reg:CC FLAGS_REG))]
17732 "TARGET_TBM"
17733 "blcmsk\t{%1, %0|%0, %1}"
17734 [(set_attr "type" "bitmanip")
17735 (set_attr "mode" "<MODE>")])
17736
17737 (define_insn "*tbm_blcs_<mode>"
17738 [(set (match_operand:SWI48 0 "register_operand" "=r")
17739 (ior:SWI48
17740 (plus:SWI48
17741 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
17742 (const_int 1))
17743 (match_dup 1)))
17744 (clobber (reg:CC FLAGS_REG))]
17745 "TARGET_TBM"
17746 "blcs\t{%1, %0|%0, %1}"
17747 [(set_attr "type" "bitmanip")
17748 (set_attr "mode" "<MODE>")])
17749
17750 (define_insn "*tbm_blsfill_<mode>"
17751 [(set (match_operand:SWI48 0 "register_operand" "=r")
17752 (ior:SWI48
17753 (plus:SWI48
17754 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
17755 (const_int -1))
17756 (match_dup 1)))
17757 (clobber (reg:CC FLAGS_REG))]
17758 "TARGET_TBM"
17759 "blsfill\t{%1, %0|%0, %1}"
17760 [(set_attr "type" "bitmanip")
17761 (set_attr "mode" "<MODE>")])
17762
17763 (define_insn "*tbm_blsic_<mode>"
17764 [(set (match_operand:SWI48 0 "register_operand" "=r")
17765 (ior:SWI48
17766 (plus:SWI48
17767 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
17768 (const_int -1))
17769 (not:SWI48
17770 (match_dup 1))))
17771 (clobber (reg:CC FLAGS_REG))]
17772 "TARGET_TBM"
17773 "blsic\t{%1, %0|%0, %1}"
17774 [(set_attr "type" "bitmanip")
17775 (set_attr "mode" "<MODE>")])
17776
17777 (define_insn "*tbm_t1mskc_<mode>"
17778 [(set (match_operand:SWI48 0 "register_operand" "=r")
17779 (ior:SWI48
17780 (plus:SWI48
17781 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
17782 (const_int 1))
17783 (not:SWI48
17784 (match_dup 1))))
17785 (clobber (reg:CC FLAGS_REG))]
17786 "TARGET_TBM"
17787 "t1mskc\t{%1, %0|%0, %1}"
17788 [(set_attr "type" "bitmanip")
17789 (set_attr "mode" "<MODE>")])
17790
17791 (define_insn "*tbm_tzmsk_<mode>"
17792 [(set (match_operand:SWI48 0 "register_operand" "=r")
17793 (and:SWI48
17794 (plus:SWI48
17795 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
17796 (const_int -1))
17797 (not:SWI48
17798 (match_dup 1))))
17799 (clobber (reg:CC FLAGS_REG))]
17800 "TARGET_TBM"
17801 "tzmsk\t{%1, %0|%0, %1}"
17802 [(set_attr "type" "bitmanip")
17803 (set_attr "mode" "<MODE>")])
17804
17805 (define_insn_and_split "popcount<mode>2"
17806 [(set (match_operand:SWI48 0 "register_operand" "=r")
17807 (popcount:SWI48
17808 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
17809 (clobber (reg:CC FLAGS_REG))]
17810 "TARGET_POPCNT"
17811 {
17812 #if TARGET_MACHO
17813 return "popcnt\t{%1, %0|%0, %1}";
17814 #else
17815 return "popcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
17816 #endif
17817 }
17818 "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
17819 && optimize_function_for_speed_p (cfun)
17820 && !reg_mentioned_p (operands[0], operands[1])"
17821 [(parallel
17822 [(set (match_dup 0)
17823 (popcount:SWI48 (match_dup 1)))
17824 (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
17825 (clobber (reg:CC FLAGS_REG))])]
17826 "ix86_expand_clear (operands[0]);"
17827 [(set_attr "prefix_rep" "1")
17828 (set_attr "type" "bitmanip")
17829 (set_attr "mode" "<MODE>")])
17830
17831 ; False dependency happens when destination is only updated by tzcnt,
17832 ; lzcnt or popcnt. There is no false dependency when destination is
17833 ; also used in source.
17834 (define_insn "*popcount<mode>2_falsedep"
17835 [(set (match_operand:SWI48 0 "register_operand" "=r")
17836 (popcount:SWI48
17837 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
17838 (unspec [(match_operand:SWI48 2 "register_operand" "0")]
17839 UNSPEC_INSN_FALSE_DEP)
17840 (clobber (reg:CC FLAGS_REG))]
17841 "TARGET_POPCNT"
17842 {
17843 #if TARGET_MACHO
17844 return "popcnt\t{%1, %0|%0, %1}";
17845 #else
17846 return "popcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
17847 #endif
17848 }
17849 [(set_attr "prefix_rep" "1")
17850 (set_attr "type" "bitmanip")
17851 (set_attr "mode" "<MODE>")])
17852
17853 (define_insn_and_split "*popcountsi2_zext"
17854 [(set (match_operand:DI 0 "register_operand" "=r")
17855 (and:DI
17856 (subreg:DI
17857 (popcount:SI
17858 (match_operand:SI 1 "nonimmediate_operand" "rm")) 0)
17859 (const_int 63)))
17860 (clobber (reg:CC FLAGS_REG))]
17861 "TARGET_POPCNT && TARGET_64BIT"
17862 {
17863 #if TARGET_MACHO
17864 return "popcnt\t{%1, %k0|%k0, %1}";
17865 #else
17866 return "popcnt{l}\t{%1, %k0|%k0, %1}";
17867 #endif
17868 }
17869 "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
17870 && optimize_function_for_speed_p (cfun)
17871 && !reg_mentioned_p (operands[0], operands[1])"
17872 [(parallel
17873 [(set (match_dup 0)
17874 (and:DI (subreg:DI (popcount:SI (match_dup 1)) 0) (const_int 63)))
17875 (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
17876 (clobber (reg:CC FLAGS_REG))])]
17877 "ix86_expand_clear (operands[0]);"
17878 [(set_attr "prefix_rep" "1")
17879 (set_attr "type" "bitmanip")
17880 (set_attr "mode" "SI")])
17881
17882 ; False dependency happens when destination is only updated by tzcnt,
17883 ; lzcnt or popcnt. There is no false dependency when destination is
17884 ; also used in source.
17885 (define_insn "*popcountsi2_zext_falsedep"
17886 [(set (match_operand:DI 0 "register_operand" "=r")
17887 (and:DI
17888 (subreg:DI
17889 (popcount:SI
17890 (match_operand:SI 1 "nonimmediate_operand" "rm")) 0)
17891 (const_int 63)))
17892 (unspec [(match_operand:DI 2 "register_operand" "0")]
17893 UNSPEC_INSN_FALSE_DEP)
17894 (clobber (reg:CC FLAGS_REG))]
17895 "TARGET_POPCNT && TARGET_64BIT"
17896 {
17897 #if TARGET_MACHO
17898 return "popcnt\t{%1, %k0|%k0, %1}";
17899 #else
17900 return "popcnt{l}\t{%1, %k0|%k0, %1}";
17901 #endif
17902 }
17903 [(set_attr "prefix_rep" "1")
17904 (set_attr "type" "bitmanip")
17905 (set_attr "mode" "SI")])
17906
17907 (define_insn_and_split "*popcounthi2_1"
17908 [(set (match_operand:SI 0 "register_operand")
17909 (popcount:SI
17910 (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand"))))
17911 (clobber (reg:CC FLAGS_REG))]
17912 "TARGET_POPCNT
17913 && ix86_pre_reload_split ()"
17914 "#"
17915 "&& 1"
17916 [(const_int 0)]
17917 {
17918 rtx tmp = gen_reg_rtx (HImode);
17919
17920 emit_insn (gen_popcounthi2 (tmp, operands[1]));
17921 emit_insn (gen_zero_extendhisi2 (operands[0], tmp));
17922 DONE;
17923 })
17924
17925 (define_insn "popcounthi2"
17926 [(set (match_operand:HI 0 "register_operand" "=r")
17927 (popcount:HI
17928 (match_operand:HI 1 "nonimmediate_operand" "rm")))
17929 (clobber (reg:CC FLAGS_REG))]
17930 "TARGET_POPCNT"
17931 {
17932 #if TARGET_MACHO
17933 return "popcnt\t{%1, %0|%0, %1}";
17934 #else
17935 return "popcnt{w}\t{%1, %0|%0, %1}";
17936 #endif
17937 }
17938 [(set_attr "prefix_rep" "1")
17939 (set_attr "type" "bitmanip")
17940 (set_attr "mode" "HI")])
17941
17942 (define_expand "bswapdi2"
17943 [(set (match_operand:DI 0 "register_operand")
17944 (bswap:DI (match_operand:DI 1 "nonimmediate_operand")))]
17945 "TARGET_64BIT"
17946 {
17947 if (!TARGET_MOVBE)
17948 operands[1] = force_reg (DImode, operands[1]);
17949 })
17950
17951 (define_expand "bswapsi2"
17952 [(set (match_operand:SI 0 "register_operand")
17953 (bswap:SI (match_operand:SI 1 "nonimmediate_operand")))]
17954 ""
17955 {
17956 if (TARGET_MOVBE)
17957 ;
17958 else if (TARGET_BSWAP)
17959 operands[1] = force_reg (SImode, operands[1]);
17960 else
17961 {
17962 rtx x = operands[0];
17963
17964 emit_move_insn (x, operands[1]);
17965 emit_insn (gen_bswaphi_lowpart (gen_lowpart (HImode, x)));
17966 emit_insn (gen_rotlsi3 (x, x, GEN_INT (16)));
17967 emit_insn (gen_bswaphi_lowpart (gen_lowpart (HImode, x)));
17968 DONE;
17969 }
17970 })
17971
17972 (define_insn "*bswap<mode>2_movbe"
17973 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r,r,m")
17974 (bswap:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,m,r")))]
17975 "TARGET_MOVBE
17976 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
17977 "@
17978 bswap\t%0
17979 movbe{<imodesuffix>}\t{%1, %0|%0, %1}
17980 movbe{<imodesuffix>}\t{%1, %0|%0, %1}"
17981 [(set_attr "type" "bitmanip,imov,imov")
17982 (set_attr "modrm" "0,1,1")
17983 (set_attr "prefix_0f" "*,1,1")
17984 (set_attr "prefix_extra" "*,1,1")
17985 (set_attr "mode" "<MODE>")])
17986
17987 (define_insn "*bswap<mode>2"
17988 [(set (match_operand:SWI48 0 "register_operand" "=r")
17989 (bswap:SWI48 (match_operand:SWI48 1 "register_operand" "0")))]
17990 "TARGET_BSWAP"
17991 "bswap\t%0"
17992 [(set_attr "type" "bitmanip")
17993 (set_attr "modrm" "0")
17994 (set_attr "mode" "<MODE>")])
17995
17996 (define_expand "bswaphi2"
17997 [(set (match_operand:HI 0 "register_operand")
17998 (bswap:HI (match_operand:HI 1 "nonimmediate_operand")))]
17999 "TARGET_MOVBE")
18000
18001 (define_insn "*bswaphi2_movbe"
18002 [(set (match_operand:HI 0 "nonimmediate_operand" "=Q,r,m")
18003 (bswap:HI (match_operand:HI 1 "nonimmediate_operand" "0,m,r")))]
18004 "TARGET_MOVBE
18005 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
18006 "@
18007 xchg{b}\t{%h0, %b0|%b0, %h0}
18008 movbe{w}\t{%1, %0|%0, %1}
18009 movbe{w}\t{%1, %0|%0, %1}"
18010 [(set_attr "type" "imov")
18011 (set_attr "modrm" "*,1,1")
18012 (set_attr "prefix_0f" "*,1,1")
18013 (set_attr "prefix_extra" "*,1,1")
18014 (set_attr "pent_pair" "np,*,*")
18015 (set_attr "athlon_decode" "vector,*,*")
18016 (set_attr "amdfam10_decode" "double,*,*")
18017 (set_attr "bdver1_decode" "double,*,*")
18018 (set_attr "mode" "QI,HI,HI")])
18019
18020 (define_peephole2
18021 [(set (match_operand:HI 0 "general_reg_operand")
18022 (bswap:HI (match_dup 0)))]
18023 "TARGET_MOVBE
18024 && !(TARGET_USE_XCHGB || optimize_function_for_size_p (cfun))
18025 && peep2_regno_dead_p (0, FLAGS_REG)"
18026 [(parallel [(set (match_dup 0) (rotate:HI (match_dup 0) (const_int 8)))
18027 (clobber (reg:CC FLAGS_REG))])])
18028
18029 (define_insn "bswaphi_lowpart"
18030 [(set (strict_low_part (match_operand:HI 0 "register_operand" "+Q,r"))
18031 (bswap:HI (match_dup 0)))
18032 (clobber (reg:CC FLAGS_REG))]
18033 ""
18034 "@
18035 xchg{b}\t{%h0, %b0|%b0, %h0}
18036 rol{w}\t{$8, %0|%0, 8}"
18037 [(set (attr "preferred_for_size")
18038 (cond [(eq_attr "alternative" "0")
18039 (symbol_ref "true")]
18040 (symbol_ref "false")))
18041 (set (attr "preferred_for_speed")
18042 (cond [(eq_attr "alternative" "0")
18043 (symbol_ref "TARGET_USE_XCHGB")]
18044 (symbol_ref "!TARGET_USE_XCHGB")))
18045 (set_attr "length" "2,4")
18046 (set_attr "mode" "QI,HI")])
18047
18048 (define_expand "paritydi2"
18049 [(set (match_operand:DI 0 "register_operand")
18050 (parity:DI (match_operand:DI 1 "register_operand")))]
18051 "! TARGET_POPCNT"
18052 {
18053 rtx scratch = gen_reg_rtx (QImode);
18054 rtx hipart1 = gen_reg_rtx (SImode);
18055 rtx lopart1 = gen_reg_rtx (SImode);
18056 rtx xor1 = gen_reg_rtx (SImode);
18057 rtx shift2 = gen_reg_rtx (SImode);
18058 rtx hipart2 = gen_reg_rtx (HImode);
18059 rtx lopart2 = gen_reg_rtx (HImode);
18060 rtx xor2 = gen_reg_rtx (HImode);
18061
18062 if (TARGET_64BIT)
18063 {
18064 rtx shift1 = gen_reg_rtx (DImode);
18065 emit_insn (gen_lshrdi3 (shift1, operands[1], GEN_INT (32)));
18066 emit_move_insn (hipart1, gen_lowpart (SImode, shift1));
18067 }
18068 else
18069 emit_move_insn (hipart1, gen_highpart (SImode, operands[1]));
18070
18071 emit_move_insn (lopart1, gen_lowpart (SImode, operands[1]));
18072 emit_insn (gen_xorsi3 (xor1, hipart1, lopart1));
18073
18074 emit_insn (gen_lshrsi3 (shift2, xor1, GEN_INT (16)));
18075 emit_move_insn (hipart2, gen_lowpart (HImode, shift2));
18076 emit_move_insn (lopart2, gen_lowpart (HImode, xor1));
18077 emit_insn (gen_xorhi3 (xor2, hipart2, lopart2));
18078
18079 emit_insn (gen_parityhi2_cmp (xor2));
18080
18081 ix86_expand_setcc (scratch, ORDERED,
18082 gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
18083
18084 if (TARGET_64BIT)
18085 emit_insn (gen_zero_extendqidi2 (operands[0], scratch));
18086 else
18087 {
18088 rtx tmp = gen_reg_rtx (SImode);
18089
18090 emit_insn (gen_zero_extendqisi2 (tmp, scratch));
18091 emit_insn (gen_zero_extendsidi2 (operands[0], tmp));
18092 }
18093 DONE;
18094 })
18095
18096 (define_expand "paritysi2"
18097 [(set (match_operand:SI 0 "register_operand")
18098 (parity:SI (match_operand:SI 1 "register_operand")))]
18099 "! TARGET_POPCNT"
18100 {
18101 rtx scratch = gen_reg_rtx (QImode);
18102 rtx shift = gen_reg_rtx (SImode);
18103 rtx hipart = gen_reg_rtx (HImode);
18104 rtx lopart = gen_reg_rtx (HImode);
18105 rtx tmp = gen_reg_rtx (HImode);
18106
18107 emit_insn (gen_lshrsi3 (shift, operands[1], GEN_INT (16)));
18108 emit_move_insn (hipart, gen_lowpart (HImode, shift));
18109 emit_move_insn (lopart, gen_lowpart (HImode, operands[1]));
18110 emit_insn (gen_xorhi3 (tmp, hipart, lopart));
18111
18112 emit_insn (gen_parityhi2_cmp (tmp));
18113
18114 ix86_expand_setcc (scratch, ORDERED,
18115 gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
18116
18117 emit_insn (gen_zero_extendqisi2 (operands[0], scratch));
18118 DONE;
18119 })
18120
18121 (define_expand "parityhi2"
18122 [(set (match_operand:HI 0 "register_operand")
18123 (parity:HI (match_operand:HI 1 "register_operand")))]
18124 "! TARGET_POPCNT"
18125 {
18126 rtx scratch = gen_reg_rtx (QImode);
18127
18128 emit_insn (gen_parityhi2_cmp (operands[1]));
18129
18130 ix86_expand_setcc (scratch, ORDERED,
18131 gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
18132
18133 emit_insn (gen_zero_extendqihi2 (operands[0], scratch));
18134 DONE;
18135 })
18136
18137 (define_expand "parityqi2"
18138 [(set (match_operand:QI 0 "register_operand")
18139 (parity:QI (match_operand:QI 1 "register_operand")))]
18140 "! TARGET_POPCNT"
18141 {
18142 emit_insn (gen_parityqi2_cmp (operands[1]));
18143
18144 ix86_expand_setcc (operands[0], ORDERED,
18145 gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
18146 DONE;
18147 })
18148
18149 (define_insn "parityhi2_cmp"
18150 [(set (reg:CC FLAGS_REG)
18151 (unspec:CC [(match_operand:HI 0 "register_operand" "+Q")]
18152 UNSPEC_PARITY))
18153 (clobber (match_dup 0))]
18154 ""
18155 "xor{b}\t{%h0, %b0|%b0, %h0}"
18156 [(set_attr "length" "2")
18157 (set_attr "mode" "QI")])
18158
18159 (define_insn "parityqi2_cmp"
18160 [(set (reg:CC FLAGS_REG)
18161 (unspec:CC [(match_operand:QI 0 "register_operand" "q")]
18162 UNSPEC_PARITY))]
18163 ""
18164 "test{b}\t%0, %0"
18165 [(set_attr "mode" "QI")])
18166
18167 ;; Replace zero_extend:HI followed by parityhi2_cmp with parityqi2_cmp
18168 (define_peephole2
18169 [(set (match_operand:HI 0 "register_operand")
18170 (zero_extend:HI (match_operand:QI 1 "general_reg_operand")))
18171 (parallel [(set (reg:CC FLAGS_REG)
18172 (unspec:CC [(match_dup 0)] UNSPEC_PARITY))
18173 (clobber (match_dup 0))])]
18174 ""
18175 [(set (reg:CC FLAGS_REG)
18176 (unspec:CC [(match_dup 1)] UNSPEC_PARITY))])
18177
18178 ;; Eliminate QImode popcount&1 using parity flag
18179 (define_peephole2
18180 [(set (match_operand:SI 0 "register_operand")
18181 (zero_extend:SI (match_operand:QI 1 "general_reg_operand")))
18182 (parallel [(set (match_operand:SI 2 "register_operand")
18183 (popcount:SI (match_dup 0)))
18184 (clobber (reg:CC FLAGS_REG))])
18185 (set (reg:CCZ FLAGS_REG)
18186 (compare:CCZ (and:QI (match_operand:QI 3 "register_operand")
18187 (const_int 1))
18188 (const_int 0)))
18189 (set (pc) (if_then_else (match_operator 4 "bt_comparison_operator"
18190 [(reg:CCZ FLAGS_REG)
18191 (const_int 0)])
18192 (label_ref (match_operand 5))
18193 (pc)))]
18194 "REGNO (operands[2]) == REGNO (operands[3])
18195 && peep2_reg_dead_p (3, operands[0])
18196 && peep2_reg_dead_p (3, operands[2])
18197 && peep2_regno_dead_p (4, FLAGS_REG)"
18198 [(set (reg:CC FLAGS_REG)
18199 (unspec:CC [(match_dup 1)] UNSPEC_PARITY))
18200 (set (pc) (if_then_else (match_op_dup 4 [(reg:CC FLAGS_REG)
18201 (const_int 0)])
18202 (label_ref (match_dup 5))
18203 (pc)))]
18204 {
18205 operands[4] = shallow_copy_rtx (operands[4]);
18206 PUT_CODE (operands[4], GET_CODE (operands[4]) == EQ ? UNORDERED : ORDERED);
18207 })
18208
18209 ;; Eliminate HImode popcount&1 using parity flag
18210 (define_peephole2
18211 [(match_scratch:HI 0 "Q")
18212 (parallel [(set (match_operand:HI 1 "register_operand")
18213 (popcount:HI
18214 (match_operand:HI 2 "nonimmediate_operand")))
18215 (clobber (reg:CC FLAGS_REG))])
18216 (set (match_operand 3 "register_operand")
18217 (zero_extend (match_dup 1)))
18218 (set (reg:CCZ FLAGS_REG)
18219 (compare:CCZ (and:QI (match_operand:QI 4 "register_operand")
18220 (const_int 1))
18221 (const_int 0)))
18222 (set (pc) (if_then_else (match_operator 5 "bt_comparison_operator"
18223 [(reg:CCZ FLAGS_REG)
18224 (const_int 0)])
18225 (label_ref (match_operand 6))
18226 (pc)))]
18227 "REGNO (operands[3]) == REGNO (operands[4])
18228 && peep2_reg_dead_p (3, operands[1])
18229 && peep2_reg_dead_p (3, operands[3])
18230 && peep2_regno_dead_p (4, FLAGS_REG)"
18231 [(set (match_dup 0) (match_dup 2))
18232 (parallel [(set (reg:CC FLAGS_REG)
18233 (unspec:CC [(match_dup 0)] UNSPEC_PARITY))
18234 (clobber (match_dup 0))])
18235 (set (pc) (if_then_else (match_op_dup 5 [(reg:CC FLAGS_REG)
18236 (const_int 0)])
18237 (label_ref (match_dup 6))
18238 (pc)))]
18239 {
18240 operands[5] = shallow_copy_rtx (operands[5]);
18241 PUT_CODE (operands[5], GET_CODE (operands[5]) == EQ ? UNORDERED : ORDERED);
18242 })
18243
18244 \f
18245 ;; Thread-local storage patterns for ELF.
18246 ;;
18247 ;; Note that these code sequences must appear exactly as shown
18248 ;; in order to allow linker relaxation.
18249
18250 (define_insn "*tls_global_dynamic_32_gnu"
18251 [(set (match_operand:SI 0 "register_operand" "=a")
18252 (unspec:SI
18253 [(match_operand:SI 1 "register_operand" "Yb")
18254 (match_operand 2 "tls_symbolic_operand")
18255 (match_operand 3 "constant_call_address_operand" "Bz")
18256 (reg:SI SP_REG)]
18257 UNSPEC_TLS_GD))
18258 (clobber (match_scratch:SI 4 "=d"))
18259 (clobber (match_scratch:SI 5 "=c"))
18260 (clobber (reg:CC FLAGS_REG))]
18261 "!TARGET_64BIT && TARGET_GNU_TLS"
18262 {
18263 if (TARGET_SUN_TLS || flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT)
18264 output_asm_insn
18265 ("lea{l}\t{%E2@tlsgd(,%1,1), %0|%0, %E2@tlsgd[%1*1]}", operands);
18266 else
18267 output_asm_insn
18268 ("lea{l}\t{%E2@tlsgd(%1), %0|%0, %E2@tlsgd[%1]}", operands);
18269 if (TARGET_SUN_TLS)
18270 #ifdef HAVE_AS_IX86_TLSGDPLT
18271 return "call\t%a2@tlsgdplt";
18272 #else
18273 return "call\t%p3@plt";
18274 #endif
18275 if (flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT)
18276 return "call\t%P3";
18277 return "call\t{*%p3@GOT(%1)|[DWORD PTR %p3@GOT[%1]]}";
18278 }
18279 [(set_attr "type" "multi")
18280 (set_attr "length" "12")])
18281
18282 (define_expand "tls_global_dynamic_32"
18283 [(parallel
18284 [(set (match_operand:SI 0 "register_operand")
18285 (unspec:SI [(match_operand:SI 2 "register_operand")
18286 (match_operand 1 "tls_symbolic_operand")
18287 (match_operand 3 "constant_call_address_operand")
18288 (reg:SI SP_REG)]
18289 UNSPEC_TLS_GD))
18290 (clobber (scratch:SI))
18291 (clobber (scratch:SI))
18292 (clobber (reg:CC FLAGS_REG))])]
18293 ""
18294 "ix86_tls_descriptor_calls_expanded_in_cfun = true;")
18295
18296 (define_insn "*tls_global_dynamic_64_<mode>"
18297 [(set (match_operand:P 0 "register_operand" "=a")
18298 (call:P
18299 (mem:QI (match_operand 2 "constant_call_address_operand" "Bz"))
18300 (match_operand 3)))
18301 (unspec:P [(match_operand 1 "tls_symbolic_operand")
18302 (reg:P SP_REG)]
18303 UNSPEC_TLS_GD)]
18304 "TARGET_64BIT"
18305 {
18306 if (!TARGET_X32)
18307 /* The .loc directive has effect for 'the immediately following assembly
18308 instruction'. So for a sequence:
18309 .loc f l
18310 .byte x
18311 insn1
18312 the 'immediately following assembly instruction' is insn1.
18313 We want to emit an insn prefix here, but if we use .byte (as shown in
18314 'ELF Handling For Thread-Local Storage'), a preceding .loc will point
18315 inside the insn sequence, rather than to the start. After relaxation
18316 of the sequence by the linker, the .loc might point inside an insn.
18317 Use data16 prefix instead, which doesn't have this problem. */
18318 fputs ("\tdata16", asm_out_file);
18319 output_asm_insn
18320 ("lea{q}\t{%E1@tlsgd(%%rip), %%rdi|rdi, %E1@tlsgd[rip]}", operands);
18321 if (TARGET_SUN_TLS || flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT)
18322 fputs (ASM_SHORT "0x6666\n", asm_out_file);
18323 else
18324 fputs (ASM_BYTE "0x66\n", asm_out_file);
18325 fputs ("\trex64\n", asm_out_file);
18326 if (TARGET_SUN_TLS)
18327 return "call\t%p2@plt";
18328 if (flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT)
18329 return "call\t%P2";
18330 return "call\t{*%p2@GOTPCREL(%%rip)|[QWORD PTR %p2@GOTPCREL[rip]]}";
18331 }
18332 [(set_attr "type" "multi")
18333 (set (attr "length")
18334 (symbol_ref "TARGET_X32 ? 15 : 16"))])
18335
18336 (define_insn "*tls_global_dynamic_64_largepic"
18337 [(set (match_operand:DI 0 "register_operand" "=a")
18338 (call:DI
18339 (mem:QI (plus:DI (match_operand:DI 2 "register_operand" "b")
18340 (match_operand:DI 3 "immediate_operand" "i")))
18341 (match_operand 4)))
18342 (unspec:DI [(match_operand 1 "tls_symbolic_operand")
18343 (reg:DI SP_REG)]
18344 UNSPEC_TLS_GD)]
18345 "TARGET_64BIT && ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF
18346 && GET_CODE (operands[3]) == CONST
18347 && GET_CODE (XEXP (operands[3], 0)) == UNSPEC
18348 && XINT (XEXP (operands[3], 0), 1) == UNSPEC_PLTOFF"
18349 {
18350 output_asm_insn
18351 ("lea{q}\t{%E1@tlsgd(%%rip), %%rdi|rdi, %E1@tlsgd[rip]}", operands);
18352 output_asm_insn ("movabs{q}\t{%3, %%rax|rax, %3}", operands);
18353 output_asm_insn ("add{q}\t{%2, %%rax|rax, %2}", operands);
18354 return "call\t{*%%rax|rax}";
18355 }
18356 [(set_attr "type" "multi")
18357 (set_attr "length" "22")])
18358
18359 (define_expand "@tls_global_dynamic_64_<mode>"
18360 [(parallel
18361 [(set (match_operand:P 0 "register_operand")
18362 (call:P
18363 (mem:QI (match_operand 2))
18364 (const_int 0)))
18365 (unspec:P [(match_operand 1 "tls_symbolic_operand")
18366 (reg:P SP_REG)]
18367 UNSPEC_TLS_GD)])]
18368 "TARGET_64BIT"
18369 "ix86_tls_descriptor_calls_expanded_in_cfun = true;")
18370
18371 (define_insn "*tls_local_dynamic_base_32_gnu"
18372 [(set (match_operand:SI 0 "register_operand" "=a")
18373 (unspec:SI
18374 [(match_operand:SI 1 "register_operand" "Yb")
18375 (match_operand 2 "constant_call_address_operand" "Bz")
18376 (reg:SI SP_REG)]
18377 UNSPEC_TLS_LD_BASE))
18378 (clobber (match_scratch:SI 3 "=d"))
18379 (clobber (match_scratch:SI 4 "=c"))
18380 (clobber (reg:CC FLAGS_REG))]
18381 "!TARGET_64BIT && TARGET_GNU_TLS"
18382 {
18383 output_asm_insn
18384 ("lea{l}\t{%&@tlsldm(%1), %0|%0, %&@tlsldm[%1]}", operands);
18385 if (TARGET_SUN_TLS)
18386 {
18387 if (HAVE_AS_IX86_TLSLDMPLT)
18388 return "call\t%&@tlsldmplt";
18389 else
18390 return "call\t%p2@plt";
18391 }
18392 if (flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT)
18393 return "call\t%P2";
18394 return "call\t{*%p2@GOT(%1)|[DWORD PTR %p2@GOT[%1]]}";
18395 }
18396 [(set_attr "type" "multi")
18397 (set_attr "length" "11")])
18398
18399 (define_expand "tls_local_dynamic_base_32"
18400 [(parallel
18401 [(set (match_operand:SI 0 "register_operand")
18402 (unspec:SI
18403 [(match_operand:SI 1 "register_operand")
18404 (match_operand 2 "constant_call_address_operand")
18405 (reg:SI SP_REG)]
18406 UNSPEC_TLS_LD_BASE))
18407 (clobber (scratch:SI))
18408 (clobber (scratch:SI))
18409 (clobber (reg:CC FLAGS_REG))])]
18410 ""
18411 "ix86_tls_descriptor_calls_expanded_in_cfun = true;")
18412
18413 (define_insn "*tls_local_dynamic_base_64_<mode>"
18414 [(set (match_operand:P 0 "register_operand" "=a")
18415 (call:P
18416 (mem:QI (match_operand 1 "constant_call_address_operand" "Bz"))
18417 (match_operand 2)))
18418 (unspec:P [(reg:P SP_REG)] UNSPEC_TLS_LD_BASE)]
18419 "TARGET_64BIT"
18420 {
18421 output_asm_insn
18422 ("lea{q}\t{%&@tlsld(%%rip), %%rdi|rdi, %&@tlsld[rip]}", operands);
18423 if (TARGET_SUN_TLS)
18424 return "call\t%p1@plt";
18425 if (flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT)
18426 return "call\t%P1";
18427 return "call\t{*%p1@GOTPCREL(%%rip)|[QWORD PTR %p1@GOTPCREL[rip]]}";
18428 }
18429 [(set_attr "type" "multi")
18430 (set_attr "length" "12")])
18431
18432 (define_insn "*tls_local_dynamic_base_64_largepic"
18433 [(set (match_operand:DI 0 "register_operand" "=a")
18434 (call:DI
18435 (mem:QI (plus:DI (match_operand:DI 1 "register_operand" "b")
18436 (match_operand:DI 2 "immediate_operand" "i")))
18437 (match_operand 3)))
18438 (unspec:DI [(reg:DI SP_REG)] UNSPEC_TLS_LD_BASE)]
18439 "TARGET_64BIT && ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF
18440 && GET_CODE (operands[2]) == CONST
18441 && GET_CODE (XEXP (operands[2], 0)) == UNSPEC
18442 && XINT (XEXP (operands[2], 0), 1) == UNSPEC_PLTOFF"
18443 {
18444 output_asm_insn
18445 ("lea{q}\t{%&@tlsld(%%rip), %%rdi|rdi, %&@tlsld[rip]}", operands);
18446 output_asm_insn ("movabs{q}\t{%2, %%rax|rax, %2}", operands);
18447 output_asm_insn ("add{q}\t{%1, %%rax|rax, %1}", operands);
18448 return "call\t{*%%rax|rax}";
18449 }
18450 [(set_attr "type" "multi")
18451 (set_attr "length" "22")])
18452
18453 (define_expand "@tls_local_dynamic_base_64_<mode>"
18454 [(parallel
18455 [(set (match_operand:P 0 "register_operand")
18456 (call:P
18457 (mem:QI (match_operand 1))
18458 (const_int 0)))
18459 (unspec:P [(reg:P SP_REG)] UNSPEC_TLS_LD_BASE)])]
18460 "TARGET_64BIT"
18461 "ix86_tls_descriptor_calls_expanded_in_cfun = true;")
18462
18463 ;; Local dynamic of a single variable is a lose. Show combine how
18464 ;; to convert that back to global dynamic.
18465
18466 (define_insn_and_split "*tls_local_dynamic_32_once"
18467 [(set (match_operand:SI 0 "register_operand" "=a")
18468 (plus:SI
18469 (unspec:SI [(match_operand:SI 1 "register_operand" "b")
18470 (match_operand 2 "constant_call_address_operand" "Bz")
18471 (reg:SI SP_REG)]
18472 UNSPEC_TLS_LD_BASE)
18473 (const:SI (unspec:SI
18474 [(match_operand 3 "tls_symbolic_operand")]
18475 UNSPEC_DTPOFF))))
18476 (clobber (match_scratch:SI 4 "=d"))
18477 (clobber (match_scratch:SI 5 "=c"))
18478 (clobber (reg:CC FLAGS_REG))]
18479 ""
18480 "#"
18481 ""
18482 [(parallel
18483 [(set (match_dup 0)
18484 (unspec:SI [(match_dup 1) (match_dup 3) (match_dup 2)
18485 (reg:SI SP_REG)]
18486 UNSPEC_TLS_GD))
18487 (clobber (match_dup 4))
18488 (clobber (match_dup 5))
18489 (clobber (reg:CC FLAGS_REG))])])
18490
18491 ;; Load and add the thread base pointer from %<tp_seg>:0.
18492 (define_expand "get_thread_pointer<mode>"
18493 [(set (match_operand:PTR 0 "register_operand")
18494 (unspec:PTR [(const_int 0)] UNSPEC_TP))]
18495 ""
18496 {
18497 /* targetm is not visible in the scope of the condition. */
18498 if (!targetm.have_tls)
18499 error ("%<__builtin_thread_pointer%> is not supported on this target");
18500 })
18501
18502 (define_insn_and_split "*load_tp_<mode>"
18503 [(set (match_operand:PTR 0 "register_operand" "=r")
18504 (unspec:PTR [(const_int 0)] UNSPEC_TP))]
18505 ""
18506 "#"
18507 ""
18508 [(set (match_dup 0)
18509 (match_dup 1))]
18510 {
18511 addr_space_t as = DEFAULT_TLS_SEG_REG;
18512
18513 operands[1] = gen_const_mem (<MODE>mode, const0_rtx);
18514 set_mem_addr_space (operands[1], as);
18515 })
18516
18517 (define_insn_and_split "*load_tp_x32_zext"
18518 [(set (match_operand:DI 0 "register_operand" "=r")
18519 (zero_extend:DI
18520 (unspec:SI [(const_int 0)] UNSPEC_TP)))]
18521 "TARGET_X32"
18522 "#"
18523 "&& 1"
18524 [(set (match_dup 0)
18525 (zero_extend:DI (match_dup 1)))]
18526 {
18527 addr_space_t as = DEFAULT_TLS_SEG_REG;
18528
18529 operands[1] = gen_const_mem (SImode, const0_rtx);
18530 set_mem_addr_space (operands[1], as);
18531 })
18532
18533 (define_insn_and_split "*add_tp_<mode>"
18534 [(set (match_operand:PTR 0 "register_operand" "=r")
18535 (plus:PTR
18536 (unspec:PTR [(const_int 0)] UNSPEC_TP)
18537 (match_operand:PTR 1 "register_operand" "0")))
18538 (clobber (reg:CC FLAGS_REG))]
18539 ""
18540 "#"
18541 ""
18542 [(parallel
18543 [(set (match_dup 0)
18544 (plus:PTR (match_dup 1) (match_dup 2)))
18545 (clobber (reg:CC FLAGS_REG))])]
18546 {
18547 addr_space_t as = DEFAULT_TLS_SEG_REG;
18548
18549 operands[2] = gen_const_mem (<MODE>mode, const0_rtx);
18550 set_mem_addr_space (operands[2], as);
18551 })
18552
18553 (define_insn_and_split "*add_tp_x32_zext"
18554 [(set (match_operand:DI 0 "register_operand" "=r")
18555 (zero_extend:DI
18556 (plus:SI (unspec:SI [(const_int 0)] UNSPEC_TP)
18557 (match_operand:SI 1 "register_operand" "0"))))
18558 (clobber (reg:CC FLAGS_REG))]
18559 "TARGET_X32"
18560 "#"
18561 "&& 1"
18562 [(parallel
18563 [(set (match_dup 0)
18564 (zero_extend:DI
18565 (plus:SI (match_dup 1) (match_dup 2))))
18566 (clobber (reg:CC FLAGS_REG))])]
18567 {
18568 addr_space_t as = DEFAULT_TLS_SEG_REG;
18569
18570 operands[2] = gen_const_mem (SImode, const0_rtx);
18571 set_mem_addr_space (operands[2], as);
18572 })
18573
18574 ;; The Sun linker took the AMD64 TLS spec literally and can only handle
18575 ;; %rax as destination of the initial executable code sequence.
18576 (define_insn "tls_initial_exec_64_sun"
18577 [(set (match_operand:DI 0 "register_operand" "=a")
18578 (unspec:DI
18579 [(match_operand 1 "tls_symbolic_operand")]
18580 UNSPEC_TLS_IE_SUN))
18581 (clobber (reg:CC FLAGS_REG))]
18582 "TARGET_64BIT && TARGET_SUN_TLS"
18583 {
18584 output_asm_insn
18585 ("mov{q}\t{%%fs:0, %0|%0, QWORD PTR fs:0}", operands);
18586 return "add{q}\t{%a1@gottpoff(%%rip), %0|%0, %a1@gottpoff[rip]}";
18587 }
18588 [(set_attr "type" "multi")])
18589
18590 ;; GNU2 TLS patterns can be split.
18591
18592 (define_expand "tls_dynamic_gnu2_32"
18593 [(set (match_dup 3)
18594 (plus:SI (match_operand:SI 2 "register_operand")
18595 (const:SI
18596 (unspec:SI [(match_operand 1 "tls_symbolic_operand")]
18597 UNSPEC_TLSDESC))))
18598 (parallel
18599 [(set (match_operand:SI 0 "register_operand")
18600 (unspec:SI [(match_dup 1) (match_dup 3)
18601 (match_dup 2) (reg:SI SP_REG)]
18602 UNSPEC_TLSDESC))
18603 (clobber (reg:CC FLAGS_REG))])]
18604 "!TARGET_64BIT && TARGET_GNU2_TLS"
18605 {
18606 operands[3] = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : operands[0];
18607 ix86_tls_descriptor_calls_expanded_in_cfun = true;
18608 })
18609
18610 (define_insn "*tls_dynamic_gnu2_lea_32"
18611 [(set (match_operand:SI 0 "register_operand" "=r")
18612 (plus:SI (match_operand:SI 1 "register_operand" "b")
18613 (const:SI
18614 (unspec:SI [(match_operand 2 "tls_symbolic_operand")]
18615 UNSPEC_TLSDESC))))]
18616 "!TARGET_64BIT && TARGET_GNU2_TLS"
18617 "lea{l}\t{%E2@TLSDESC(%1), %0|%0, %E2@TLSDESC[%1]}"
18618 [(set_attr "type" "lea")
18619 (set_attr "mode" "SI")
18620 (set_attr "length" "6")
18621 (set_attr "length_address" "4")])
18622
18623 (define_insn "*tls_dynamic_gnu2_call_32"
18624 [(set (match_operand:SI 0 "register_operand" "=a")
18625 (unspec:SI [(match_operand 1 "tls_symbolic_operand")
18626 (match_operand:SI 2 "register_operand" "0")
18627 ;; we have to make sure %ebx still points to the GOT
18628 (match_operand:SI 3 "register_operand" "b")
18629 (reg:SI SP_REG)]
18630 UNSPEC_TLSDESC))
18631 (clobber (reg:CC FLAGS_REG))]
18632 "!TARGET_64BIT && TARGET_GNU2_TLS"
18633 "call\t{*%a1@TLSCALL(%2)|[DWORD PTR [%2+%a1@TLSCALL]]}"
18634 [(set_attr "type" "call")
18635 (set_attr "length" "2")
18636 (set_attr "length_address" "0")])
18637
18638 (define_insn_and_split "*tls_dynamic_gnu2_combine_32"
18639 [(set (match_operand:SI 0 "register_operand" "=&a")
18640 (plus:SI
18641 (unspec:SI [(match_operand 3 "tls_modbase_operand")
18642 (match_operand:SI 4)
18643 (match_operand:SI 2 "register_operand" "b")
18644 (reg:SI SP_REG)]
18645 UNSPEC_TLSDESC)
18646 (const:SI (unspec:SI
18647 [(match_operand 1 "tls_symbolic_operand")]
18648 UNSPEC_DTPOFF))))
18649 (clobber (reg:CC FLAGS_REG))]
18650 "!TARGET_64BIT && TARGET_GNU2_TLS"
18651 "#"
18652 "&& 1"
18653 [(set (match_dup 0) (match_dup 5))]
18654 {
18655 operands[5] = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : operands[0];
18656 emit_insn (gen_tls_dynamic_gnu2_32 (operands[5], operands[1], operands[2]));
18657 })
18658
18659 (define_expand "@tls_dynamic_gnu2_64_<mode>"
18660 [(set (match_dup 2)
18661 (unspec:PTR [(match_operand 1 "tls_symbolic_operand")]
18662 UNSPEC_TLSDESC))
18663 (parallel
18664 [(set (match_operand:PTR 0 "register_operand")
18665 (unspec:PTR [(match_dup 1) (match_dup 2) (reg:PTR SP_REG)]
18666 UNSPEC_TLSDESC))
18667 (clobber (reg:CC FLAGS_REG))])]
18668 "TARGET_64BIT && TARGET_GNU2_TLS"
18669 {
18670 operands[2] = can_create_pseudo_p () ? gen_reg_rtx (ptr_mode) : operands[0];
18671 ix86_tls_descriptor_calls_expanded_in_cfun = true;
18672 })
18673
18674 (define_insn "*tls_dynamic_gnu2_lea_64_<mode>"
18675 [(set (match_operand:PTR 0 "register_operand" "=r")
18676 (unspec:PTR [(match_operand 1 "tls_symbolic_operand")]
18677 UNSPEC_TLSDESC))]
18678 "TARGET_64BIT && TARGET_GNU2_TLS"
18679 "lea%z0\t{%E1@TLSDESC(%%rip), %0|%0, %E1@TLSDESC[rip]}"
18680 [(set_attr "type" "lea")
18681 (set_attr "mode" "<MODE>")
18682 (set_attr "length" "7")
18683 (set_attr "length_address" "4")])
18684
18685 (define_insn "*tls_dynamic_gnu2_call_64_<mode>"
18686 [(set (match_operand:PTR 0 "register_operand" "=a")
18687 (unspec:PTR [(match_operand 1 "tls_symbolic_operand")
18688 (match_operand:PTR 2 "register_operand" "0")
18689 (reg:PTR SP_REG)]
18690 UNSPEC_TLSDESC))
18691 (clobber (reg:CC FLAGS_REG))]
18692 "TARGET_64BIT && TARGET_GNU2_TLS"
18693 "call\t{*%a1@TLSCALL(%2)|[QWORD PTR [%2+%a1@TLSCALL]]}"
18694 [(set_attr "type" "call")
18695 (set_attr "length" "2")
18696 (set_attr "length_address" "0")])
18697
18698 (define_insn_and_split "*tls_dynamic_gnu2_combine_64_<mode>"
18699 [(set (match_operand:PTR 0 "register_operand" "=&a")
18700 (plus:PTR
18701 (unspec:PTR [(match_operand 2 "tls_modbase_operand")
18702 (match_operand:PTR 3)
18703 (reg:PTR SP_REG)]
18704 UNSPEC_TLSDESC)
18705 (const:PTR (unspec:PTR
18706 [(match_operand 1 "tls_symbolic_operand")]
18707 UNSPEC_DTPOFF))))
18708 (clobber (reg:CC FLAGS_REG))]
18709 "TARGET_64BIT && TARGET_GNU2_TLS"
18710 "#"
18711 "&& 1"
18712 [(set (match_dup 0) (match_dup 4))]
18713 {
18714 operands[4] = can_create_pseudo_p () ? gen_reg_rtx (ptr_mode) : operands[0];
18715 emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode, operands[4], operands[1]));
18716 })
18717
18718 (define_split
18719 [(match_operand 0 "tls_address_pattern")]
18720 "TARGET_TLS_DIRECT_SEG_REFS"
18721 [(match_dup 0)]
18722 "operands[0] = ix86_rewrite_tls_address (operands[0]);")
18723
18724 \f
18725 ;; These patterns match the binary 387 instructions for addM3, subM3,
18726 ;; mulM3 and divM3. There are three patterns for each of DFmode and
18727 ;; SFmode. The first is the normal insn, the second the same insn but
18728 ;; with one operand a conversion, and the third the same insn but with
18729 ;; the other operand a conversion. The conversion may be SFmode or
18730 ;; SImode if the target mode DFmode, but only SImode if the target mode
18731 ;; is SFmode.
18732
18733 ;; Gcc is slightly more smart about handling normal two address instructions
18734 ;; so use special patterns for add and mull.
18735
18736 (define_insn "*fop_xf_comm_i387"
18737 [(set (match_operand:XF 0 "register_operand" "=f")
18738 (match_operator:XF 3 "binary_fp_operator"
18739 [(match_operand:XF 1 "register_operand" "%0")
18740 (match_operand:XF 2 "register_operand" "f")]))]
18741 "TARGET_80387
18742 && COMMUTATIVE_ARITH_P (operands[3])"
18743 "* return output_387_binary_op (insn, operands);"
18744 [(set (attr "type")
18745 (if_then_else (match_operand:XF 3 "mult_operator")
18746 (const_string "fmul")
18747 (const_string "fop")))
18748 (set_attr "mode" "XF")])
18749
18750 (define_insn "*fop_<mode>_comm"
18751 [(set (match_operand:MODEF 0 "register_operand" "=f,x,v")
18752 (match_operator:MODEF 3 "binary_fp_operator"
18753 [(match_operand:MODEF 1 "nonimmediate_operand" "%0,0,v")
18754 (match_operand:MODEF 2 "nonimmediate_operand" "fm,xm,vm")]))]
18755 "((SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
18756 || (TARGET_80387 && X87_ENABLE_ARITH (<MODE>mode)))
18757 && COMMUTATIVE_ARITH_P (operands[3])
18758 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
18759 "* return output_387_binary_op (insn, operands);"
18760 [(set (attr "type")
18761 (if_then_else (eq_attr "alternative" "1,2")
18762 (if_then_else (match_operand:MODEF 3 "mult_operator")
18763 (const_string "ssemul")
18764 (const_string "sseadd"))
18765 (if_then_else (match_operand:MODEF 3 "mult_operator")
18766 (const_string "fmul")
18767 (const_string "fop"))))
18768 (set_attr "isa" "*,noavx,avx")
18769 (set_attr "prefix" "orig,orig,vex")
18770 (set_attr "mode" "<MODE>")
18771 (set (attr "enabled")
18772 (if_then_else
18773 (match_test ("SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"))
18774 (if_then_else
18775 (eq_attr "alternative" "0")
18776 (symbol_ref "TARGET_MIX_SSE_I387
18777 && X87_ENABLE_ARITH (<MODE>mode)")
18778 (const_string "*"))
18779 (if_then_else
18780 (eq_attr "alternative" "0")
18781 (symbol_ref "true")
18782 (symbol_ref "false"))))])
18783
18784 (define_insn "*<insn>hf"
18785 [(set (match_operand:HF 0 "register_operand" "=v")
18786 (plusminusmultdiv:HF
18787 (match_operand:HF 1 "nonimmediate_operand" "<comm>v")
18788 (match_operand:HF 2 "nonimmediate_operand" "vm")))]
18789 "TARGET_AVX512FP16
18790 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
18791 "v<insn>sh\t{%2, %1, %0|%0, %1, %2}"
18792 [(set_attr "prefix" "evex")
18793 (set_attr "mode" "HF")])
18794
18795 (define_insn "*rcpsf2_sse"
18796 [(set (match_operand:SF 0 "register_operand" "=x,x,x")
18797 (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "0,x,m")]
18798 UNSPEC_RCP))]
18799 "TARGET_SSE && TARGET_SSE_MATH"
18800 "@
18801 %vrcpss\t{%d1, %0|%0, %d1}
18802 %vrcpss\t{%d1, %0|%0, %d1}
18803 %vrcpss\t{%1, %d0|%d0, %1}"
18804 [(set_attr "type" "sse")
18805 (set_attr "atom_sse_attr" "rcp")
18806 (set_attr "btver2_sse_attr" "rcp")
18807 (set_attr "prefix" "maybe_vex")
18808 (set_attr "mode" "SF")
18809 (set_attr "avx_partial_xmm_update" "false,false,true")
18810 (set (attr "preferred_for_speed")
18811 (cond [(match_test "TARGET_AVX")
18812 (symbol_ref "true")
18813 (eq_attr "alternative" "1,2")
18814 (symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY")
18815 ]
18816 (symbol_ref "true")))])
18817
18818 (define_insn "rcphf2"
18819 [(set (match_operand:HF 0 "register_operand" "=v,v")
18820 (unspec:HF [(match_operand:HF 1 "nonimmediate_operand" "v,m")]
18821 UNSPEC_RCP))]
18822 "TARGET_AVX512FP16"
18823 "@
18824 vrcpsh\t{%d1, %0|%0, %d1}
18825 vrcpsh\t{%1, %d0|%d0, %1}"
18826 [(set_attr "type" "sse")
18827 (set_attr "prefix" "evex")
18828 (set_attr "mode" "HF")
18829 (set_attr "avx_partial_xmm_update" "false,true")])
18830
18831 (define_insn "*fop_xf_1_i387"
18832 [(set (match_operand:XF 0 "register_operand" "=f,f")
18833 (match_operator:XF 3 "binary_fp_operator"
18834 [(match_operand:XF 1 "register_operand" "0,f")
18835 (match_operand:XF 2 "register_operand" "f,0")]))]
18836 "TARGET_80387
18837 && !COMMUTATIVE_ARITH_P (operands[3])"
18838 "* return output_387_binary_op (insn, operands);"
18839 [(set (attr "type")
18840 (if_then_else (match_operand:XF 3 "div_operator")
18841 (const_string "fdiv")
18842 (const_string "fop")))
18843 (set_attr "mode" "XF")])
18844
18845 (define_insn "*fop_<mode>_1"
18846 [(set (match_operand:MODEF 0 "register_operand" "=f,f,x,v")
18847 (match_operator:MODEF 3 "binary_fp_operator"
18848 [(match_operand:MODEF 1
18849 "x87nonimm_ssenomem_operand" "0,fm,0,v")
18850 (match_operand:MODEF 2
18851 "nonimmediate_operand" "fm,0,xm,vm")]))]
18852 "((SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
18853 || (TARGET_80387 && X87_ENABLE_ARITH (<MODE>mode)))
18854 && !COMMUTATIVE_ARITH_P (operands[3])
18855 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
18856 "* return output_387_binary_op (insn, operands);"
18857 [(set (attr "type")
18858 (if_then_else (eq_attr "alternative" "2,3")
18859 (if_then_else (match_operand:MODEF 3 "div_operator")
18860 (const_string "ssediv")
18861 (const_string "sseadd"))
18862 (if_then_else (match_operand:MODEF 3 "div_operator")
18863 (const_string "fdiv")
18864 (const_string "fop"))))
18865 (set_attr "isa" "*,*,noavx,avx")
18866 (set_attr "prefix" "orig,orig,orig,vex")
18867 (set_attr "mode" "<MODE>")
18868 (set (attr "enabled")
18869 (if_then_else
18870 (match_test ("SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"))
18871 (if_then_else
18872 (eq_attr "alternative" "0,1")
18873 (symbol_ref "TARGET_MIX_SSE_I387
18874 && X87_ENABLE_ARITH (<MODE>mode)")
18875 (const_string "*"))
18876 (if_then_else
18877 (eq_attr "alternative" "0,1")
18878 (symbol_ref "true")
18879 (symbol_ref "false"))))])
18880
18881 (define_insn "*fop_<X87MODEF:mode>_2_i387"
18882 [(set (match_operand:X87MODEF 0 "register_operand" "=f")
18883 (match_operator:X87MODEF 3 "binary_fp_operator"
18884 [(float:X87MODEF
18885 (match_operand:SWI24 1 "nonimmediate_operand" "m"))
18886 (match_operand:X87MODEF 2 "register_operand" "0")]))]
18887 "TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, <SWI24:MODE>mode)
18888 && !(SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH)
18889 && (TARGET_USE_<SWI24:MODE>MODE_FIOP
18890 || optimize_function_for_size_p (cfun))"
18891 "* return output_387_binary_op (insn, operands);"
18892 [(set (attr "type")
18893 (cond [(match_operand:X87MODEF 3 "mult_operator")
18894 (const_string "fmul")
18895 (match_operand:X87MODEF 3 "div_operator")
18896 (const_string "fdiv")
18897 ]
18898 (const_string "fop")))
18899 (set_attr "fp_int_src" "true")
18900 (set_attr "mode" "<SWI24:MODE>")])
18901
18902 (define_insn "*fop_<X87MODEF:mode>_3_i387"
18903 [(set (match_operand:X87MODEF 0 "register_operand" "=f")
18904 (match_operator:X87MODEF 3 "binary_fp_operator"
18905 [(match_operand:X87MODEF 1 "register_operand" "0")
18906 (float:X87MODEF
18907 (match_operand:SWI24 2 "nonimmediate_operand" "m"))]))]
18908 "TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, <SWI24:MODE>mode)
18909 && !(SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH)
18910 && (TARGET_USE_<SWI24:MODE>MODE_FIOP
18911 || optimize_function_for_size_p (cfun))"
18912 "* return output_387_binary_op (insn, operands);"
18913 [(set (attr "type")
18914 (cond [(match_operand:X87MODEF 3 "mult_operator")
18915 (const_string "fmul")
18916 (match_operand:X87MODEF 3 "div_operator")
18917 (const_string "fdiv")
18918 ]
18919 (const_string "fop")))
18920 (set_attr "fp_int_src" "true")
18921 (set_attr "mode" "<SWI24:MODE>")])
18922
18923 (define_insn "*fop_xf_4_i387"
18924 [(set (match_operand:XF 0 "register_operand" "=f,f")
18925 (match_operator:XF 3 "binary_fp_operator"
18926 [(float_extend:XF
18927 (match_operand:MODEF 1 "nonimmediate_operand" "fm,0"))
18928 (match_operand:XF 2 "register_operand" "0,f")]))]
18929 "TARGET_80387"
18930 "* return output_387_binary_op (insn, operands);"
18931 [(set (attr "type")
18932 (cond [(match_operand:XF 3 "mult_operator")
18933 (const_string "fmul")
18934 (match_operand:XF 3 "div_operator")
18935 (const_string "fdiv")
18936 ]
18937 (const_string "fop")))
18938 (set_attr "mode" "<MODE>")])
18939
18940 (define_insn "*fop_df_4_i387"
18941 [(set (match_operand:DF 0 "register_operand" "=f,f")
18942 (match_operator:DF 3 "binary_fp_operator"
18943 [(float_extend:DF
18944 (match_operand:SF 1 "nonimmediate_operand" "fm,0"))
18945 (match_operand:DF 2 "register_operand" "0,f")]))]
18946 "TARGET_80387 && X87_ENABLE_ARITH (DFmode)
18947 && !(SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)"
18948 "* return output_387_binary_op (insn, operands);"
18949 [(set (attr "type")
18950 (cond [(match_operand:DF 3 "mult_operator")
18951 (const_string "fmul")
18952 (match_operand:DF 3 "div_operator")
18953 (const_string "fdiv")
18954 ]
18955 (const_string "fop")))
18956 (set_attr "mode" "SF")])
18957
18958 (define_insn "*fop_xf_5_i387"
18959 [(set (match_operand:XF 0 "register_operand" "=f,f")
18960 (match_operator:XF 3 "binary_fp_operator"
18961 [(match_operand:XF 1 "register_operand" "0,f")
18962 (float_extend:XF
18963 (match_operand:MODEF 2 "nonimmediate_operand" "fm,0"))]))]
18964 "TARGET_80387"
18965 "* return output_387_binary_op (insn, operands);"
18966 [(set (attr "type")
18967 (cond [(match_operand:XF 3 "mult_operator")
18968 (const_string "fmul")
18969 (match_operand:XF 3 "div_operator")
18970 (const_string "fdiv")
18971 ]
18972 (const_string "fop")))
18973 (set_attr "mode" "<MODE>")])
18974
18975 (define_insn "*fop_df_5_i387"
18976 [(set (match_operand:DF 0 "register_operand" "=f,f")
18977 (match_operator:DF 3 "binary_fp_operator"
18978 [(match_operand:DF 1 "register_operand" "0,f")
18979 (float_extend:DF
18980 (match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))]
18981 "TARGET_80387 && X87_ENABLE_ARITH (DFmode)
18982 && !(SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)"
18983 "* return output_387_binary_op (insn, operands);"
18984 [(set (attr "type")
18985 (cond [(match_operand:DF 3 "mult_operator")
18986 (const_string "fmul")
18987 (match_operand:DF 3 "div_operator")
18988 (const_string "fdiv")
18989 ]
18990 (const_string "fop")))
18991 (set_attr "mode" "SF")])
18992
18993 (define_insn "*fop_xf_6_i387"
18994 [(set (match_operand:XF 0 "register_operand" "=f,f")
18995 (match_operator:XF 3 "binary_fp_operator"
18996 [(float_extend:XF
18997 (match_operand:MODEF 1 "register_operand" "0,f"))
18998 (float_extend:XF
18999 (match_operand:MODEF 2 "nonimmediate_operand" "fm,0"))]))]
19000 "TARGET_80387"
19001 "* return output_387_binary_op (insn, operands);"
19002 [(set (attr "type")
19003 (cond [(match_operand:XF 3 "mult_operator")
19004 (const_string "fmul")
19005 (match_operand:XF 3 "div_operator")
19006 (const_string "fdiv")
19007 ]
19008 (const_string "fop")))
19009 (set_attr "mode" "<MODE>")])
19010
19011 (define_insn "*fop_df_6_i387"
19012 [(set (match_operand:DF 0 "register_operand" "=f,f")
19013 (match_operator:DF 3 "binary_fp_operator"
19014 [(float_extend:DF
19015 (match_operand:SF 1 "register_operand" "0,f"))
19016 (float_extend:DF
19017 (match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))]
19018 "TARGET_80387 && X87_ENABLE_ARITH (DFmode)
19019 && !(SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)"
19020 "* return output_387_binary_op (insn, operands);"
19021 [(set (attr "type")
19022 (cond [(match_operand:DF 3 "mult_operator")
19023 (const_string "fmul")
19024 (match_operand:DF 3 "div_operator")
19025 (const_string "fdiv")
19026 ]
19027 (const_string "fop")))
19028 (set_attr "mode" "SF")])
19029 \f
19030 ;; FPU special functions.
19031
19032 ;; This pattern implements a no-op XFmode truncation for
19033 ;; all fancy i386 XFmode math functions.
19034
19035 (define_insn "truncxf<mode>2_i387_noop_unspec"
19036 [(set (match_operand:MODEF 0 "nonimmediate_operand" "=mf")
19037 (unspec:MODEF [(match_operand:XF 1 "register_operand" "f")]
19038 UNSPEC_TRUNC_NOOP))]
19039 "TARGET_USE_FANCY_MATH_387"
19040 "* return output_387_reg_move (insn, operands);"
19041 [(set_attr "type" "fmov")
19042 (set_attr "mode" "<MODE>")])
19043
19044 (define_insn "sqrtxf2"
19045 [(set (match_operand:XF 0 "register_operand" "=f")
19046 (sqrt:XF (match_operand:XF 1 "register_operand" "0")))]
19047 "TARGET_USE_FANCY_MATH_387"
19048 "fsqrt"
19049 [(set_attr "type" "fpspc")
19050 (set_attr "mode" "XF")
19051 (set_attr "athlon_decode" "direct")
19052 (set_attr "amdfam10_decode" "direct")
19053 (set_attr "bdver1_decode" "direct")])
19054
19055 (define_insn "*rsqrtsf2_sse"
19056 [(set (match_operand:SF 0 "register_operand" "=x,x,x")
19057 (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "0,x,m")]
19058 UNSPEC_RSQRT))]
19059 "TARGET_SSE && TARGET_SSE_MATH"
19060 "@
19061 %vrsqrtss\t{%d1, %0|%0, %d1}
19062 %vrsqrtss\t{%d1, %0|%0, %d1}
19063 %vrsqrtss\t{%1, %d0|%d0, %1}"
19064 [(set_attr "type" "sse")
19065 (set_attr "atom_sse_attr" "rcp")
19066 (set_attr "btver2_sse_attr" "rcp")
19067 (set_attr "prefix" "maybe_vex")
19068 (set_attr "mode" "SF")
19069 (set_attr "avx_partial_xmm_update" "false,false,true")
19070 (set (attr "preferred_for_speed")
19071 (cond [(match_test "TARGET_AVX")
19072 (symbol_ref "true")
19073 (eq_attr "alternative" "1,2")
19074 (symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY")
19075 ]
19076 (symbol_ref "true")))])
19077
19078 (define_expand "rsqrtsf2"
19079 [(set (match_operand:SF 0 "register_operand")
19080 (unspec:SF [(match_operand:SF 1 "nonimmediate_operand")]
19081 UNSPEC_RSQRT))]
19082 "TARGET_SSE && TARGET_SSE_MATH"
19083 {
19084 ix86_emit_swsqrtsf (operands[0], operands[1], SFmode, 1);
19085 DONE;
19086 })
19087
19088 (define_insn "rsqrthf2"
19089 [(set (match_operand:HF 0 "register_operand" "=v,v")
19090 (unspec:HF [(match_operand:HF 1 "nonimmediate_operand" "v,m")]
19091 UNSPEC_RSQRT))]
19092 "TARGET_AVX512FP16"
19093 "@
19094 vrsqrtsh\t{%d1, %0|%0, %d1}
19095 vrsqrtsh\t{%1, %d0|%d0, %1}"
19096 [(set_attr "type" "sse")
19097 (set_attr "prefix" "evex")
19098 (set_attr "avx_partial_xmm_update" "false,true")
19099 (set_attr "mode" "HF")])
19100
19101 (define_insn "sqrthf2"
19102 [(set (match_operand:HF 0 "register_operand" "=v,v")
19103 (sqrt:HF
19104 (match_operand:HF 1 "nonimmediate_operand" "v,m")))]
19105 "TARGET_AVX512FP16"
19106 "@
19107 vsqrtsh\t{%d1, %0|%0, %d1}
19108 vsqrtsh\t{%1, %d0|%d0, %1}"
19109 [(set_attr "type" "sse")
19110 (set_attr "prefix" "evex")
19111 (set_attr "avx_partial_xmm_update" "false,true")
19112 (set_attr "mode" "HF")])
19113
19114 (define_insn "*sqrt<mode>2_sse"
19115 [(set (match_operand:MODEF 0 "register_operand" "=v,v,v")
19116 (sqrt:MODEF
19117 (match_operand:MODEF 1 "nonimmediate_operand" "0,v,m")))]
19118 "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
19119 "@
19120 %vsqrt<ssemodesuffix>\t{%d1, %0|%0, %d1}
19121 %vsqrt<ssemodesuffix>\t{%d1, %0|%0, %d1}
19122 %vsqrt<ssemodesuffix>\t{%1, %d0|%d0, %1}"
19123 [(set_attr "type" "sse")
19124 (set_attr "atom_sse_attr" "sqrt")
19125 (set_attr "btver2_sse_attr" "sqrt")
19126 (set_attr "prefix" "maybe_vex")
19127 (set_attr "avx_partial_xmm_update" "false,false,true")
19128 (set_attr "mode" "<MODE>")
19129 (set (attr "preferred_for_speed")
19130 (cond [(match_test "TARGET_AVX")
19131 (symbol_ref "true")
19132 (eq_attr "alternative" "1,2")
19133 (symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY")
19134 ]
19135 (symbol_ref "true")))])
19136
19137 (define_expand "sqrt<mode>2"
19138 [(set (match_operand:MODEF 0 "register_operand")
19139 (sqrt:MODEF
19140 (match_operand:MODEF 1 "nonimmediate_operand")))]
19141 "(TARGET_USE_FANCY_MATH_387 && X87_ENABLE_ARITH (<MODE>mode))
19142 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
19143 {
19144 if (<MODE>mode == SFmode
19145 && TARGET_SSE && TARGET_SSE_MATH
19146 && TARGET_RECIP_SQRT
19147 && !optimize_function_for_size_p (cfun)
19148 && flag_finite_math_only && !flag_trapping_math
19149 && flag_unsafe_math_optimizations)
19150 {
19151 ix86_emit_swsqrtsf (operands[0], operands[1], SFmode, 0);
19152 DONE;
19153 }
19154
19155 if (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
19156 {
19157 rtx op0 = gen_reg_rtx (XFmode);
19158 rtx op1 = gen_reg_rtx (XFmode);
19159
19160 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
19161 emit_insn (gen_sqrtxf2 (op0, op1));
19162 emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op0));
19163 DONE;
19164 }
19165 })
19166
19167 (define_expand "hypot<mode>3"
19168 [(use (match_operand:MODEF 0 "register_operand"))
19169 (use (match_operand:MODEF 1 "general_operand"))
19170 (use (match_operand:MODEF 2 "general_operand"))]
19171 "TARGET_USE_FANCY_MATH_387
19172 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
19173 || TARGET_MIX_SSE_I387)
19174 && flag_finite_math_only
19175 && flag_unsafe_math_optimizations"
19176 {
19177 rtx op0 = gen_reg_rtx (XFmode);
19178 rtx op1 = gen_reg_rtx (XFmode);
19179 rtx op2 = gen_reg_rtx (XFmode);
19180
19181 emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
19182 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
19183
19184 emit_insn (gen_mulxf3 (op1, op1, op1));
19185 emit_insn (gen_mulxf3 (op2, op2, op2));
19186 emit_insn (gen_addxf3 (op0, op2, op1));
19187 emit_insn (gen_sqrtxf2 (op0, op0));
19188
19189 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
19190 DONE;
19191 })
19192
19193 (define_insn "x86_fnstsw_1"
19194 [(set (match_operand:HI 0 "register_operand" "=a")
19195 (unspec:HI [(reg:CCFP FPSR_REG)] UNSPEC_FNSTSW))]
19196 "TARGET_80387"
19197 "fnstsw\t%0"
19198 [(set_attr "length" "2")
19199 (set_attr "mode" "SI")
19200 (set_attr "unit" "i387")])
19201
19202 (define_insn "fpremxf4_i387"
19203 [(set (match_operand:XF 0 "register_operand" "=f")
19204 (unspec:XF [(match_operand:XF 2 "register_operand" "0")
19205 (match_operand:XF 3 "register_operand" "1")]
19206 UNSPEC_FPREM_F))
19207 (set (match_operand:XF 1 "register_operand" "=f")
19208 (unspec:XF [(match_dup 2) (match_dup 3)]
19209 UNSPEC_FPREM_U))
19210 (set (reg:CCFP FPSR_REG)
19211 (unspec:CCFP [(match_dup 2) (match_dup 3)]
19212 UNSPEC_C2_FLAG))]
19213 "TARGET_USE_FANCY_MATH_387
19214 && flag_finite_math_only"
19215 "fprem"
19216 [(set_attr "type" "fpspc")
19217 (set_attr "znver1_decode" "vector")
19218 (set_attr "mode" "XF")])
19219
19220 (define_expand "fmodxf3"
19221 [(use (match_operand:XF 0 "register_operand"))
19222 (use (match_operand:XF 1 "general_operand"))
19223 (use (match_operand:XF 2 "general_operand"))]
19224 "TARGET_USE_FANCY_MATH_387
19225 && flag_finite_math_only"
19226 {
19227 rtx_code_label *label = gen_label_rtx ();
19228
19229 rtx op1 = gen_reg_rtx (XFmode);
19230 rtx op2 = gen_reg_rtx (XFmode);
19231
19232 emit_move_insn (op2, operands[2]);
19233 emit_move_insn (op1, operands[1]);
19234
19235 emit_label (label);
19236 emit_insn (gen_fpremxf4_i387 (op1, op2, op1, op2));
19237 ix86_emit_fp_unordered_jump (label);
19238 LABEL_NUSES (label) = 1;
19239
19240 emit_move_insn (operands[0], op1);
19241 DONE;
19242 })
19243
19244 (define_expand "fmod<mode>3"
19245 [(use (match_operand:MODEF 0 "register_operand"))
19246 (use (match_operand:MODEF 1 "general_operand"))
19247 (use (match_operand:MODEF 2 "general_operand"))]
19248 "TARGET_USE_FANCY_MATH_387
19249 && flag_finite_math_only"
19250 {
19251 rtx (*gen_truncxf) (rtx, rtx);
19252
19253 rtx_code_label *label = gen_label_rtx ();
19254
19255 rtx op1 = gen_reg_rtx (XFmode);
19256 rtx op2 = gen_reg_rtx (XFmode);
19257
19258 emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
19259 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
19260
19261 emit_label (label);
19262 emit_insn (gen_fpremxf4_i387 (op1, op2, op1, op2));
19263 ix86_emit_fp_unordered_jump (label);
19264 LABEL_NUSES (label) = 1;
19265
19266 /* Truncate the result properly for strict SSE math. */
19267 if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
19268 && !TARGET_MIX_SSE_I387)
19269 gen_truncxf = gen_truncxf<mode>2;
19270 else
19271 gen_truncxf = gen_truncxf<mode>2_i387_noop_unspec;
19272
19273 emit_insn (gen_truncxf (operands[0], op1));
19274 DONE;
19275 })
19276
19277 (define_insn "fprem1xf4_i387"
19278 [(set (match_operand:XF 0 "register_operand" "=f")
19279 (unspec:XF [(match_operand:XF 2 "register_operand" "0")
19280 (match_operand:XF 3 "register_operand" "1")]
19281 UNSPEC_FPREM1_F))
19282 (set (match_operand:XF 1 "register_operand" "=f")
19283 (unspec:XF [(match_dup 2) (match_dup 3)]
19284 UNSPEC_FPREM1_U))
19285 (set (reg:CCFP FPSR_REG)
19286 (unspec:CCFP [(match_dup 2) (match_dup 3)]
19287 UNSPEC_C2_FLAG))]
19288 "TARGET_USE_FANCY_MATH_387
19289 && flag_finite_math_only"
19290 "fprem1"
19291 [(set_attr "type" "fpspc")
19292 (set_attr "znver1_decode" "vector")
19293 (set_attr "mode" "XF")])
19294
19295 (define_expand "remainderxf3"
19296 [(use (match_operand:XF 0 "register_operand"))
19297 (use (match_operand:XF 1 "general_operand"))
19298 (use (match_operand:XF 2 "general_operand"))]
19299 "TARGET_USE_FANCY_MATH_387
19300 && flag_finite_math_only"
19301 {
19302 rtx_code_label *label = gen_label_rtx ();
19303
19304 rtx op1 = gen_reg_rtx (XFmode);
19305 rtx op2 = gen_reg_rtx (XFmode);
19306
19307 emit_move_insn (op2, operands[2]);
19308 emit_move_insn (op1, operands[1]);
19309
19310 emit_label (label);
19311 emit_insn (gen_fprem1xf4_i387 (op1, op2, op1, op2));
19312 ix86_emit_fp_unordered_jump (label);
19313 LABEL_NUSES (label) = 1;
19314
19315 emit_move_insn (operands[0], op1);
19316 DONE;
19317 })
19318
19319 (define_expand "remainder<mode>3"
19320 [(use (match_operand:MODEF 0 "register_operand"))
19321 (use (match_operand:MODEF 1 "general_operand"))
19322 (use (match_operand:MODEF 2 "general_operand"))]
19323 "TARGET_USE_FANCY_MATH_387
19324 && flag_finite_math_only"
19325 {
19326 rtx (*gen_truncxf) (rtx, rtx);
19327
19328 rtx_code_label *label = gen_label_rtx ();
19329
19330 rtx op1 = gen_reg_rtx (XFmode);
19331 rtx op2 = gen_reg_rtx (XFmode);
19332
19333 emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
19334 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
19335
19336 emit_label (label);
19337
19338 emit_insn (gen_fprem1xf4_i387 (op1, op2, op1, op2));
19339 ix86_emit_fp_unordered_jump (label);
19340 LABEL_NUSES (label) = 1;
19341
19342 /* Truncate the result properly for strict SSE math. */
19343 if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
19344 && !TARGET_MIX_SSE_I387)
19345 gen_truncxf = gen_truncxf<mode>2;
19346 else
19347 gen_truncxf = gen_truncxf<mode>2_i387_noop_unspec;
19348
19349 emit_insn (gen_truncxf (operands[0], op1));
19350 DONE;
19351 })
19352
19353 (define_int_iterator SINCOS
19354 [UNSPEC_SIN
19355 UNSPEC_COS])
19356
19357 (define_int_attr sincos
19358 [(UNSPEC_SIN "sin")
19359 (UNSPEC_COS "cos")])
19360
19361 (define_insn "<sincos>xf2"
19362 [(set (match_operand:XF 0 "register_operand" "=f")
19363 (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
19364 SINCOS))]
19365 "TARGET_USE_FANCY_MATH_387
19366 && flag_unsafe_math_optimizations"
19367 "f<sincos>"
19368 [(set_attr "type" "fpspc")
19369 (set_attr "znver1_decode" "vector")
19370 (set_attr "mode" "XF")])
19371
19372 (define_expand "<sincos><mode>2"
19373 [(set (match_operand:MODEF 0 "register_operand")
19374 (unspec:MODEF [(match_operand:MODEF 1 "general_operand")]
19375 SINCOS))]
19376 "TARGET_USE_FANCY_MATH_387
19377 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
19378 || TARGET_MIX_SSE_I387)
19379 && flag_unsafe_math_optimizations"
19380 {
19381 rtx op0 = gen_reg_rtx (XFmode);
19382 rtx op1 = gen_reg_rtx (XFmode);
19383
19384 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
19385 emit_insn (gen_<sincos>xf2 (op0, op1));
19386 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
19387 DONE;
19388 })
19389
19390 (define_insn "sincosxf3"
19391 [(set (match_operand:XF 0 "register_operand" "=f")
19392 (unspec:XF [(match_operand:XF 2 "register_operand" "0")]
19393 UNSPEC_SINCOS_COS))
19394 (set (match_operand:XF 1 "register_operand" "=f")
19395 (unspec:XF [(match_dup 2)] UNSPEC_SINCOS_SIN))]
19396 "TARGET_USE_FANCY_MATH_387
19397 && flag_unsafe_math_optimizations"
19398 "fsincos"
19399 [(set_attr "type" "fpspc")
19400 (set_attr "znver1_decode" "vector")
19401 (set_attr "mode" "XF")])
19402
19403 (define_expand "sincos<mode>3"
19404 [(use (match_operand:MODEF 0 "register_operand"))
19405 (use (match_operand:MODEF 1 "register_operand"))
19406 (use (match_operand:MODEF 2 "general_operand"))]
19407 "TARGET_USE_FANCY_MATH_387
19408 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
19409 || TARGET_MIX_SSE_I387)
19410 && flag_unsafe_math_optimizations"
19411 {
19412 rtx op0 = gen_reg_rtx (XFmode);
19413 rtx op1 = gen_reg_rtx (XFmode);
19414 rtx op2 = gen_reg_rtx (XFmode);
19415
19416 emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
19417 emit_insn (gen_sincosxf3 (op0, op1, op2));
19418 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
19419 emit_insn (gen_truncxf<mode>2 (operands[1], op1));
19420 DONE;
19421 })
19422
19423 (define_insn "fptanxf4_i387"
19424 [(set (match_operand:SF 0 "register_operand" "=f")
19425 (match_operand:SF 3 "const1_operand"))
19426 (set (match_operand:XF 1 "register_operand" "=f")
19427 (unspec:XF [(match_operand:XF 2 "register_operand" "0")]
19428 UNSPEC_TAN))]
19429 "TARGET_USE_FANCY_MATH_387
19430 && flag_unsafe_math_optimizations"
19431 "fptan"
19432 [(set_attr "type" "fpspc")
19433 (set_attr "znver1_decode" "vector")
19434 (set_attr "mode" "XF")])
19435
19436 (define_expand "tanxf2"
19437 [(use (match_operand:XF 0 "register_operand"))
19438 (use (match_operand:XF 1 "register_operand"))]
19439 "TARGET_USE_FANCY_MATH_387
19440 && flag_unsafe_math_optimizations"
19441 {
19442 rtx one = gen_reg_rtx (SFmode);
19443 emit_insn (gen_fptanxf4_i387 (one, operands[0], operands[1],
19444 CONST1_RTX (SFmode)));
19445 DONE;
19446 })
19447
19448 (define_expand "tan<mode>2"
19449 [(use (match_operand:MODEF 0 "register_operand"))
19450 (use (match_operand:MODEF 1 "general_operand"))]
19451 "TARGET_USE_FANCY_MATH_387
19452 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
19453 || TARGET_MIX_SSE_I387)
19454 && flag_unsafe_math_optimizations"
19455 {
19456 rtx op0 = gen_reg_rtx (XFmode);
19457 rtx op1 = gen_reg_rtx (XFmode);
19458
19459 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
19460 emit_insn (gen_tanxf2 (op0, op1));
19461 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
19462 DONE;
19463 })
19464
19465 (define_insn "atan2xf3"
19466 [(set (match_operand:XF 0 "register_operand" "=f")
19467 (unspec:XF [(match_operand:XF 2 "register_operand" "0")
19468 (match_operand:XF 1 "register_operand" "f")]
19469 UNSPEC_FPATAN))
19470 (clobber (match_scratch:XF 3 "=1"))]
19471 "TARGET_USE_FANCY_MATH_387
19472 && flag_unsafe_math_optimizations"
19473 "fpatan"
19474 [(set_attr "type" "fpspc")
19475 (set_attr "znver1_decode" "vector")
19476 (set_attr "mode" "XF")])
19477
19478 (define_expand "atan2<mode>3"
19479 [(use (match_operand:MODEF 0 "register_operand"))
19480 (use (match_operand:MODEF 1 "general_operand"))
19481 (use (match_operand:MODEF 2 "general_operand"))]
19482 "TARGET_USE_FANCY_MATH_387
19483 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
19484 || TARGET_MIX_SSE_I387)
19485 && flag_unsafe_math_optimizations"
19486 {
19487 rtx op0 = gen_reg_rtx (XFmode);
19488 rtx op1 = gen_reg_rtx (XFmode);
19489 rtx op2 = gen_reg_rtx (XFmode);
19490
19491 emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
19492 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
19493
19494 emit_insn (gen_atan2xf3 (op0, op1, op2));
19495 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
19496 DONE;
19497 })
19498
19499 (define_expand "atanxf2"
19500 [(parallel [(set (match_operand:XF 0 "register_operand")
19501 (unspec:XF [(match_dup 2)
19502 (match_operand:XF 1 "register_operand")]
19503 UNSPEC_FPATAN))
19504 (clobber (scratch:XF))])]
19505 "TARGET_USE_FANCY_MATH_387
19506 && flag_unsafe_math_optimizations"
19507 "operands[2] = force_reg (XFmode, CONST1_RTX (XFmode));")
19508
19509 (define_expand "atan<mode>2"
19510 [(use (match_operand:MODEF 0 "register_operand"))
19511 (use (match_operand:MODEF 1 "general_operand"))]
19512 "TARGET_USE_FANCY_MATH_387
19513 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
19514 || TARGET_MIX_SSE_I387)
19515 && flag_unsafe_math_optimizations"
19516 {
19517 rtx op0 = gen_reg_rtx (XFmode);
19518 rtx op1 = gen_reg_rtx (XFmode);
19519
19520 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
19521 emit_insn (gen_atanxf2 (op0, op1));
19522 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
19523 DONE;
19524 })
19525
19526 (define_expand "asinxf2"
19527 [(set (match_dup 2)
19528 (mult:XF (match_operand:XF 1 "register_operand")
19529 (match_dup 1)))
19530 (set (match_dup 4) (minus:XF (match_dup 3) (match_dup 2)))
19531 (set (match_dup 5) (sqrt:XF (match_dup 4)))
19532 (parallel [(set (match_operand:XF 0 "register_operand")
19533 (unspec:XF [(match_dup 5) (match_dup 1)]
19534 UNSPEC_FPATAN))
19535 (clobber (scratch:XF))])]
19536 "TARGET_USE_FANCY_MATH_387
19537 && flag_unsafe_math_optimizations"
19538 {
19539 int i;
19540
19541 for (i = 2; i < 6; i++)
19542 operands[i] = gen_reg_rtx (XFmode);
19543
19544 emit_move_insn (operands[3], CONST1_RTX (XFmode));
19545 })
19546
19547 (define_expand "asin<mode>2"
19548 [(use (match_operand:MODEF 0 "register_operand"))
19549 (use (match_operand:MODEF 1 "general_operand"))]
19550 "TARGET_USE_FANCY_MATH_387
19551 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
19552 || TARGET_MIX_SSE_I387)
19553 && flag_unsafe_math_optimizations"
19554 {
19555 rtx op0 = gen_reg_rtx (XFmode);
19556 rtx op1 = gen_reg_rtx (XFmode);
19557
19558 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
19559 emit_insn (gen_asinxf2 (op0, op1));
19560 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
19561 DONE;
19562 })
19563
19564 (define_expand "acosxf2"
19565 [(set (match_dup 2)
19566 (mult:XF (match_operand:XF 1 "register_operand")
19567 (match_dup 1)))
19568 (set (match_dup 4) (minus:XF (match_dup 3) (match_dup 2)))
19569 (set (match_dup 5) (sqrt:XF (match_dup 4)))
19570 (parallel [(set (match_operand:XF 0 "register_operand")
19571 (unspec:XF [(match_dup 1) (match_dup 5)]
19572 UNSPEC_FPATAN))
19573 (clobber (scratch:XF))])]
19574 "TARGET_USE_FANCY_MATH_387
19575 && flag_unsafe_math_optimizations"
19576 {
19577 int i;
19578
19579 for (i = 2; i < 6; i++)
19580 operands[i] = gen_reg_rtx (XFmode);
19581
19582 emit_move_insn (operands[3], CONST1_RTX (XFmode));
19583 })
19584
19585 (define_expand "acos<mode>2"
19586 [(use (match_operand:MODEF 0 "register_operand"))
19587 (use (match_operand:MODEF 1 "general_operand"))]
19588 "TARGET_USE_FANCY_MATH_387
19589 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
19590 || TARGET_MIX_SSE_I387)
19591 && flag_unsafe_math_optimizations"
19592 {
19593 rtx op0 = gen_reg_rtx (XFmode);
19594 rtx op1 = gen_reg_rtx (XFmode);
19595
19596 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
19597 emit_insn (gen_acosxf2 (op0, op1));
19598 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
19599 DONE;
19600 })
19601
19602 (define_expand "sinhxf2"
19603 [(use (match_operand:XF 0 "register_operand"))
19604 (use (match_operand:XF 1 "register_operand"))]
19605 "TARGET_USE_FANCY_MATH_387
19606 && flag_finite_math_only
19607 && flag_unsafe_math_optimizations"
19608 {
19609 ix86_emit_i387_sinh (operands[0], operands[1]);
19610 DONE;
19611 })
19612
19613 (define_expand "sinh<mode>2"
19614 [(use (match_operand:MODEF 0 "register_operand"))
19615 (use (match_operand:MODEF 1 "general_operand"))]
19616 "TARGET_USE_FANCY_MATH_387
19617 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
19618 || TARGET_MIX_SSE_I387)
19619 && flag_finite_math_only
19620 && flag_unsafe_math_optimizations"
19621 {
19622 rtx op0 = gen_reg_rtx (XFmode);
19623 rtx op1 = gen_reg_rtx (XFmode);
19624
19625 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
19626 emit_insn (gen_sinhxf2 (op0, op1));
19627 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
19628 DONE;
19629 })
19630
19631 (define_expand "coshxf2"
19632 [(use (match_operand:XF 0 "register_operand"))
19633 (use (match_operand:XF 1 "register_operand"))]
19634 "TARGET_USE_FANCY_MATH_387
19635 && flag_unsafe_math_optimizations"
19636 {
19637 ix86_emit_i387_cosh (operands[0], operands[1]);
19638 DONE;
19639 })
19640
19641 (define_expand "cosh<mode>2"
19642 [(use (match_operand:MODEF 0 "register_operand"))
19643 (use (match_operand:MODEF 1 "general_operand"))]
19644 "TARGET_USE_FANCY_MATH_387
19645 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
19646 || TARGET_MIX_SSE_I387)
19647 && flag_unsafe_math_optimizations"
19648 {
19649 rtx op0 = gen_reg_rtx (XFmode);
19650 rtx op1 = gen_reg_rtx (XFmode);
19651
19652 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
19653 emit_insn (gen_coshxf2 (op0, op1));
19654 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
19655 DONE;
19656 })
19657
19658 (define_expand "tanhxf2"
19659 [(use (match_operand:XF 0 "register_operand"))
19660 (use (match_operand:XF 1 "register_operand"))]
19661 "TARGET_USE_FANCY_MATH_387
19662 && flag_unsafe_math_optimizations"
19663 {
19664 ix86_emit_i387_tanh (operands[0], operands[1]);
19665 DONE;
19666 })
19667
19668 (define_expand "tanh<mode>2"
19669 [(use (match_operand:MODEF 0 "register_operand"))
19670 (use (match_operand:MODEF 1 "general_operand"))]
19671 "TARGET_USE_FANCY_MATH_387
19672 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
19673 || TARGET_MIX_SSE_I387)
19674 && flag_unsafe_math_optimizations"
19675 {
19676 rtx op0 = gen_reg_rtx (XFmode);
19677 rtx op1 = gen_reg_rtx (XFmode);
19678
19679 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
19680 emit_insn (gen_tanhxf2 (op0, op1));
19681 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
19682 DONE;
19683 })
19684
19685 (define_expand "asinhxf2"
19686 [(use (match_operand:XF 0 "register_operand"))
19687 (use (match_operand:XF 1 "register_operand"))]
19688 "TARGET_USE_FANCY_MATH_387
19689 && flag_finite_math_only
19690 && flag_unsafe_math_optimizations"
19691 {
19692 ix86_emit_i387_asinh (operands[0], operands[1]);
19693 DONE;
19694 })
19695
19696 (define_expand "asinh<mode>2"
19697 [(use (match_operand:MODEF 0 "register_operand"))
19698 (use (match_operand:MODEF 1 "general_operand"))]
19699 "TARGET_USE_FANCY_MATH_387
19700 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
19701 || TARGET_MIX_SSE_I387)
19702 && flag_finite_math_only
19703 && flag_unsafe_math_optimizations"
19704 {
19705 rtx op0 = gen_reg_rtx (XFmode);
19706 rtx op1 = gen_reg_rtx (XFmode);
19707
19708 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
19709 emit_insn (gen_asinhxf2 (op0, op1));
19710 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
19711 DONE;
19712 })
19713
19714 (define_expand "acoshxf2"
19715 [(use (match_operand:XF 0 "register_operand"))
19716 (use (match_operand:XF 1 "register_operand"))]
19717 "TARGET_USE_FANCY_MATH_387
19718 && flag_unsafe_math_optimizations"
19719 {
19720 ix86_emit_i387_acosh (operands[0], operands[1]);
19721 DONE;
19722 })
19723
19724 (define_expand "acosh<mode>2"
19725 [(use (match_operand:MODEF 0 "register_operand"))
19726 (use (match_operand:MODEF 1 "general_operand"))]
19727 "TARGET_USE_FANCY_MATH_387
19728 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
19729 || TARGET_MIX_SSE_I387)
19730 && flag_unsafe_math_optimizations"
19731 {
19732 rtx op0 = gen_reg_rtx (XFmode);
19733 rtx op1 = gen_reg_rtx (XFmode);
19734
19735 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
19736 emit_insn (gen_acoshxf2 (op0, op1));
19737 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
19738 DONE;
19739 })
19740
19741 (define_expand "atanhxf2"
19742 [(use (match_operand:XF 0 "register_operand"))
19743 (use (match_operand:XF 1 "register_operand"))]
19744 "TARGET_USE_FANCY_MATH_387
19745 && flag_unsafe_math_optimizations"
19746 {
19747 ix86_emit_i387_atanh (operands[0], operands[1]);
19748 DONE;
19749 })
19750
19751 (define_expand "atanh<mode>2"
19752 [(use (match_operand:MODEF 0 "register_operand"))
19753 (use (match_operand:MODEF 1 "general_operand"))]
19754 "TARGET_USE_FANCY_MATH_387
19755 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
19756 || TARGET_MIX_SSE_I387)
19757 && flag_unsafe_math_optimizations"
19758 {
19759 rtx op0 = gen_reg_rtx (XFmode);
19760 rtx op1 = gen_reg_rtx (XFmode);
19761
19762 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
19763 emit_insn (gen_atanhxf2 (op0, op1));
19764 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
19765 DONE;
19766 })
19767
19768 (define_insn "fyl2xxf3_i387"
19769 [(set (match_operand:XF 0 "register_operand" "=f")
19770 (unspec:XF [(match_operand:XF 1 "register_operand" "0")
19771 (match_operand:XF 2 "register_operand" "f")]
19772 UNSPEC_FYL2X))
19773 (clobber (match_scratch:XF 3 "=2"))]
19774 "TARGET_USE_FANCY_MATH_387
19775 && flag_unsafe_math_optimizations"
19776 "fyl2x"
19777 [(set_attr "type" "fpspc")
19778 (set_attr "znver1_decode" "vector")
19779 (set_attr "mode" "XF")])
19780
19781 (define_expand "logxf2"
19782 [(parallel [(set (match_operand:XF 0 "register_operand")
19783 (unspec:XF [(match_operand:XF 1 "register_operand")
19784 (match_dup 2)] UNSPEC_FYL2X))
19785 (clobber (scratch:XF))])]
19786 "TARGET_USE_FANCY_MATH_387
19787 && flag_unsafe_math_optimizations"
19788 {
19789 operands[2]
19790 = force_reg (XFmode, standard_80387_constant_rtx (4)); /* fldln2 */
19791 })
19792
19793 (define_expand "log<mode>2"
19794 [(use (match_operand:MODEF 0 "register_operand"))
19795 (use (match_operand:MODEF 1 "general_operand"))]
19796 "TARGET_USE_FANCY_MATH_387
19797 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
19798 || TARGET_MIX_SSE_I387)
19799 && flag_unsafe_math_optimizations"
19800 {
19801 rtx op0 = gen_reg_rtx (XFmode);
19802 rtx op1 = gen_reg_rtx (XFmode);
19803
19804 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
19805 emit_insn (gen_logxf2 (op0, op1));
19806 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
19807 DONE;
19808 })
19809
19810 (define_expand "log10xf2"
19811 [(parallel [(set (match_operand:XF 0 "register_operand")
19812 (unspec:XF [(match_operand:XF 1 "register_operand")
19813 (match_dup 2)] UNSPEC_FYL2X))
19814 (clobber (scratch:XF))])]
19815 "TARGET_USE_FANCY_MATH_387
19816 && flag_unsafe_math_optimizations"
19817 {
19818 operands[2]
19819 = force_reg (XFmode, standard_80387_constant_rtx (3)); /* fldlg2 */
19820 })
19821
19822 (define_expand "log10<mode>2"
19823 [(use (match_operand:MODEF 0 "register_operand"))
19824 (use (match_operand:MODEF 1 "general_operand"))]
19825 "TARGET_USE_FANCY_MATH_387
19826 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
19827 || TARGET_MIX_SSE_I387)
19828 && flag_unsafe_math_optimizations"
19829 {
19830 rtx op0 = gen_reg_rtx (XFmode);
19831 rtx op1 = gen_reg_rtx (XFmode);
19832
19833 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
19834 emit_insn (gen_log10xf2 (op0, op1));
19835 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
19836 DONE;
19837 })
19838
19839 (define_expand "log2xf2"
19840 [(parallel [(set (match_operand:XF 0 "register_operand")
19841 (unspec:XF [(match_operand:XF 1 "register_operand")
19842 (match_dup 2)] UNSPEC_FYL2X))
19843 (clobber (scratch:XF))])]
19844 "TARGET_USE_FANCY_MATH_387
19845 && flag_unsafe_math_optimizations"
19846 "operands[2] = force_reg (XFmode, CONST1_RTX (XFmode));")
19847
19848 (define_expand "log2<mode>2"
19849 [(use (match_operand:MODEF 0 "register_operand"))
19850 (use (match_operand:MODEF 1 "general_operand"))]
19851 "TARGET_USE_FANCY_MATH_387
19852 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
19853 || TARGET_MIX_SSE_I387)
19854 && flag_unsafe_math_optimizations"
19855 {
19856 rtx op0 = gen_reg_rtx (XFmode);
19857 rtx op1 = gen_reg_rtx (XFmode);
19858
19859 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
19860 emit_insn (gen_log2xf2 (op0, op1));
19861 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
19862 DONE;
19863 })
19864
19865 (define_insn "fyl2xp1xf3_i387"
19866 [(set (match_operand:XF 0 "register_operand" "=f")
19867 (unspec:XF [(match_operand:XF 1 "register_operand" "0")
19868 (match_operand:XF 2 "register_operand" "f")]
19869 UNSPEC_FYL2XP1))
19870 (clobber (match_scratch:XF 3 "=2"))]
19871 "TARGET_USE_FANCY_MATH_387
19872 && flag_unsafe_math_optimizations"
19873 "fyl2xp1"
19874 [(set_attr "type" "fpspc")
19875 (set_attr "znver1_decode" "vector")
19876 (set_attr "mode" "XF")])
19877
19878 (define_expand "log1pxf2"
19879 [(use (match_operand:XF 0 "register_operand"))
19880 (use (match_operand:XF 1 "register_operand"))]
19881 "TARGET_USE_FANCY_MATH_387
19882 && flag_unsafe_math_optimizations"
19883 {
19884 ix86_emit_i387_log1p (operands[0], operands[1]);
19885 DONE;
19886 })
19887
19888 (define_expand "log1p<mode>2"
19889 [(use (match_operand:MODEF 0 "register_operand"))
19890 (use (match_operand:MODEF 1 "general_operand"))]
19891 "TARGET_USE_FANCY_MATH_387
19892 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
19893 || TARGET_MIX_SSE_I387)
19894 && flag_unsafe_math_optimizations"
19895 {
19896 rtx op0 = gen_reg_rtx (XFmode);
19897 rtx op1 = gen_reg_rtx (XFmode);
19898
19899 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
19900 emit_insn (gen_log1pxf2 (op0, op1));
19901 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
19902 DONE;
19903 })
19904
19905 (define_insn "fxtractxf3_i387"
19906 [(set (match_operand:XF 0 "register_operand" "=f")
19907 (unspec:XF [(match_operand:XF 2 "register_operand" "0")]
19908 UNSPEC_XTRACT_FRACT))
19909 (set (match_operand:XF 1 "register_operand" "=f")
19910 (unspec:XF [(match_dup 2)] UNSPEC_XTRACT_EXP))]
19911 "TARGET_USE_FANCY_MATH_387
19912 && flag_unsafe_math_optimizations"
19913 "fxtract"
19914 [(set_attr "type" "fpspc")
19915 (set_attr "znver1_decode" "vector")
19916 (set_attr "mode" "XF")])
19917
19918 (define_expand "logbxf2"
19919 [(parallel [(set (match_dup 2)
19920 (unspec:XF [(match_operand:XF 1 "register_operand")]
19921 UNSPEC_XTRACT_FRACT))
19922 (set (match_operand:XF 0 "register_operand")
19923 (unspec:XF [(match_dup 1)] UNSPEC_XTRACT_EXP))])]
19924 "TARGET_USE_FANCY_MATH_387
19925 && flag_unsafe_math_optimizations"
19926 "operands[2] = gen_reg_rtx (XFmode);")
19927
19928 (define_expand "logb<mode>2"
19929 [(use (match_operand:MODEF 0 "register_operand"))
19930 (use (match_operand:MODEF 1 "general_operand"))]
19931 "TARGET_USE_FANCY_MATH_387
19932 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
19933 || TARGET_MIX_SSE_I387)
19934 && flag_unsafe_math_optimizations"
19935 {
19936 rtx op0 = gen_reg_rtx (XFmode);
19937 rtx op1 = gen_reg_rtx (XFmode);
19938
19939 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
19940 emit_insn (gen_logbxf2 (op0, op1));
19941 emit_insn (gen_truncxf<mode>2 (operands[0], op1));
19942 DONE;
19943 })
19944
19945 (define_expand "ilogbxf2"
19946 [(use (match_operand:SI 0 "register_operand"))
19947 (use (match_operand:XF 1 "register_operand"))]
19948 "TARGET_USE_FANCY_MATH_387
19949 && flag_unsafe_math_optimizations"
19950 {
19951 rtx op0, op1;
19952
19953 if (optimize_insn_for_size_p ())
19954 FAIL;
19955
19956 op0 = gen_reg_rtx (XFmode);
19957 op1 = gen_reg_rtx (XFmode);
19958
19959 emit_insn (gen_fxtractxf3_i387 (op0, op1, operands[1]));
19960 emit_insn (gen_fix_truncxfsi2 (operands[0], op1));
19961 DONE;
19962 })
19963
19964 (define_expand "ilogb<mode>2"
19965 [(use (match_operand:SI 0 "register_operand"))
19966 (use (match_operand:MODEF 1 "general_operand"))]
19967 "TARGET_USE_FANCY_MATH_387
19968 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
19969 || TARGET_MIX_SSE_I387)
19970 && flag_unsafe_math_optimizations"
19971 {
19972 rtx op0, op1, op2;
19973
19974 if (optimize_insn_for_size_p ())
19975 FAIL;
19976
19977 op0 = gen_reg_rtx (XFmode);
19978 op1 = gen_reg_rtx (XFmode);
19979 op2 = gen_reg_rtx (XFmode);
19980
19981 emit_insn (gen_extend<mode>xf2 (op2, operands[1]));
19982 emit_insn (gen_fxtractxf3_i387 (op0, op1, op2));
19983 emit_insn (gen_fix_truncxfsi2 (operands[0], op1));
19984 DONE;
19985 })
19986
19987 (define_insn "*f2xm1xf2_i387"
19988 [(set (match_operand:XF 0 "register_operand" "=f")
19989 (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
19990 UNSPEC_F2XM1))]
19991 "TARGET_USE_FANCY_MATH_387
19992 && flag_unsafe_math_optimizations"
19993 "f2xm1"
19994 [(set_attr "type" "fpspc")
19995 (set_attr "znver1_decode" "vector")
19996 (set_attr "mode" "XF")])
19997
19998 (define_insn "fscalexf4_i387"
19999 [(set (match_operand:XF 0 "register_operand" "=f")
20000 (unspec:XF [(match_operand:XF 2 "register_operand" "0")
20001 (match_operand:XF 3 "register_operand" "1")]
20002 UNSPEC_FSCALE_FRACT))
20003 (set (match_operand:XF 1 "register_operand" "=f")
20004 (unspec:XF [(match_dup 2) (match_dup 3)]
20005 UNSPEC_FSCALE_EXP))]
20006 "TARGET_USE_FANCY_MATH_387
20007 && flag_unsafe_math_optimizations"
20008 "fscale"
20009 [(set_attr "type" "fpspc")
20010 (set_attr "znver1_decode" "vector")
20011 (set_attr "mode" "XF")])
20012
20013 (define_expand "expNcorexf3"
20014 [(set (match_dup 3) (mult:XF (match_operand:XF 1 "register_operand")
20015 (match_operand:XF 2 "register_operand")))
20016 (set (match_dup 4) (unspec:XF [(match_dup 3)] UNSPEC_FRNDINT))
20017 (set (match_dup 5) (minus:XF (match_dup 3) (match_dup 4)))
20018 (set (match_dup 6) (unspec:XF [(match_dup 5)] UNSPEC_F2XM1))
20019 (set (match_dup 8) (plus:XF (match_dup 6) (match_dup 7)))
20020 (parallel [(set (match_operand:XF 0 "register_operand")
20021 (unspec:XF [(match_dup 8) (match_dup 4)]
20022 UNSPEC_FSCALE_FRACT))
20023 (set (match_dup 9)
20024 (unspec:XF [(match_dup 8) (match_dup 4)]
20025 UNSPEC_FSCALE_EXP))])]
20026 "TARGET_USE_FANCY_MATH_387
20027 && flag_unsafe_math_optimizations"
20028 {
20029 int i;
20030
20031 for (i = 3; i < 10; i++)
20032 operands[i] = gen_reg_rtx (XFmode);
20033
20034 emit_move_insn (operands[7], CONST1_RTX (XFmode));
20035 })
20036
20037 (define_expand "expxf2"
20038 [(use (match_operand:XF 0 "register_operand"))
20039 (use (match_operand:XF 1 "register_operand"))]
20040 "TARGET_USE_FANCY_MATH_387
20041 && flag_unsafe_math_optimizations"
20042 {
20043 rtx op2 = force_reg (XFmode, standard_80387_constant_rtx (5)); /* fldl2e */
20044
20045 emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2));
20046 DONE;
20047 })
20048
20049 (define_expand "exp<mode>2"
20050 [(use (match_operand:MODEF 0 "register_operand"))
20051 (use (match_operand:MODEF 1 "general_operand"))]
20052 "TARGET_USE_FANCY_MATH_387
20053 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
20054 || TARGET_MIX_SSE_I387)
20055 && flag_unsafe_math_optimizations"
20056 {
20057 rtx op0 = gen_reg_rtx (XFmode);
20058 rtx op1 = gen_reg_rtx (XFmode);
20059
20060 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
20061 emit_insn (gen_expxf2 (op0, op1));
20062 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
20063 DONE;
20064 })
20065
20066 (define_expand "exp10xf2"
20067 [(use (match_operand:XF 0 "register_operand"))
20068 (use (match_operand:XF 1 "register_operand"))]
20069 "TARGET_USE_FANCY_MATH_387
20070 && flag_unsafe_math_optimizations"
20071 {
20072 rtx op2 = force_reg (XFmode, standard_80387_constant_rtx (6)); /* fldl2t */
20073
20074 emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2));
20075 DONE;
20076 })
20077
20078 (define_expand "exp10<mode>2"
20079 [(use (match_operand:MODEF 0 "register_operand"))
20080 (use (match_operand:MODEF 1 "general_operand"))]
20081 "TARGET_USE_FANCY_MATH_387
20082 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
20083 || TARGET_MIX_SSE_I387)
20084 && flag_unsafe_math_optimizations"
20085 {
20086 rtx op0 = gen_reg_rtx (XFmode);
20087 rtx op1 = gen_reg_rtx (XFmode);
20088
20089 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
20090 emit_insn (gen_exp10xf2 (op0, op1));
20091 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
20092 DONE;
20093 })
20094
20095 (define_expand "exp2xf2"
20096 [(use (match_operand:XF 0 "register_operand"))
20097 (use (match_operand:XF 1 "register_operand"))]
20098 "TARGET_USE_FANCY_MATH_387
20099 && flag_unsafe_math_optimizations"
20100 {
20101 rtx op2 = force_reg (XFmode, CONST1_RTX (XFmode));
20102
20103 emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2));
20104 DONE;
20105 })
20106
20107 (define_expand "exp2<mode>2"
20108 [(use (match_operand:MODEF 0 "register_operand"))
20109 (use (match_operand:MODEF 1 "general_operand"))]
20110 "TARGET_USE_FANCY_MATH_387
20111 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
20112 || TARGET_MIX_SSE_I387)
20113 && flag_unsafe_math_optimizations"
20114 {
20115 rtx op0 = gen_reg_rtx (XFmode);
20116 rtx op1 = gen_reg_rtx (XFmode);
20117
20118 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
20119 emit_insn (gen_exp2xf2 (op0, op1));
20120 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
20121 DONE;
20122 })
20123
20124 (define_expand "expm1xf2"
20125 [(set (match_dup 3) (mult:XF (match_operand:XF 1 "register_operand")
20126 (match_dup 2)))
20127 (set (match_dup 4) (unspec:XF [(match_dup 3)] UNSPEC_FRNDINT))
20128 (set (match_dup 5) (minus:XF (match_dup 3) (match_dup 4)))
20129 (set (match_dup 6) (unspec:XF [(match_dup 5)] UNSPEC_F2XM1))
20130 (parallel [(set (match_dup 7)
20131 (unspec:XF [(match_dup 6) (match_dup 4)]
20132 UNSPEC_FSCALE_FRACT))
20133 (set (match_dup 8)
20134 (unspec:XF [(match_dup 6) (match_dup 4)]
20135 UNSPEC_FSCALE_EXP))])
20136 (parallel [(set (match_dup 10)
20137 (unspec:XF [(match_dup 9) (match_dup 8)]
20138 UNSPEC_FSCALE_FRACT))
20139 (set (match_dup 11)
20140 (unspec:XF [(match_dup 9) (match_dup 8)]
20141 UNSPEC_FSCALE_EXP))])
20142 (set (match_dup 12) (minus:XF (match_dup 10) (match_dup 9)))
20143 (set (match_operand:XF 0 "register_operand")
20144 (plus:XF (match_dup 12) (match_dup 7)))]
20145 "TARGET_USE_FANCY_MATH_387
20146 && flag_unsafe_math_optimizations"
20147 {
20148 int i;
20149
20150 for (i = 2; i < 13; i++)
20151 operands[i] = gen_reg_rtx (XFmode);
20152
20153 emit_move_insn (operands[2], standard_80387_constant_rtx (5)); /* fldl2e */
20154 emit_move_insn (operands[9], CONST1_RTX (XFmode));
20155 })
20156
20157 (define_expand "expm1<mode>2"
20158 [(use (match_operand:MODEF 0 "register_operand"))
20159 (use (match_operand:MODEF 1 "general_operand"))]
20160 "TARGET_USE_FANCY_MATH_387
20161 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
20162 || TARGET_MIX_SSE_I387)
20163 && flag_unsafe_math_optimizations"
20164 {
20165 rtx op0 = gen_reg_rtx (XFmode);
20166 rtx op1 = gen_reg_rtx (XFmode);
20167
20168 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
20169 emit_insn (gen_expm1xf2 (op0, op1));
20170 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
20171 DONE;
20172 })
20173
20174 (define_insn "avx512f_scalef<mode>2"
20175 [(set (match_operand:MODEF 0 "register_operand" "=v")
20176 (unspec:MODEF
20177 [(match_operand:MODEF 1 "register_operand" "v")
20178 (match_operand:MODEF 2 "nonimmediate_operand" "vm")]
20179 UNSPEC_SCALEF))]
20180 "TARGET_AVX512F"
20181 "vscalef<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
20182 [(set_attr "prefix" "evex")
20183 (set_attr "mode" "<MODE>")])
20184
20185 (define_expand "ldexpxf3"
20186 [(match_operand:XF 0 "register_operand")
20187 (match_operand:XF 1 "register_operand")
20188 (match_operand:SI 2 "register_operand")]
20189 "TARGET_USE_FANCY_MATH_387
20190 && flag_unsafe_math_optimizations"
20191 {
20192 rtx tmp1 = gen_reg_rtx (XFmode);
20193 rtx tmp2 = gen_reg_rtx (XFmode);
20194
20195 emit_insn (gen_floatsixf2 (tmp1, operands[2]));
20196 emit_insn (gen_fscalexf4_i387 (operands[0], tmp2,
20197 operands[1], tmp1));
20198 DONE;
20199 })
20200
20201 (define_expand "ldexp<mode>3"
20202 [(use (match_operand:MODEF 0 "register_operand"))
20203 (use (match_operand:MODEF 1 "general_operand"))
20204 (use (match_operand:SI 2 "register_operand"))]
20205 "((TARGET_USE_FANCY_MATH_387
20206 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
20207 || TARGET_MIX_SSE_I387))
20208 || (TARGET_AVX512F && TARGET_SSE_MATH))
20209 && flag_unsafe_math_optimizations"
20210 {
20211 /* Prefer avx512f version. */
20212 if (TARGET_AVX512F && TARGET_SSE_MATH)
20213 {
20214 rtx op2 = gen_reg_rtx (<MODE>mode);
20215 operands[1] = force_reg (<MODE>mode, operands[1]);
20216
20217 emit_insn (gen_floatsi<mode>2 (op2, operands[2]));
20218 emit_insn (gen_avx512f_scalef<mode>2 (operands[0], operands[1], op2));
20219 }
20220 else
20221 {
20222 rtx op0 = gen_reg_rtx (XFmode);
20223 rtx op1 = gen_reg_rtx (XFmode);
20224
20225 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
20226 emit_insn (gen_ldexpxf3 (op0, op1, operands[2]));
20227 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
20228 }
20229 DONE;
20230 })
20231
20232 (define_expand "scalbxf3"
20233 [(parallel [(set (match_operand:XF 0 " register_operand")
20234 (unspec:XF [(match_operand:XF 1 "register_operand")
20235 (match_operand:XF 2 "register_operand")]
20236 UNSPEC_FSCALE_FRACT))
20237 (set (match_dup 3)
20238 (unspec:XF [(match_dup 1) (match_dup 2)]
20239 UNSPEC_FSCALE_EXP))])]
20240 "TARGET_USE_FANCY_MATH_387
20241 && flag_unsafe_math_optimizations"
20242 "operands[3] = gen_reg_rtx (XFmode);")
20243
20244 (define_expand "scalb<mode>3"
20245 [(use (match_operand:MODEF 0 "register_operand"))
20246 (use (match_operand:MODEF 1 "general_operand"))
20247 (use (match_operand:MODEF 2 "general_operand"))]
20248 "TARGET_USE_FANCY_MATH_387
20249 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
20250 || TARGET_MIX_SSE_I387)
20251 && flag_unsafe_math_optimizations"
20252 {
20253 rtx op0 = gen_reg_rtx (XFmode);
20254 rtx op1 = gen_reg_rtx (XFmode);
20255 rtx op2 = gen_reg_rtx (XFmode);
20256
20257 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
20258 emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
20259 emit_insn (gen_scalbxf3 (op0, op1, op2));
20260 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
20261 DONE;
20262 })
20263
20264 (define_expand "significandxf2"
20265 [(parallel [(set (match_operand:XF 0 "register_operand")
20266 (unspec:XF [(match_operand:XF 1 "register_operand")]
20267 UNSPEC_XTRACT_FRACT))
20268 (set (match_dup 2)
20269 (unspec:XF [(match_dup 1)] UNSPEC_XTRACT_EXP))])]
20270 "TARGET_USE_FANCY_MATH_387
20271 && flag_unsafe_math_optimizations"
20272 "operands[2] = gen_reg_rtx (XFmode);")
20273
20274 (define_expand "significand<mode>2"
20275 [(use (match_operand:MODEF 0 "register_operand"))
20276 (use (match_operand:MODEF 1 "general_operand"))]
20277 "TARGET_USE_FANCY_MATH_387
20278 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
20279 || TARGET_MIX_SSE_I387)
20280 && flag_unsafe_math_optimizations"
20281 {
20282 rtx op0 = gen_reg_rtx (XFmode);
20283 rtx op1 = gen_reg_rtx (XFmode);
20284
20285 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
20286 emit_insn (gen_significandxf2 (op0, op1));
20287 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
20288 DONE;
20289 })
20290 \f
20291
20292 (define_insn "sse4_1_round<mode>2"
20293 [(set (match_operand:MODEFH 0 "register_operand" "=x,x,x,v,v")
20294 (unspec:MODEFH
20295 [(match_operand:MODEFH 1 "nonimmediate_operand" "0,x,m,v,m")
20296 (match_operand:SI 2 "const_0_to_15_operand")]
20297 UNSPEC_ROUND))]
20298 "TARGET_SSE4_1"
20299 "@
20300 %vround<ssemodesuffix>\t{%2, %d1, %0|%0, %d1, %2}
20301 %vround<ssemodesuffix>\t{%2, %d1, %0|%0, %d1, %2}
20302 %vround<ssemodesuffix>\t{%2, %1, %d0|%d0, %1, %2}
20303 vrndscale<ssemodesuffix>\t{%2, %d1, %0|%0, %d1, %2}
20304 vrndscale<ssemodesuffix>\t{%2, %1, %d0|%d0, %1, %2}"
20305 [(set_attr "type" "ssecvt")
20306 (set_attr "prefix_extra" "1,1,1,*,*")
20307 (set_attr "length_immediate" "*,*,*,1,1")
20308 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,evex,evex")
20309 (set_attr "isa" "noavx512f,noavx512f,noavx512f,avx512f,avx512f")
20310 (set_attr "avx_partial_xmm_update" "false,false,true,false,true")
20311 (set_attr "mode" "<MODE>")
20312 (set (attr "preferred_for_speed")
20313 (cond [(match_test "TARGET_AVX")
20314 (symbol_ref "true")
20315 (eq_attr "alternative" "1,2")
20316 (symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY")
20317 ]
20318 (symbol_ref "true")))])
20319
20320 (define_insn "rintxf2"
20321 [(set (match_operand:XF 0 "register_operand" "=f")
20322 (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
20323 UNSPEC_FRNDINT))]
20324 "TARGET_USE_FANCY_MATH_387"
20325 "frndint"
20326 [(set_attr "type" "fpspc")
20327 (set_attr "znver1_decode" "vector")
20328 (set_attr "mode" "XF")])
20329
20330 (define_expand "rinthf2"
20331 [(match_operand:HF 0 "register_operand")
20332 (match_operand:HF 1 "nonimmediate_operand")]
20333 "TARGET_AVX512FP16"
20334 {
20335 emit_insn (gen_sse4_1_roundhf2 (operands[0],
20336 operands[1],
20337 GEN_INT (ROUND_MXCSR)));
20338 DONE;
20339 })
20340
20341 (define_expand "rint<mode>2"
20342 [(use (match_operand:MODEF 0 "register_operand"))
20343 (use (match_operand:MODEF 1 "nonimmediate_operand"))]
20344 "TARGET_USE_FANCY_MATH_387
20345 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
20346 {
20347 if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
20348 {
20349 if (TARGET_SSE4_1)
20350 emit_insn (gen_sse4_1_round<mode>2
20351 (operands[0], operands[1], GEN_INT (ROUND_MXCSR)));
20352 else
20353 ix86_expand_rint (operands[0], operands[1]);
20354 }
20355 else
20356 {
20357 rtx op0 = gen_reg_rtx (XFmode);
20358 rtx op1 = gen_reg_rtx (XFmode);
20359
20360 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
20361 emit_insn (gen_rintxf2 (op0, op1));
20362 emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op0));
20363 }
20364 DONE;
20365 })
20366
20367 (define_expand "nearbyintxf2"
20368 [(set (match_operand:XF 0 "register_operand")
20369 (unspec:XF [(match_operand:XF 1 "register_operand")]
20370 UNSPEC_FRNDINT))]
20371 "TARGET_USE_FANCY_MATH_387
20372 && !flag_trapping_math")
20373
20374 (define_expand "nearbyinthf2"
20375 [(match_operand:HF 0 "register_operand")
20376 (match_operand:HF 1 "nonimmediate_operand")]
20377 "TARGET_AVX512FP16"
20378 {
20379 emit_insn (gen_sse4_1_roundhf2 (operands[0],
20380 operands[1],
20381 GEN_INT (ROUND_MXCSR | ROUND_NO_EXC)));
20382 DONE;
20383 })
20384
20385 (define_expand "nearbyint<mode>2"
20386 [(use (match_operand:MODEF 0 "register_operand"))
20387 (use (match_operand:MODEF 1 "nonimmediate_operand"))]
20388 "(TARGET_USE_FANCY_MATH_387
20389 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
20390 || TARGET_MIX_SSE_I387)
20391 && !flag_trapping_math)
20392 || (TARGET_SSE4_1 && TARGET_SSE_MATH)"
20393 {
20394 if (TARGET_SSE4_1 && TARGET_SSE_MATH)
20395 emit_insn (gen_sse4_1_round<mode>2
20396 (operands[0], operands[1], GEN_INT (ROUND_MXCSR
20397 | ROUND_NO_EXC)));
20398 else
20399 {
20400 rtx op0 = gen_reg_rtx (XFmode);
20401 rtx op1 = gen_reg_rtx (XFmode);
20402
20403 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
20404 emit_insn (gen_nearbyintxf2 (op0, op1));
20405 emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op0));
20406 }
20407 DONE;
20408 })
20409
20410 (define_expand "round<mode>2"
20411 [(match_operand:X87MODEF 0 "register_operand")
20412 (match_operand:X87MODEF 1 "nonimmediate_operand")]
20413 "(TARGET_USE_FANCY_MATH_387
20414 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
20415 || TARGET_MIX_SSE_I387)
20416 && flag_unsafe_math_optimizations
20417 && (flag_fp_int_builtin_inexact || !flag_trapping_math))
20418 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
20419 && !flag_trapping_math && !flag_rounding_math)"
20420 {
20421 if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
20422 && !flag_trapping_math && !flag_rounding_math)
20423 {
20424 if (TARGET_SSE4_1)
20425 {
20426 operands[1] = force_reg (<MODE>mode, operands[1]);
20427 ix86_expand_round_sse4 (operands[0], operands[1]);
20428 }
20429 else if (TARGET_64BIT || (<MODE>mode != DFmode))
20430 ix86_expand_round (operands[0], operands[1]);
20431 else
20432 ix86_expand_rounddf_32 (operands[0], operands[1]);
20433 }
20434 else
20435 {
20436 operands[1] = force_reg (<MODE>mode, operands[1]);
20437 ix86_emit_i387_round (operands[0], operands[1]);
20438 }
20439 DONE;
20440 })
20441
20442 (define_insn "lrintxfdi2"
20443 [(set (match_operand:DI 0 "nonimmediate_operand" "=m")
20444 (unspec:DI [(match_operand:XF 1 "register_operand" "f")]
20445 UNSPEC_FIST))
20446 (clobber (match_scratch:XF 2 "=&f"))]
20447 "TARGET_USE_FANCY_MATH_387"
20448 "* return output_fix_trunc (insn, operands, false);"
20449 [(set_attr "type" "fpspc")
20450 (set_attr "mode" "DI")])
20451
20452 (define_insn "lrintxf<mode>2"
20453 [(set (match_operand:SWI24 0 "nonimmediate_operand" "=m")
20454 (unspec:SWI24 [(match_operand:XF 1 "register_operand" "f")]
20455 UNSPEC_FIST))]
20456 "TARGET_USE_FANCY_MATH_387"
20457 "* return output_fix_trunc (insn, operands, false);"
20458 [(set_attr "type" "fpspc")
20459 (set_attr "mode" "<MODE>")])
20460
20461 (define_expand "lrint<MODEF:mode><SWI48:mode>2"
20462 [(set (match_operand:SWI48 0 "register_operand")
20463 (unspec:SWI48 [(match_operand:MODEF 1 "nonimmediate_operand")]
20464 UNSPEC_FIX_NOTRUNC))]
20465 "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH")
20466
20467 (define_expand "lround<X87MODEF:mode><SWI248x:mode>2"
20468 [(match_operand:SWI248x 0 "nonimmediate_operand")
20469 (match_operand:X87MODEF 1 "register_operand")]
20470 "(TARGET_USE_FANCY_MATH_387
20471 && (!(SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH)
20472 || TARGET_MIX_SSE_I387)
20473 && flag_unsafe_math_optimizations)
20474 || (SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH
20475 && <SWI248x:MODE>mode != HImode
20476 && ((<SWI248x:MODE>mode != DImode) || TARGET_64BIT)
20477 && !flag_trapping_math && !flag_rounding_math)"
20478 {
20479 if (SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH
20480 && <SWI248x:MODE>mode != HImode
20481 && ((<SWI248x:MODE>mode != DImode) || TARGET_64BIT)
20482 && !flag_trapping_math && !flag_rounding_math)
20483 ix86_expand_lround (operands[0], operands[1]);
20484 else
20485 ix86_emit_i387_round (operands[0], operands[1]);
20486 DONE;
20487 })
20488
20489 (define_int_iterator FRNDINT_ROUNDING
20490 [UNSPEC_FRNDINT_ROUNDEVEN
20491 UNSPEC_FRNDINT_FLOOR
20492 UNSPEC_FRNDINT_CEIL
20493 UNSPEC_FRNDINT_TRUNC])
20494
20495 (define_int_iterator FIST_ROUNDING
20496 [UNSPEC_FIST_FLOOR
20497 UNSPEC_FIST_CEIL])
20498
20499 ;; Base name for define_insn
20500 (define_int_attr rounding_insn
20501 [(UNSPEC_FRNDINT_ROUNDEVEN "roundeven")
20502 (UNSPEC_FRNDINT_FLOOR "floor")
20503 (UNSPEC_FRNDINT_CEIL "ceil")
20504 (UNSPEC_FRNDINT_TRUNC "btrunc")
20505 (UNSPEC_FIST_FLOOR "floor")
20506 (UNSPEC_FIST_CEIL "ceil")])
20507
20508 (define_int_attr rounding
20509 [(UNSPEC_FRNDINT_ROUNDEVEN "roundeven")
20510 (UNSPEC_FRNDINT_FLOOR "floor")
20511 (UNSPEC_FRNDINT_CEIL "ceil")
20512 (UNSPEC_FRNDINT_TRUNC "trunc")
20513 (UNSPEC_FIST_FLOOR "floor")
20514 (UNSPEC_FIST_CEIL "ceil")])
20515
20516 (define_int_attr ROUNDING
20517 [(UNSPEC_FRNDINT_ROUNDEVEN "ROUNDEVEN")
20518 (UNSPEC_FRNDINT_FLOOR "FLOOR")
20519 (UNSPEC_FRNDINT_CEIL "CEIL")
20520 (UNSPEC_FRNDINT_TRUNC "TRUNC")
20521 (UNSPEC_FIST_FLOOR "FLOOR")
20522 (UNSPEC_FIST_CEIL "CEIL")])
20523
20524 ;; Rounding mode control word calculation could clobber FLAGS_REG.
20525 (define_insn_and_split "frndintxf2_<rounding>"
20526 [(set (match_operand:XF 0 "register_operand")
20527 (unspec:XF [(match_operand:XF 1 "register_operand")]
20528 FRNDINT_ROUNDING))
20529 (clobber (reg:CC FLAGS_REG))]
20530 "TARGET_USE_FANCY_MATH_387
20531 && (flag_fp_int_builtin_inexact || !flag_trapping_math)
20532 && ix86_pre_reload_split ()"
20533 "#"
20534 "&& 1"
20535 [(const_int 0)]
20536 {
20537 ix86_optimize_mode_switching[I387_<ROUNDING>] = 1;
20538
20539 operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
20540 operands[3] = assign_386_stack_local (HImode, SLOT_CW_<ROUNDING>);
20541
20542 emit_insn (gen_frndintxf2_<rounding>_i387 (operands[0], operands[1],
20543 operands[2], operands[3]));
20544 DONE;
20545 }
20546 [(set_attr "type" "frndint")
20547 (set_attr "i387_cw" "<rounding>")
20548 (set_attr "mode" "XF")])
20549
20550 (define_insn "frndintxf2_<rounding>_i387"
20551 [(set (match_operand:XF 0 "register_operand" "=f")
20552 (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
20553 FRNDINT_ROUNDING))
20554 (use (match_operand:HI 2 "memory_operand" "m"))
20555 (use (match_operand:HI 3 "memory_operand" "m"))]
20556 "TARGET_USE_FANCY_MATH_387
20557 && (flag_fp_int_builtin_inexact || !flag_trapping_math)"
20558 "fldcw\t%3\n\tfrndint\n\tfldcw\t%2"
20559 [(set_attr "type" "frndint")
20560 (set_attr "i387_cw" "<rounding>")
20561 (set_attr "mode" "XF")])
20562
20563 (define_expand "<rounding_insn>xf2"
20564 [(parallel [(set (match_operand:XF 0 "register_operand")
20565 (unspec:XF [(match_operand:XF 1 "register_operand")]
20566 FRNDINT_ROUNDING))
20567 (clobber (reg:CC FLAGS_REG))])]
20568 "TARGET_USE_FANCY_MATH_387
20569 && (flag_fp_int_builtin_inexact || !flag_trapping_math)")
20570
20571 (define_expand "<rounding_insn>hf2"
20572 [(parallel [(set (match_operand:HF 0 "register_operand")
20573 (unspec:HF [(match_operand:HF 1 "register_operand")]
20574 FRNDINT_ROUNDING))
20575 (clobber (reg:CC FLAGS_REG))])]
20576 "TARGET_AVX512FP16"
20577 {
20578 emit_insn (gen_sse4_1_roundhf2 (operands[0], operands[1],
20579 GEN_INT (ROUND_<ROUNDING> | ROUND_NO_EXC)));
20580 DONE;
20581 })
20582
20583 (define_expand "<rounding_insn><mode>2"
20584 [(parallel [(set (match_operand:MODEF 0 "register_operand")
20585 (unspec:MODEF [(match_operand:MODEF 1 "register_operand")]
20586 FRNDINT_ROUNDING))
20587 (clobber (reg:CC FLAGS_REG))])]
20588 "(TARGET_USE_FANCY_MATH_387
20589 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
20590 || TARGET_MIX_SSE_I387)
20591 && (flag_fp_int_builtin_inexact || !flag_trapping_math))
20592 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
20593 && (TARGET_SSE4_1
20594 || (ROUND_<ROUNDING> != ROUND_ROUNDEVEN
20595 && (flag_fp_int_builtin_inexact || !flag_trapping_math))))"
20596 {
20597 if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
20598 && (TARGET_SSE4_1
20599 || (ROUND_<ROUNDING> != ROUND_ROUNDEVEN
20600 && (flag_fp_int_builtin_inexact || !flag_trapping_math))))
20601 {
20602 if (TARGET_SSE4_1)
20603 emit_insn (gen_sse4_1_round<mode>2
20604 (operands[0], operands[1],
20605 GEN_INT (ROUND_<ROUNDING> | ROUND_NO_EXC)));
20606 else if (TARGET_64BIT || (<MODE>mode != DFmode))
20607 {
20608 if (ROUND_<ROUNDING> == ROUND_FLOOR)
20609 ix86_expand_floorceil (operands[0], operands[1], true);
20610 else if (ROUND_<ROUNDING> == ROUND_CEIL)
20611 ix86_expand_floorceil (operands[0], operands[1], false);
20612 else if (ROUND_<ROUNDING> == ROUND_TRUNC)
20613 ix86_expand_trunc (operands[0], operands[1]);
20614 else
20615 gcc_unreachable ();
20616 }
20617 else
20618 {
20619 if (ROUND_<ROUNDING> == ROUND_FLOOR)
20620 ix86_expand_floorceildf_32 (operands[0], operands[1], true);
20621 else if (ROUND_<ROUNDING> == ROUND_CEIL)
20622 ix86_expand_floorceildf_32 (operands[0], operands[1], false);
20623 else if (ROUND_<ROUNDING> == ROUND_TRUNC)
20624 ix86_expand_truncdf_32 (operands[0], operands[1]);
20625 else
20626 gcc_unreachable ();
20627 }
20628 }
20629 else
20630 {
20631 rtx op0 = gen_reg_rtx (XFmode);
20632 rtx op1 = gen_reg_rtx (XFmode);
20633
20634 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
20635 emit_insn (gen_frndintxf2_<rounding> (op0, op1));
20636 emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op0));
20637 }
20638 DONE;
20639 })
20640
20641 ;; Rounding mode control word calculation could clobber FLAGS_REG.
20642 (define_insn_and_split "*fist<mode>2_<rounding>_1"
20643 [(set (match_operand:SWI248x 0 "nonimmediate_operand")
20644 (unspec:SWI248x [(match_operand:XF 1 "register_operand")]
20645 FIST_ROUNDING))
20646 (clobber (reg:CC FLAGS_REG))]
20647 "TARGET_USE_FANCY_MATH_387
20648 && flag_unsafe_math_optimizations
20649 && ix86_pre_reload_split ()"
20650 "#"
20651 "&& 1"
20652 [(const_int 0)]
20653 {
20654 ix86_optimize_mode_switching[I387_<ROUNDING>] = 1;
20655
20656 operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
20657 operands[3] = assign_386_stack_local (HImode, SLOT_CW_<ROUNDING>);
20658
20659 emit_insn (gen_fist<mode>2_<rounding> (operands[0], operands[1],
20660 operands[2], operands[3]));
20661 DONE;
20662 }
20663 [(set_attr "type" "fistp")
20664 (set_attr "i387_cw" "<rounding>")
20665 (set_attr "mode" "<MODE>")])
20666
20667 (define_insn "fistdi2_<rounding>"
20668 [(set (match_operand:DI 0 "nonimmediate_operand" "=m")
20669 (unspec:DI [(match_operand:XF 1 "register_operand" "f")]
20670 FIST_ROUNDING))
20671 (use (match_operand:HI 2 "memory_operand" "m"))
20672 (use (match_operand:HI 3 "memory_operand" "m"))
20673 (clobber (match_scratch:XF 4 "=&f"))]
20674 "TARGET_USE_FANCY_MATH_387
20675 && flag_unsafe_math_optimizations"
20676 "* return output_fix_trunc (insn, operands, false);"
20677 [(set_attr "type" "fistp")
20678 (set_attr "i387_cw" "<rounding>")
20679 (set_attr "mode" "DI")])
20680
20681 (define_insn "fist<mode>2_<rounding>"
20682 [(set (match_operand:SWI24 0 "nonimmediate_operand" "=m")
20683 (unspec:SWI24 [(match_operand:XF 1 "register_operand" "f")]
20684 FIST_ROUNDING))
20685 (use (match_operand:HI 2 "memory_operand" "m"))
20686 (use (match_operand:HI 3 "memory_operand" "m"))]
20687 "TARGET_USE_FANCY_MATH_387
20688 && flag_unsafe_math_optimizations"
20689 "* return output_fix_trunc (insn, operands, false);"
20690 [(set_attr "type" "fistp")
20691 (set_attr "i387_cw" "<rounding>")
20692 (set_attr "mode" "<MODE>")])
20693
20694 (define_expand "l<rounding_insn>xf<mode>2"
20695 [(parallel [(set (match_operand:SWI248x 0 "nonimmediate_operand")
20696 (unspec:SWI248x [(match_operand:XF 1 "register_operand")]
20697 FIST_ROUNDING))
20698 (clobber (reg:CC FLAGS_REG))])]
20699 "TARGET_USE_FANCY_MATH_387
20700 && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
20701 && flag_unsafe_math_optimizations")
20702
20703 (define_expand "l<rounding_insn><MODEF:mode><SWI48:mode>2"
20704 [(parallel [(set (match_operand:SWI48 0 "nonimmediate_operand")
20705 (unspec:SWI48 [(match_operand:MODEF 1 "register_operand")]
20706 FIST_ROUNDING))
20707 (clobber (reg:CC FLAGS_REG))])]
20708 "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
20709 && (TARGET_SSE4_1 || !flag_trapping_math)"
20710 {
20711 if (TARGET_SSE4_1)
20712 {
20713 rtx tmp = gen_reg_rtx (<MODEF:MODE>mode);
20714
20715 emit_insn (gen_sse4_1_round<MODEF:mode>2
20716 (tmp, operands[1], GEN_INT (ROUND_<ROUNDING>
20717 | ROUND_NO_EXC)));
20718 emit_insn (gen_fix_trunc<MODEF:mode><SWI48:mode>2
20719 (operands[0], tmp));
20720 }
20721 else if (ROUND_<ROUNDING> == ROUND_FLOOR)
20722 ix86_expand_lfloorceil (operands[0], operands[1], true);
20723 else if (ROUND_<ROUNDING> == ROUND_CEIL)
20724 ix86_expand_lfloorceil (operands[0], operands[1], false);
20725 else
20726 gcc_unreachable ();
20727
20728 DONE;
20729 })
20730
20731 (define_insn "fxam<mode>2_i387"
20732 [(set (match_operand:HI 0 "register_operand" "=a")
20733 (unspec:HI
20734 [(match_operand:X87MODEF 1 "register_operand" "f")]
20735 UNSPEC_FXAM))]
20736 "TARGET_USE_FANCY_MATH_387"
20737 "fxam\n\tfnstsw\t%0"
20738 [(set_attr "type" "multi")
20739 (set_attr "length" "4")
20740 (set_attr "unit" "i387")
20741 (set_attr "mode" "<MODE>")])
20742
20743 (define_expand "signbittf2"
20744 [(use (match_operand:SI 0 "register_operand"))
20745 (use (match_operand:TF 1 "register_operand"))]
20746 "TARGET_SSE"
20747 {
20748 if (TARGET_SSE4_1)
20749 {
20750 rtx mask = ix86_build_signbit_mask (TFmode, 0, 0);
20751 rtx scratch = gen_reg_rtx (QImode);
20752
20753 emit_insn (gen_ptesttf2 (operands[1], mask));
20754 ix86_expand_setcc (scratch, NE,
20755 gen_rtx_REG (CCZmode, FLAGS_REG), const0_rtx);
20756
20757 emit_insn (gen_zero_extendqisi2 (operands[0], scratch));
20758 }
20759 else
20760 {
20761 emit_insn (gen_sse_movmskps (operands[0],
20762 gen_lowpart (V4SFmode, operands[1])));
20763 emit_insn (gen_andsi3 (operands[0], operands[0], GEN_INT (0x8)));
20764 }
20765 DONE;
20766 })
20767
20768 (define_expand "signbitxf2"
20769 [(use (match_operand:SI 0 "register_operand"))
20770 (use (match_operand:XF 1 "register_operand"))]
20771 "TARGET_USE_FANCY_MATH_387"
20772 {
20773 rtx scratch = gen_reg_rtx (HImode);
20774
20775 emit_insn (gen_fxamxf2_i387 (scratch, operands[1]));
20776 emit_insn (gen_andsi3 (operands[0],
20777 gen_lowpart (SImode, scratch), GEN_INT (0x200)));
20778 DONE;
20779 })
20780
20781 (define_insn "movmsk_df"
20782 [(set (match_operand:SI 0 "register_operand" "=r")
20783 (unspec:SI
20784 [(match_operand:DF 1 "register_operand" "x")]
20785 UNSPEC_MOVMSK))]
20786 "SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH"
20787 "%vmovmskpd\t{%1, %0|%0, %1}"
20788 [(set_attr "type" "ssemov")
20789 (set_attr "prefix" "maybe_vex")
20790 (set_attr "mode" "DF")])
20791
20792 ;; Use movmskpd in SSE mode to avoid store forwarding stall
20793 ;; for 32bit targets and movq+shrq sequence for 64bit targets.
20794 (define_expand "signbitdf2"
20795 [(use (match_operand:SI 0 "register_operand"))
20796 (use (match_operand:DF 1 "register_operand"))]
20797 "TARGET_USE_FANCY_MATH_387
20798 || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)"
20799 {
20800 if (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)
20801 {
20802 emit_insn (gen_movmsk_df (operands[0], operands[1]));
20803 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
20804 }
20805 else
20806 {
20807 rtx scratch = gen_reg_rtx (HImode);
20808
20809 emit_insn (gen_fxamdf2_i387 (scratch, operands[1]));
20810 emit_insn (gen_andsi3 (operands[0],
20811 gen_lowpart (SImode, scratch), GEN_INT (0x200)));
20812 }
20813 DONE;
20814 })
20815
20816 (define_expand "signbitsf2"
20817 [(use (match_operand:SI 0 "register_operand"))
20818 (use (match_operand:SF 1 "register_operand"))]
20819 "TARGET_USE_FANCY_MATH_387
20820 && !(SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH)"
20821 {
20822 rtx scratch = gen_reg_rtx (HImode);
20823
20824 emit_insn (gen_fxamsf2_i387 (scratch, operands[1]));
20825 emit_insn (gen_andsi3 (operands[0],
20826 gen_lowpart (SImode, scratch), GEN_INT (0x200)));
20827 DONE;
20828 })
20829 \f
20830 ;; Block operation instructions
20831
20832 (define_insn "cld"
20833 [(unspec_volatile [(const_int 0)] UNSPECV_CLD)]
20834 ""
20835 "cld"
20836 [(set_attr "length" "1")
20837 (set_attr "length_immediate" "0")
20838 (set_attr "modrm" "0")])
20839
20840 (define_expand "cpymem<mode>"
20841 [(use (match_operand:BLK 0 "memory_operand"))
20842 (use (match_operand:BLK 1 "memory_operand"))
20843 (use (match_operand:SWI48 2 "nonmemory_operand"))
20844 (use (match_operand:SWI48 3 "const_int_operand"))
20845 (use (match_operand:SI 4 "const_int_operand"))
20846 (use (match_operand:SI 5 "const_int_operand"))
20847 (use (match_operand:SI 6 ""))
20848 (use (match_operand:SI 7 ""))
20849 (use (match_operand:SI 8 ""))]
20850 ""
20851 {
20852 if (ix86_expand_set_or_cpymem (operands[0], operands[1],
20853 operands[2], NULL, operands[3],
20854 operands[4], operands[5],
20855 operands[6], operands[7],
20856 operands[8], false))
20857 DONE;
20858 else
20859 FAIL;
20860 })
20861
20862 ;; Most CPUs don't like single string operations
20863 ;; Handle this case here to simplify previous expander.
20864
20865 (define_expand "strmov"
20866 [(set (match_dup 4) (match_operand 3 "memory_operand"))
20867 (set (match_operand 1 "memory_operand") (match_dup 4))
20868 (parallel [(set (match_operand 0 "register_operand") (match_dup 5))
20869 (clobber (reg:CC FLAGS_REG))])
20870 (parallel [(set (match_operand 2 "register_operand") (match_dup 6))
20871 (clobber (reg:CC FLAGS_REG))])]
20872 ""
20873 {
20874 /* Can't use this for non-default address spaces. */
20875 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (operands[3])))
20876 FAIL;
20877
20878 int piece_size = GET_MODE_SIZE (GET_MODE (operands[1]));
20879
20880 /* If .md ever supports :P for Pmode, these can be directly
20881 in the pattern above. */
20882 operands[5] = plus_constant (Pmode, operands[0], piece_size);
20883 operands[6] = plus_constant (Pmode, operands[2], piece_size);
20884
20885 /* Can't use this if the user has appropriated esi or edi. */
20886 if ((TARGET_SINGLE_STRINGOP || optimize_insn_for_size_p ())
20887 && !(fixed_regs[SI_REG] || fixed_regs[DI_REG]))
20888 {
20889 emit_insn (gen_strmov_singleop (operands[0], operands[1],
20890 operands[2], operands[3],
20891 operands[5], operands[6]));
20892 DONE;
20893 }
20894
20895 operands[4] = gen_reg_rtx (GET_MODE (operands[1]));
20896 })
20897
20898 (define_expand "strmov_singleop"
20899 [(parallel [(set (match_operand 1 "memory_operand")
20900 (match_operand 3 "memory_operand"))
20901 (set (match_operand 0 "register_operand")
20902 (match_operand 4))
20903 (set (match_operand 2 "register_operand")
20904 (match_operand 5))])]
20905 ""
20906 {
20907 if (TARGET_CLD)
20908 ix86_optimize_mode_switching[X86_DIRFLAG] = 1;
20909 })
20910
20911 (define_insn "*strmovdi_rex_1"
20912 [(set (mem:DI (match_operand:P 2 "register_operand" "0"))
20913 (mem:DI (match_operand:P 3 "register_operand" "1")))
20914 (set (match_operand:P 0 "register_operand" "=D")
20915 (plus:P (match_dup 2)
20916 (const_int 8)))
20917 (set (match_operand:P 1 "register_operand" "=S")
20918 (plus:P (match_dup 3)
20919 (const_int 8)))]
20920 "TARGET_64BIT
20921 && !(fixed_regs[SI_REG] || fixed_regs[DI_REG])
20922 && ix86_check_no_addr_space (insn)"
20923 "%^movsq"
20924 [(set_attr "type" "str")
20925 (set_attr "memory" "both")
20926 (set_attr "mode" "DI")])
20927
20928 (define_insn "*strmovsi_1"
20929 [(set (mem:SI (match_operand:P 2 "register_operand" "0"))
20930 (mem:SI (match_operand:P 3 "register_operand" "1")))
20931 (set (match_operand:P 0 "register_operand" "=D")
20932 (plus:P (match_dup 2)
20933 (const_int 4)))
20934 (set (match_operand:P 1 "register_operand" "=S")
20935 (plus:P (match_dup 3)
20936 (const_int 4)))]
20937 "!(fixed_regs[SI_REG] || fixed_regs[DI_REG])
20938 && ix86_check_no_addr_space (insn)"
20939 "%^movs{l|d}"
20940 [(set_attr "type" "str")
20941 (set_attr "memory" "both")
20942 (set_attr "mode" "SI")])
20943
20944 (define_insn "*strmovhi_1"
20945 [(set (mem:HI (match_operand:P 2 "register_operand" "0"))
20946 (mem:HI (match_operand:P 3 "register_operand" "1")))
20947 (set (match_operand:P 0 "register_operand" "=D")
20948 (plus:P (match_dup 2)
20949 (const_int 2)))
20950 (set (match_operand:P 1 "register_operand" "=S")
20951 (plus:P (match_dup 3)
20952 (const_int 2)))]
20953 "!(fixed_regs[SI_REG] || fixed_regs[DI_REG])
20954 && ix86_check_no_addr_space (insn)"
20955 "%^movsw"
20956 [(set_attr "type" "str")
20957 (set_attr "memory" "both")
20958 (set_attr "mode" "HI")])
20959
20960 (define_insn "*strmovqi_1"
20961 [(set (mem:QI (match_operand:P 2 "register_operand" "0"))
20962 (mem:QI (match_operand:P 3 "register_operand" "1")))
20963 (set (match_operand:P 0 "register_operand" "=D")
20964 (plus:P (match_dup 2)
20965 (const_int 1)))
20966 (set (match_operand:P 1 "register_operand" "=S")
20967 (plus:P (match_dup 3)
20968 (const_int 1)))]
20969 "!(fixed_regs[SI_REG] || fixed_regs[DI_REG])
20970 && ix86_check_no_addr_space (insn)"
20971 "%^movsb"
20972 [(set_attr "type" "str")
20973 (set_attr "memory" "both")
20974 (set (attr "prefix_rex")
20975 (if_then_else
20976 (match_test "<P:MODE>mode == DImode")
20977 (const_string "0")
20978 (const_string "*")))
20979 (set_attr "mode" "QI")])
20980
20981 (define_expand "rep_mov"
20982 [(parallel [(set (match_operand 4 "register_operand") (const_int 0))
20983 (set (match_operand 0 "register_operand")
20984 (match_operand 5))
20985 (set (match_operand 2 "register_operand")
20986 (match_operand 6))
20987 (set (match_operand 1 "memory_operand")
20988 (match_operand 3 "memory_operand"))
20989 (use (match_dup 4))])]
20990 ""
20991 {
20992 if (TARGET_CLD)
20993 ix86_optimize_mode_switching[X86_DIRFLAG] = 1;
20994 })
20995
20996 (define_insn "*rep_movdi_rex64"
20997 [(set (match_operand:P 2 "register_operand" "=c") (const_int 0))
20998 (set (match_operand:P 0 "register_operand" "=D")
20999 (plus:P (ashift:P (match_operand:P 5 "register_operand" "2")
21000 (const_int 3))
21001 (match_operand:P 3 "register_operand" "0")))
21002 (set (match_operand:P 1 "register_operand" "=S")
21003 (plus:P (ashift:P (match_dup 5) (const_int 3))
21004 (match_operand:P 4 "register_operand" "1")))
21005 (set (mem:BLK (match_dup 3))
21006 (mem:BLK (match_dup 4)))
21007 (use (match_dup 5))]
21008 "TARGET_64BIT
21009 && !(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
21010 && ix86_check_no_addr_space (insn)"
21011 "%^rep{%;} movsq"
21012 [(set_attr "type" "str")
21013 (set_attr "prefix_rep" "1")
21014 (set_attr "memory" "both")
21015 (set_attr "mode" "DI")])
21016
21017 (define_insn "*rep_movsi"
21018 [(set (match_operand:P 2 "register_operand" "=c") (const_int 0))
21019 (set (match_operand:P 0 "register_operand" "=D")
21020 (plus:P (ashift:P (match_operand:P 5 "register_operand" "2")
21021 (const_int 2))
21022 (match_operand:P 3 "register_operand" "0")))
21023 (set (match_operand:P 1 "register_operand" "=S")
21024 (plus:P (ashift:P (match_dup 5) (const_int 2))
21025 (match_operand:P 4 "register_operand" "1")))
21026 (set (mem:BLK (match_dup 3))
21027 (mem:BLK (match_dup 4)))
21028 (use (match_dup 5))]
21029 "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
21030 && ix86_check_no_addr_space (insn)"
21031 "%^rep{%;} movs{l|d}"
21032 [(set_attr "type" "str")
21033 (set_attr "prefix_rep" "1")
21034 (set_attr "memory" "both")
21035 (set_attr "mode" "SI")])
21036
21037 (define_insn "*rep_movqi"
21038 [(set (match_operand:P 2 "register_operand" "=c") (const_int 0))
21039 (set (match_operand:P 0 "register_operand" "=D")
21040 (plus:P (match_operand:P 3 "register_operand" "0")
21041 (match_operand:P 5 "register_operand" "2")))
21042 (set (match_operand:P 1 "register_operand" "=S")
21043 (plus:P (match_operand:P 4 "register_operand" "1") (match_dup 5)))
21044 (set (mem:BLK (match_dup 3))
21045 (mem:BLK (match_dup 4)))
21046 (use (match_dup 5))]
21047 "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
21048 && ix86_check_no_addr_space (insn)"
21049 "%^rep{%;} movsb"
21050 [(set_attr "type" "str")
21051 (set_attr "prefix_rep" "1")
21052 (set_attr "memory" "both")
21053 (set_attr "mode" "QI")])
21054
21055 (define_expand "setmem<mode>"
21056 [(use (match_operand:BLK 0 "memory_operand"))
21057 (use (match_operand:SWI48 1 "nonmemory_operand"))
21058 (use (match_operand:QI 2 "nonmemory_operand"))
21059 (use (match_operand 3 "const_int_operand"))
21060 (use (match_operand:SI 4 "const_int_operand"))
21061 (use (match_operand:SI 5 "const_int_operand"))
21062 (use (match_operand:SI 6 ""))
21063 (use (match_operand:SI 7 ""))
21064 (use (match_operand:SI 8 ""))]
21065 ""
21066 {
21067 if (ix86_expand_set_or_cpymem (operands[0], NULL,
21068 operands[1], operands[2],
21069 operands[3], operands[4],
21070 operands[5], operands[6],
21071 operands[7], operands[8], true))
21072 DONE;
21073 else
21074 FAIL;
21075 })
21076
21077 ;; Most CPUs don't like single string operations
21078 ;; Handle this case here to simplify previous expander.
21079
21080 (define_expand "strset"
21081 [(set (match_operand 1 "memory_operand")
21082 (match_operand 2 "register_operand"))
21083 (parallel [(set (match_operand 0 "register_operand")
21084 (match_dup 3))
21085 (clobber (reg:CC FLAGS_REG))])]
21086 ""
21087 {
21088 /* Can't use this for non-default address spaces. */
21089 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (operands[1])))
21090 FAIL;
21091
21092 if (GET_MODE (operands[1]) != GET_MODE (operands[2]))
21093 operands[1] = adjust_address_nv (operands[1], GET_MODE (operands[2]), 0);
21094
21095 /* If .md ever supports :P for Pmode, this can be directly
21096 in the pattern above. */
21097 operands[3] = plus_constant (Pmode, operands[0],
21098 GET_MODE_SIZE (GET_MODE (operands[2])));
21099
21100 /* Can't use this if the user has appropriated eax or edi. */
21101 if ((TARGET_SINGLE_STRINGOP || optimize_insn_for_size_p ())
21102 && !(fixed_regs[AX_REG] || fixed_regs[DI_REG]))
21103 {
21104 emit_insn (gen_strset_singleop (operands[0], operands[1], operands[2],
21105 operands[3]));
21106 DONE;
21107 }
21108 })
21109
21110 (define_expand "strset_singleop"
21111 [(parallel [(set (match_operand 1 "memory_operand")
21112 (match_operand 2 "register_operand"))
21113 (set (match_operand 0 "register_operand")
21114 (match_operand 3))
21115 (unspec [(const_int 0)] UNSPEC_STOS)])]
21116 ""
21117 {
21118 if (TARGET_CLD)
21119 ix86_optimize_mode_switching[X86_DIRFLAG] = 1;
21120 })
21121
21122 (define_insn "*strsetdi_rex_1"
21123 [(set (mem:DI (match_operand:P 1 "register_operand" "0"))
21124 (match_operand:DI 2 "register_operand" "a"))
21125 (set (match_operand:P 0 "register_operand" "=D")
21126 (plus:P (match_dup 1)
21127 (const_int 8)))
21128 (unspec [(const_int 0)] UNSPEC_STOS)]
21129 "TARGET_64BIT
21130 && !(fixed_regs[AX_REG] || fixed_regs[DI_REG])
21131 && ix86_check_no_addr_space (insn)"
21132 "%^stosq"
21133 [(set_attr "type" "str")
21134 (set_attr "memory" "store")
21135 (set_attr "mode" "DI")])
21136
21137 (define_insn "*strsetsi_1"
21138 [(set (mem:SI (match_operand:P 1 "register_operand" "0"))
21139 (match_operand:SI 2 "register_operand" "a"))
21140 (set (match_operand:P 0 "register_operand" "=D")
21141 (plus:P (match_dup 1)
21142 (const_int 4)))
21143 (unspec [(const_int 0)] UNSPEC_STOS)]
21144 "!(fixed_regs[AX_REG] || fixed_regs[DI_REG])
21145 && ix86_check_no_addr_space (insn)"
21146 "%^stos{l|d}"
21147 [(set_attr "type" "str")
21148 (set_attr "memory" "store")
21149 (set_attr "mode" "SI")])
21150
21151 (define_insn "*strsethi_1"
21152 [(set (mem:HI (match_operand:P 1 "register_operand" "0"))
21153 (match_operand:HI 2 "register_operand" "a"))
21154 (set (match_operand:P 0 "register_operand" "=D")
21155 (plus:P (match_dup 1)
21156 (const_int 2)))
21157 (unspec [(const_int 0)] UNSPEC_STOS)]
21158 "!(fixed_regs[AX_REG] || fixed_regs[DI_REG])
21159 && ix86_check_no_addr_space (insn)"
21160 "%^stosw"
21161 [(set_attr "type" "str")
21162 (set_attr "memory" "store")
21163 (set_attr "mode" "HI")])
21164
21165 (define_insn "*strsetqi_1"
21166 [(set (mem:QI (match_operand:P 1 "register_operand" "0"))
21167 (match_operand:QI 2 "register_operand" "a"))
21168 (set (match_operand:P 0 "register_operand" "=D")
21169 (plus:P (match_dup 1)
21170 (const_int 1)))
21171 (unspec [(const_int 0)] UNSPEC_STOS)]
21172 "!(fixed_regs[AX_REG] || fixed_regs[DI_REG])
21173 && ix86_check_no_addr_space (insn)"
21174 "%^stosb"
21175 [(set_attr "type" "str")
21176 (set_attr "memory" "store")
21177 (set (attr "prefix_rex")
21178 (if_then_else
21179 (match_test "<P:MODE>mode == DImode")
21180 (const_string "0")
21181 (const_string "*")))
21182 (set_attr "mode" "QI")])
21183
21184 (define_expand "rep_stos"
21185 [(parallel [(set (match_operand 1 "register_operand") (const_int 0))
21186 (set (match_operand 0 "register_operand")
21187 (match_operand 4))
21188 (set (match_operand 2 "memory_operand") (const_int 0))
21189 (use (match_operand 3 "register_operand"))
21190 (use (match_dup 1))])]
21191 ""
21192 {
21193 if (TARGET_CLD)
21194 ix86_optimize_mode_switching[X86_DIRFLAG] = 1;
21195 })
21196
21197 (define_insn "*rep_stosdi_rex64"
21198 [(set (match_operand:P 1 "register_operand" "=c") (const_int 0))
21199 (set (match_operand:P 0 "register_operand" "=D")
21200 (plus:P (ashift:P (match_operand:P 4 "register_operand" "1")
21201 (const_int 3))
21202 (match_operand:P 3 "register_operand" "0")))
21203 (set (mem:BLK (match_dup 3))
21204 (const_int 0))
21205 (use (match_operand:DI 2 "register_operand" "a"))
21206 (use (match_dup 4))]
21207 "TARGET_64BIT
21208 && !(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
21209 && ix86_check_no_addr_space (insn)"
21210 "%^rep{%;} stosq"
21211 [(set_attr "type" "str")
21212 (set_attr "prefix_rep" "1")
21213 (set_attr "memory" "store")
21214 (set_attr "mode" "DI")])
21215
21216 (define_insn "*rep_stossi"
21217 [(set (match_operand:P 1 "register_operand" "=c") (const_int 0))
21218 (set (match_operand:P 0 "register_operand" "=D")
21219 (plus:P (ashift:P (match_operand:P 4 "register_operand" "1")
21220 (const_int 2))
21221 (match_operand:P 3 "register_operand" "0")))
21222 (set (mem:BLK (match_dup 3))
21223 (const_int 0))
21224 (use (match_operand:SI 2 "register_operand" "a"))
21225 (use (match_dup 4))]
21226 "!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
21227 && ix86_check_no_addr_space (insn)"
21228 "%^rep{%;} stos{l|d}"
21229 [(set_attr "type" "str")
21230 (set_attr "prefix_rep" "1")
21231 (set_attr "memory" "store")
21232 (set_attr "mode" "SI")])
21233
21234 (define_insn "*rep_stosqi"
21235 [(set (match_operand:P 1 "register_operand" "=c") (const_int 0))
21236 (set (match_operand:P 0 "register_operand" "=D")
21237 (plus:P (match_operand:P 3 "register_operand" "0")
21238 (match_operand:P 4 "register_operand" "1")))
21239 (set (mem:BLK (match_dup 3))
21240 (const_int 0))
21241 (use (match_operand:QI 2 "register_operand" "a"))
21242 (use (match_dup 4))]
21243 "!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
21244 && ix86_check_no_addr_space (insn)"
21245 "%^rep{%;} stosb"
21246 [(set_attr "type" "str")
21247 (set_attr "prefix_rep" "1")
21248 (set_attr "memory" "store")
21249 (set (attr "prefix_rex")
21250 (if_then_else
21251 (match_test "<P:MODE>mode == DImode")
21252 (const_string "0")
21253 (const_string "*")))
21254 (set_attr "mode" "QI")])
21255
21256 (define_expand "cmpmemsi"
21257 [(set (match_operand:SI 0 "register_operand" "")
21258 (compare:SI (match_operand:BLK 1 "memory_operand" "")
21259 (match_operand:BLK 2 "memory_operand" "") ) )
21260 (use (match_operand 3 "general_operand"))
21261 (use (match_operand 4 "immediate_operand"))]
21262 ""
21263 {
21264 if (ix86_expand_cmpstrn_or_cmpmem (operands[0], operands[1],
21265 operands[2], operands[3],
21266 operands[4], false))
21267 DONE;
21268 else
21269 FAIL;
21270 })
21271
21272 (define_expand "cmpstrnsi"
21273 [(set (match_operand:SI 0 "register_operand")
21274 (compare:SI (match_operand:BLK 1 "general_operand")
21275 (match_operand:BLK 2 "general_operand")))
21276 (use (match_operand 3 "general_operand"))
21277 (use (match_operand 4 "immediate_operand"))]
21278 ""
21279 {
21280 if (ix86_expand_cmpstrn_or_cmpmem (operands[0], operands[1],
21281 operands[2], operands[3],
21282 operands[4], true))
21283 DONE;
21284 else
21285 FAIL;
21286 })
21287
21288 ;; Produce a tri-state integer (-1, 0, 1) from condition codes.
21289
21290 (define_expand "cmpintqi"
21291 [(set (match_dup 1)
21292 (gtu:QI (reg:CC FLAGS_REG) (const_int 0)))
21293 (set (match_dup 2)
21294 (ltu:QI (reg:CC FLAGS_REG) (const_int 0)))
21295 (parallel [(set (match_operand:QI 0 "register_operand")
21296 (minus:QI (match_dup 1)
21297 (match_dup 2)))
21298 (clobber (reg:CC FLAGS_REG))])]
21299 ""
21300 {
21301 operands[1] = gen_reg_rtx (QImode);
21302 operands[2] = gen_reg_rtx (QImode);
21303 })
21304
21305 ;; memcmp recognizers. The `cmpsb' opcode does nothing if the count is
21306 ;; zero. Emit extra code to make sure that a zero-length compare is EQ.
21307
21308 (define_expand "cmpstrnqi_nz_1"
21309 [(parallel [(set (reg:CC FLAGS_REG)
21310 (compare:CC (match_operand 4 "memory_operand")
21311 (match_operand 5 "memory_operand")))
21312 (use (match_operand 2 "register_operand"))
21313 (use (match_operand:SI 3 "immediate_operand"))
21314 (clobber (match_operand 0 "register_operand"))
21315 (clobber (match_operand 1 "register_operand"))
21316 (clobber (match_dup 2))])]
21317 ""
21318 {
21319 if (TARGET_CLD)
21320 ix86_optimize_mode_switching[X86_DIRFLAG] = 1;
21321 })
21322
21323 (define_insn "*cmpstrnqi_nz_1"
21324 [(set (reg:CC FLAGS_REG)
21325 (compare:CC (mem:BLK (match_operand:P 4 "register_operand" "0"))
21326 (mem:BLK (match_operand:P 5 "register_operand" "1"))))
21327 (use (match_operand:P 6 "register_operand" "2"))
21328 (use (match_operand:SI 3 "immediate_operand" "i"))
21329 (clobber (match_operand:P 0 "register_operand" "=S"))
21330 (clobber (match_operand:P 1 "register_operand" "=D"))
21331 (clobber (match_operand:P 2 "register_operand" "=c"))]
21332 "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
21333 && ix86_check_no_addr_space (insn)"
21334 "%^repz{%;} cmpsb"
21335 [(set_attr "type" "str")
21336 (set_attr "mode" "QI")
21337 (set (attr "prefix_rex")
21338 (if_then_else
21339 (match_test "<P:MODE>mode == DImode")
21340 (const_string "0")
21341 (const_string "*")))
21342 (set_attr "prefix_rep" "1")])
21343
21344 ;; The same, but the count is not known to not be zero.
21345
21346 (define_expand "cmpstrnqi_1"
21347 [(parallel [(set (reg:CC FLAGS_REG)
21348 (if_then_else:CC (ne (match_operand 2 "register_operand")
21349 (const_int 0))
21350 (compare:CC (match_operand 4 "memory_operand")
21351 (match_operand 5 "memory_operand"))
21352 (const_int 0)))
21353 (use (match_operand:SI 3 "immediate_operand"))
21354 (use (reg:CC FLAGS_REG))
21355 (clobber (match_operand 0 "register_operand"))
21356 (clobber (match_operand 1 "register_operand"))
21357 (clobber (match_dup 2))])]
21358 ""
21359 {
21360 if (TARGET_CLD)
21361 ix86_optimize_mode_switching[X86_DIRFLAG] = 1;
21362 })
21363
21364 (define_insn "*cmpstrnqi_1"
21365 [(set (reg:CC FLAGS_REG)
21366 (if_then_else:CC (ne (match_operand:P 6 "register_operand" "2")
21367 (const_int 0))
21368 (compare:CC (mem:BLK (match_operand:P 4 "register_operand" "0"))
21369 (mem:BLK (match_operand:P 5 "register_operand" "1")))
21370 (const_int 0)))
21371 (use (match_operand:SI 3 "immediate_operand" "i"))
21372 (use (reg:CC FLAGS_REG))
21373 (clobber (match_operand:P 0 "register_operand" "=S"))
21374 (clobber (match_operand:P 1 "register_operand" "=D"))
21375 (clobber (match_operand:P 2 "register_operand" "=c"))]
21376 "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
21377 && ix86_check_no_addr_space (insn)"
21378 "%^repz{%;} cmpsb"
21379 [(set_attr "type" "str")
21380 (set_attr "mode" "QI")
21381 (set (attr "prefix_rex")
21382 (if_then_else
21383 (match_test "<P:MODE>mode == DImode")
21384 (const_string "0")
21385 (const_string "*")))
21386 (set_attr "prefix_rep" "1")])
21387
21388 (define_expand "strlen<mode>"
21389 [(set (match_operand:P 0 "register_operand")
21390 (unspec:P [(match_operand:BLK 1 "general_operand")
21391 (match_operand:QI 2 "immediate_operand")
21392 (match_operand 3 "immediate_operand")]
21393 UNSPEC_SCAS))]
21394 ""
21395 {
21396 if (ix86_expand_strlen (operands[0], operands[1], operands[2], operands[3]))
21397 DONE;
21398 else
21399 FAIL;
21400 })
21401
21402 (define_expand "strlenqi_1"
21403 [(parallel [(set (match_operand 0 "register_operand")
21404 (match_operand 2))
21405 (clobber (match_operand 1 "register_operand"))
21406 (clobber (reg:CC FLAGS_REG))])]
21407 ""
21408 {
21409 if (TARGET_CLD)
21410 ix86_optimize_mode_switching[X86_DIRFLAG] = 1;
21411 })
21412
21413 (define_insn "*strlenqi_1"
21414 [(set (match_operand:P 0 "register_operand" "=&c")
21415 (unspec:P [(mem:BLK (match_operand:P 5 "register_operand" "1"))
21416 (match_operand:QI 2 "register_operand" "a")
21417 (match_operand:P 3 "immediate_operand" "i")
21418 (match_operand:P 4 "register_operand" "0")] UNSPEC_SCAS))
21419 (clobber (match_operand:P 1 "register_operand" "=D"))
21420 (clobber (reg:CC FLAGS_REG))]
21421 "!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
21422 && ix86_check_no_addr_space (insn)"
21423 "%^repnz{%;} scasb"
21424 [(set_attr "type" "str")
21425 (set_attr "mode" "QI")
21426 (set (attr "prefix_rex")
21427 (if_then_else
21428 (match_test "<P:MODE>mode == DImode")
21429 (const_string "0")
21430 (const_string "*")))
21431 (set_attr "prefix_rep" "1")])
21432
21433 ;; Peephole optimizations to clean up after cmpstrn*. This should be
21434 ;; handled in combine, but it is not currently up to the task.
21435 ;; When used for their truth value, the cmpstrn* expanders generate
21436 ;; code like this:
21437 ;;
21438 ;; repz cmpsb
21439 ;; seta %al
21440 ;; setb %dl
21441 ;; cmpb %al, %dl
21442 ;; jcc label
21443 ;;
21444 ;; The intermediate three instructions are unnecessary.
21445
21446 ;; This one handles cmpstrn*_nz_1...
21447 (define_peephole2
21448 [(parallel[
21449 (set (reg:CC FLAGS_REG)
21450 (compare:CC (mem:BLK (match_operand 4 "register_operand"))
21451 (mem:BLK (match_operand 5 "register_operand"))))
21452 (use (match_operand 6 "register_operand"))
21453 (use (match_operand:SI 3 "immediate_operand"))
21454 (clobber (match_operand 0 "register_operand"))
21455 (clobber (match_operand 1 "register_operand"))
21456 (clobber (match_operand 2 "register_operand"))])
21457 (set (match_operand:QI 7 "register_operand")
21458 (gtu:QI (reg:CC FLAGS_REG) (const_int 0)))
21459 (set (match_operand:QI 8 "register_operand")
21460 (ltu:QI (reg:CC FLAGS_REG) (const_int 0)))
21461 (set (reg FLAGS_REG)
21462 (compare (match_dup 7) (match_dup 8)))
21463 ]
21464 "peep2_reg_dead_p (4, operands[7]) && peep2_reg_dead_p (4, operands[8])"
21465 [(parallel[
21466 (set (reg:CC FLAGS_REG)
21467 (compare:CC (mem:BLK (match_dup 4))
21468 (mem:BLK (match_dup 5))))
21469 (use (match_dup 6))
21470 (use (match_dup 3))
21471 (clobber (match_dup 0))
21472 (clobber (match_dup 1))
21473 (clobber (match_dup 2))])])
21474
21475 ;; ...and this one handles cmpstrn*_1.
21476 (define_peephole2
21477 [(parallel[
21478 (set (reg:CC FLAGS_REG)
21479 (if_then_else:CC (ne (match_operand 6 "register_operand")
21480 (const_int 0))
21481 (compare:CC (mem:BLK (match_operand 4 "register_operand"))
21482 (mem:BLK (match_operand 5 "register_operand")))
21483 (const_int 0)))
21484 (use (match_operand:SI 3 "immediate_operand"))
21485 (use (reg:CC FLAGS_REG))
21486 (clobber (match_operand 0 "register_operand"))
21487 (clobber (match_operand 1 "register_operand"))
21488 (clobber (match_operand 2 "register_operand"))])
21489 (set (match_operand:QI 7 "register_operand")
21490 (gtu:QI (reg:CC FLAGS_REG) (const_int 0)))
21491 (set (match_operand:QI 8 "register_operand")
21492 (ltu:QI (reg:CC FLAGS_REG) (const_int 0)))
21493 (set (reg FLAGS_REG)
21494 (compare (match_dup 7) (match_dup 8)))
21495 ]
21496 "peep2_reg_dead_p (4, operands[7]) && peep2_reg_dead_p (4, operands[8])"
21497 [(parallel[
21498 (set (reg:CC FLAGS_REG)
21499 (if_then_else:CC (ne (match_dup 6)
21500 (const_int 0))
21501 (compare:CC (mem:BLK (match_dup 4))
21502 (mem:BLK (match_dup 5)))
21503 (const_int 0)))
21504 (use (match_dup 3))
21505 (use (reg:CC FLAGS_REG))
21506 (clobber (match_dup 0))
21507 (clobber (match_dup 1))
21508 (clobber (match_dup 2))])])
21509 \f
21510 ;; Conditional move instructions.
21511
21512 (define_expand "mov<mode>cc"
21513 [(set (match_operand:SWIM 0 "register_operand")
21514 (if_then_else:SWIM (match_operand 1 "comparison_operator")
21515 (match_operand:SWIM 2 "<general_operand>")
21516 (match_operand:SWIM 3 "<general_operand>")))]
21517 ""
21518 "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;")
21519
21520 ;; Data flow gets confused by our desire for `sbbl reg,reg', and clearing
21521 ;; the register first winds up with `sbbl $0,reg', which is also weird.
21522 ;; So just document what we're doing explicitly.
21523
21524 (define_expand "x86_mov<mode>cc_0_m1"
21525 [(parallel
21526 [(set (match_operand:SWI48 0 "register_operand")
21527 (if_then_else:SWI48
21528 (match_operator:SWI48 2 "ix86_carry_flag_operator"
21529 [(match_operand 1 "flags_reg_operand")
21530 (const_int 0)])
21531 (const_int -1)
21532 (const_int 0)))
21533 (clobber (reg:CC FLAGS_REG))])])
21534
21535 (define_insn "*x86_mov<mode>cc_0_m1"
21536 [(set (match_operand:SWI48 0 "register_operand" "=r")
21537 (if_then_else:SWI48 (match_operator 1 "ix86_carry_flag_operator"
21538 [(reg FLAGS_REG) (const_int 0)])
21539 (const_int -1)
21540 (const_int 0)))
21541 (clobber (reg:CC FLAGS_REG))]
21542 ""
21543 "sbb{<imodesuffix>}\t%0, %0"
21544 [(set_attr "type" "alu1")
21545 (set_attr "use_carry" "1")
21546 (set_attr "pent_pair" "pu")
21547 (set_attr "mode" "<MODE>")
21548 (set_attr "length_immediate" "0")])
21549
21550 (define_insn "*x86_mov<mode>cc_0_m1_se"
21551 [(set (match_operand:SWI48 0 "register_operand" "=r")
21552 (sign_extract:SWI48 (match_operator 1 "ix86_carry_flag_operator"
21553 [(reg FLAGS_REG) (const_int 0)])
21554 (const_int 1)
21555 (const_int 0)))
21556 (clobber (reg:CC FLAGS_REG))]
21557 ""
21558 "sbb{<imodesuffix>}\t%0, %0"
21559 [(set_attr "type" "alu1")
21560 (set_attr "use_carry" "1")
21561 (set_attr "pent_pair" "pu")
21562 (set_attr "mode" "<MODE>")
21563 (set_attr "length_immediate" "0")])
21564
21565 (define_insn "*x86_mov<mode>cc_0_m1_neg"
21566 [(set (match_operand:SWI 0 "register_operand" "=<r>")
21567 (neg:SWI (match_operator 1 "ix86_carry_flag_operator"
21568 [(reg FLAGS_REG) (const_int 0)])))
21569 (clobber (reg:CC FLAGS_REG))]
21570 ""
21571 "sbb{<imodesuffix>}\t%0, %0"
21572 [(set_attr "type" "alu1")
21573 (set_attr "use_carry" "1")
21574 (set_attr "pent_pair" "pu")
21575 (set_attr "mode" "<MODE>")
21576 (set_attr "length_immediate" "0")])
21577
21578 (define_expand "x86_mov<mode>cc_0_m1_neg"
21579 [(parallel
21580 [(set (match_operand:SWI48 0 "register_operand")
21581 (neg:SWI48 (ltu:SWI48 (reg:CCC FLAGS_REG) (const_int 0))))
21582 (clobber (reg:CC FLAGS_REG))])])
21583
21584 (define_split
21585 [(set (match_operand:SWI48 0 "register_operand")
21586 (neg:SWI48
21587 (leu:SWI48
21588 (match_operand 1 "int_nonimmediate_operand")
21589 (match_operand 2 "const_int_operand"))))]
21590 "x86_64_immediate_operand (operands[2], VOIDmode)
21591 && INTVAL (operands[2]) != -1
21592 && INTVAL (operands[2]) != 2147483647"
21593 [(set (reg:CC FLAGS_REG) (compare:CC (match_dup 1) (match_dup 2)))
21594 (set (match_dup 0)
21595 (neg:SWI48 (ltu:SWI48 (reg:CC FLAGS_REG) (const_int 0))))]
21596 "operands[2] = GEN_INT (INTVAL (operands[2]) + 1);")
21597
21598 (define_split
21599 [(set (match_operand:SWI 0 "register_operand")
21600 (neg:SWI
21601 (eq:SWI
21602 (match_operand 1 "int_nonimmediate_operand")
21603 (const_int 0))))]
21604 ""
21605 [(set (reg:CC FLAGS_REG) (compare:CC (match_dup 1) (const_int 1)))
21606 (set (match_dup 0)
21607 (neg:SWI (ltu:SWI (reg:CC FLAGS_REG) (const_int 0))))])
21608
21609 (define_split
21610 [(set (match_operand:SWI 0 "register_operand")
21611 (neg:SWI
21612 (ne:SWI
21613 (match_operand 1 "int_nonimmediate_operand")
21614 (const_int 0))))]
21615 ""
21616 [(set (reg:CCC FLAGS_REG)
21617 (unspec:CCC [(match_dup 1) (const_int 0)] UNSPEC_CC_NE))
21618 (set (match_dup 0)
21619 (neg:SWI (ltu:SWI (reg:CCC FLAGS_REG) (const_int 0))))])
21620
21621 (define_insn "*mov<mode>cc_noc"
21622 [(set (match_operand:SWI248 0 "register_operand" "=r,r")
21623 (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator"
21624 [(reg FLAGS_REG) (const_int 0)])
21625 (match_operand:SWI248 2 "nonimmediate_operand" "rm,0")
21626 (match_operand:SWI248 3 "nonimmediate_operand" "0,rm")))]
21627 "TARGET_CMOVE && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
21628 "@
21629 cmov%O2%C1\t{%2, %0|%0, %2}
21630 cmov%O2%c1\t{%3, %0|%0, %3}"
21631 [(set_attr "type" "icmov")
21632 (set_attr "mode" "<MODE>")])
21633
21634 (define_insn "*movsicc_noc_zext"
21635 [(set (match_operand:DI 0 "register_operand" "=r,r")
21636 (if_then_else:DI (match_operator 1 "ix86_comparison_operator"
21637 [(reg FLAGS_REG) (const_int 0)])
21638 (zero_extend:DI
21639 (match_operand:SI 2 "nonimmediate_operand" "rm,0"))
21640 (zero_extend:DI
21641 (match_operand:SI 3 "nonimmediate_operand" "0,rm"))))]
21642 "TARGET_64BIT
21643 && TARGET_CMOVE && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
21644 "@
21645 cmov%O2%C1\t{%2, %k0|%k0, %2}
21646 cmov%O2%c1\t{%3, %k0|%k0, %3}"
21647 [(set_attr "type" "icmov")
21648 (set_attr "mode" "SI")])
21649
21650 ;; Don't do conditional moves with memory inputs. This splitter helps
21651 ;; register starved x86_32 by forcing inputs into registers before reload.
21652 (define_split
21653 [(set (match_operand:SWI248 0 "register_operand")
21654 (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator"
21655 [(reg FLAGS_REG) (const_int 0)])
21656 (match_operand:SWI248 2 "nonimmediate_operand")
21657 (match_operand:SWI248 3 "nonimmediate_operand")))]
21658 "!TARGET_64BIT && TARGET_CMOVE
21659 && TARGET_AVOID_MEM_OPND_FOR_CMOVE
21660 && (MEM_P (operands[2]) || MEM_P (operands[3]))
21661 && can_create_pseudo_p ()
21662 && optimize_insn_for_speed_p ()"
21663 [(set (match_dup 0)
21664 (if_then_else:SWI248 (match_dup 1) (match_dup 2) (match_dup 3)))]
21665 {
21666 operands[2] = force_reg (<MODE>mode, operands[2]);
21667 operands[3] = force_reg (<MODE>mode, operands[3]);
21668 })
21669
21670 (define_insn "*movqicc_noc"
21671 [(set (match_operand:QI 0 "register_operand" "=r,r")
21672 (if_then_else:QI (match_operator 1 "ix86_comparison_operator"
21673 [(reg FLAGS_REG) (const_int 0)])
21674 (match_operand:QI 2 "register_operand" "r,0")
21675 (match_operand:QI 3 "register_operand" "0,r")))]
21676 "TARGET_CMOVE && !TARGET_PARTIAL_REG_STALL"
21677 "#"
21678 [(set_attr "type" "icmov")
21679 (set_attr "mode" "QI")])
21680
21681 (define_split
21682 [(set (match_operand:SWI12 0 "register_operand")
21683 (if_then_else:SWI12 (match_operator 1 "ix86_comparison_operator"
21684 [(reg FLAGS_REG) (const_int 0)])
21685 (match_operand:SWI12 2 "register_operand")
21686 (match_operand:SWI12 3 "register_operand")))]
21687 "TARGET_CMOVE && !TARGET_PARTIAL_REG_STALL
21688 && reload_completed"
21689 [(set (match_dup 0)
21690 (if_then_else:SI (match_dup 1) (match_dup 2) (match_dup 3)))]
21691 {
21692 operands[0] = gen_lowpart (SImode, operands[0]);
21693 operands[2] = gen_lowpart (SImode, operands[2]);
21694 operands[3] = gen_lowpart (SImode, operands[3]);
21695 })
21696
21697 ;; Don't do conditional moves with memory inputs
21698 (define_peephole2
21699 [(match_scratch:SWI248 4 "r")
21700 (set (match_operand:SWI248 0 "register_operand")
21701 (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator"
21702 [(reg FLAGS_REG) (const_int 0)])
21703 (match_operand:SWI248 2 "nonimmediate_operand")
21704 (match_operand:SWI248 3 "nonimmediate_operand")))]
21705 "TARGET_CMOVE && TARGET_AVOID_MEM_OPND_FOR_CMOVE
21706 && (MEM_P (operands[2]) || MEM_P (operands[3]))
21707 && optimize_insn_for_speed_p ()"
21708 [(set (match_dup 4) (match_dup 5))
21709 (set (match_dup 0)
21710 (if_then_else:SWI248 (match_dup 1) (match_dup 2) (match_dup 3)))]
21711 {
21712 if (MEM_P (operands[2]))
21713 {
21714 operands[5] = operands[2];
21715 operands[2] = operands[4];
21716 }
21717 else if (MEM_P (operands[3]))
21718 {
21719 operands[5] = operands[3];
21720 operands[3] = operands[4];
21721 }
21722 else
21723 gcc_unreachable ();
21724 })
21725
21726 (define_peephole2
21727 [(match_scratch:SI 4 "r")
21728 (set (match_operand:DI 0 "register_operand")
21729 (if_then_else:DI (match_operator 1 "ix86_comparison_operator"
21730 [(reg FLAGS_REG) (const_int 0)])
21731 (zero_extend:DI
21732 (match_operand:SI 2 "nonimmediate_operand"))
21733 (zero_extend:DI
21734 (match_operand:SI 3 "nonimmediate_operand"))))]
21735 "TARGET_64BIT
21736 && TARGET_CMOVE && TARGET_AVOID_MEM_OPND_FOR_CMOVE
21737 && (MEM_P (operands[2]) || MEM_P (operands[3]))
21738 && optimize_insn_for_speed_p ()"
21739 [(set (match_dup 4) (match_dup 5))
21740 (set (match_dup 0)
21741 (if_then_else:DI (match_dup 1)
21742 (zero_extend:DI (match_dup 2))
21743 (zero_extend:DI (match_dup 3))))]
21744 {
21745 if (MEM_P (operands[2]))
21746 {
21747 operands[5] = operands[2];
21748 operands[2] = operands[4];
21749 }
21750 else if (MEM_P (operands[3]))
21751 {
21752 operands[5] = operands[3];
21753 operands[3] = operands[4];
21754 }
21755 else
21756 gcc_unreachable ();
21757 })
21758
21759 ;; Eliminate a reg-reg mov by inverting the condition of a cmov (#1).
21760 ;; mov r0,r1; dec r0; mov r2,r3; cmov r0,r2 -> dec r1; mov r0,r3; cmov r0, r1
21761 (define_peephole2
21762 [(set (match_operand:SWI248 0 "general_reg_operand")
21763 (match_operand:SWI248 1 "general_reg_operand"))
21764 (parallel [(set (reg FLAGS_REG) (match_operand 5))
21765 (set (match_dup 0) (match_operand:SWI248 6))])
21766 (set (match_operand:SWI248 2 "general_reg_operand")
21767 (match_operand:SWI248 3 "general_gr_operand"))
21768 (set (match_dup 0)
21769 (if_then_else:SWI248 (match_operator 4 "ix86_comparison_operator"
21770 [(reg FLAGS_REG) (const_int 0)])
21771 (match_dup 0)
21772 (match_dup 2)))]
21773 "TARGET_CMOVE
21774 && REGNO (operands[2]) != REGNO (operands[0])
21775 && REGNO (operands[2]) != REGNO (operands[1])
21776 && peep2_reg_dead_p (1, operands[1])
21777 && peep2_reg_dead_p (4, operands[2])
21778 && !reg_overlap_mentioned_p (operands[0], operands[3])"
21779 [(parallel [(set (match_dup 7) (match_dup 8))
21780 (set (match_dup 1) (match_dup 9))])
21781 (set (match_dup 0) (match_dup 3))
21782 (set (match_dup 0) (if_then_else:SWI248 (match_dup 4)
21783 (match_dup 1)
21784 (match_dup 0)))]
21785 {
21786 operands[7] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (1)), 0, 0));
21787 operands[8] = replace_rtx (operands[5], operands[0], operands[1], true);
21788 operands[9] = replace_rtx (operands[6], operands[0], operands[1], true);
21789 })
21790
21791 ;; Eliminate a reg-reg mov by inverting the condition of a cmov (#2).
21792 ;; mov r2,r3; mov r0,r1; dec r0; cmov r0,r2 -> dec r1; mov r0,r3; cmov r0, r1
21793 (define_peephole2
21794 [(set (match_operand:SWI248 2 "general_reg_operand")
21795 (match_operand:SWI248 3 "general_gr_operand"))
21796 (set (match_operand:SWI248 0 "general_reg_operand")
21797 (match_operand:SWI248 1 "general_reg_operand"))
21798 (parallel [(set (reg FLAGS_REG) (match_operand 5))
21799 (set (match_dup 0) (match_operand:SWI248 6))])
21800 (set (match_dup 0)
21801 (if_then_else:SWI248 (match_operator 4 "ix86_comparison_operator"
21802 [(reg FLAGS_REG) (const_int 0)])
21803 (match_dup 0)
21804 (match_dup 2)))]
21805 "TARGET_CMOVE
21806 && REGNO (operands[2]) != REGNO (operands[0])
21807 && REGNO (operands[2]) != REGNO (operands[1])
21808 && peep2_reg_dead_p (2, operands[1])
21809 && peep2_reg_dead_p (4, operands[2])
21810 && !reg_overlap_mentioned_p (operands[0], operands[3])
21811 && !reg_mentioned_p (operands[2], operands[6])"
21812 [(parallel [(set (match_dup 7) (match_dup 8))
21813 (set (match_dup 1) (match_dup 9))])
21814 (set (match_dup 0) (match_dup 3))
21815 (set (match_dup 0) (if_then_else:SWI248 (match_dup 4)
21816 (match_dup 1)
21817 (match_dup 0)))]
21818 {
21819 operands[7] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (2)), 0, 0));
21820 operands[8] = replace_rtx (operands[5], operands[0], operands[1], true);
21821 operands[9] = replace_rtx (operands[6], operands[0], operands[1], true);
21822 })
21823
21824 (define_insn "movhf_mask"
21825 [(set (match_operand:HF 0 "nonimmediate_operand" "=v,m,v")
21826 (unspec:HF
21827 [(match_operand:HF 1 "nonimmediate_operand" "m,v,v")
21828 (match_operand:HF 2 "nonimm_or_0_operand" "0C,0C,0C")
21829 (match_operand:QI 3 "register_operand" "Yk,Yk,Yk")]
21830 UNSPEC_MOVCC_MASK))]
21831 "TARGET_AVX512FP16"
21832 "@
21833 vmovsh\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}
21834 vmovsh\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}
21835 vmovsh\t{%d1, %0%{%3%}%N2|%0%{%3%}%N2, %d1}"
21836 [(set_attr "type" "ssemov")
21837 (set_attr "prefix" "evex")
21838 (set_attr "mode" "HF")])
21839
21840 (define_expand "movhfcc"
21841 [(set (match_operand:HF 0 "register_operand")
21842 (if_then_else:HF
21843 (match_operand 1 "comparison_operator")
21844 (match_operand:HF 2 "register_operand")
21845 (match_operand:HF 3 "register_operand")))]
21846 "TARGET_AVX512FP16"
21847 "if (ix86_expand_fp_movcc (operands)) DONE; else FAIL;")
21848
21849 (define_expand "mov<mode>cc"
21850 [(set (match_operand:X87MODEF 0 "register_operand")
21851 (if_then_else:X87MODEF
21852 (match_operand 1 "comparison_operator")
21853 (match_operand:X87MODEF 2 "register_operand")
21854 (match_operand:X87MODEF 3 "register_operand")))]
21855 "(TARGET_80387 && TARGET_CMOVE)
21856 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
21857 "if (ix86_expand_fp_movcc (operands)) DONE; else FAIL;")
21858
21859 (define_insn "*movxfcc_1"
21860 [(set (match_operand:XF 0 "register_operand" "=f,f")
21861 (if_then_else:XF (match_operator 1 "fcmov_comparison_operator"
21862 [(reg FLAGS_REG) (const_int 0)])
21863 (match_operand:XF 2 "register_operand" "f,0")
21864 (match_operand:XF 3 "register_operand" "0,f")))]
21865 "TARGET_80387 && TARGET_CMOVE"
21866 "@
21867 fcmov%F1\t{%2, %0|%0, %2}
21868 fcmov%f1\t{%3, %0|%0, %3}"
21869 [(set_attr "type" "fcmov")
21870 (set_attr "mode" "XF")])
21871
21872 (define_insn "*movdfcc_1"
21873 [(set (match_operand:DF 0 "register_operand" "=f,f,&r,&r,r ,r")
21874 (if_then_else:DF (match_operator 1 "fcmov_comparison_operator"
21875 [(reg FLAGS_REG) (const_int 0)])
21876 (match_operand:DF 2 "nonimmediate_operand"
21877 "f ,0,rm,0 ,rm,0")
21878 (match_operand:DF 3 "nonimmediate_operand"
21879 "0 ,f,0 ,rm,0, rm")))]
21880 "TARGET_80387 && TARGET_CMOVE
21881 && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
21882 "@
21883 fcmov%F1\t{%2, %0|%0, %2}
21884 fcmov%f1\t{%3, %0|%0, %3}
21885 #
21886 #
21887 cmov%O2%C1\t{%2, %0|%0, %2}
21888 cmov%O2%c1\t{%3, %0|%0, %3}"
21889 [(set_attr "isa" "*,*,nox64,nox64,x64,x64")
21890 (set_attr "type" "fcmov,fcmov,multi,multi,icmov,icmov")
21891 (set_attr "mode" "DF,DF,DI,DI,DI,DI")])
21892
21893 (define_split
21894 [(set (match_operand:DF 0 "general_reg_operand")
21895 (if_then_else:DF (match_operator 1 "fcmov_comparison_operator"
21896 [(reg FLAGS_REG) (const_int 0)])
21897 (match_operand:DF 2 "nonimmediate_operand")
21898 (match_operand:DF 3 "nonimmediate_operand")))]
21899 "!TARGET_64BIT && reload_completed"
21900 [(set (match_dup 2)
21901 (if_then_else:SI (match_dup 1) (match_dup 4) (match_dup 5)))
21902 (set (match_dup 3)
21903 (if_then_else:SI (match_dup 1) (match_dup 6) (match_dup 7)))]
21904 {
21905 split_double_mode (DImode, &operands[2], 2, &operands[4], &operands[6]);
21906 split_double_mode (DImode, &operands[0], 1, &operands[2], &operands[3]);
21907 })
21908
21909 (define_insn "*movsfcc_1_387"
21910 [(set (match_operand:SF 0 "register_operand" "=f,f,r,r")
21911 (if_then_else:SF (match_operator 1 "fcmov_comparison_operator"
21912 [(reg FLAGS_REG) (const_int 0)])
21913 (match_operand:SF 2 "nonimmediate_operand" "f,0,rm,0")
21914 (match_operand:SF 3 "nonimmediate_operand" "0,f,0,rm")))]
21915 "TARGET_80387 && TARGET_CMOVE
21916 && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
21917 "@
21918 fcmov%F1\t{%2, %0|%0, %2}
21919 fcmov%f1\t{%3, %0|%0, %3}
21920 cmov%O2%C1\t{%2, %0|%0, %2}
21921 cmov%O2%c1\t{%3, %0|%0, %3}"
21922 [(set_attr "type" "fcmov,fcmov,icmov,icmov")
21923 (set_attr "mode" "SF,SF,SI,SI")])
21924
21925 ;; Don't do conditional moves with memory inputs. This splitter helps
21926 ;; register starved x86_32 by forcing inputs into registers before reload.
21927 (define_split
21928 [(set (match_operand:MODEF 0 "register_operand")
21929 (if_then_else:MODEF (match_operator 1 "ix86_comparison_operator"
21930 [(reg FLAGS_REG) (const_int 0)])
21931 (match_operand:MODEF 2 "nonimmediate_operand")
21932 (match_operand:MODEF 3 "nonimmediate_operand")))]
21933 "!TARGET_64BIT && TARGET_80387 && TARGET_CMOVE
21934 && TARGET_AVOID_MEM_OPND_FOR_CMOVE
21935 && (MEM_P (operands[2]) || MEM_P (operands[3]))
21936 && can_create_pseudo_p ()
21937 && optimize_insn_for_speed_p ()"
21938 [(set (match_dup 0)
21939 (if_then_else:MODEF (match_dup 1) (match_dup 2) (match_dup 3)))]
21940 {
21941 operands[2] = force_reg (<MODE>mode, operands[2]);
21942 operands[3] = force_reg (<MODE>mode, operands[3]);
21943 })
21944
21945 ;; Don't do conditional moves with memory inputs
21946 (define_peephole2
21947 [(match_scratch:MODEF 4 "r")
21948 (set (match_operand:MODEF 0 "general_reg_operand")
21949 (if_then_else:MODEF (match_operator 1 "fcmov_comparison_operator"
21950 [(reg FLAGS_REG) (const_int 0)])
21951 (match_operand:MODEF 2 "nonimmediate_operand")
21952 (match_operand:MODEF 3 "nonimmediate_operand")))]
21953 "(<MODE>mode != DFmode || TARGET_64BIT)
21954 && TARGET_80387 && TARGET_CMOVE
21955 && TARGET_AVOID_MEM_OPND_FOR_CMOVE
21956 && (MEM_P (operands[2]) || MEM_P (operands[3]))
21957 && optimize_insn_for_speed_p ()"
21958 [(set (match_dup 4) (match_dup 5))
21959 (set (match_dup 0)
21960 (if_then_else:MODEF (match_dup 1) (match_dup 2) (match_dup 3)))]
21961 {
21962 if (MEM_P (operands[2]))
21963 {
21964 operands[5] = operands[2];
21965 operands[2] = operands[4];
21966 }
21967 else if (MEM_P (operands[3]))
21968 {
21969 operands[5] = operands[3];
21970 operands[3] = operands[4];
21971 }
21972 else
21973 gcc_unreachable ();
21974 })
21975
21976 ;; All moves in XOP pcmov instructions are 128 bits and hence we restrict
21977 ;; the scalar versions to have only XMM registers as operands.
21978
21979 ;; XOP conditional move
21980 (define_insn "*xop_pcmov_<mode>"
21981 [(set (match_operand:MODEF 0 "register_operand" "=x")
21982 (if_then_else:MODEF
21983 (match_operand:MODEF 1 "register_operand" "x")
21984 (match_operand:MODEF 2 "register_operand" "x")
21985 (match_operand:MODEF 3 "register_operand" "x")))]
21986 "TARGET_XOP"
21987 "vpcmov\t{%1, %3, %2, %0|%0, %2, %3, %1}"
21988 [(set_attr "type" "sse4arg")])
21989
21990 ;; These versions of the min/max patterns are intentionally ignorant of
21991 ;; their behavior wrt -0.0 and NaN (via the commutative operand mark).
21992 ;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator
21993 ;; are undefined in this condition, we're certain this is correct.
21994
21995 (define_insn "<code><mode>3"
21996 [(set (match_operand:MODEF 0 "register_operand" "=x,v")
21997 (smaxmin:MODEF
21998 (match_operand:MODEF 1 "nonimmediate_operand" "%0,v")
21999 (match_operand:MODEF 2 "nonimmediate_operand" "xm,vm")))]
22000 "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
22001 "@
22002 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
22003 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
22004 [(set_attr "isa" "noavx,avx")
22005 (set_attr "prefix" "orig,vex")
22006 (set_attr "type" "sseadd")
22007 (set_attr "mode" "<MODE>")])
22008
22009 (define_insn "<code>hf3"
22010 [(set (match_operand:HF 0 "register_operand" "=v")
22011 (smaxmin:HF
22012 (match_operand:HF 1 "nonimmediate_operand" "%v")
22013 (match_operand:HF 2 "nonimmediate_operand" "vm")))]
22014 "TARGET_AVX512FP16"
22015 "v<maxmin_float>sh\t{%2, %1, %0|%0, %1, %2}"
22016 [(set_attr "prefix" "evex")
22017 (set_attr "type" "sseadd")
22018 (set_attr "mode" "HF")])
22019
22020 ;; These versions of the min/max patterns implement exactly the operations
22021 ;; min = (op1 < op2 ? op1 : op2)
22022 ;; max = (!(op1 < op2) ? op1 : op2)
22023 ;; Their operands are not commutative, and thus they may be used in the
22024 ;; presence of -0.0 and NaN.
22025
22026 (define_insn "*ieee_s<ieee_maxmin>hf3"
22027 [(set (match_operand:HF 0 "register_operand" "=v")
22028 (unspec:HF
22029 [(match_operand:HF 1 "register_operand" "v")
22030 (match_operand:HF 2 "nonimmediate_operand" "vm")]
22031 IEEE_MAXMIN))]
22032 "TARGET_AVX512FP16"
22033 "v<ieee_maxmin>sh\t{%2, %1, %0|%0, %1, %2}"
22034 [(set_attr "prefix" "evex")
22035 (set_attr "type" "sseadd")
22036 (set_attr "mode" "HF")])
22037
22038 (define_insn "*ieee_s<ieee_maxmin><mode>3"
22039 [(set (match_operand:MODEF 0 "register_operand" "=x,v")
22040 (unspec:MODEF
22041 [(match_operand:MODEF 1 "register_operand" "0,v")
22042 (match_operand:MODEF 2 "nonimmediate_operand" "xm,vm")]
22043 IEEE_MAXMIN))]
22044 "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
22045 "@
22046 <ieee_maxmin><ssemodesuffix>\t{%2, %0|%0, %2}
22047 v<ieee_maxmin><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
22048 [(set_attr "isa" "noavx,avx")
22049 (set_attr "prefix" "orig,maybe_evex")
22050 (set_attr "type" "sseadd")
22051 (set_attr "mode" "<MODE>")])
22052
22053 ;; Make two stack loads independent:
22054 ;; fld aa fld aa
22055 ;; fld %st(0) -> fld bb
22056 ;; fmul bb fmul %st(1), %st
22057 ;;
22058 ;; Actually we only match the last two instructions for simplicity.
22059
22060 (define_peephole2
22061 [(set (match_operand 0 "fp_register_operand")
22062 (match_operand 1 "fp_register_operand"))
22063 (set (match_dup 0)
22064 (match_operator 2 "binary_fp_operator"
22065 [(match_dup 0)
22066 (match_operand 3 "memory_operand")]))]
22067 "REGNO (operands[0]) != REGNO (operands[1])"
22068 [(set (match_dup 0) (match_dup 3))
22069 (set (match_dup 0)
22070 (match_op_dup 2
22071 [(match_dup 5) (match_dup 4)]))]
22072 {
22073 operands[4] = operands[0];
22074 operands[5] = operands[1];
22075
22076 /* The % modifier is not operational anymore in peephole2's, so we have to
22077 swap the operands manually in the case of addition and multiplication. */
22078 if (COMMUTATIVE_ARITH_P (operands[2]))
22079 std::swap (operands[4], operands[5]);
22080 })
22081
22082 (define_peephole2
22083 [(set (match_operand 0 "fp_register_operand")
22084 (match_operand 1 "fp_register_operand"))
22085 (set (match_dup 0)
22086 (match_operator 2 "binary_fp_operator"
22087 [(match_operand 3 "memory_operand")
22088 (match_dup 0)]))]
22089 "REGNO (operands[0]) != REGNO (operands[1])"
22090 [(set (match_dup 0) (match_dup 3))
22091 (set (match_dup 0)
22092 (match_op_dup 2
22093 [(match_dup 4) (match_dup 5)]))]
22094 {
22095 operands[4] = operands[0];
22096 operands[5] = operands[1];
22097
22098 /* The % modifier is not operational anymore in peephole2's, so we have to
22099 swap the operands manually in the case of addition and multiplication. */
22100 if (COMMUTATIVE_ARITH_P (operands[2]))
22101 std::swap (operands[4], operands[5]);
22102 })
22103
22104 ;; Conditional addition patterns
22105 (define_expand "add<mode>cc"
22106 [(match_operand:SWI 0 "register_operand")
22107 (match_operand 1 "ordered_comparison_operator")
22108 (match_operand:SWI 2 "register_operand")
22109 (match_operand:SWI 3 "const_int_operand")]
22110 ""
22111 "if (ix86_expand_int_addcc (operands)) DONE; else FAIL;")
22112
22113 ;; min/max patterns
22114
22115 (define_code_attr maxmin_rel
22116 [(smax "GE") (smin "LE") (umax "GEU") (umin "LEU")])
22117
22118 (define_expand "<code><mode>3"
22119 [(parallel
22120 [(set (match_operand:SDWIM 0 "register_operand")
22121 (maxmin:SDWIM
22122 (match_operand:SDWIM 1 "register_operand")
22123 (match_operand:SDWIM 2 "general_operand")))
22124 (clobber (reg:CC FLAGS_REG))])]
22125 "TARGET_CMOVE
22126 && (<MODE>mode != QImode || !TARGET_PARTIAL_REG_STALL)")
22127
22128 (define_insn_and_split "*<code><dwi>3_doubleword"
22129 [(set (match_operand:<DWI> 0 "register_operand")
22130 (maxmin:<DWI>
22131 (match_operand:<DWI> 1 "register_operand")
22132 (match_operand:<DWI> 2 "general_operand")))
22133 (clobber (reg:CC FLAGS_REG))]
22134 "TARGET_CMOVE
22135 && ix86_pre_reload_split ()"
22136 "#"
22137 "&& 1"
22138 [(set (match_dup 0)
22139 (if_then_else:DWIH (match_dup 6)
22140 (match_dup 1)
22141 (match_dup 2)))
22142 (set (match_dup 3)
22143 (if_then_else:DWIH (match_dup 6)
22144 (match_dup 4)
22145 (match_dup 5)))]
22146 {
22147 operands[2] = force_reg (<DWI>mode, operands[2]);
22148
22149 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
22150
22151 rtx cmplo[2] = { operands[1], operands[2] };
22152 rtx cmphi[2] = { operands[4], operands[5] };
22153
22154 enum rtx_code code = <maxmin_rel>;
22155
22156 switch (code)
22157 {
22158 case LE: case LEU:
22159 std::swap (cmplo[0], cmplo[1]);
22160 std::swap (cmphi[0], cmphi[1]);
22161 code = swap_condition (code);
22162 /* FALLTHRU */
22163
22164 case GE: case GEU:
22165 {
22166 bool uns = (code == GEU);
22167 rtx (*sbb_insn) (machine_mode, rtx, rtx, rtx)
22168 = uns ? gen_sub3_carry_ccc : gen_sub3_carry_ccgz;
22169
22170 emit_insn (gen_cmp_1 (<MODE>mode, cmplo[0], cmplo[1]));
22171
22172 rtx tmp = gen_rtx_SCRATCH (<MODE>mode);
22173 emit_insn (sbb_insn (<MODE>mode, tmp, cmphi[0], cmphi[1]));
22174
22175 rtx flags = gen_rtx_REG (uns ? CCCmode : CCGZmode, FLAGS_REG);
22176 operands[6] = gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
22177
22178 break;
22179 }
22180
22181 default:
22182 gcc_unreachable ();
22183 }
22184 })
22185
22186 (define_insn_and_split "*<code><mode>3_1"
22187 [(set (match_operand:SWI 0 "register_operand")
22188 (maxmin:SWI
22189 (match_operand:SWI 1 "register_operand")
22190 (match_operand:SWI 2 "general_operand")))
22191 (clobber (reg:CC FLAGS_REG))]
22192 "TARGET_CMOVE
22193 && (<MODE>mode != QImode || !TARGET_PARTIAL_REG_STALL)
22194 && ix86_pre_reload_split ()"
22195 "#"
22196 "&& 1"
22197 [(set (match_dup 0)
22198 (if_then_else:SWI (match_dup 3)
22199 (match_dup 1)
22200 (match_dup 2)))]
22201 {
22202 machine_mode mode = <MODE>mode;
22203 rtx cmp_op = operands[2];
22204
22205 operands[2] = force_reg (mode, cmp_op);
22206
22207 enum rtx_code code = <maxmin_rel>;
22208
22209 if (cmp_op == const1_rtx)
22210 {
22211 /* Convert smax (x, 1) into (x > 0 ? x : 1).
22212 Convert umax (x, 1) into (x != 0 ? x : 1).
22213 Convert ?min (x, 1) into (x <= 0 ? x : 1). */
22214 cmp_op = const0_rtx;
22215 if (code == GE)
22216 code = GT;
22217 else if (code == GEU)
22218 code = NE;
22219 }
22220 /* Convert smin (x, -1) into (x < 0 ? x : -1). */
22221 else if (cmp_op == constm1_rtx && code == LE)
22222 {
22223 cmp_op = const0_rtx;
22224 code = LT;
22225 }
22226 /* Convert smax (x, -1) into (x >= 0 ? x : -1). */
22227 else if (cmp_op == constm1_rtx && code == GE)
22228 cmp_op = const0_rtx;
22229 else if (cmp_op != const0_rtx)
22230 cmp_op = operands[2];
22231
22232 machine_mode cmpmode = SELECT_CC_MODE (code, operands[1], cmp_op);
22233 rtx flags = gen_rtx_REG (cmpmode, FLAGS_REG);
22234
22235 rtx tmp = gen_rtx_COMPARE (cmpmode, operands[1], cmp_op);
22236 emit_insn (gen_rtx_SET (flags, tmp));
22237
22238 operands[3] = gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
22239 })
22240
22241 ;; Avoid clearing a register between a flags setting comparison and its use,
22242 ;; i.e. prefer "xorl %eax,%eax; test/cmp" over "test/cmp; movl $0, %eax".
22243 (define_peephole2
22244 [(set (reg FLAGS_REG) (match_operand 0))
22245 (set (match_operand:SWI 1 "general_reg_operand") (const_int 0))]
22246 "peep2_regno_dead_p (0, FLAGS_REG)
22247 && !reg_overlap_mentioned_p (operands[1], operands[0])"
22248 [(set (match_dup 2) (match_dup 0))]
22249 {
22250 operands[2] = gen_rtx_REG (GET_MODE (operands[0]), FLAGS_REG);
22251 ix86_expand_clear (operands[1]);
22252 })
22253
22254 ;; When optimizing for size, zeroing memory should use a register.
22255 (define_peephole2
22256 [(match_scratch:SWI48 0 "r")
22257 (set (match_operand:SWI48 1 "memory_operand") (const_int 0))
22258 (set (match_operand:SWI48 2 "memory_operand") (const_int 0))
22259 (set (match_operand:SWI48 3 "memory_operand") (const_int 0))
22260 (set (match_operand:SWI48 4 "memory_operand") (const_int 0))]
22261 "optimize_insn_for_size_p () && peep2_regno_dead_p (0, FLAGS_REG)"
22262 [(const_int 0)]
22263 {
22264 ix86_expand_clear (operands[0]);
22265 emit_move_insn (operands[1], operands[0]);
22266 emit_move_insn (operands[2], operands[0]);
22267 emit_move_insn (operands[3], operands[0]);
22268 ix86_last_zero_store_uid
22269 = INSN_UID (emit_move_insn (operands[4], operands[0]));
22270 DONE;
22271 })
22272
22273 (define_peephole2
22274 [(match_scratch:SWI48 0 "r")
22275 (set (match_operand:SWI48 1 "memory_operand") (const_int 0))
22276 (set (match_operand:SWI48 2 "memory_operand") (const_int 0))]
22277 "optimize_insn_for_size_p () && peep2_regno_dead_p (0, FLAGS_REG)"
22278 [(const_int 0)]
22279 {
22280 ix86_expand_clear (operands[0]);
22281 emit_move_insn (operands[1], operands[0]);
22282 ix86_last_zero_store_uid
22283 = INSN_UID (emit_move_insn (operands[2], operands[0]));
22284 DONE;
22285 })
22286
22287 (define_peephole2
22288 [(match_scratch:SWI48 0 "r")
22289 (set (match_operand:SWI48 1 "memory_operand") (const_int 0))]
22290 "optimize_insn_for_size_p () && peep2_regno_dead_p (0, FLAGS_REG)"
22291 [(const_int 0)]
22292 {
22293 ix86_expand_clear (operands[0]);
22294 ix86_last_zero_store_uid
22295 = INSN_UID (emit_move_insn (operands[1], operands[0]));
22296 DONE;
22297 })
22298
22299 (define_peephole2
22300 [(set (match_operand:SWI48 5 "memory_operand")
22301 (match_operand:SWI48 0 "general_reg_operand"))
22302 (set (match_operand:SWI48 1 "memory_operand") (const_int 0))
22303 (set (match_operand:SWI48 2 "memory_operand") (const_int 0))
22304 (set (match_operand:SWI48 3 "memory_operand") (const_int 0))
22305 (set (match_operand:SWI48 4 "memory_operand") (const_int 0))]
22306 "optimize_insn_for_size_p ()
22307 && INSN_UID (peep2_next_insn (0)) == ix86_last_zero_store_uid"
22308 [(const_int 0)]
22309 {
22310 emit_move_insn (operands[5], operands[0]);
22311 emit_move_insn (operands[1], operands[0]);
22312 emit_move_insn (operands[2], operands[0]);
22313 emit_move_insn (operands[3], operands[0]);
22314 ix86_last_zero_store_uid
22315 = INSN_UID (emit_move_insn (operands[4], operands[0]));
22316 DONE;
22317 })
22318
22319 (define_peephole2
22320 [(set (match_operand:SWI48 3 "memory_operand")
22321 (match_operand:SWI48 0 "general_reg_operand"))
22322 (set (match_operand:SWI48 1 "memory_operand") (const_int 0))
22323 (set (match_operand:SWI48 2 "memory_operand") (const_int 0))]
22324 "optimize_insn_for_size_p ()
22325 && INSN_UID (peep2_next_insn (0)) == ix86_last_zero_store_uid"
22326 [(const_int 0)]
22327 {
22328 emit_move_insn (operands[3], operands[0]);
22329 emit_move_insn (operands[1], operands[0]);
22330 ix86_last_zero_store_uid
22331 = INSN_UID (emit_move_insn (operands[2], operands[0]));
22332 DONE;
22333 })
22334
22335 (define_peephole2
22336 [(set (match_operand:SWI48 2 "memory_operand")
22337 (match_operand:SWI48 0 "general_reg_operand"))
22338 (set (match_operand:SWI48 1 "memory_operand") (const_int 0))]
22339 "optimize_insn_for_size_p ()
22340 && INSN_UID (peep2_next_insn (0)) == ix86_last_zero_store_uid"
22341 [(const_int 0)]
22342 {
22343 emit_move_insn (operands[2], operands[0]);
22344 ix86_last_zero_store_uid
22345 = INSN_UID (emit_move_insn (operands[1], operands[0]));
22346 DONE;
22347 })
22348
22349 ;; Reload dislikes loading constants directly into class_likely_spilled
22350 ;; hard registers. Try to tidy things up here.
22351 (define_peephole2
22352 [(set (match_operand:SWI 0 "general_reg_operand")
22353 (match_operand:SWI 1 "x86_64_general_operand"))
22354 (set (match_operand:SWI 2 "general_reg_operand")
22355 (match_dup 0))]
22356 "peep2_reg_dead_p (2, operands[0])"
22357 [(set (match_dup 2) (match_dup 1))])
22358 \f
22359 ;; Misc patterns (?)
22360
22361 ;; This pattern exists to put a dependency on all ebp-based memory accesses.
22362 ;; Otherwise there will be nothing to keep
22363 ;;
22364 ;; [(set (reg ebp) (reg esp))]
22365 ;; [(set (reg esp) (plus (reg esp) (const_int -160000)))
22366 ;; (clobber (eflags)]
22367 ;; [(set (mem (plus (reg ebp) (const_int -160000))) (const_int 0))]
22368 ;;
22369 ;; in proper program order.
22370
22371 (define_insn "@pro_epilogue_adjust_stack_add_<mode>"
22372 [(set (match_operand:P 0 "register_operand" "=r,r")
22373 (plus:P (match_operand:P 1 "register_operand" "0,r")
22374 (match_operand:P 2 "<nonmemory_operand>" "r<i>,l<i>")))
22375 (clobber (reg:CC FLAGS_REG))
22376 (clobber (mem:BLK (scratch)))]
22377 ""
22378 {
22379 switch (get_attr_type (insn))
22380 {
22381 case TYPE_IMOV:
22382 return "mov{<imodesuffix>}\t{%1, %0|%0, %1}";
22383
22384 case TYPE_ALU:
22385 gcc_assert (rtx_equal_p (operands[0], operands[1]));
22386 if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
22387 return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
22388
22389 return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
22390
22391 default:
22392 operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
22393 return "lea{<imodesuffix>}\t{%E2, %0|%0, %E2}";
22394 }
22395 }
22396 [(set (attr "type")
22397 (cond [(and (eq_attr "alternative" "0")
22398 (not (match_test "TARGET_OPT_AGU")))
22399 (const_string "alu")
22400 (match_operand:<MODE> 2 "const0_operand")
22401 (const_string "imov")
22402 ]
22403 (const_string "lea")))
22404 (set (attr "length_immediate")
22405 (cond [(eq_attr "type" "imov")
22406 (const_string "0")
22407 (and (eq_attr "type" "alu")
22408 (match_operand 2 "const128_operand"))
22409 (const_string "1")
22410 ]
22411 (const_string "*")))
22412 (set_attr "mode" "<MODE>")])
22413
22414 (define_insn "@pro_epilogue_adjust_stack_sub_<mode>"
22415 [(set (match_operand:P 0 "register_operand" "=r")
22416 (minus:P (match_operand:P 1 "register_operand" "0")
22417 (match_operand:P 2 "register_operand" "r")))
22418 (clobber (reg:CC FLAGS_REG))
22419 (clobber (mem:BLK (scratch)))]
22420 ""
22421 "sub{<imodesuffix>}\t{%2, %0|%0, %2}"
22422 [(set_attr "type" "alu")
22423 (set_attr "mode" "<MODE>")])
22424
22425 (define_insn "@allocate_stack_worker_probe_<mode>"
22426 [(set (match_operand:P 0 "register_operand" "=a")
22427 (unspec_volatile:P [(match_operand:P 1 "register_operand" "0")]
22428 UNSPECV_STACK_PROBE))
22429 (clobber (reg:CC FLAGS_REG))]
22430 "ix86_target_stack_probe ()"
22431 "call\t___chkstk_ms"
22432 [(set_attr "type" "multi")
22433 (set_attr "length" "5")])
22434
22435 (define_expand "allocate_stack"
22436 [(match_operand 0 "register_operand")
22437 (match_operand 1 "general_operand")]
22438 "ix86_target_stack_probe ()"
22439 {
22440 rtx x;
22441
22442 #ifndef CHECK_STACK_LIMIT
22443 #define CHECK_STACK_LIMIT 0
22444 #endif
22445
22446 if (CHECK_STACK_LIMIT && CONST_INT_P (operands[1])
22447 && INTVAL (operands[1]) < CHECK_STACK_LIMIT)
22448 x = operands[1];
22449 else
22450 {
22451 x = copy_to_mode_reg (Pmode, operands[1]);
22452
22453 emit_insn (gen_allocate_stack_worker_probe (Pmode, x, x));
22454 }
22455
22456 x = expand_simple_binop (Pmode, MINUS, stack_pointer_rtx, x,
22457 stack_pointer_rtx, 0, OPTAB_DIRECT);
22458
22459 if (x != stack_pointer_rtx)
22460 emit_move_insn (stack_pointer_rtx, x);
22461
22462 emit_move_insn (operands[0], virtual_stack_dynamic_rtx);
22463 DONE;
22464 })
22465
22466 (define_expand "probe_stack"
22467 [(match_operand 0 "memory_operand")]
22468 ""
22469 {
22470 emit_insn (gen_probe_stack_1
22471 (word_mode, operands[0], const0_rtx));
22472 DONE;
22473 })
22474
22475 ;; Use OR for stack probes, this is shorter.
22476 (define_insn "@probe_stack_1_<mode>"
22477 [(set (match_operand:W 0 "memory_operand" "=m")
22478 (unspec:W [(match_operand:W 1 "const0_operand")]
22479 UNSPEC_PROBE_STACK))
22480 (clobber (reg:CC FLAGS_REG))]
22481 ""
22482 "or{<imodesuffix>}\t{%1, %0|%0, %1}"
22483 [(set_attr "type" "alu1")
22484 (set_attr "mode" "<MODE>")
22485 (set_attr "length_immediate" "1")])
22486
22487 (define_insn "@adjust_stack_and_probe_<mode>"
22488 [(set (match_operand:P 0 "register_operand" "=r")
22489 (unspec_volatile:P [(match_operand:P 1 "register_operand" "0")]
22490 UNSPECV_PROBE_STACK_RANGE))
22491 (set (reg:P SP_REG)
22492 (minus:P (reg:P SP_REG) (match_operand:P 2 "const_int_operand")))
22493 (clobber (reg:CC FLAGS_REG))
22494 (clobber (mem:BLK (scratch)))]
22495 ""
22496 "* return output_adjust_stack_and_probe (operands[0]);"
22497 [(set_attr "type" "multi")])
22498
22499 (define_insn "@probe_stack_range_<mode>"
22500 [(set (match_operand:P 0 "register_operand" "=r")
22501 (unspec_volatile:P [(match_operand:P 1 "register_operand" "0")
22502 (match_operand:P 2 "const_int_operand")]
22503 UNSPECV_PROBE_STACK_RANGE))
22504 (clobber (reg:CC FLAGS_REG))]
22505 ""
22506 "* return output_probe_stack_range (operands[0], operands[2]);"
22507 [(set_attr "type" "multi")])
22508
22509 (define_expand "builtin_setjmp_receiver"
22510 [(label_ref (match_operand 0))]
22511 "!TARGET_64BIT && flag_pic"
22512 {
22513 #if TARGET_MACHO
22514 if (TARGET_MACHO)
22515 {
22516 rtx xops[3];
22517 rtx_code_label *label_rtx = gen_label_rtx ();
22518 emit_insn (gen_set_got_labelled (pic_offset_table_rtx, label_rtx));
22519 xops[0] = xops[1] = pic_offset_table_rtx;
22520 xops[2] = machopic_gen_offset (gen_rtx_LABEL_REF (SImode, label_rtx));
22521 ix86_expand_binary_operator (MINUS, SImode, xops);
22522 }
22523 else
22524 #endif
22525 emit_insn (gen_set_got (pic_offset_table_rtx));
22526 DONE;
22527 })
22528
22529 (define_expand "save_stack_nonlocal"
22530 [(set (match_operand 0 "memory_operand")
22531 (match_operand 1 "register_operand"))]
22532 ""
22533 {
22534 rtx stack_slot;
22535
22536 if (flag_cf_protection & CF_RETURN)
22537 {
22538 /* Copy shadow stack pointer to the first slot
22539 and stack pointer to the second slot. */
22540 rtx ssp_slot = adjust_address (operands[0], word_mode, 0);
22541 stack_slot = adjust_address (operands[0], Pmode, UNITS_PER_WORD);
22542
22543 rtx reg_ssp = force_reg (word_mode, const0_rtx);
22544 emit_insn (gen_rdssp (word_mode, reg_ssp, reg_ssp));
22545 emit_move_insn (ssp_slot, reg_ssp);
22546 }
22547 else
22548 stack_slot = adjust_address (operands[0], Pmode, 0);
22549 emit_move_insn (stack_slot, operands[1]);
22550 DONE;
22551 })
22552
22553 (define_expand "restore_stack_nonlocal"
22554 [(set (match_operand 0 "register_operand" "")
22555 (match_operand 1 "memory_operand" ""))]
22556 ""
22557 {
22558 rtx stack_slot;
22559
22560 if (flag_cf_protection & CF_RETURN)
22561 {
22562 /* Restore shadow stack pointer from the first slot
22563 and stack pointer from the second slot. */
22564 rtx ssp_slot = adjust_address (operands[1], word_mode, 0);
22565 stack_slot = adjust_address (operands[1], Pmode, UNITS_PER_WORD);
22566
22567 /* Get the current shadow stack pointer. The code below will check if
22568 SHSTK feature is enabled. If it is not enabled the RDSSP instruction
22569 is a NOP. */
22570 rtx reg_ssp = force_reg (word_mode, const0_rtx);
22571 emit_insn (gen_rdssp (word_mode, reg_ssp, reg_ssp));
22572
22573 /* Compare through subtraction the saved and the current ssp
22574 to decide if ssp has to be adjusted. */
22575 reg_ssp = expand_simple_binop (word_mode, MINUS,
22576 reg_ssp, ssp_slot,
22577 reg_ssp, 1, OPTAB_DIRECT);
22578
22579 /* Compare and jump over adjustment code. */
22580 rtx noadj_label = gen_label_rtx ();
22581 emit_cmp_and_jump_insns (reg_ssp, const0_rtx, EQ, NULL_RTX,
22582 word_mode, 1, noadj_label);
22583
22584 /* Compute the number of frames to adjust. */
22585 rtx reg_adj = gen_lowpart (ptr_mode, reg_ssp);
22586 rtx reg_adj_neg = expand_simple_unop (ptr_mode, NEG, reg_adj,
22587 NULL_RTX, 1);
22588
22589 reg_adj = expand_simple_binop (ptr_mode, LSHIFTRT, reg_adj_neg,
22590 GEN_INT (exact_log2 (UNITS_PER_WORD)),
22591 reg_adj, 1, OPTAB_DIRECT);
22592
22593 /* Check if number of frames <= 255 so no loop is needed. */
22594 rtx inc_label = gen_label_rtx ();
22595 emit_cmp_and_jump_insns (reg_adj, GEN_INT (255), LEU, NULL_RTX,
22596 ptr_mode, 1, inc_label);
22597
22598 /* Adjust the ssp in a loop. */
22599 rtx loop_label = gen_label_rtx ();
22600 emit_label (loop_label);
22601 LABEL_NUSES (loop_label) = 1;
22602
22603 rtx reg_255 = force_reg (word_mode, GEN_INT (255));
22604 emit_insn (gen_incssp (word_mode, reg_255));
22605
22606 reg_adj = expand_simple_binop (ptr_mode, MINUS,
22607 reg_adj, GEN_INT (255),
22608 reg_adj, 1, OPTAB_DIRECT);
22609
22610 /* Compare and jump to the loop label. */
22611 emit_cmp_and_jump_insns (reg_adj, GEN_INT (255), GTU, NULL_RTX,
22612 ptr_mode, 1, loop_label);
22613
22614 emit_label (inc_label);
22615 LABEL_NUSES (inc_label) = 1;
22616
22617 emit_insn (gen_incssp (word_mode, reg_ssp));
22618
22619 emit_label (noadj_label);
22620 LABEL_NUSES (noadj_label) = 1;
22621 }
22622 else
22623 stack_slot = adjust_address (operands[1], Pmode, 0);
22624 emit_move_insn (operands[0], stack_slot);
22625 DONE;
22626 })
22627
22628
22629 ;; Avoid redundant prefixes by splitting HImode arithmetic to SImode.
22630 ;; Do not split instructions with mask registers.
22631 (define_split
22632 [(set (match_operand 0 "general_reg_operand")
22633 (match_operator 3 "promotable_binary_operator"
22634 [(match_operand 1 "general_reg_operand")
22635 (match_operand 2 "aligned_operand")]))
22636 (clobber (reg:CC FLAGS_REG))]
22637 "! TARGET_PARTIAL_REG_STALL && reload_completed
22638 && ((GET_MODE (operands[0]) == HImode
22639 && ((optimize_function_for_speed_p (cfun) && !TARGET_FAST_PREFIX)
22640 /* ??? next two lines just !satisfies_constraint_K (...) */
22641 || !CONST_INT_P (operands[2])
22642 || satisfies_constraint_K (operands[2])))
22643 || (GET_MODE (operands[0]) == QImode
22644 && (TARGET_PROMOTE_QImode || optimize_function_for_size_p (cfun))))"
22645 [(parallel [(set (match_dup 0)
22646 (match_op_dup 3 [(match_dup 1) (match_dup 2)]))
22647 (clobber (reg:CC FLAGS_REG))])]
22648 {
22649 operands[0] = gen_lowpart (SImode, operands[0]);
22650 operands[1] = gen_lowpart (SImode, operands[1]);
22651 if (GET_CODE (operands[3]) != ASHIFT)
22652 operands[2] = gen_lowpart (SImode, operands[2]);
22653 operands[3] = shallow_copy_rtx (operands[3]);
22654 PUT_MODE (operands[3], SImode);
22655 })
22656
22657 ; Promote the QImode tests, as i386 has encoding of the AND
22658 ; instruction with 32-bit sign-extended immediate and thus the
22659 ; instruction size is unchanged, except in the %eax case for
22660 ; which it is increased by one byte, hence the ! optimize_size.
22661 (define_split
22662 [(set (match_operand 0 "flags_reg_operand")
22663 (match_operator 2 "compare_operator"
22664 [(and (match_operand 3 "aligned_operand")
22665 (match_operand 4 "const_int_operand"))
22666 (const_int 0)]))
22667 (set (match_operand 1 "register_operand")
22668 (and (match_dup 3) (match_dup 4)))]
22669 "! TARGET_PARTIAL_REG_STALL && reload_completed
22670 && optimize_insn_for_speed_p ()
22671 && ((GET_MODE (operands[1]) == HImode && ! TARGET_FAST_PREFIX)
22672 || (GET_MODE (operands[1]) == QImode && TARGET_PROMOTE_QImode))
22673 /* Ensure that the operand will remain sign-extended immediate. */
22674 && ix86_match_ccmode (insn, INTVAL (operands[4]) >= 0 ? CCNOmode : CCZmode)"
22675 [(parallel [(set (match_dup 0)
22676 (match_op_dup 2 [(and:SI (match_dup 3) (match_dup 4))
22677 (const_int 0)]))
22678 (set (match_dup 1)
22679 (and:SI (match_dup 3) (match_dup 4)))])]
22680 {
22681 operands[4]
22682 = gen_int_mode (INTVAL (operands[4])
22683 & GET_MODE_MASK (GET_MODE (operands[1])), SImode);
22684 operands[1] = gen_lowpart (SImode, operands[1]);
22685 operands[3] = gen_lowpart (SImode, operands[3]);
22686 })
22687
22688 ; Don't promote the QImode tests, as i386 doesn't have encoding of
22689 ; the TEST instruction with 32-bit sign-extended immediate and thus
22690 ; the instruction size would at least double, which is not what we
22691 ; want even with ! optimize_size.
22692 (define_split
22693 [(set (match_operand 0 "flags_reg_operand")
22694 (match_operator 1 "compare_operator"
22695 [(and (match_operand:HI 2 "aligned_operand")
22696 (match_operand:HI 3 "const_int_operand"))
22697 (const_int 0)]))]
22698 "! TARGET_PARTIAL_REG_STALL && reload_completed
22699 && ! TARGET_FAST_PREFIX
22700 && optimize_insn_for_speed_p ()
22701 /* Ensure that the operand will remain sign-extended immediate. */
22702 && ix86_match_ccmode (insn, INTVAL (operands[3]) >= 0 ? CCNOmode : CCZmode)"
22703 [(set (match_dup 0)
22704 (match_op_dup 1 [(and:SI (match_dup 2) (match_dup 3))
22705 (const_int 0)]))]
22706 {
22707 operands[3]
22708 = gen_int_mode (INTVAL (operands[3])
22709 & GET_MODE_MASK (GET_MODE (operands[2])), SImode);
22710 operands[2] = gen_lowpart (SImode, operands[2]);
22711 })
22712
22713 (define_split
22714 [(set (match_operand 0 "register_operand")
22715 (neg (match_operand 1 "register_operand")))
22716 (clobber (reg:CC FLAGS_REG))]
22717 "! TARGET_PARTIAL_REG_STALL && reload_completed
22718 && (GET_MODE (operands[0]) == HImode
22719 || (GET_MODE (operands[0]) == QImode
22720 && (TARGET_PROMOTE_QImode
22721 || optimize_insn_for_size_p ())))"
22722 [(parallel [(set (match_dup 0)
22723 (neg:SI (match_dup 1)))
22724 (clobber (reg:CC FLAGS_REG))])]
22725 {
22726 operands[0] = gen_lowpart (SImode, operands[0]);
22727 operands[1] = gen_lowpart (SImode, operands[1]);
22728 })
22729
22730 ;; Do not split instructions with mask regs.
22731 (define_split
22732 [(set (match_operand 0 "general_reg_operand")
22733 (not (match_operand 1 "general_reg_operand")))]
22734 "! TARGET_PARTIAL_REG_STALL && reload_completed
22735 && (GET_MODE (operands[0]) == HImode
22736 || (GET_MODE (operands[0]) == QImode
22737 && (TARGET_PROMOTE_QImode
22738 || optimize_insn_for_size_p ())))"
22739 [(set (match_dup 0)
22740 (not:SI (match_dup 1)))]
22741 {
22742 operands[0] = gen_lowpart (SImode, operands[0]);
22743 operands[1] = gen_lowpart (SImode, operands[1]);
22744 })
22745 \f
22746 ;; RTL Peephole optimizations, run before sched2. These primarily look to
22747 ;; transform a complex memory operation into two memory to register operations.
22748
22749 ;; Don't push memory operands
22750 (define_peephole2
22751 [(set (match_operand:SWI 0 "push_operand")
22752 (match_operand:SWI 1 "memory_operand"))
22753 (match_scratch:SWI 2 "<r>")]
22754 "!(TARGET_PUSH_MEMORY || optimize_insn_for_size_p ())
22755 && !RTX_FRAME_RELATED_P (peep2_next_insn (0))"
22756 [(set (match_dup 2) (match_dup 1))
22757 (set (match_dup 0) (match_dup 2))])
22758
22759 ;; We need to handle SFmode only, because DFmode and XFmode are split to
22760 ;; SImode pushes.
22761 (define_peephole2
22762 [(set (match_operand:SF 0 "push_operand")
22763 (match_operand:SF 1 "memory_operand"))
22764 (match_scratch:SF 2 "r")]
22765 "!(TARGET_PUSH_MEMORY || optimize_insn_for_size_p ())
22766 && !RTX_FRAME_RELATED_P (peep2_next_insn (0))"
22767 [(set (match_dup 2) (match_dup 1))
22768 (set (match_dup 0) (match_dup 2))])
22769
22770 ;; Don't move an immediate directly to memory when the instruction
22771 ;; gets too big, or if LCP stalls are a problem for 16-bit moves.
22772 (define_peephole2
22773 [(match_scratch:SWI124 1 "<r>")
22774 (set (match_operand:SWI124 0 "memory_operand")
22775 (const_int 0))]
22776 "optimize_insn_for_speed_p ()
22777 && ((<MODE>mode == HImode
22778 && TARGET_LCP_STALL)
22779 || (!TARGET_USE_MOV0
22780 && TARGET_SPLIT_LONG_MOVES
22781 && get_attr_length (insn) >= ix86_cur_cost ()->large_insn))
22782 && peep2_regno_dead_p (0, FLAGS_REG)"
22783 [(parallel [(set (match_dup 2) (const_int 0))
22784 (clobber (reg:CC FLAGS_REG))])
22785 (set (match_dup 0) (match_dup 1))]
22786 "operands[2] = gen_lowpart (SImode, operands[1]);")
22787
22788 (define_peephole2
22789 [(match_scratch:SWI124 2 "<r>")
22790 (set (match_operand:SWI124 0 "memory_operand")
22791 (match_operand:SWI124 1 "immediate_operand"))]
22792 "optimize_insn_for_speed_p ()
22793 && ((<MODE>mode == HImode
22794 && TARGET_LCP_STALL)
22795 || (TARGET_SPLIT_LONG_MOVES
22796 && get_attr_length (insn) >= ix86_cur_cost ()->large_insn))"
22797 [(set (match_dup 2) (match_dup 1))
22798 (set (match_dup 0) (match_dup 2))])
22799
22800 ;; Don't compare memory with zero, load and use a test instead.
22801 (define_peephole2
22802 [(set (match_operand 0 "flags_reg_operand")
22803 (match_operator 1 "compare_operator"
22804 [(match_operand:SI 2 "memory_operand")
22805 (const_int 0)]))
22806 (match_scratch:SI 3 "r")]
22807 "optimize_insn_for_speed_p () && ix86_match_ccmode (insn, CCNOmode)"
22808 [(set (match_dup 3) (match_dup 2))
22809 (set (match_dup 0) (match_op_dup 1 [(match_dup 3) (const_int 0)]))])
22810
22811 ;; NOT is not pairable on Pentium, while XOR is, but one byte longer.
22812 ;; Don't split NOTs with a displacement operand, because resulting XOR
22813 ;; will not be pairable anyway.
22814 ;;
22815 ;; On AMD K6, NOT is vector decoded with memory operand that cannot be
22816 ;; represented using a modRM byte. The XOR replacement is long decoded,
22817 ;; so this split helps here as well.
22818 ;;
22819 ;; Note: Can't do this as a regular split because we can't get proper
22820 ;; lifetime information then.
22821
22822 (define_peephole2
22823 [(set (match_operand:SWI124 0 "nonimmediate_gr_operand")
22824 (not:SWI124 (match_operand:SWI124 1 "nonimmediate_gr_operand")))]
22825 "optimize_insn_for_speed_p ()
22826 && ((TARGET_NOT_UNPAIRABLE
22827 && (!MEM_P (operands[0])
22828 || !memory_displacement_operand (operands[0], <MODE>mode)))
22829 || (TARGET_NOT_VECTORMODE
22830 && long_memory_operand (operands[0], <MODE>mode)))
22831 && peep2_regno_dead_p (0, FLAGS_REG)"
22832 [(parallel [(set (match_dup 0)
22833 (xor:SWI124 (match_dup 1) (const_int -1)))
22834 (clobber (reg:CC FLAGS_REG))])])
22835
22836 ;; Non pairable "test imm, reg" instructions can be translated to
22837 ;; "and imm, reg" if reg dies. The "and" form is also shorter (one
22838 ;; byte opcode instead of two, have a short form for byte operands),
22839 ;; so do it for other CPUs as well. Given that the value was dead,
22840 ;; this should not create any new dependencies. Pass on the sub-word
22841 ;; versions if we're concerned about partial register stalls.
22842
22843 (define_peephole2
22844 [(set (match_operand 0 "flags_reg_operand")
22845 (match_operator 1 "compare_operator"
22846 [(and:SI (match_operand:SI 2 "register_operand")
22847 (match_operand:SI 3 "immediate_operand"))
22848 (const_int 0)]))]
22849 "ix86_match_ccmode (insn, CCNOmode)
22850 && (REGNO (operands[2]) != AX_REG
22851 || satisfies_constraint_K (operands[3]))
22852 && peep2_reg_dead_p (1, operands[2])"
22853 [(parallel
22854 [(set (match_dup 0)
22855 (match_op_dup 1 [(and:SI (match_dup 2) (match_dup 3))
22856 (const_int 0)]))
22857 (set (match_dup 2)
22858 (and:SI (match_dup 2) (match_dup 3)))])])
22859
22860 ;; We don't need to handle HImode case, because it will be promoted to SImode
22861 ;; on ! TARGET_PARTIAL_REG_STALL
22862
22863 (define_peephole2
22864 [(set (match_operand 0 "flags_reg_operand")
22865 (match_operator 1 "compare_operator"
22866 [(and:QI (match_operand:QI 2 "register_operand")
22867 (match_operand:QI 3 "immediate_operand"))
22868 (const_int 0)]))]
22869 "! TARGET_PARTIAL_REG_STALL
22870 && ix86_match_ccmode (insn, CCNOmode)
22871 && REGNO (operands[2]) != AX_REG
22872 && peep2_reg_dead_p (1, operands[2])"
22873 [(parallel
22874 [(set (match_dup 0)
22875 (match_op_dup 1 [(and:QI (match_dup 2) (match_dup 3))
22876 (const_int 0)]))
22877 (set (match_dup 2)
22878 (and:QI (match_dup 2) (match_dup 3)))])])
22879
22880 (define_peephole2
22881 [(set (match_operand 0 "flags_reg_operand")
22882 (match_operator 1 "compare_operator"
22883 [(and:QI
22884 (subreg:QI
22885 (zero_extract:SWI248 (match_operand:SWI248 2 "QIreg_operand")
22886 (const_int 8)
22887 (const_int 8)) 0)
22888 (match_operand 3 "const_int_operand"))
22889 (const_int 0)]))]
22890 "! TARGET_PARTIAL_REG_STALL
22891 && ix86_match_ccmode (insn, CCNOmode)
22892 && REGNO (operands[2]) != AX_REG
22893 && peep2_reg_dead_p (1, operands[2])"
22894 [(parallel
22895 [(set (match_dup 0)
22896 (match_op_dup 1
22897 [(and:QI
22898 (subreg:QI
22899 (zero_extract:SWI248 (match_dup 2)
22900 (const_int 8)
22901 (const_int 8)) 0)
22902 (match_dup 3))
22903 (const_int 0)]))
22904 (set (zero_extract:SWI248 (match_dup 2)
22905 (const_int 8)
22906 (const_int 8))
22907 (subreg:SWI248
22908 (and:QI
22909 (subreg:QI
22910 (zero_extract:SWI248 (match_dup 2)
22911 (const_int 8)
22912 (const_int 8)) 0)
22913 (match_dup 3)) 0))])])
22914
22915 ;; Don't do logical operations with memory inputs.
22916 (define_peephole2
22917 [(match_scratch:SWI 2 "<r>")
22918 (parallel [(set (match_operand:SWI 0 "register_operand")
22919 (match_operator:SWI 3 "arith_or_logical_operator"
22920 [(match_dup 0)
22921 (match_operand:SWI 1 "memory_operand")]))
22922 (clobber (reg:CC FLAGS_REG))])]
22923 "!(TARGET_READ_MODIFY || optimize_insn_for_size_p ())"
22924 [(set (match_dup 2) (match_dup 1))
22925 (parallel [(set (match_dup 0)
22926 (match_op_dup 3 [(match_dup 0) (match_dup 2)]))
22927 (clobber (reg:CC FLAGS_REG))])])
22928
22929 (define_peephole2
22930 [(match_scratch:SWI 2 "<r>")
22931 (parallel [(set (match_operand:SWI 0 "register_operand")
22932 (match_operator:SWI 3 "arith_or_logical_operator"
22933 [(match_operand:SWI 1 "memory_operand")
22934 (match_dup 0)]))
22935 (clobber (reg:CC FLAGS_REG))])]
22936 "!(TARGET_READ_MODIFY || optimize_insn_for_size_p ())"
22937 [(set (match_dup 2) (match_dup 1))
22938 (parallel [(set (match_dup 0)
22939 (match_op_dup 3 [(match_dup 2) (match_dup 0)]))
22940 (clobber (reg:CC FLAGS_REG))])])
22941
22942 ;; Prefer Load+RegOp to Mov+MemOp. Watch out for cases when
22943 ;; the memory address refers to the destination of the load!
22944
22945 (define_peephole2
22946 [(set (match_operand:SWI 0 "general_reg_operand")
22947 (match_operand:SWI 1 "general_reg_operand"))
22948 (parallel [(set (match_dup 0)
22949 (match_operator:SWI 3 "commutative_operator"
22950 [(match_dup 0)
22951 (match_operand:SWI 2 "memory_operand")]))
22952 (clobber (reg:CC FLAGS_REG))])]
22953 "REGNO (operands[0]) != REGNO (operands[1])
22954 && (<MODE>mode != QImode
22955 || any_QIreg_operand (operands[1], QImode))"
22956 [(set (match_dup 0) (match_dup 4))
22957 (parallel [(set (match_dup 0)
22958 (match_op_dup 3 [(match_dup 0) (match_dup 1)]))
22959 (clobber (reg:CC FLAGS_REG))])]
22960 "operands[4] = replace_rtx (operands[2], operands[0], operands[1], true);")
22961
22962 (define_peephole2
22963 [(set (match_operand 0 "mmx_reg_operand")
22964 (match_operand 1 "mmx_reg_operand"))
22965 (set (match_dup 0)
22966 (match_operator 3 "commutative_operator"
22967 [(match_dup 0)
22968 (match_operand 2 "memory_operand")]))]
22969 "REGNO (operands[0]) != REGNO (operands[1])"
22970 [(set (match_dup 0) (match_dup 2))
22971 (set (match_dup 0)
22972 (match_op_dup 3 [(match_dup 0) (match_dup 1)]))])
22973
22974 (define_peephole2
22975 [(set (match_operand 0 "sse_reg_operand")
22976 (match_operand 1 "sse_reg_operand"))
22977 (set (match_dup 0)
22978 (match_operator 3 "commutative_operator"
22979 [(match_dup 0)
22980 (match_operand 2 "memory_operand")]))]
22981 "REGNO (operands[0]) != REGNO (operands[1])
22982 /* Punt if operands[1] is %[xy]mm16+ and AVX512BW is not enabled,
22983 as EVEX encoded vpadd[bw], vpmullw, vpmin[su][bw] and vpmax[su][bw]
22984 instructions require AVX512BW and AVX512VL, but with the original
22985 instructions it might require just AVX512VL.
22986 AVX512VL is implied from TARGET_HARD_REGNO_MODE_OK. */
22987 && (!EXT_REX_SSE_REGNO_P (REGNO (operands[1]))
22988 || TARGET_AVX512BW
22989 || GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (operands[0]))) > 2
22990 || logic_operator (operands[3], VOIDmode))"
22991 [(set (match_dup 0) (match_dup 2))
22992 (set (match_dup 0)
22993 (match_op_dup 3 [(match_dup 0) (match_dup 1)]))])
22994
22995 ; Don't do logical operations with memory outputs
22996 ;
22997 ; These two don't make sense for PPro/PII -- we're expanding a 4-uop
22998 ; instruction into two 1-uop insns plus a 2-uop insn. That last has
22999 ; the same decoder scheduling characteristics as the original.
23000
23001 (define_peephole2
23002 [(match_scratch:SWI 2 "<r>")
23003 (parallel [(set (match_operand:SWI 0 "memory_operand")
23004 (match_operator:SWI 3 "arith_or_logical_operator"
23005 [(match_dup 0)
23006 (match_operand:SWI 1 "<nonmemory_operand>")]))
23007 (clobber (reg:CC FLAGS_REG))])]
23008 "!(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())"
23009 [(set (match_dup 2) (match_dup 0))
23010 (parallel [(set (match_dup 2)
23011 (match_op_dup 3 [(match_dup 2) (match_dup 1)]))
23012 (clobber (reg:CC FLAGS_REG))])
23013 (set (match_dup 0) (match_dup 2))])
23014
23015 (define_peephole2
23016 [(match_scratch:SWI 2 "<r>")
23017 (parallel [(set (match_operand:SWI 0 "memory_operand")
23018 (match_operator:SWI 3 "arith_or_logical_operator"
23019 [(match_operand:SWI 1 "<nonmemory_operand>")
23020 (match_dup 0)]))
23021 (clobber (reg:CC FLAGS_REG))])]
23022 "!(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())"
23023 [(set (match_dup 2) (match_dup 0))
23024 (parallel [(set (match_dup 2)
23025 (match_op_dup 3 [(match_dup 1) (match_dup 2)]))
23026 (clobber (reg:CC FLAGS_REG))])
23027 (set (match_dup 0) (match_dup 2))])
23028
23029 ;; Attempt to use arith or logical operations with memory outputs with
23030 ;; setting of flags.
23031 (define_peephole2
23032 [(set (match_operand:SWI 0 "register_operand")
23033 (match_operand:SWI 1 "memory_operand"))
23034 (parallel [(set (match_dup 0)
23035 (match_operator:SWI 3 "plusminuslogic_operator"
23036 [(match_dup 0)
23037 (match_operand:SWI 2 "<nonmemory_operand>")]))
23038 (clobber (reg:CC FLAGS_REG))])
23039 (set (match_dup 1) (match_dup 0))
23040 (set (reg FLAGS_REG) (compare (match_dup 0) (const_int 0)))]
23041 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
23042 && peep2_reg_dead_p (4, operands[0])
23043 && !reg_overlap_mentioned_p (operands[0], operands[1])
23044 && !reg_overlap_mentioned_p (operands[0], operands[2])
23045 && (<MODE>mode != QImode
23046 || immediate_operand (operands[2], QImode)
23047 || any_QIreg_operand (operands[2], QImode))
23048 && ix86_match_ccmode (peep2_next_insn (3),
23049 (GET_CODE (operands[3]) == PLUS
23050 || GET_CODE (operands[3]) == MINUS)
23051 ? CCGOCmode : CCNOmode)"
23052 [(parallel [(set (match_dup 4) (match_dup 6))
23053 (set (match_dup 1) (match_dup 5))])]
23054 {
23055 operands[4] = SET_DEST (PATTERN (peep2_next_insn (3)));
23056 operands[5]
23057 = gen_rtx_fmt_ee (GET_CODE (operands[3]), GET_MODE (operands[3]),
23058 copy_rtx (operands[1]),
23059 operands[2]);
23060 operands[6]
23061 = gen_rtx_COMPARE (GET_MODE (operands[4]),
23062 copy_rtx (operands[5]),
23063 const0_rtx);
23064 })
23065
23066 ;; Likewise for cmpelim optimized pattern.
23067 (define_peephole2
23068 [(set (match_operand:SWI 0 "register_operand")
23069 (match_operand:SWI 1 "memory_operand"))
23070 (parallel [(set (reg FLAGS_REG)
23071 (compare (match_operator:SWI 3 "plusminuslogic_operator"
23072 [(match_dup 0)
23073 (match_operand:SWI 2 "<nonmemory_operand>")])
23074 (const_int 0)))
23075 (set (match_dup 0) (match_dup 3))])
23076 (set (match_dup 1) (match_dup 0))]
23077 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
23078 && peep2_reg_dead_p (3, operands[0])
23079 && !reg_overlap_mentioned_p (operands[0], operands[1])
23080 && !reg_overlap_mentioned_p (operands[0], operands[2])
23081 && ix86_match_ccmode (peep2_next_insn (1),
23082 (GET_CODE (operands[3]) == PLUS
23083 || GET_CODE (operands[3]) == MINUS)
23084 ? CCGOCmode : CCNOmode)"
23085 [(parallel [(set (match_dup 4) (match_dup 6))
23086 (set (match_dup 1) (match_dup 5))])]
23087 {
23088 operands[4] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (1)), 0, 0));
23089 operands[5]
23090 = gen_rtx_fmt_ee (GET_CODE (operands[3]), GET_MODE (operands[3]),
23091 copy_rtx (operands[1]), operands[2]);
23092 operands[6]
23093 = gen_rtx_COMPARE (GET_MODE (operands[4]), copy_rtx (operands[5]),
23094 const0_rtx);
23095 })
23096
23097 ;; Likewise for instances where we have a lea pattern.
23098 (define_peephole2
23099 [(set (match_operand:SWI 0 "register_operand")
23100 (match_operand:SWI 1 "memory_operand"))
23101 (set (match_operand:<LEAMODE> 3 "register_operand")
23102 (plus:<LEAMODE> (match_operand:<LEAMODE> 4 "register_operand")
23103 (match_operand:<LEAMODE> 2 "<nonmemory_operand>")))
23104 (set (match_dup 1) (match_operand:SWI 5 "register_operand"))
23105 (set (reg FLAGS_REG) (compare (match_dup 5) (const_int 0)))]
23106 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
23107 && REGNO (operands[4]) == REGNO (operands[0])
23108 && REGNO (operands[5]) == REGNO (operands[3])
23109 && peep2_reg_dead_p (4, operands[3])
23110 && ((REGNO (operands[0]) == REGNO (operands[3]))
23111 || peep2_reg_dead_p (2, operands[0]))
23112 && !reg_overlap_mentioned_p (operands[0], operands[1])
23113 && !reg_overlap_mentioned_p (operands[3], operands[1])
23114 && !reg_overlap_mentioned_p (operands[0], operands[2])
23115 && (<MODE>mode != QImode
23116 || immediate_operand (operands[2], QImode)
23117 || any_QIreg_operand (operands[2], QImode))
23118 && ix86_match_ccmode (peep2_next_insn (3), CCGOCmode)"
23119 [(parallel [(set (match_dup 6) (match_dup 8))
23120 (set (match_dup 1) (match_dup 7))])]
23121 {
23122 operands[6] = SET_DEST (PATTERN (peep2_next_insn (3)));
23123 operands[7]
23124 = gen_rtx_PLUS (<MODE>mode,
23125 copy_rtx (operands[1]),
23126 gen_lowpart (<MODE>mode, operands[2]));
23127 operands[8]
23128 = gen_rtx_COMPARE (GET_MODE (operands[6]),
23129 copy_rtx (operands[7]),
23130 const0_rtx);
23131 })
23132
23133 (define_peephole2
23134 [(parallel [(set (match_operand:SWI 0 "register_operand")
23135 (match_operator:SWI 2 "plusminuslogic_operator"
23136 [(match_dup 0)
23137 (match_operand:SWI 1 "memory_operand")]))
23138 (clobber (reg:CC FLAGS_REG))])
23139 (set (match_dup 1) (match_dup 0))
23140 (set (reg FLAGS_REG) (compare (match_dup 0) (const_int 0)))]
23141 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
23142 && COMMUTATIVE_ARITH_P (operands[2])
23143 && peep2_reg_dead_p (3, operands[0])
23144 && !reg_overlap_mentioned_p (operands[0], operands[1])
23145 && ix86_match_ccmode (peep2_next_insn (2),
23146 GET_CODE (operands[2]) == PLUS
23147 ? CCGOCmode : CCNOmode)"
23148 [(parallel [(set (match_dup 3) (match_dup 5))
23149 (set (match_dup 1) (match_dup 4))])]
23150 {
23151 operands[3] = SET_DEST (PATTERN (peep2_next_insn (2)));
23152 operands[4]
23153 = gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE (operands[2]),
23154 copy_rtx (operands[1]),
23155 operands[0]);
23156 operands[5]
23157 = gen_rtx_COMPARE (GET_MODE (operands[3]),
23158 copy_rtx (operands[4]),
23159 const0_rtx);
23160 })
23161
23162 ;; Likewise for cmpelim optimized pattern.
23163 (define_peephole2
23164 [(parallel [(set (reg FLAGS_REG)
23165 (compare (match_operator:SWI 2 "plusminuslogic_operator"
23166 [(match_operand:SWI 0 "register_operand")
23167 (match_operand:SWI 1 "memory_operand")])
23168 (const_int 0)))
23169 (set (match_dup 0) (match_dup 2))])
23170 (set (match_dup 1) (match_dup 0))]
23171 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
23172 && COMMUTATIVE_ARITH_P (operands[2])
23173 && peep2_reg_dead_p (2, operands[0])
23174 && !reg_overlap_mentioned_p (operands[0], operands[1])
23175 && ix86_match_ccmode (peep2_next_insn (0),
23176 GET_CODE (operands[2]) == PLUS
23177 ? CCGOCmode : CCNOmode)"
23178 [(parallel [(set (match_dup 3) (match_dup 5))
23179 (set (match_dup 1) (match_dup 4))])]
23180 {
23181 operands[3] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (0)), 0, 0));
23182 operands[4]
23183 = gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE (operands[2]),
23184 copy_rtx (operands[1]), operands[0]);
23185 operands[5]
23186 = gen_rtx_COMPARE (GET_MODE (operands[3]), copy_rtx (operands[4]),
23187 const0_rtx);
23188 })
23189
23190 (define_peephole2
23191 [(set (match_operand:SWI12 0 "register_operand")
23192 (match_operand:SWI12 1 "memory_operand"))
23193 (parallel [(set (match_operand:SI 4 "register_operand")
23194 (match_operator:SI 3 "plusminuslogic_operator"
23195 [(match_dup 4)
23196 (match_operand:SI 2 "nonmemory_operand")]))
23197 (clobber (reg:CC FLAGS_REG))])
23198 (set (match_dup 1) (match_dup 0))
23199 (set (reg FLAGS_REG) (compare (match_dup 0) (const_int 0)))]
23200 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
23201 && REGNO (operands[0]) == REGNO (operands[4])
23202 && peep2_reg_dead_p (4, operands[0])
23203 && (<MODE>mode != QImode
23204 || immediate_operand (operands[2], SImode)
23205 || any_QIreg_operand (operands[2], SImode))
23206 && !reg_overlap_mentioned_p (operands[0], operands[1])
23207 && !reg_overlap_mentioned_p (operands[0], operands[2])
23208 && ix86_match_ccmode (peep2_next_insn (3),
23209 (GET_CODE (operands[3]) == PLUS
23210 || GET_CODE (operands[3]) == MINUS)
23211 ? CCGOCmode : CCNOmode)"
23212 [(parallel [(set (match_dup 5) (match_dup 7))
23213 (set (match_dup 1) (match_dup 6))])]
23214 {
23215 operands[5] = SET_DEST (PATTERN (peep2_next_insn (3)));
23216 operands[6]
23217 = gen_rtx_fmt_ee (GET_CODE (operands[3]), <MODE>mode,
23218 copy_rtx (operands[1]),
23219 gen_lowpart (<MODE>mode, operands[2]));
23220 operands[7]
23221 = gen_rtx_COMPARE (GET_MODE (operands[5]),
23222 copy_rtx (operands[6]),
23223 const0_rtx);
23224 })
23225
23226 ;; peephole2 comes before regcprop, so deal also with a case that
23227 ;; would be cleaned up by regcprop.
23228 (define_peephole2
23229 [(set (match_operand:SWI 0 "register_operand")
23230 (match_operand:SWI 1 "memory_operand"))
23231 (parallel [(set (match_dup 0)
23232 (match_operator:SWI 3 "plusminuslogic_operator"
23233 [(match_dup 0)
23234 (match_operand:SWI 2 "<nonmemory_operand>")]))
23235 (clobber (reg:CC FLAGS_REG))])
23236 (set (match_operand:SWI 4 "register_operand") (match_dup 0))
23237 (set (match_dup 1) (match_dup 4))
23238 (set (reg FLAGS_REG) (compare (match_dup 4) (const_int 0)))]
23239 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
23240 && peep2_reg_dead_p (3, operands[0])
23241 && peep2_reg_dead_p (5, operands[4])
23242 && !reg_overlap_mentioned_p (operands[0], operands[1])
23243 && !reg_overlap_mentioned_p (operands[0], operands[2])
23244 && !reg_overlap_mentioned_p (operands[4], operands[1])
23245 && (<MODE>mode != QImode
23246 || immediate_operand (operands[2], QImode)
23247 || any_QIreg_operand (operands[2], QImode))
23248 && ix86_match_ccmode (peep2_next_insn (4),
23249 (GET_CODE (operands[3]) == PLUS
23250 || GET_CODE (operands[3]) == MINUS)
23251 ? CCGOCmode : CCNOmode)"
23252 [(parallel [(set (match_dup 5) (match_dup 7))
23253 (set (match_dup 1) (match_dup 6))])]
23254 {
23255 operands[5] = SET_DEST (PATTERN (peep2_next_insn (4)));
23256 operands[6]
23257 = gen_rtx_fmt_ee (GET_CODE (operands[3]), GET_MODE (operands[3]),
23258 copy_rtx (operands[1]),
23259 operands[2]);
23260 operands[7]
23261 = gen_rtx_COMPARE (GET_MODE (operands[5]),
23262 copy_rtx (operands[6]),
23263 const0_rtx);
23264 })
23265
23266 (define_peephole2
23267 [(set (match_operand:SWI12 0 "register_operand")
23268 (match_operand:SWI12 1 "memory_operand"))
23269 (parallel [(set (match_operand:SI 4 "register_operand")
23270 (match_operator:SI 3 "plusminuslogic_operator"
23271 [(match_dup 4)
23272 (match_operand:SI 2 "nonmemory_operand")]))
23273 (clobber (reg:CC FLAGS_REG))])
23274 (set (match_operand:SWI12 5 "register_operand") (match_dup 0))
23275 (set (match_dup 1) (match_dup 5))
23276 (set (reg FLAGS_REG) (compare (match_dup 5) (const_int 0)))]
23277 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
23278 && REGNO (operands[0]) == REGNO (operands[4])
23279 && peep2_reg_dead_p (3, operands[0])
23280 && peep2_reg_dead_p (5, operands[5])
23281 && (<MODE>mode != QImode
23282 || immediate_operand (operands[2], SImode)
23283 || any_QIreg_operand (operands[2], SImode))
23284 && !reg_overlap_mentioned_p (operands[0], operands[1])
23285 && !reg_overlap_mentioned_p (operands[0], operands[2])
23286 && !reg_overlap_mentioned_p (operands[5], operands[1])
23287 && ix86_match_ccmode (peep2_next_insn (4),
23288 (GET_CODE (operands[3]) == PLUS
23289 || GET_CODE (operands[3]) == MINUS)
23290 ? CCGOCmode : CCNOmode)"
23291 [(parallel [(set (match_dup 6) (match_dup 8))
23292 (set (match_dup 1) (match_dup 7))])]
23293 {
23294 operands[6] = SET_DEST (PATTERN (peep2_next_insn (4)));
23295 operands[7]
23296 = gen_rtx_fmt_ee (GET_CODE (operands[3]), <MODE>mode,
23297 copy_rtx (operands[1]),
23298 gen_lowpart (<MODE>mode, operands[2]));
23299 operands[8]
23300 = gen_rtx_COMPARE (GET_MODE (operands[6]),
23301 copy_rtx (operands[7]),
23302 const0_rtx);
23303 })
23304
23305 ;; Likewise for cmpelim optimized pattern.
23306 (define_peephole2
23307 [(set (match_operand:SWI 0 "register_operand")
23308 (match_operand:SWI 1 "memory_operand"))
23309 (parallel [(set (reg FLAGS_REG)
23310 (compare (match_operator:SWI 3 "plusminuslogic_operator"
23311 [(match_dup 0)
23312 (match_operand:SWI 2 "<nonmemory_operand>")])
23313 (const_int 0)))
23314 (set (match_dup 0) (match_dup 3))])
23315 (set (match_operand:SWI 4 "register_operand") (match_dup 0))
23316 (set (match_dup 1) (match_dup 4))]
23317 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
23318 && peep2_reg_dead_p (3, operands[0])
23319 && peep2_reg_dead_p (4, operands[4])
23320 && !reg_overlap_mentioned_p (operands[0], operands[1])
23321 && !reg_overlap_mentioned_p (operands[0], operands[2])
23322 && !reg_overlap_mentioned_p (operands[4], operands[1])
23323 && ix86_match_ccmode (peep2_next_insn (1),
23324 (GET_CODE (operands[3]) == PLUS
23325 || GET_CODE (operands[3]) == MINUS)
23326 ? CCGOCmode : CCNOmode)"
23327 [(parallel [(set (match_dup 5) (match_dup 7))
23328 (set (match_dup 1) (match_dup 6))])]
23329 {
23330 operands[5] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (1)), 0, 0));
23331 operands[6]
23332 = gen_rtx_fmt_ee (GET_CODE (operands[3]), GET_MODE (operands[3]),
23333 copy_rtx (operands[1]), operands[2]);
23334 operands[7]
23335 = gen_rtx_COMPARE (GET_MODE (operands[5]), copy_rtx (operands[6]),
23336 const0_rtx);
23337 })
23338
23339 ;; Special cases for xor, where (x ^= y) != 0 is (misoptimized)
23340 ;; into x = z; x ^= y; x != z
23341 (define_peephole2
23342 [(set (match_operand:SWI 0 "register_operand")
23343 (match_operand:SWI 1 "memory_operand"))
23344 (set (match_operand:SWI 3 "register_operand") (match_dup 0))
23345 (parallel [(set (match_operand:SWI 4 "register_operand")
23346 (xor:SWI (match_dup 4)
23347 (match_operand:SWI 2 "<nonmemory_operand>")))
23348 (clobber (reg:CC FLAGS_REG))])
23349 (set (match_dup 1) (match_dup 4))
23350 (set (reg:CCZ FLAGS_REG)
23351 (compare:CCZ (match_operand:SWI 5 "register_operand")
23352 (match_operand:SWI 6 "<nonmemory_operand>")))]
23353 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
23354 && (REGNO (operands[4]) == REGNO (operands[0])
23355 || REGNO (operands[4]) == REGNO (operands[3]))
23356 && (rtx_equal_p (operands[REGNO (operands[4]) == REGNO (operands[0])
23357 ? 3 : 0], operands[5])
23358 ? rtx_equal_p (operands[2], operands[6])
23359 : rtx_equal_p (operands[2], operands[5])
23360 && rtx_equal_p (operands[REGNO (operands[4]) == REGNO (operands[0])
23361 ? 3 : 0], operands[6]))
23362 && peep2_reg_dead_p (4, operands[4])
23363 && peep2_reg_dead_p (5, operands[REGNO (operands[4]) == REGNO (operands[0])
23364 ? 3 : 0])
23365 && !reg_overlap_mentioned_p (operands[0], operands[1])
23366 && !reg_overlap_mentioned_p (operands[0], operands[2])
23367 && !reg_overlap_mentioned_p (operands[3], operands[0])
23368 && !reg_overlap_mentioned_p (operands[3], operands[1])
23369 && !reg_overlap_mentioned_p (operands[3], operands[2])
23370 && (<MODE>mode != QImode
23371 || immediate_operand (operands[2], QImode)
23372 || any_QIreg_operand (operands[2], QImode))"
23373 [(parallel [(set (match_dup 7) (match_dup 9))
23374 (set (match_dup 1) (match_dup 8))])]
23375 {
23376 operands[7] = SET_DEST (PATTERN (peep2_next_insn (4)));
23377 operands[8] = gen_rtx_XOR (<MODE>mode, copy_rtx (operands[1]),
23378 operands[2]);
23379 operands[9]
23380 = gen_rtx_COMPARE (GET_MODE (operands[7]),
23381 copy_rtx (operands[8]),
23382 const0_rtx);
23383 })
23384
23385 (define_peephole2
23386 [(set (match_operand:SWI12 0 "register_operand")
23387 (match_operand:SWI12 1 "memory_operand"))
23388 (set (match_operand:SWI12 3 "register_operand") (match_dup 0))
23389 (parallel [(set (match_operand:SI 4 "register_operand")
23390 (xor:SI (match_dup 4)
23391 (match_operand:SI 2 "<nonmemory_operand>")))
23392 (clobber (reg:CC FLAGS_REG))])
23393 (set (match_dup 1) (match_operand:SWI12 5 "register_operand"))
23394 (set (reg:CCZ FLAGS_REG)
23395 (compare:CCZ (match_operand:SWI12 6 "register_operand")
23396 (match_operand:SWI12 7 "<nonmemory_operand>")))]
23397 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
23398 && (REGNO (operands[5]) == REGNO (operands[0])
23399 || REGNO (operands[5]) == REGNO (operands[3]))
23400 && REGNO (operands[5]) == REGNO (operands[4])
23401 && (rtx_equal_p (operands[REGNO (operands[5]) == REGNO (operands[0])
23402 ? 3 : 0], operands[6])
23403 ? (REG_P (operands[2])
23404 ? REG_P (operands[7]) && REGNO (operands[2]) == REGNO (operands[7])
23405 : rtx_equal_p (operands[2], operands[7]))
23406 : (rtx_equal_p (operands[REGNO (operands[5]) == REGNO (operands[0])
23407 ? 3 : 0], operands[7])
23408 && REG_P (operands[2])
23409 && REGNO (operands[2]) == REGNO (operands[6])))
23410 && peep2_reg_dead_p (4, operands[5])
23411 && peep2_reg_dead_p (5, operands[REGNO (operands[5]) == REGNO (operands[0])
23412 ? 3 : 0])
23413 && !reg_overlap_mentioned_p (operands[0], operands[1])
23414 && !reg_overlap_mentioned_p (operands[0], operands[2])
23415 && !reg_overlap_mentioned_p (operands[3], operands[0])
23416 && !reg_overlap_mentioned_p (operands[3], operands[1])
23417 && !reg_overlap_mentioned_p (operands[3], operands[2])
23418 && (<MODE>mode != QImode
23419 || immediate_operand (operands[2], SImode)
23420 || any_QIreg_operand (operands[2], SImode))"
23421 [(parallel [(set (match_dup 8) (match_dup 10))
23422 (set (match_dup 1) (match_dup 9))])]
23423 {
23424 operands[8] = SET_DEST (PATTERN (peep2_next_insn (4)));
23425 operands[9] = gen_rtx_XOR (<MODE>mode, copy_rtx (operands[1]),
23426 gen_lowpart (<MODE>mode, operands[2]));
23427 operands[10]
23428 = gen_rtx_COMPARE (GET_MODE (operands[8]),
23429 copy_rtx (operands[9]),
23430 const0_rtx);
23431 })
23432
23433 ;; Attempt to optimize away memory stores of values the memory already
23434 ;; has. See PR79593.
23435 (define_peephole2
23436 [(set (match_operand 0 "register_operand")
23437 (match_operand 1 "memory_operand"))
23438 (set (match_operand 2 "memory_operand") (match_dup 0))]
23439 "!MEM_VOLATILE_P (operands[1])
23440 && !MEM_VOLATILE_P (operands[2])
23441 && rtx_equal_p (operands[1], operands[2])
23442 && !reg_overlap_mentioned_p (operands[0], operands[2])"
23443 [(set (match_dup 0) (match_dup 1))])
23444
23445 ;; Attempt to always use XOR for zeroing registers (including FP modes).
23446 (define_peephole2
23447 [(set (match_operand 0 "general_reg_operand")
23448 (match_operand 1 "const0_operand"))]
23449 "GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD
23450 && (! TARGET_USE_MOV0 || optimize_insn_for_size_p ())
23451 && peep2_regno_dead_p (0, FLAGS_REG)"
23452 [(parallel [(set (match_dup 0) (const_int 0))
23453 (clobber (reg:CC FLAGS_REG))])]
23454 "operands[0] = gen_lowpart (word_mode, operands[0]);")
23455
23456 (define_peephole2
23457 [(set (strict_low_part (match_operand:SWI12 0 "general_reg_operand"))
23458 (const_int 0))]
23459 "(! TARGET_USE_MOV0 || optimize_insn_for_size_p ())
23460 && peep2_regno_dead_p (0, FLAGS_REG)"
23461 [(parallel [(set (strict_low_part (match_dup 0)) (const_int 0))
23462 (clobber (reg:CC FLAGS_REG))])])
23463
23464 ;; For HI, SI and DI modes, or $-1,reg is smaller than mov $-1,reg.
23465 (define_peephole2
23466 [(set (match_operand:SWI248 0 "general_reg_operand")
23467 (const_int -1))]
23468 "(TARGET_MOVE_M1_VIA_OR || optimize_insn_for_size_p ())
23469 && peep2_regno_dead_p (0, FLAGS_REG)"
23470 [(parallel [(set (match_dup 0) (const_int -1))
23471 (clobber (reg:CC FLAGS_REG))])]
23472 {
23473 if (<MODE_SIZE> < GET_MODE_SIZE (SImode))
23474 operands[0] = gen_lowpart (SImode, operands[0]);
23475 })
23476
23477 ;; Attempt to convert simple lea to add/shift.
23478 ;; These can be created by move expanders.
23479 ;; Disable PLUS peepholes on TARGET_OPT_AGU, since all
23480 ;; relevant lea instructions were already split.
23481
23482 (define_peephole2
23483 [(set (match_operand:SWI48 0 "register_operand")
23484 (plus:SWI48 (match_dup 0)
23485 (match_operand:SWI48 1 "<nonmemory_operand>")))]
23486 "!TARGET_OPT_AGU
23487 && peep2_regno_dead_p (0, FLAGS_REG)"
23488 [(parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (match_dup 1)))
23489 (clobber (reg:CC FLAGS_REG))])])
23490
23491 (define_peephole2
23492 [(set (match_operand:SWI48 0 "register_operand")
23493 (plus:SWI48 (match_operand:SWI48 1 "<nonmemory_operand>")
23494 (match_dup 0)))]
23495 "!TARGET_OPT_AGU
23496 && peep2_regno_dead_p (0, FLAGS_REG)"
23497 [(parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (match_dup 1)))
23498 (clobber (reg:CC FLAGS_REG))])])
23499
23500 (define_peephole2
23501 [(set (match_operand:DI 0 "register_operand")
23502 (zero_extend:DI
23503 (plus:SI (match_operand:SI 1 "register_operand")
23504 (match_operand:SI 2 "nonmemory_operand"))))]
23505 "TARGET_64BIT && !TARGET_OPT_AGU
23506 && REGNO (operands[0]) == REGNO (operands[1])
23507 && peep2_regno_dead_p (0, FLAGS_REG)"
23508 [(parallel [(set (match_dup 0)
23509 (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))
23510 (clobber (reg:CC FLAGS_REG))])])
23511
23512 (define_peephole2
23513 [(set (match_operand:DI 0 "register_operand")
23514 (zero_extend:DI
23515 (plus:SI (match_operand:SI 1 "nonmemory_operand")
23516 (match_operand:SI 2 "register_operand"))))]
23517 "TARGET_64BIT && !TARGET_OPT_AGU
23518 && REGNO (operands[0]) == REGNO (operands[2])
23519 && peep2_regno_dead_p (0, FLAGS_REG)"
23520 [(parallel [(set (match_dup 0)
23521 (zero_extend:DI (plus:SI (match_dup 2) (match_dup 1))))
23522 (clobber (reg:CC FLAGS_REG))])])
23523
23524 (define_peephole2
23525 [(set (match_operand:SWI48 0 "register_operand")
23526 (mult:SWI48 (match_dup 0)
23527 (match_operand:SWI48 1 "const_int_operand")))]
23528 "pow2p_hwi (INTVAL (operands[1]))
23529 && peep2_regno_dead_p (0, FLAGS_REG)"
23530 [(parallel [(set (match_dup 0) (ashift:SWI48 (match_dup 0) (match_dup 1)))
23531 (clobber (reg:CC FLAGS_REG))])]
23532 "operands[1] = GEN_INT (exact_log2 (INTVAL (operands[1])));")
23533
23534 (define_peephole2
23535 [(set (match_operand:DI 0 "register_operand")
23536 (zero_extend:DI
23537 (mult:SI (match_operand:SI 1 "register_operand")
23538 (match_operand:SI 2 "const_int_operand"))))]
23539 "TARGET_64BIT
23540 && pow2p_hwi (INTVAL (operands[2]))
23541 && REGNO (operands[0]) == REGNO (operands[1])
23542 && peep2_regno_dead_p (0, FLAGS_REG)"
23543 [(parallel [(set (match_dup 0)
23544 (zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))
23545 (clobber (reg:CC FLAGS_REG))])]
23546 "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));")
23547
23548 ;; The ESP adjustments can be done by the push and pop instructions. Resulting
23549 ;; code is shorter, since push is only 1 byte, while add imm, %esp is 3 bytes.
23550 ;; On many CPUs it is also faster, since special hardware to avoid esp
23551 ;; dependencies is present.
23552
23553 ;; While some of these conversions may be done using splitters, we use
23554 ;; peepholes in order to allow combine_stack_adjustments pass to see
23555 ;; nonobfuscated RTL.
23556
23557 ;; Convert prologue esp subtractions to push.
23558 ;; We need register to push. In order to keep verify_flow_info happy we have
23559 ;; two choices
23560 ;; - use scratch and clobber it in order to avoid dependencies
23561 ;; - use already live register
23562 ;; We can't use the second way right now, since there is no reliable way how to
23563 ;; verify that given register is live. First choice will also most likely in
23564 ;; fewer dependencies. On the place of esp adjustments it is very likely that
23565 ;; call clobbered registers are dead. We may want to use base pointer as an
23566 ;; alternative when no register is available later.
23567
23568 (define_peephole2
23569 [(match_scratch:W 1 "r")
23570 (parallel [(set (reg:P SP_REG)
23571 (plus:P (reg:P SP_REG)
23572 (match_operand:P 0 "const_int_operand")))
23573 (clobber (reg:CC FLAGS_REG))
23574 (clobber (mem:BLK (scratch)))])]
23575 "(TARGET_SINGLE_PUSH || optimize_insn_for_size_p ())
23576 && INTVAL (operands[0]) == -GET_MODE_SIZE (word_mode)
23577 && !ix86_red_zone_used"
23578 [(clobber (match_dup 1))
23579 (parallel [(set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1))
23580 (clobber (mem:BLK (scratch)))])])
23581
23582 (define_peephole2
23583 [(match_scratch:W 1 "r")
23584 (parallel [(set (reg:P SP_REG)
23585 (plus:P (reg:P SP_REG)
23586 (match_operand:P 0 "const_int_operand")))
23587 (clobber (reg:CC FLAGS_REG))
23588 (clobber (mem:BLK (scratch)))])]
23589 "(TARGET_DOUBLE_PUSH || optimize_insn_for_size_p ())
23590 && INTVAL (operands[0]) == -2*GET_MODE_SIZE (word_mode)
23591 && !ix86_red_zone_used"
23592 [(clobber (match_dup 1))
23593 (set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1))
23594 (parallel [(set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1))
23595 (clobber (mem:BLK (scratch)))])])
23596
23597 ;; Convert esp subtractions to push.
23598 (define_peephole2
23599 [(match_scratch:W 1 "r")
23600 (parallel [(set (reg:P SP_REG)
23601 (plus:P (reg:P SP_REG)
23602 (match_operand:P 0 "const_int_operand")))
23603 (clobber (reg:CC FLAGS_REG))])]
23604 "(TARGET_SINGLE_PUSH || optimize_insn_for_size_p ())
23605 && INTVAL (operands[0]) == -GET_MODE_SIZE (word_mode)
23606 && !ix86_red_zone_used"
23607 [(clobber (match_dup 1))
23608 (set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1))])
23609
23610 (define_peephole2
23611 [(match_scratch:W 1 "r")
23612 (parallel [(set (reg:P SP_REG)
23613 (plus:P (reg:P SP_REG)
23614 (match_operand:P 0 "const_int_operand")))
23615 (clobber (reg:CC FLAGS_REG))])]
23616 "(TARGET_DOUBLE_PUSH || optimize_insn_for_size_p ())
23617 && INTVAL (operands[0]) == -2*GET_MODE_SIZE (word_mode)
23618 && !ix86_red_zone_used"
23619 [(clobber (match_dup 1))
23620 (set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1))
23621 (set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1))])
23622
23623 ;; Convert epilogue deallocator to pop.
23624 (define_peephole2
23625 [(match_scratch:W 1 "r")
23626 (parallel [(set (reg:P SP_REG)
23627 (plus:P (reg:P SP_REG)
23628 (match_operand:P 0 "const_int_operand")))
23629 (clobber (reg:CC FLAGS_REG))
23630 (clobber (mem:BLK (scratch)))])]
23631 "(TARGET_SINGLE_POP || optimize_insn_for_size_p ())
23632 && INTVAL (operands[0]) == GET_MODE_SIZE (word_mode)"
23633 [(parallel [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))
23634 (clobber (mem:BLK (scratch)))])])
23635
23636 ;; Two pops case is tricky, since pop causes dependency
23637 ;; on destination register. We use two registers if available.
23638 (define_peephole2
23639 [(match_scratch:W 1 "r")
23640 (match_scratch:W 2 "r")
23641 (parallel [(set (reg:P SP_REG)
23642 (plus:P (reg:P SP_REG)
23643 (match_operand:P 0 "const_int_operand")))
23644 (clobber (reg:CC FLAGS_REG))
23645 (clobber (mem:BLK (scratch)))])]
23646 "(TARGET_DOUBLE_POP || optimize_insn_for_size_p ())
23647 && INTVAL (operands[0]) == 2*GET_MODE_SIZE (word_mode)"
23648 [(parallel [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))
23649 (clobber (mem:BLK (scratch)))])
23650 (set (match_dup 2) (mem:W (post_inc:P (reg:P SP_REG))))])
23651
23652 (define_peephole2
23653 [(match_scratch:W 1 "r")
23654 (parallel [(set (reg:P SP_REG)
23655 (plus:P (reg:P SP_REG)
23656 (match_operand:P 0 "const_int_operand")))
23657 (clobber (reg:CC FLAGS_REG))
23658 (clobber (mem:BLK (scratch)))])]
23659 "optimize_insn_for_size_p ()
23660 && INTVAL (operands[0]) == 2*GET_MODE_SIZE (word_mode)"
23661 [(parallel [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))
23662 (clobber (mem:BLK (scratch)))])
23663 (set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))])
23664
23665 ;; Convert esp additions to pop.
23666 (define_peephole2
23667 [(match_scratch:W 1 "r")
23668 (parallel [(set (reg:P SP_REG)
23669 (plus:P (reg:P SP_REG)
23670 (match_operand:P 0 "const_int_operand")))
23671 (clobber (reg:CC FLAGS_REG))])]
23672 "INTVAL (operands[0]) == GET_MODE_SIZE (word_mode)"
23673 [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))])
23674
23675 ;; Two pops case is tricky, since pop causes dependency
23676 ;; on destination register. We use two registers if available.
23677 (define_peephole2
23678 [(match_scratch:W 1 "r")
23679 (match_scratch:W 2 "r")
23680 (parallel [(set (reg:P SP_REG)
23681 (plus:P (reg:P SP_REG)
23682 (match_operand:P 0 "const_int_operand")))
23683 (clobber (reg:CC FLAGS_REG))])]
23684 "INTVAL (operands[0]) == 2*GET_MODE_SIZE (word_mode)"
23685 [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))
23686 (set (match_dup 2) (mem:W (post_inc:P (reg:P SP_REG))))])
23687
23688 (define_peephole2
23689 [(match_scratch:W 1 "r")
23690 (parallel [(set (reg:P SP_REG)
23691 (plus:P (reg:P SP_REG)
23692 (match_operand:P 0 "const_int_operand")))
23693 (clobber (reg:CC FLAGS_REG))])]
23694 "optimize_insn_for_size_p ()
23695 && INTVAL (operands[0]) == 2*GET_MODE_SIZE (word_mode)"
23696 [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))
23697 (set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))])
23698 \f
23699 ;; Convert compares with 1 to shorter inc/dec operations when CF is not
23700 ;; required and register dies. Similarly for 128 to -128.
23701 (define_peephole2
23702 [(set (match_operand 0 "flags_reg_operand")
23703 (match_operator 1 "compare_operator"
23704 [(match_operand 2 "register_operand")
23705 (match_operand 3 "const_int_operand")]))]
23706 "(((!TARGET_FUSE_CMP_AND_BRANCH || optimize_insn_for_size_p ())
23707 && incdec_operand (operands[3], GET_MODE (operands[3])))
23708 || (!TARGET_FUSE_CMP_AND_BRANCH
23709 && INTVAL (operands[3]) == 128))
23710 && ix86_match_ccmode (insn, CCGCmode)
23711 && peep2_reg_dead_p (1, operands[2])"
23712 [(parallel [(set (match_dup 0)
23713 (match_op_dup 1 [(match_dup 2) (match_dup 3)]))
23714 (clobber (match_dup 2))])])
23715 \f
23716 ;; Convert imul by three, five and nine into lea
23717 (define_peephole2
23718 [(parallel
23719 [(set (match_operand:SWI48 0 "register_operand")
23720 (mult:SWI48 (match_operand:SWI48 1 "register_operand")
23721 (match_operand:SWI48 2 "const359_operand")))
23722 (clobber (reg:CC FLAGS_REG))])]
23723 "!TARGET_PARTIAL_REG_STALL
23724 || <MODE>mode == SImode
23725 || optimize_function_for_size_p (cfun)"
23726 [(set (match_dup 0)
23727 (plus:SWI48 (mult:SWI48 (match_dup 1) (match_dup 2))
23728 (match_dup 1)))]
23729 "operands[2] = GEN_INT (INTVAL (operands[2]) - 1);")
23730
23731 (define_peephole2
23732 [(parallel
23733 [(set (match_operand:SWI48 0 "register_operand")
23734 (mult:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
23735 (match_operand:SWI48 2 "const359_operand")))
23736 (clobber (reg:CC FLAGS_REG))])]
23737 "optimize_insn_for_speed_p ()
23738 && (!TARGET_PARTIAL_REG_STALL || <MODE>mode == SImode)"
23739 [(set (match_dup 0) (match_dup 1))
23740 (set (match_dup 0)
23741 (plus:SWI48 (mult:SWI48 (match_dup 0) (match_dup 2))
23742 (match_dup 0)))]
23743 "operands[2] = GEN_INT (INTVAL (operands[2]) - 1);")
23744
23745 ;; imul $32bit_imm, mem, reg is vector decoded, while
23746 ;; imul $32bit_imm, reg, reg is direct decoded.
23747 (define_peephole2
23748 [(match_scratch:SWI48 3 "r")
23749 (parallel [(set (match_operand:SWI48 0 "register_operand")
23750 (mult:SWI48 (match_operand:SWI48 1 "memory_operand")
23751 (match_operand:SWI48 2 "immediate_operand")))
23752 (clobber (reg:CC FLAGS_REG))])]
23753 "TARGET_SLOW_IMUL_IMM32_MEM && optimize_insn_for_speed_p ()
23754 && !satisfies_constraint_K (operands[2])"
23755 [(set (match_dup 3) (match_dup 1))
23756 (parallel [(set (match_dup 0) (mult:SWI48 (match_dup 3) (match_dup 2)))
23757 (clobber (reg:CC FLAGS_REG))])])
23758
23759 (define_peephole2
23760 [(match_scratch:SI 3 "r")
23761 (parallel [(set (match_operand:DI 0 "register_operand")
23762 (zero_extend:DI
23763 (mult:SI (match_operand:SI 1 "memory_operand")
23764 (match_operand:SI 2 "immediate_operand"))))
23765 (clobber (reg:CC FLAGS_REG))])]
23766 "TARGET_64BIT
23767 && TARGET_SLOW_IMUL_IMM32_MEM && optimize_insn_for_speed_p ()
23768 && !satisfies_constraint_K (operands[2])"
23769 [(set (match_dup 3) (match_dup 1))
23770 (parallel [(set (match_dup 0)
23771 (zero_extend:DI (mult:SI (match_dup 3) (match_dup 2))))
23772 (clobber (reg:CC FLAGS_REG))])])
23773
23774 ;; imul $8/16bit_imm, regmem, reg is vector decoded.
23775 ;; Convert it into imul reg, reg
23776 ;; It would be better to force assembler to encode instruction using long
23777 ;; immediate, but there is apparently no way to do so.
23778 (define_peephole2
23779 [(parallel [(set (match_operand:SWI248 0 "register_operand")
23780 (mult:SWI248
23781 (match_operand:SWI248 1 "nonimmediate_operand")
23782 (match_operand:SWI248 2 "const_int_operand")))
23783 (clobber (reg:CC FLAGS_REG))])
23784 (match_scratch:SWI248 3 "r")]
23785 "TARGET_SLOW_IMUL_IMM8 && optimize_insn_for_speed_p ()
23786 && satisfies_constraint_K (operands[2])"
23787 [(set (match_dup 3) (match_dup 2))
23788 (parallel [(set (match_dup 0) (mult:SWI248 (match_dup 0) (match_dup 3)))
23789 (clobber (reg:CC FLAGS_REG))])]
23790 {
23791 if (!rtx_equal_p (operands[0], operands[1]))
23792 emit_move_insn (operands[0], operands[1]);
23793 })
23794
23795 ;; After splitting up read-modify operations, array accesses with memory
23796 ;; operands might end up in form:
23797 ;; sall $2, %eax
23798 ;; movl 4(%esp), %edx
23799 ;; addl %edx, %eax
23800 ;; instead of pre-splitting:
23801 ;; sall $2, %eax
23802 ;; addl 4(%esp), %eax
23803 ;; Turn it into:
23804 ;; movl 4(%esp), %edx
23805 ;; leal (%edx,%eax,4), %eax
23806
23807 (define_peephole2
23808 [(match_scratch:W 5 "r")
23809 (parallel [(set (match_operand 0 "register_operand")
23810 (ashift (match_operand 1 "register_operand")
23811 (match_operand 2 "const_int_operand")))
23812 (clobber (reg:CC FLAGS_REG))])
23813 (parallel [(set (match_operand 3 "register_operand")
23814 (plus (match_dup 0)
23815 (match_operand 4 "x86_64_general_operand")))
23816 (clobber (reg:CC FLAGS_REG))])]
23817 "IN_RANGE (INTVAL (operands[2]), 1, 3)
23818 /* Validate MODE for lea. */
23819 && ((!TARGET_PARTIAL_REG_STALL
23820 && (GET_MODE (operands[0]) == QImode
23821 || GET_MODE (operands[0]) == HImode))
23822 || GET_MODE (operands[0]) == SImode
23823 || (TARGET_64BIT && GET_MODE (operands[0]) == DImode))
23824 && (rtx_equal_p (operands[0], operands[3])
23825 || peep2_reg_dead_p (2, operands[0]))
23826 /* We reorder load and the shift. */
23827 && !reg_overlap_mentioned_p (operands[0], operands[4])"
23828 [(set (match_dup 5) (match_dup 4))
23829 (set (match_dup 0) (match_dup 1))]
23830 {
23831 machine_mode op1mode = GET_MODE (operands[1]);
23832 machine_mode mode = op1mode == DImode ? DImode : SImode;
23833 int scale = 1 << INTVAL (operands[2]);
23834 rtx index = gen_lowpart (word_mode, operands[1]);
23835 rtx base = gen_lowpart (word_mode, operands[5]);
23836 rtx dest = gen_lowpart (mode, operands[3]);
23837
23838 operands[1] = gen_rtx_PLUS (word_mode, base,
23839 gen_rtx_MULT (word_mode, index, GEN_INT (scale)));
23840 if (mode != word_mode)
23841 operands[1] = gen_rtx_SUBREG (mode, operands[1], 0);
23842
23843 operands[5] = base;
23844 if (op1mode != word_mode)
23845 operands[5] = gen_lowpart (op1mode, operands[5]);
23846
23847 operands[0] = dest;
23848 })
23849 \f
23850 ;; We used to use "int $5", in honor of #BR which maps to interrupt vector 5.
23851 ;; That, however, is usually mapped by the OS to SIGSEGV, which is often
23852 ;; caught for use by garbage collectors and the like. Using an insn that
23853 ;; maps to SIGILL makes it more likely the program will rightfully die.
23854 ;; Keeping with tradition, "6" is in honor of #UD.
23855 (define_insn "trap"
23856 [(trap_if (const_int 1) (const_int 6))]
23857 ""
23858 {
23859 #ifdef HAVE_AS_IX86_UD2
23860 return "ud2";
23861 #else
23862 return ASM_SHORT "0x0b0f";
23863 #endif
23864 }
23865 [(set_attr "length" "2")])
23866
23867 (define_insn "ud2"
23868 [(unspec_volatile [(const_int 0)] UNSPECV_UD2)]
23869 ""
23870 {
23871 #ifdef HAVE_AS_IX86_UD2
23872 return "ud2";
23873 #else
23874 return ASM_SHORT "0x0b0f";
23875 #endif
23876 }
23877 [(set_attr "length" "2")])
23878
23879 (define_expand "prefetch"
23880 [(prefetch (match_operand 0 "address_operand")
23881 (match_operand:SI 1 "const_int_operand")
23882 (match_operand:SI 2 "const_int_operand"))]
23883 "TARGET_3DNOW || TARGET_PREFETCH_SSE || TARGET_PRFCHW || TARGET_PREFETCHWT1"
23884 {
23885 bool write = operands[1] != const0_rtx;
23886 int locality = INTVAL (operands[2]);
23887
23888 gcc_assert (IN_RANGE (locality, 0, 3));
23889
23890 /* Use 3dNOW prefetch in case we are asking for write prefetch not
23891 supported by SSE counterpart (non-SSE2 athlon machines) or the
23892 SSE prefetch is not available (K6 machines). Otherwise use SSE
23893 prefetch as it allows specifying of locality. */
23894
23895 if (write)
23896 {
23897 if (TARGET_PREFETCHWT1)
23898 operands[2] = GEN_INT (MAX (locality, 2));
23899 else if (TARGET_PRFCHW)
23900 operands[2] = GEN_INT (3);
23901 else if (TARGET_3DNOW && !TARGET_SSE2)
23902 operands[2] = GEN_INT (3);
23903 else if (TARGET_PREFETCH_SSE)
23904 operands[1] = const0_rtx;
23905 else
23906 {
23907 gcc_assert (TARGET_3DNOW);
23908 operands[2] = GEN_INT (3);
23909 }
23910 }
23911 else
23912 {
23913 if (TARGET_PREFETCH_SSE)
23914 ;
23915 else
23916 {
23917 gcc_assert (TARGET_3DNOW);
23918 operands[2] = GEN_INT (3);
23919 }
23920 }
23921 })
23922
23923 (define_insn "*prefetch_sse"
23924 [(prefetch (match_operand 0 "address_operand" "p")
23925 (const_int 0)
23926 (match_operand:SI 1 "const_int_operand"))]
23927 "TARGET_PREFETCH_SSE"
23928 {
23929 static const char * const patterns[4] = {
23930 "prefetchnta\t%a0", "prefetcht2\t%a0", "prefetcht1\t%a0", "prefetcht0\t%a0"
23931 };
23932
23933 int locality = INTVAL (operands[1]);
23934 gcc_assert (IN_RANGE (locality, 0, 3));
23935
23936 return patterns[locality];
23937 }
23938 [(set_attr "type" "sse")
23939 (set_attr "atom_sse_attr" "prefetch")
23940 (set (attr "length_address")
23941 (symbol_ref "memory_address_length (operands[0], false)"))
23942 (set_attr "memory" "none")])
23943
23944 (define_insn "*prefetch_3dnow"
23945 [(prefetch (match_operand 0 "address_operand" "p")
23946 (match_operand:SI 1 "const_int_operand")
23947 (const_int 3))]
23948 "TARGET_3DNOW || TARGET_PRFCHW || TARGET_PREFETCHWT1"
23949 {
23950 if (operands[1] == const0_rtx)
23951 return "prefetch\t%a0";
23952 else
23953 return "prefetchw\t%a0";
23954 }
23955 [(set_attr "type" "mmx")
23956 (set (attr "length_address")
23957 (symbol_ref "memory_address_length (operands[0], false)"))
23958 (set_attr "memory" "none")])
23959
23960 (define_insn "*prefetch_prefetchwt1"
23961 [(prefetch (match_operand 0 "address_operand" "p")
23962 (const_int 1)
23963 (const_int 2))]
23964 "TARGET_PREFETCHWT1"
23965 "prefetchwt1\t%a0";
23966 [(set_attr "type" "sse")
23967 (set (attr "length_address")
23968 (symbol_ref "memory_address_length (operands[0], false)"))
23969 (set_attr "memory" "none")])
23970
23971 (define_insn "prefetchi"
23972 [(unspec_volatile [(match_operand 0 "local_func_symbolic_operand" "p")
23973 (match_operand:SI 1 "const_int_operand")]
23974 UNSPECV_PREFETCHI)]
23975 "TARGET_PREFETCHI && TARGET_64BIT"
23976 {
23977 static const char * const patterns[2] = {
23978 "prefetchit1\t%0", "prefetchit0\t%0"
23979 };
23980
23981 int locality = INTVAL (operands[1]);
23982 gcc_assert (IN_RANGE (locality, 2, 3));
23983
23984 return patterns[locality - 2];
23985 }
23986 [(set_attr "type" "sse")
23987 (set (attr "length_address")
23988 (symbol_ref "memory_address_length (operands[0], false)"))
23989 (set_attr "memory" "none")])
23990
23991 (define_expand "stack_protect_set"
23992 [(match_operand 0 "memory_operand")
23993 (match_operand 1 "memory_operand")]
23994 ""
23995 {
23996 emit_insn (gen_stack_protect_set_1
23997 (ptr_mode, operands[0], operands[1]));
23998 DONE;
23999 })
24000
24001 (define_insn "@stack_protect_set_1_<mode>"
24002 [(set (match_operand:PTR 0 "memory_operand" "=m")
24003 (unspec:PTR [(match_operand:PTR 1 "memory_operand" "m")]
24004 UNSPEC_SP_SET))
24005 (set (match_scratch:PTR 2 "=&r") (const_int 0))
24006 (clobber (reg:CC FLAGS_REG))]
24007 ""
24008 {
24009 output_asm_insn ("mov{<imodesuffix>}\t{%1, %2|%2, %1}", operands);
24010 output_asm_insn ("mov{<imodesuffix>}\t{%2, %0|%0, %2}", operands);
24011 return "xor{l}\t%k2, %k2";
24012 }
24013 [(set_attr "type" "multi")])
24014
24015 ;; Patterns and peephole2s to optimize stack_protect_set_1_<mode>
24016 ;; immediately followed by *mov{s,d}i_internal to the same register,
24017 ;; where we can avoid the xor{l} above. We don't split this, so that
24018 ;; scheduling or anything else doesn't separate the *stack_protect_set*
24019 ;; pattern from the set of the register that overwrites the register
24020 ;; with a new value.
24021 (define_insn "*stack_protect_set_2_<mode>"
24022 [(set (match_operand:PTR 0 "memory_operand" "=m")
24023 (unspec:PTR [(match_operand:PTR 3 "memory_operand" "m")]
24024 UNSPEC_SP_SET))
24025 (set (match_operand:SI 1 "register_operand" "=&r")
24026 (match_operand:SI 2 "general_operand" "g"))
24027 (clobber (reg:CC FLAGS_REG))]
24028 "reload_completed
24029 && !reg_overlap_mentioned_p (operands[1], operands[2])"
24030 {
24031 output_asm_insn ("mov{<imodesuffix>}\t{%3, %<k>1|%<k>1, %3}", operands);
24032 output_asm_insn ("mov{<imodesuffix>}\t{%<k>1, %0|%0, %<k>1}", operands);
24033 if (pic_32bit_operand (operands[2], SImode)
24034 || ix86_use_lea_for_mov (insn, operands + 1))
24035 return "lea{l}\t{%E2, %1|%1, %E2}";
24036 else
24037 return "mov{l}\t{%2, %1|%1, %2}";
24038 }
24039 [(set_attr "type" "multi")
24040 (set_attr "length" "24")])
24041
24042 (define_peephole2
24043 [(parallel [(set (match_operand:PTR 0 "memory_operand")
24044 (unspec:PTR [(match_operand:PTR 1 "memory_operand")]
24045 UNSPEC_SP_SET))
24046 (set (match_operand:PTR 2 "general_reg_operand") (const_int 0))
24047 (clobber (reg:CC FLAGS_REG))])
24048 (set (match_operand:SI 3 "general_reg_operand")
24049 (match_operand:SI 4))]
24050 "REGNO (operands[2]) == REGNO (operands[3])
24051 && general_operand (operands[4], SImode)
24052 && (general_reg_operand (operands[4], SImode)
24053 || memory_operand (operands[4], SImode)
24054 || immediate_operand (operands[4], SImode))
24055 && !reg_overlap_mentioned_p (operands[3], operands[4])"
24056 [(parallel [(set (match_dup 0)
24057 (unspec:PTR [(match_dup 1)] UNSPEC_SP_SET))
24058 (set (match_dup 3) (match_dup 4))
24059 (clobber (reg:CC FLAGS_REG))])])
24060
24061 (define_insn "*stack_protect_set_3"
24062 [(set (match_operand:DI 0 "memory_operand" "=m,m,m")
24063 (unspec:DI [(match_operand:DI 3 "memory_operand" "m,m,m")]
24064 UNSPEC_SP_SET))
24065 (set (match_operand:DI 1 "register_operand" "=&r,r,r")
24066 (match_operand:DI 2 "general_operand" "Z,rem,i"))
24067 (clobber (reg:CC FLAGS_REG))]
24068 "TARGET_64BIT
24069 && reload_completed
24070 && !reg_overlap_mentioned_p (operands[1], operands[2])"
24071 {
24072 output_asm_insn ("mov{q}\t{%3, %1|%1, %3}", operands);
24073 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", operands);
24074 if (pic_32bit_operand (operands[2], DImode))
24075 return "lea{q}\t{%E2, %1|%1, %E2}";
24076 else if (which_alternative == 0)
24077 return "mov{l}\t{%k2, %k1|%k1, %k2}";
24078 else if (which_alternative == 2)
24079 return "movabs{q}\t{%2, %1|%1, %2}";
24080 else if (ix86_use_lea_for_mov (insn, operands + 1))
24081 return "lea{q}\t{%E2, %1|%1, %E2}";
24082 else
24083 return "mov{q}\t{%2, %1|%1, %2}";
24084 }
24085 [(set_attr "type" "multi")
24086 (set_attr "length" "24")])
24087
24088 (define_peephole2
24089 [(parallel [(set (match_operand:DI 0 "memory_operand")
24090 (unspec:DI [(match_operand:DI 1 "memory_operand")]
24091 UNSPEC_SP_SET))
24092 (set (match_operand:DI 2 "general_reg_operand") (const_int 0))
24093 (clobber (reg:CC FLAGS_REG))])
24094 (set (match_dup 2) (match_operand:DI 3))]
24095 "TARGET_64BIT
24096 && general_operand (operands[3], DImode)
24097 && (general_reg_operand (operands[3], DImode)
24098 || memory_operand (operands[3], DImode)
24099 || x86_64_zext_immediate_operand (operands[3], DImode)
24100 || x86_64_immediate_operand (operands[3], DImode)
24101 || (CONSTANT_P (operands[3])
24102 && (!flag_pic || LEGITIMATE_PIC_OPERAND_P (operands[3]))))
24103 && !reg_overlap_mentioned_p (operands[2], operands[3])"
24104 [(parallel [(set (match_dup 0)
24105 (unspec:PTR [(match_dup 1)] UNSPEC_SP_SET))
24106 (set (match_dup 2) (match_dup 3))
24107 (clobber (reg:CC FLAGS_REG))])])
24108
24109 (define_expand "stack_protect_test"
24110 [(match_operand 0 "memory_operand")
24111 (match_operand 1 "memory_operand")
24112 (match_operand 2)]
24113 ""
24114 {
24115 rtx flags = gen_rtx_REG (CCZmode, FLAGS_REG);
24116
24117 emit_insn (gen_stack_protect_test_1
24118 (ptr_mode, flags, operands[0], operands[1]));
24119
24120 emit_jump_insn (gen_cbranchcc4 (gen_rtx_EQ (VOIDmode, flags, const0_rtx),
24121 flags, const0_rtx, operands[2]));
24122 DONE;
24123 })
24124
24125 (define_insn "@stack_protect_test_1_<mode>"
24126 [(set (match_operand:CCZ 0 "flags_reg_operand")
24127 (unspec:CCZ [(match_operand:PTR 1 "memory_operand" "m")
24128 (match_operand:PTR 2 "memory_operand" "m")]
24129 UNSPEC_SP_TEST))
24130 (clobber (match_scratch:PTR 3 "=&r"))]
24131 ""
24132 {
24133 output_asm_insn ("mov{<imodesuffix>}\t{%1, %3|%3, %1}", operands);
24134 return "sub{<imodesuffix>}\t{%2, %3|%3, %2}";
24135 }
24136 [(set_attr "type" "multi")])
24137
24138 (define_insn "sse4_2_crc32<mode>"
24139 [(set (match_operand:SI 0 "register_operand" "=r")
24140 (unspec:SI
24141 [(match_operand:SI 1 "register_operand" "0")
24142 (match_operand:SWI124 2 "nonimmediate_operand" "<r>m")]
24143 UNSPEC_CRC32))]
24144 "TARGET_CRC32"
24145 "crc32{<imodesuffix>}\t{%2, %0|%0, %2}"
24146 [(set_attr "type" "sselog1")
24147 (set_attr "prefix_rep" "1")
24148 (set_attr "prefix_extra" "1")
24149 (set (attr "prefix_data16")
24150 (if_then_else (match_operand:HI 2)
24151 (const_string "1")
24152 (const_string "*")))
24153 (set (attr "prefix_rex")
24154 (if_then_else (match_operand:QI 2 "ext_QIreg_operand")
24155 (const_string "1")
24156 (const_string "*")))
24157 (set_attr "mode" "SI")])
24158
24159 (define_insn "sse4_2_crc32di"
24160 [(set (match_operand:DI 0 "register_operand" "=r")
24161 (zero_extend:DI
24162 (unspec:SI
24163 [(match_operand:SI 1 "register_operand" "0")
24164 (match_operand:DI 2 "nonimmediate_operand" "rm")]
24165 UNSPEC_CRC32)))]
24166 "TARGET_64BIT && TARGET_CRC32"
24167 "crc32{q}\t{%2, %0|%0, %2}"
24168 [(set_attr "type" "sselog1")
24169 (set_attr "prefix_rep" "1")
24170 (set_attr "prefix_extra" "1")
24171 (set_attr "mode" "DI")])
24172
24173 (define_insn "rdpmc"
24174 [(set (match_operand:DI 0 "register_operand" "=A")
24175 (unspec_volatile:DI [(match_operand:SI 1 "register_operand" "c")]
24176 UNSPECV_RDPMC))]
24177 "!TARGET_64BIT"
24178 "rdpmc"
24179 [(set_attr "type" "other")
24180 (set_attr "length" "2")])
24181
24182 (define_insn "rdpmc_rex64"
24183 [(set (match_operand:DI 0 "register_operand" "=a")
24184 (unspec_volatile:DI [(match_operand:SI 2 "register_operand" "c")]
24185 UNSPECV_RDPMC))
24186 (set (match_operand:DI 1 "register_operand" "=d")
24187 (unspec_volatile:DI [(match_dup 2)] UNSPECV_RDPMC))]
24188 "TARGET_64BIT"
24189 "rdpmc"
24190 [(set_attr "type" "other")
24191 (set_attr "length" "2")])
24192
24193 (define_insn "rdtsc"
24194 [(set (match_operand:DI 0 "register_operand" "=A")
24195 (unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSC))]
24196 "!TARGET_64BIT"
24197 "rdtsc"
24198 [(set_attr "type" "other")
24199 (set_attr "length" "2")])
24200
24201 (define_insn "rdtsc_rex64"
24202 [(set (match_operand:DI 0 "register_operand" "=a")
24203 (unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSC))
24204 (set (match_operand:DI 1 "register_operand" "=d")
24205 (unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSC))]
24206 "TARGET_64BIT"
24207 "rdtsc"
24208 [(set_attr "type" "other")
24209 (set_attr "length" "2")])
24210
24211 (define_insn "rdtscp"
24212 [(set (match_operand:DI 0 "register_operand" "=A")
24213 (unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSCP))
24214 (set (match_operand:SI 1 "register_operand" "=c")
24215 (unspec_volatile:SI [(const_int 0)] UNSPECV_RDTSCP))]
24216 "!TARGET_64BIT"
24217 "rdtscp"
24218 [(set_attr "type" "other")
24219 (set_attr "length" "3")])
24220
24221 (define_insn "rdtscp_rex64"
24222 [(set (match_operand:DI 0 "register_operand" "=a")
24223 (unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSCP))
24224 (set (match_operand:DI 1 "register_operand" "=d")
24225 (unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSCP))
24226 (set (match_operand:SI 2 "register_operand" "=c")
24227 (unspec_volatile:SI [(const_int 0)] UNSPECV_RDTSCP))]
24228 "TARGET_64BIT"
24229 "rdtscp"
24230 [(set_attr "type" "other")
24231 (set_attr "length" "3")])
24232
24233 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
24234 ;;
24235 ;; FXSR, XSAVE and XSAVEOPT instructions
24236 ;;
24237 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
24238
24239 (define_insn "fxsave"
24240 [(set (match_operand:BLK 0 "memory_operand" "=m")
24241 (unspec_volatile:BLK [(const_int 0)] UNSPECV_FXSAVE))]
24242 "TARGET_FXSR"
24243 "fxsave\t%0"
24244 [(set_attr "type" "other")
24245 (set_attr "memory" "store")
24246 (set (attr "length")
24247 (symbol_ref "ix86_attr_length_address_default (insn) + 3"))])
24248
24249 (define_insn "fxsave64"
24250 [(set (match_operand:BLK 0 "memory_operand" "=m")
24251 (unspec_volatile:BLK [(const_int 0)] UNSPECV_FXSAVE64))]
24252 "TARGET_64BIT && TARGET_FXSR"
24253 "fxsave64\t%0"
24254 [(set_attr "type" "other")
24255 (set_attr "memory" "store")
24256 (set (attr "length")
24257 (symbol_ref "ix86_attr_length_address_default (insn) + 4"))])
24258
24259 (define_insn "fxrstor"
24260 [(unspec_volatile [(match_operand:BLK 0 "memory_operand" "m")]
24261 UNSPECV_FXRSTOR)]
24262 "TARGET_FXSR"
24263 "fxrstor\t%0"
24264 [(set_attr "type" "other")
24265 (set_attr "memory" "load")
24266 (set (attr "length")
24267 (symbol_ref "ix86_attr_length_address_default (insn) + 3"))])
24268
24269 (define_insn "fxrstor64"
24270 [(unspec_volatile [(match_operand:BLK 0 "memory_operand" "m")]
24271 UNSPECV_FXRSTOR64)]
24272 "TARGET_64BIT && TARGET_FXSR"
24273 "fxrstor64\t%0"
24274 [(set_attr "type" "other")
24275 (set_attr "memory" "load")
24276 (set (attr "length")
24277 (symbol_ref "ix86_attr_length_address_default (insn) + 4"))])
24278
24279 (define_int_iterator ANY_XSAVE
24280 [UNSPECV_XSAVE
24281 (UNSPECV_XSAVEOPT "TARGET_XSAVEOPT")
24282 (UNSPECV_XSAVEC "TARGET_XSAVEC")
24283 (UNSPECV_XSAVES "TARGET_XSAVES")])
24284
24285 (define_int_iterator ANY_XSAVE64
24286 [UNSPECV_XSAVE64
24287 (UNSPECV_XSAVEOPT64 "TARGET_XSAVEOPT")
24288 (UNSPECV_XSAVEC64 "TARGET_XSAVEC")
24289 (UNSPECV_XSAVES64 "TARGET_XSAVES")])
24290
24291 (define_int_attr xsave
24292 [(UNSPECV_XSAVE "xsave")
24293 (UNSPECV_XSAVE64 "xsave64")
24294 (UNSPECV_XSAVEOPT "xsaveopt")
24295 (UNSPECV_XSAVEOPT64 "xsaveopt64")
24296 (UNSPECV_XSAVEC "xsavec")
24297 (UNSPECV_XSAVEC64 "xsavec64")
24298 (UNSPECV_XSAVES "xsaves")
24299 (UNSPECV_XSAVES64 "xsaves64")])
24300
24301 (define_int_iterator ANY_XRSTOR
24302 [UNSPECV_XRSTOR
24303 (UNSPECV_XRSTORS "TARGET_XSAVES")])
24304
24305 (define_int_iterator ANY_XRSTOR64
24306 [UNSPECV_XRSTOR64
24307 (UNSPECV_XRSTORS64 "TARGET_XSAVES")])
24308
24309 (define_int_attr xrstor
24310 [(UNSPECV_XRSTOR "xrstor")
24311 (UNSPECV_XRSTOR64 "xrstor")
24312 (UNSPECV_XRSTORS "xrstors")
24313 (UNSPECV_XRSTORS64 "xrstors")])
24314
24315 (define_insn "<xsave>"
24316 [(set (match_operand:BLK 0 "memory_operand" "=m")
24317 (unspec_volatile:BLK
24318 [(match_operand:DI 1 "register_operand" "A")]
24319 ANY_XSAVE))]
24320 "!TARGET_64BIT && TARGET_XSAVE"
24321 "<xsave>\t%0"
24322 [(set_attr "type" "other")
24323 (set_attr "memory" "store")
24324 (set (attr "length")
24325 (symbol_ref "ix86_attr_length_address_default (insn) + 3"))])
24326
24327 (define_insn "<xsave>_rex64"
24328 [(set (match_operand:BLK 0 "memory_operand" "=m")
24329 (unspec_volatile:BLK
24330 [(match_operand:SI 1 "register_operand" "a")
24331 (match_operand:SI 2 "register_operand" "d")]
24332 ANY_XSAVE))]
24333 "TARGET_64BIT && TARGET_XSAVE"
24334 "<xsave>\t%0"
24335 [(set_attr "type" "other")
24336 (set_attr "memory" "store")
24337 (set (attr "length")
24338 (symbol_ref "ix86_attr_length_address_default (insn) + 3"))])
24339
24340 (define_insn "<xsave>"
24341 [(set (match_operand:BLK 0 "memory_operand" "=m")
24342 (unspec_volatile:BLK
24343 [(match_operand:SI 1 "register_operand" "a")
24344 (match_operand:SI 2 "register_operand" "d")]
24345 ANY_XSAVE64))]
24346 "TARGET_64BIT && TARGET_XSAVE"
24347 "<xsave>\t%0"
24348 [(set_attr "type" "other")
24349 (set_attr "memory" "store")
24350 (set (attr "length")
24351 (symbol_ref "ix86_attr_length_address_default (insn) + 4"))])
24352
24353 (define_insn "<xrstor>"
24354 [(unspec_volatile:BLK
24355 [(match_operand:BLK 0 "memory_operand" "m")
24356 (match_operand:DI 1 "register_operand" "A")]
24357 ANY_XRSTOR)]
24358 "!TARGET_64BIT && TARGET_XSAVE"
24359 "<xrstor>\t%0"
24360 [(set_attr "type" "other")
24361 (set_attr "memory" "load")
24362 (set (attr "length")
24363 (symbol_ref "ix86_attr_length_address_default (insn) + 3"))])
24364
24365 (define_insn "<xrstor>_rex64"
24366 [(unspec_volatile:BLK
24367 [(match_operand:BLK 0 "memory_operand" "m")
24368 (match_operand:SI 1 "register_operand" "a")
24369 (match_operand:SI 2 "register_operand" "d")]
24370 ANY_XRSTOR)]
24371 "TARGET_64BIT && TARGET_XSAVE"
24372 "<xrstor>\t%0"
24373 [(set_attr "type" "other")
24374 (set_attr "memory" "load")
24375 (set (attr "length")
24376 (symbol_ref "ix86_attr_length_address_default (insn) + 3"))])
24377
24378 (define_insn "<xrstor>64"
24379 [(unspec_volatile:BLK
24380 [(match_operand:BLK 0 "memory_operand" "m")
24381 (match_operand:SI 1 "register_operand" "a")
24382 (match_operand:SI 2 "register_operand" "d")]
24383 ANY_XRSTOR64)]
24384 "TARGET_64BIT && TARGET_XSAVE"
24385 "<xrstor>64\t%0"
24386 [(set_attr "type" "other")
24387 (set_attr "memory" "load")
24388 (set (attr "length")
24389 (symbol_ref "ix86_attr_length_address_default (insn) + 4"))])
24390
24391 (define_insn "xsetbv"
24392 [(unspec_volatile:SI
24393 [(match_operand:SI 0 "register_operand" "c")
24394 (match_operand:DI 1 "register_operand" "A")]
24395 UNSPECV_XSETBV)]
24396 "!TARGET_64BIT && TARGET_XSAVE"
24397 "xsetbv"
24398 [(set_attr "type" "other")])
24399
24400 (define_insn "xsetbv_rex64"
24401 [(unspec_volatile:SI
24402 [(match_operand:SI 0 "register_operand" "c")
24403 (match_operand:SI 1 "register_operand" "a")
24404 (match_operand:SI 2 "register_operand" "d")]
24405 UNSPECV_XSETBV)]
24406 "TARGET_64BIT && TARGET_XSAVE"
24407 "xsetbv"
24408 [(set_attr "type" "other")])
24409
24410 (define_insn "xgetbv"
24411 [(set (match_operand:DI 0 "register_operand" "=A")
24412 (unspec_volatile:DI [(match_operand:SI 1 "register_operand" "c")]
24413 UNSPECV_XGETBV))]
24414 "!TARGET_64BIT && TARGET_XSAVE"
24415 "xgetbv"
24416 [(set_attr "type" "other")])
24417
24418 (define_insn "xgetbv_rex64"
24419 [(set (match_operand:DI 0 "register_operand" "=a")
24420 (unspec_volatile:DI [(match_operand:SI 2 "register_operand" "c")]
24421 UNSPECV_XGETBV))
24422 (set (match_operand:DI 1 "register_operand" "=d")
24423 (unspec_volatile:DI [(match_dup 2)] UNSPECV_XGETBV))]
24424 "TARGET_64BIT && TARGET_XSAVE"
24425 "xgetbv"
24426 [(set_attr "type" "other")])
24427
24428 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
24429 ;;
24430 ;; Floating-point instructions for atomic compound assignments
24431 ;;
24432 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
24433
24434 ; Clobber all floating-point registers on environment save and restore
24435 ; to ensure that the TOS value saved at fnstenv is valid after fldenv.
24436 (define_insn "fnstenv"
24437 [(set (match_operand:BLK 0 "memory_operand" "=m")
24438 (unspec_volatile:BLK [(const_int 0)] UNSPECV_FNSTENV))
24439 (clobber (reg:XF ST0_REG))
24440 (clobber (reg:XF ST1_REG))
24441 (clobber (reg:XF ST2_REG))
24442 (clobber (reg:XF ST3_REG))
24443 (clobber (reg:XF ST4_REG))
24444 (clobber (reg:XF ST5_REG))
24445 (clobber (reg:XF ST6_REG))
24446 (clobber (reg:XF ST7_REG))]
24447 "TARGET_80387"
24448 "fnstenv\t%0"
24449 [(set_attr "type" "other")
24450 (set_attr "memory" "store")
24451 (set (attr "length")
24452 (symbol_ref "ix86_attr_length_address_default (insn) + 2"))])
24453
24454 (define_insn "fldenv"
24455 [(unspec_volatile [(match_operand:BLK 0 "memory_operand" "m")]
24456 UNSPECV_FLDENV)
24457 (clobber (reg:XF ST0_REG))
24458 (clobber (reg:XF ST1_REG))
24459 (clobber (reg:XF ST2_REG))
24460 (clobber (reg:XF ST3_REG))
24461 (clobber (reg:XF ST4_REG))
24462 (clobber (reg:XF ST5_REG))
24463 (clobber (reg:XF ST6_REG))
24464 (clobber (reg:XF ST7_REG))]
24465 "TARGET_80387"
24466 "fldenv\t%0"
24467 [(set_attr "type" "other")
24468 (set_attr "memory" "load")
24469 (set (attr "length")
24470 (symbol_ref "ix86_attr_length_address_default (insn) + 2"))])
24471
24472 (define_insn "fnstsw"
24473 [(set (match_operand:HI 0 "nonimmediate_operand" "=a,m")
24474 (unspec_volatile:HI [(const_int 0)] UNSPECV_FNSTSW))]
24475 "TARGET_80387"
24476 "fnstsw\t%0"
24477 [(set_attr "type" "other,other")
24478 (set_attr "memory" "none,store")
24479 (set (attr "length")
24480 (symbol_ref "ix86_attr_length_address_default (insn) + 2"))])
24481
24482 (define_insn "fnclex"
24483 [(unspec_volatile [(const_int 0)] UNSPECV_FNCLEX)]
24484 "TARGET_80387"
24485 "fnclex"
24486 [(set_attr "type" "other")
24487 (set_attr "memory" "none")
24488 (set_attr "length" "2")])
24489
24490 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
24491 ;;
24492 ;; LWP instructions
24493 ;;
24494 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
24495
24496 (define_insn "@lwp_llwpcb<mode>"
24497 [(unspec_volatile [(match_operand:P 0 "register_operand" "r")]
24498 UNSPECV_LLWP_INTRINSIC)]
24499 "TARGET_LWP"
24500 "llwpcb\t%0"
24501 [(set_attr "type" "lwp")
24502 (set_attr "mode" "<MODE>")
24503 (set_attr "length" "5")])
24504
24505 (define_insn "@lwp_slwpcb<mode>"
24506 [(set (match_operand:P 0 "register_operand" "=r")
24507 (unspec_volatile:P [(const_int 0)] UNSPECV_SLWP_INTRINSIC))]
24508 "TARGET_LWP"
24509 "slwpcb\t%0"
24510 [(set_attr "type" "lwp")
24511 (set_attr "mode" "<MODE>")
24512 (set_attr "length" "5")])
24513
24514 (define_insn "@lwp_lwpval<mode>"
24515 [(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r")
24516 (match_operand:SI 1 "nonimmediate_operand" "rm")
24517 (match_operand:SI 2 "const_int_operand")]
24518 UNSPECV_LWPVAL_INTRINSIC)]
24519 "TARGET_LWP"
24520 "lwpval\t{%2, %1, %0|%0, %1, %2}"
24521 [(set_attr "type" "lwp")
24522 (set_attr "mode" "<MODE>")
24523 (set (attr "length")
24524 (symbol_ref "ix86_attr_length_address_default (insn) + 9"))])
24525
24526 (define_insn "@lwp_lwpins<mode>"
24527 [(set (reg:CCC FLAGS_REG)
24528 (unspec_volatile:CCC [(match_operand:SWI48 0 "register_operand" "r")
24529 (match_operand:SI 1 "nonimmediate_operand" "rm")
24530 (match_operand:SI 2 "const_int_operand")]
24531 UNSPECV_LWPINS_INTRINSIC))]
24532 "TARGET_LWP"
24533 "lwpins\t{%2, %1, %0|%0, %1, %2}"
24534 [(set_attr "type" "lwp")
24535 (set_attr "mode" "<MODE>")
24536 (set (attr "length")
24537 (symbol_ref "ix86_attr_length_address_default (insn) + 9"))])
24538
24539 (define_int_iterator RDFSGSBASE
24540 [UNSPECV_RDFSBASE
24541 UNSPECV_RDGSBASE])
24542
24543 (define_int_iterator WRFSGSBASE
24544 [UNSPECV_WRFSBASE
24545 UNSPECV_WRGSBASE])
24546
24547 (define_int_attr fsgs
24548 [(UNSPECV_RDFSBASE "fs")
24549 (UNSPECV_RDGSBASE "gs")
24550 (UNSPECV_WRFSBASE "fs")
24551 (UNSPECV_WRGSBASE "gs")])
24552
24553 (define_insn "rd<fsgs>base<mode>"
24554 [(set (match_operand:SWI48 0 "register_operand" "=r")
24555 (unspec_volatile:SWI48 [(const_int 0)] RDFSGSBASE))]
24556 "TARGET_64BIT && TARGET_FSGSBASE"
24557 "rd<fsgs>base\t%0"
24558 [(set_attr "type" "other")
24559 (set_attr "prefix_extra" "2")])
24560
24561 (define_insn "wr<fsgs>base<mode>"
24562 [(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r")]
24563 WRFSGSBASE)]
24564 "TARGET_64BIT && TARGET_FSGSBASE"
24565 "wr<fsgs>base\t%0"
24566 [(set_attr "type" "other")
24567 (set_attr "prefix_extra" "2")])
24568
24569 (define_insn "ptwrite<mode>"
24570 [(unspec_volatile [(match_operand:SWI48 0 "nonimmediate_operand" "rm")]
24571 UNSPECV_PTWRITE)]
24572 "TARGET_PTWRITE"
24573 "ptwrite\t%0"
24574 [(set_attr "type" "other")
24575 (set_attr "prefix_extra" "2")])
24576
24577 (define_insn "@rdrand<mode>"
24578 [(set (match_operand:SWI248 0 "register_operand" "=r")
24579 (unspec_volatile:SWI248 [(const_int 0)] UNSPECV_RDRAND))
24580 (set (reg:CCC FLAGS_REG)
24581 (unspec_volatile:CCC [(const_int 0)] UNSPECV_RDRAND))]
24582 "TARGET_RDRND"
24583 "rdrand\t%0"
24584 [(set_attr "type" "other")
24585 (set_attr "prefix_extra" "1")])
24586
24587 (define_insn "@rdseed<mode>"
24588 [(set (match_operand:SWI248 0 "register_operand" "=r")
24589 (unspec_volatile:SWI248 [(const_int 0)] UNSPECV_RDSEED))
24590 (set (reg:CCC FLAGS_REG)
24591 (unspec_volatile:CCC [(const_int 0)] UNSPECV_RDSEED))]
24592 "TARGET_RDSEED"
24593 "rdseed\t%0"
24594 [(set_attr "type" "other")
24595 (set_attr "prefix_extra" "1")])
24596
24597 (define_expand "pause"
24598 [(set (match_dup 0)
24599 (unspec:BLK [(match_dup 0)] UNSPEC_PAUSE))]
24600 ""
24601 {
24602 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
24603 MEM_VOLATILE_P (operands[0]) = 1;
24604 })
24605
24606 ;; Use "rep; nop", instead of "pause", to support older assemblers.
24607 ;; They have the same encoding.
24608 (define_insn "*pause"
24609 [(set (match_operand:BLK 0)
24610 (unspec:BLK [(match_dup 0)] UNSPEC_PAUSE))]
24611 ""
24612 "rep%; nop"
24613 [(set_attr "length" "2")
24614 (set_attr "memory" "unknown")])
24615
24616 ;; CET instructions
24617 (define_insn "@rdssp<mode>"
24618 [(set (match_operand:SWI48 0 "register_operand" "=r")
24619 (unspec_volatile:SWI48 [(match_operand:SWI48 1 "register_operand" "0")]
24620 UNSPECV_NOP_RDSSP))]
24621 "TARGET_SHSTK || (flag_cf_protection & CF_RETURN)"
24622 "rdssp<mskmodesuffix>\t%0"
24623 [(set_attr "length" "6")
24624 (set_attr "type" "other")])
24625
24626 (define_insn "@incssp<mode>"
24627 [(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r")]
24628 UNSPECV_INCSSP)]
24629 "TARGET_SHSTK || (flag_cf_protection & CF_RETURN)"
24630 "incssp<mskmodesuffix>\t%0"
24631 [(set_attr "length" "4")
24632 (set_attr "type" "other")])
24633
24634 (define_insn "saveprevssp"
24635 [(unspec_volatile [(const_int 0)] UNSPECV_SAVEPREVSSP)]
24636 "TARGET_SHSTK"
24637 "saveprevssp"
24638 [(set_attr "length" "5")
24639 (set_attr "type" "other")])
24640
24641 (define_insn "rstorssp"
24642 [(unspec_volatile [(match_operand:DI 0 "memory_operand" "m")]
24643 UNSPECV_RSTORSSP)]
24644 "TARGET_SHSTK"
24645 "rstorssp\t%0"
24646 [(set_attr "length" "5")
24647 (set_attr "type" "other")])
24648
24649 (define_insn "@wrss<mode>"
24650 [(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r")
24651 (match_operand:SWI48 1 "memory_operand" "m")]
24652 UNSPECV_WRSS)]
24653 "TARGET_SHSTK"
24654 "wrss<mskmodesuffix>\t%0, %1"
24655 [(set_attr "length" "3")
24656 (set_attr "type" "other")])
24657
24658 (define_insn "@wruss<mode>"
24659 [(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r")
24660 (match_operand:SWI48 1 "memory_operand" "m")]
24661 UNSPECV_WRUSS)]
24662 "TARGET_SHSTK"
24663 "wruss<mskmodesuffix>\t%0, %1"
24664 [(set_attr "length" "4")
24665 (set_attr "type" "other")])
24666
24667 (define_insn "setssbsy"
24668 [(unspec_volatile [(const_int 0)] UNSPECV_SETSSBSY)]
24669 "TARGET_SHSTK"
24670 "setssbsy"
24671 [(set_attr "length" "4")
24672 (set_attr "type" "other")])
24673
24674 (define_insn "clrssbsy"
24675 [(unspec_volatile [(match_operand:DI 0 "memory_operand" "m")]
24676 UNSPECV_CLRSSBSY)]
24677 "TARGET_SHSTK"
24678 "clrssbsy\t%0"
24679 [(set_attr "length" "4")
24680 (set_attr "type" "other")])
24681
24682 (define_insn "nop_endbr"
24683 [(unspec_volatile [(const_int 0)] UNSPECV_NOP_ENDBR)]
24684 "(flag_cf_protection & CF_BRANCH)"
24685 {
24686 return TARGET_64BIT ? "endbr64" : "endbr32";
24687 }
24688 [(set_attr "length" "4")
24689 (set_attr "length_immediate" "0")
24690 (set_attr "modrm" "0")])
24691
24692 ;; For RTM support
24693 (define_expand "xbegin"
24694 [(set (match_operand:SI 0 "register_operand")
24695 (unspec_volatile:SI [(const_int 0)] UNSPECV_XBEGIN))]
24696 "TARGET_RTM"
24697 {
24698 rtx_code_label *label = gen_label_rtx ();
24699
24700 /* xbegin is emitted as jump_insn, so reload won't be able
24701 to reload its operand. Force the value into AX hard register. */
24702 rtx ax_reg = gen_rtx_REG (SImode, AX_REG);
24703 emit_move_insn (ax_reg, constm1_rtx);
24704
24705 emit_jump_insn (gen_xbegin_1 (ax_reg, label));
24706
24707 emit_label (label);
24708 LABEL_NUSES (label) = 1;
24709
24710 emit_move_insn (operands[0], ax_reg);
24711
24712 DONE;
24713 })
24714
24715 (define_insn "xbegin_1"
24716 [(set (pc)
24717 (if_then_else (ne (unspec [(const_int 0)] UNSPEC_XBEGIN_ABORT)
24718 (const_int 0))
24719 (label_ref (match_operand 1))
24720 (pc)))
24721 (set (match_operand:SI 0 "register_operand" "+a")
24722 (unspec_volatile:SI [(match_dup 0)] UNSPECV_XBEGIN))]
24723 "TARGET_RTM"
24724 "xbegin\t%l1"
24725 [(set_attr "type" "other")
24726 (set_attr "length" "6")])
24727
24728 (define_insn "xend"
24729 [(unspec_volatile [(const_int 0)] UNSPECV_XEND)]
24730 "TARGET_RTM"
24731 "xend"
24732 [(set_attr "type" "other")
24733 (set_attr "length" "3")])
24734
24735 (define_insn "xabort"
24736 [(unspec_volatile [(match_operand:SI 0 "const_0_to_255_operand")]
24737 UNSPECV_XABORT)]
24738 "TARGET_RTM"
24739 "xabort\t%0"
24740 [(set_attr "type" "other")
24741 (set_attr "length" "3")])
24742
24743 (define_expand "xtest"
24744 [(set (match_operand:QI 0 "register_operand")
24745 (unspec_volatile:QI [(const_int 0)] UNSPECV_XTEST))]
24746 "TARGET_RTM"
24747 {
24748 emit_insn (gen_xtest_1 ());
24749
24750 ix86_expand_setcc (operands[0], NE,
24751 gen_rtx_REG (CCZmode, FLAGS_REG), const0_rtx);
24752 DONE;
24753 })
24754
24755 (define_insn "xtest_1"
24756 [(set (reg:CCZ FLAGS_REG)
24757 (unspec_volatile:CCZ [(const_int 0)] UNSPECV_XTEST))]
24758 "TARGET_RTM"
24759 "xtest"
24760 [(set_attr "type" "other")
24761 (set_attr "length" "3")])
24762
24763 (define_insn "clwb"
24764 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
24765 UNSPECV_CLWB)]
24766 "TARGET_CLWB"
24767 "clwb\t%a0"
24768 [(set_attr "type" "sse")
24769 (set_attr "atom_sse_attr" "fence")
24770 (set_attr "memory" "unknown")])
24771
24772 (define_insn "clflushopt"
24773 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
24774 UNSPECV_CLFLUSHOPT)]
24775 "TARGET_CLFLUSHOPT"
24776 "clflushopt\t%a0"
24777 [(set_attr "type" "sse")
24778 (set_attr "atom_sse_attr" "fence")
24779 (set_attr "memory" "unknown")])
24780
24781 ;; MONITORX and MWAITX
24782 (define_insn "mwaitx"
24783 [(unspec_volatile [(match_operand:SI 0 "register_operand" "c")
24784 (match_operand:SI 1 "register_operand" "a")
24785 (match_operand:SI 2 "register_operand" "b")]
24786 UNSPECV_MWAITX)]
24787 "TARGET_MWAITX"
24788 ;; 64bit version is "mwaitx %rax,%rcx,%rbx". But only lower 32bits are used.
24789 ;; Since 32bit register operands are implicitly zero extended to 64bit,
24790 ;; we only need to set up 32bit registers.
24791 "mwaitx"
24792 [(set_attr "length" "3")])
24793
24794 (define_insn "@monitorx_<mode>"
24795 [(unspec_volatile [(match_operand:P 0 "register_operand" "a")
24796 (match_operand:SI 1 "register_operand" "c")
24797 (match_operand:SI 2 "register_operand" "d")]
24798 UNSPECV_MONITORX)]
24799 "TARGET_MWAITX"
24800 ;; 64bit version is "monitorx %rax,%rcx,%rdx". But only lower 32bits in
24801 ;; RCX and RDX are used. Since 32bit register operands are implicitly
24802 ;; zero extended to 64bit, we only need to set up 32bit registers.
24803 "%^monitorx"
24804 [(set (attr "length")
24805 (symbol_ref ("(Pmode != word_mode) + 3")))])
24806
24807 ;; CLZERO
24808 (define_insn "@clzero_<mode>"
24809 [(unspec_volatile [(match_operand: P 0 "register_operand" "a")]
24810 UNSPECV_CLZERO)]
24811 "TARGET_CLZERO"
24812 "clzero"
24813 [(set_attr "length" "3")
24814 (set_attr "memory" "unknown")])
24815
24816 ;; RDPKRU and WRPKRU
24817
24818 (define_expand "rdpkru"
24819 [(parallel
24820 [(set (match_operand:SI 0 "register_operand")
24821 (unspec_volatile:SI [(match_dup 1)] UNSPECV_PKU))
24822 (set (match_dup 2) (const_int 0))])]
24823 "TARGET_PKU"
24824 {
24825 operands[1] = force_reg (SImode, const0_rtx);
24826 operands[2] = gen_reg_rtx (SImode);
24827 })
24828
24829 (define_insn "*rdpkru"
24830 [(set (match_operand:SI 0 "register_operand" "=a")
24831 (unspec_volatile:SI [(match_operand:SI 2 "register_operand" "c")]
24832 UNSPECV_PKU))
24833 (set (match_operand:SI 1 "register_operand" "=d")
24834 (const_int 0))]
24835 "TARGET_PKU"
24836 "rdpkru"
24837 [(set_attr "type" "other")])
24838
24839 (define_expand "wrpkru"
24840 [(unspec_volatile:SI
24841 [(match_operand:SI 0 "register_operand")
24842 (match_dup 1) (match_dup 2)] UNSPECV_PKU)]
24843 "TARGET_PKU"
24844 {
24845 operands[1] = force_reg (SImode, const0_rtx);
24846 operands[2] = force_reg (SImode, const0_rtx);
24847 })
24848
24849 (define_insn "*wrpkru"
24850 [(unspec_volatile:SI
24851 [(match_operand:SI 0 "register_operand" "a")
24852 (match_operand:SI 1 "register_operand" "d")
24853 (match_operand:SI 2 "register_operand" "c")] UNSPECV_PKU)]
24854 "TARGET_PKU"
24855 "wrpkru"
24856 [(set_attr "type" "other")])
24857
24858 (define_insn "rdpid"
24859 [(set (match_operand:SI 0 "register_operand" "=r")
24860 (unspec_volatile:SI [(const_int 0)] UNSPECV_RDPID))]
24861 "!TARGET_64BIT && TARGET_RDPID"
24862 "rdpid\t%0"
24863 [(set_attr "type" "other")])
24864
24865 (define_insn "rdpid_rex64"
24866 [(set (match_operand:DI 0 "register_operand" "=r")
24867 (unspec_volatile:DI [(const_int 0)] UNSPECV_RDPID))]
24868 "TARGET_64BIT && TARGET_RDPID"
24869 "rdpid\t%0"
24870 [(set_attr "type" "other")])
24871
24872 ;; Intirinsics for > i486
24873
24874 (define_insn "wbinvd"
24875 [(unspec_volatile [(const_int 0)] UNSPECV_WBINVD)]
24876 ""
24877 "wbinvd"
24878 [(set_attr "type" "other")])
24879
24880 (define_insn "wbnoinvd"
24881 [(unspec_volatile [(const_int 0)] UNSPECV_WBNOINVD)]
24882 "TARGET_WBNOINVD"
24883 "wbnoinvd"
24884 [(set_attr "type" "other")])
24885
24886 ;; MOVDIRI and MOVDIR64B
24887
24888 (define_insn "movdiri<mode>"
24889 [(set (match_operand:SWI48 0 "memory_operand" "=m")
24890 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
24891 UNSPEC_MOVDIRI))]
24892 "TARGET_MOVDIRI"
24893 "movdiri\t{%1, %0|%0, %1}"
24894 [(set_attr "type" "other")])
24895
24896 (define_insn "@movdir64b_<mode>"
24897 [(set (mem:XI (match_operand:P 0 "register_operand" "r"))
24898 (unspec:XI [(match_operand:XI 1 "memory_operand" "m")]
24899 UNSPEC_MOVDIR64B))]
24900 "TARGET_MOVDIR64B"
24901 "movdir64b\t{%1, %0|%0, %1}"
24902 [(set_attr "type" "other")])
24903
24904 ;; TSXLDTRK
24905 (define_int_iterator TSXLDTRK [UNSPECV_XSUSLDTRK UNSPECV_XRESLDTRK])
24906 (define_int_attr tsxldtrk [(UNSPECV_XSUSLDTRK "xsusldtrk")
24907 (UNSPECV_XRESLDTRK "xresldtrk")])
24908 (define_insn "<tsxldtrk>"
24909 [(unspec_volatile [(const_int 0)] TSXLDTRK)]
24910 "TARGET_TSXLDTRK"
24911 "<tsxldtrk>"
24912 [(set_attr "type" "other")
24913 (set_attr "length" "4")])
24914
24915 ;; ENQCMD and ENQCMDS
24916
24917 (define_int_iterator ENQCMD [UNSPECV_ENQCMD UNSPECV_ENQCMDS])
24918 (define_int_attr enqcmd_sfx [(UNSPECV_ENQCMD "") (UNSPECV_ENQCMDS "s")])
24919
24920 (define_insn "@enqcmd<enqcmd_sfx>_<mode>"
24921 [(set (reg:CCZ FLAGS_REG)
24922 (unspec_volatile:CCZ [(match_operand:P 0 "register_operand" "r")
24923 (match_operand:XI 1 "memory_operand" "m")]
24924 ENQCMD))]
24925 "TARGET_ENQCMD"
24926 "enqcmd<enqcmd_sfx>\t{%1, %0|%0, %1}"
24927 [(set_attr "type" "other")])
24928
24929 ;; UINTR
24930 (define_int_iterator UINTR [UNSPECV_CLUI UNSPECV_STUI])
24931 (define_int_attr uintr [(UNSPECV_CLUI "clui") (UNSPECV_STUI "stui")])
24932
24933 (define_insn "<uintr>"
24934 [(unspec_volatile [(const_int 0)] UINTR)]
24935 "TARGET_UINTR && TARGET_64BIT"
24936 "<uintr>"
24937 [(set_attr "type" "other")
24938 (set_attr "length" "4")])
24939
24940 (define_insn "testui"
24941 [(set (reg:CCC FLAGS_REG)
24942 (unspec_volatile:CCC [(const_int 0)] UNSPECV_TESTUI))]
24943 "TARGET_UINTR && TARGET_64BIT"
24944 "testui"
24945 [(set_attr "type" "other")
24946 (set_attr "length" "4")])
24947
24948 (define_insn "senduipi"
24949 [(unspec_volatile
24950 [(match_operand:DI 0 "register_operand" "r")]
24951 UNSPECV_SENDUIPI)]
24952 "TARGET_UINTR && TARGET_64BIT"
24953 "senduipi\t%0"
24954 [(set_attr "type" "other")
24955 (set_attr "length" "4")])
24956
24957 ;; WAITPKG
24958
24959 (define_insn "umwait"
24960 [(set (reg:CCC FLAGS_REG)
24961 (unspec_volatile:CCC [(match_operand:SI 0 "register_operand" "r")
24962 (match_operand:DI 1 "register_operand" "A")]
24963 UNSPECV_UMWAIT))]
24964 "!TARGET_64BIT && TARGET_WAITPKG"
24965 "umwait\t%0"
24966 [(set_attr "length" "3")])
24967
24968 (define_insn "umwait_rex64"
24969 [(set (reg:CCC FLAGS_REG)
24970 (unspec_volatile:CCC [(match_operand:SI 0 "register_operand" "r")
24971 (match_operand:SI 1 "register_operand" "a")
24972 (match_operand:SI 2 "register_operand" "d")]
24973 UNSPECV_UMWAIT))]
24974 "TARGET_64BIT && TARGET_WAITPKG"
24975 "umwait\t%0"
24976 [(set_attr "length" "3")])
24977
24978 (define_insn "@umonitor_<mode>"
24979 [(unspec_volatile [(match_operand:P 0 "register_operand" "r")]
24980 UNSPECV_UMONITOR)]
24981 "TARGET_WAITPKG"
24982 "umonitor\t%0"
24983 [(set (attr "length")
24984 (symbol_ref ("(Pmode != word_mode) + 3")))])
24985
24986 (define_insn "tpause"
24987 [(set (reg:CCC FLAGS_REG)
24988 (unspec_volatile:CCC [(match_operand:SI 0 "register_operand" "r")
24989 (match_operand:DI 1 "register_operand" "A")]
24990 UNSPECV_TPAUSE))]
24991 "!TARGET_64BIT && TARGET_WAITPKG"
24992 "tpause\t%0"
24993 [(set_attr "length" "3")])
24994
24995 (define_insn "tpause_rex64"
24996 [(set (reg:CCC FLAGS_REG)
24997 (unspec_volatile:CCC [(match_operand:SI 0 "register_operand" "r")
24998 (match_operand:SI 1 "register_operand" "a")
24999 (match_operand:SI 2 "register_operand" "d")]
25000 UNSPECV_TPAUSE))]
25001 "TARGET_64BIT && TARGET_WAITPKG"
25002 "tpause\t%0"
25003 [(set_attr "length" "3")])
25004
25005 (define_insn "cldemote"
25006 [(unspec_volatile[(match_operand 0 "address_operand" "p")]
25007 UNSPECV_CLDEMOTE)]
25008 "TARGET_CLDEMOTE"
25009 "cldemote\t%a0"
25010 [(set_attr "type" "other")
25011 (set_attr "memory" "unknown")])
25012
25013 (define_insn "speculation_barrier"
25014 [(unspec_volatile [(const_int 0)] UNSPECV_SPECULATION_BARRIER)]
25015 ""
25016 "lfence"
25017 [(set_attr "type" "other")
25018 (set_attr "length" "3")])
25019
25020 (define_insn "serialize"
25021 [(unspec_volatile [(const_int 0)] UNSPECV_SERIALIZE)]
25022 "TARGET_SERIALIZE"
25023 "serialize"
25024 [(set_attr "type" "other")
25025 (set_attr "length" "3")])
25026
25027 (define_insn "patchable_area"
25028 [(unspec_volatile [(match_operand 0 "const_int_operand")
25029 (match_operand 1 "const_int_operand")]
25030 UNSPECV_PATCHABLE_AREA)]
25031 ""
25032 {
25033 ix86_output_patchable_area (INTVAL (operands[0]),
25034 INTVAL (operands[1]) != 0);
25035 return "";
25036 }
25037 [(set (attr "length") (symbol_ref "INTVAL (operands[0])"))
25038 (set_attr "length_immediate" "0")
25039 (set_attr "modrm" "0")])
25040
25041 (define_insn "hreset"
25042 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")]
25043 UNSPECV_HRESET)]
25044 "TARGET_HRESET"
25045 "hreset\t{$0|0}"
25046 [(set_attr "type" "other")
25047 (set_attr "length" "4")])
25048
25049 ;; Spaceship optimization
25050 (define_expand "spaceship<mode>3"
25051 [(match_operand:SI 0 "register_operand")
25052 (match_operand:MODEF 1 "cmp_fp_expander_operand")
25053 (match_operand:MODEF 2 "cmp_fp_expander_operand")]
25054 "(TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
25055 && (TARGET_CMOVE || (TARGET_SAHF && TARGET_USE_SAHF))"
25056 {
25057 ix86_expand_fp_spaceship (operands[0], operands[1], operands[2]);
25058 DONE;
25059 })
25060
25061 (define_expand "spaceshipxf3"
25062 [(match_operand:SI 0 "register_operand")
25063 (match_operand:XF 1 "nonmemory_operand")
25064 (match_operand:XF 2 "nonmemory_operand")]
25065 "TARGET_80387 && (TARGET_CMOVE || (TARGET_SAHF && TARGET_USE_SAHF))"
25066 {
25067 ix86_expand_fp_spaceship (operands[0], operands[1], operands[2]);
25068 DONE;
25069 })
25070
25071 ;; Defined because the generic expand_builtin_issignaling for XFmode
25072 ;; only tests for sNaNs, but i387 treats also pseudo numbers as always
25073 ;; signaling.
25074 (define_expand "issignalingxf2"
25075 [(match_operand:SI 0 "register_operand")
25076 (match_operand:XF 1 "general_operand")]
25077 ""
25078 {
25079 rtx temp = operands[1];
25080 if (!MEM_P (temp))
25081 {
25082 rtx mem = assign_stack_temp (XFmode, GET_MODE_SIZE (XFmode));
25083 emit_move_insn (mem, temp);
25084 temp = mem;
25085 }
25086 rtx ex = adjust_address (temp, HImode, 8);
25087 rtx hi = adjust_address (temp, SImode, 4);
25088 rtx lo = adjust_address (temp, SImode, 0);
25089 rtx val = GEN_INT (HOST_WIDE_INT_M1U << 30);
25090 rtx mask = GEN_INT (0x7fff);
25091 rtx bit = GEN_INT (HOST_WIDE_INT_1U << 30);
25092 /* Expand to:
25093 ((ex & mask) && (int) hi >= 0)
25094 || ((ex & mask) == mask && ((hi ^ bit) | ((lo | -lo) >> 31)) > val). */
25095 rtx nlo = expand_unop (SImode, neg_optab, lo, NULL_RTX, 0);
25096 lo = expand_binop (SImode, ior_optab, lo, nlo,
25097 NULL_RTX, 1, OPTAB_LIB_WIDEN);
25098 lo = expand_shift (RSHIFT_EXPR, SImode, lo, 31, NULL_RTX, 1);
25099 temp = expand_binop (SImode, xor_optab, hi, bit,
25100 NULL_RTX, 1, OPTAB_LIB_WIDEN);
25101 temp = expand_binop (SImode, ior_optab, temp, lo,
25102 NULL_RTX, 1, OPTAB_LIB_WIDEN);
25103 temp = emit_store_flag_force (gen_reg_rtx (SImode), GTU, temp, val,
25104 SImode, 1, 1);
25105 ex = expand_binop (HImode, and_optab, ex, mask,
25106 NULL_RTX, 1, OPTAB_LIB_WIDEN);
25107 rtx temp2 = emit_store_flag_force (gen_reg_rtx (SImode), NE,
25108 ex, const0_rtx, SImode, 1, 1);
25109 ex = emit_store_flag_force (gen_reg_rtx (SImode), EQ,
25110 ex, mask, HImode, 1, 1);
25111 temp = expand_binop (SImode, and_optab, temp, ex,
25112 NULL_RTX, 1, OPTAB_LIB_WIDEN);
25113 rtx temp3 = emit_store_flag_force (gen_reg_rtx (SImode), GE,
25114 hi, const0_rtx, SImode, 0, 1);
25115 temp2 = expand_binop (SImode, and_optab, temp2, temp3,
25116 NULL_RTX, 1, OPTAB_LIB_WIDEN);
25117 temp = expand_binop (SImode, ior_optab, temp, temp2,
25118 NULL_RTX, 1, OPTAB_LIB_WIDEN);
25119 emit_move_insn (operands[0], temp);
25120 DONE;
25121 })
25122
25123 (include "mmx.md")
25124 (include "sse.md")
25125 (include "sync.md")