]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/i386/i386.md
Support Intel prefetchit0/t1
[thirdparty/gcc.git] / gcc / config / i386 / i386.md
1 ;; GCC machine description for IA-32 and x86-64.
2 ;; Copyright (C) 1988-2022 Free Software Foundation, Inc.
3 ;; Mostly by William Schelter.
4 ;; x86_64 support added by Jan Hubicka
5 ;;
6 ;; This file is part of GCC.
7 ;;
8 ;; GCC is free software; you can redistribute it and/or modify
9 ;; it under the terms of the GNU General Public License as published by
10 ;; the Free Software Foundation; either version 3, or (at your option)
11 ;; any later version.
12 ;;
13 ;; GCC is distributed in the hope that it will be useful,
14 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
15 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 ;; GNU General Public License for more details.
17 ;;
18 ;; You should have received a copy of the GNU General Public License
19 ;; along with GCC; see the file COPYING3. If not see
20 ;; <http://www.gnu.org/licenses/>. */
21 ;;
22 ;; The original PO technology requires these to be ordered by speed,
23 ;; so that assigner will pick the fastest.
24 ;;
25 ;; See file "rtl.def" for documentation on define_insn, match_*, et. al.
26 ;;
27 ;; The special asm out single letter directives following a '%' are:
28 ;; L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
29 ;; C -- print opcode suffix for set/cmov insn.
30 ;; c -- like C, but print reversed condition
31 ;; F,f -- likewise, but for floating-point.
32 ;; O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
33 ;; otherwise nothing
34 ;; R -- print the prefix for register names.
35 ;; z -- print the opcode suffix for the size of the current operand.
36 ;; Z -- likewise, with special suffixes for x87 instructions.
37 ;; * -- print a star (in certain assembler syntax)
38 ;; A -- print an absolute memory reference.
39 ;; E -- print address with DImode register names if TARGET_64BIT.
40 ;; w -- print the operand as if it's a "word" (HImode) even if it isn't.
41 ;; s -- print a shift double count, followed by the assemblers argument
42 ;; delimiter.
43 ;; b -- print the QImode name of the register for the indicated operand.
44 ;; %b0 would print %al if operands[0] is reg 0.
45 ;; w -- likewise, print the HImode name of the register.
46 ;; k -- likewise, print the SImode name of the register.
47 ;; q -- likewise, print the DImode name of the register.
48 ;; x -- likewise, print the V4SFmode name of the register.
49 ;; t -- likewise, print the V8SFmode name of the register.
50 ;; h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
51 ;; y -- print "st(0)" instead of "st" as a register.
52 ;; d -- print duplicated register operand for AVX instruction.
53 ;; D -- print condition for SSE cmp instruction.
54 ;; P -- if PIC, print an @PLT suffix.
55 ;; p -- print raw symbol name.
56 ;; X -- don't print any sort of PIC '@' suffix for a symbol.
57 ;; & -- print some in-use local-dynamic symbol name.
58 ;; H -- print a memory address offset by 8; used for sse high-parts
59 ;; K -- print HLE lock prefix
60 ;; Y -- print condition for XOP pcom* instruction.
61 ;; + -- print a branch hint as 'cs' or 'ds' prefix
62 ;; ; -- print a semicolon (after prefixes due to bug in older gas).
63 ;; ~ -- print "i" if TARGET_AVX2, "f" otherwise.
64 ;; ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
65 ;; ! -- print NOTRACK prefix for jxx/call/ret instructions if required.
66
67 (define_c_enum "unspec" [
68 ;; Relocation specifiers
69 UNSPEC_GOT
70 UNSPEC_GOTOFF
71 UNSPEC_GOTPCREL
72 UNSPEC_GOTTPOFF
73 UNSPEC_TPOFF
74 UNSPEC_NTPOFF
75 UNSPEC_DTPOFF
76 UNSPEC_GOTNTPOFF
77 UNSPEC_INDNTPOFF
78 UNSPEC_PLTOFF
79 UNSPEC_MACHOPIC_OFFSET
80 UNSPEC_PCREL
81 UNSPEC_SIZEOF
82
83 ;; Prologue support
84 UNSPEC_STACK_ALLOC
85 UNSPEC_SET_GOT
86 UNSPEC_SET_RIP
87 UNSPEC_SET_GOT_OFFSET
88 UNSPEC_MEMORY_BLOCKAGE
89 UNSPEC_PROBE_STACK
90
91 ;; TLS support
92 UNSPEC_TP
93 UNSPEC_TLS_GD
94 UNSPEC_TLS_LD_BASE
95 UNSPEC_TLSDESC
96 UNSPEC_TLS_IE_SUN
97
98 ;; Other random patterns
99 UNSPEC_SCAS
100 UNSPEC_FNSTSW
101 UNSPEC_SAHF
102 UNSPEC_NOTRAP
103 UNSPEC_PARITY
104 UNSPEC_FSTCW
105 UNSPEC_REP
106 UNSPEC_LD_MPIC ; load_macho_picbase
107 UNSPEC_TRUNC_NOOP
108 UNSPEC_DIV_ALREADY_SPLIT
109 UNSPEC_PAUSE
110 UNSPEC_LEA_ADDR
111 UNSPEC_XBEGIN_ABORT
112 UNSPEC_STOS
113 UNSPEC_PEEPSIB
114 UNSPEC_INSN_FALSE_DEP
115 UNSPEC_SBB
116 UNSPEC_CC_NE
117
118 ;; For SSE/MMX support:
119 UNSPEC_FIX_NOTRUNC
120 UNSPEC_MASKMOV
121 UNSPEC_MOVCC_MASK
122 UNSPEC_MOVMSK
123 UNSPEC_BLENDV
124 UNSPEC_PSHUFB
125 UNSPEC_XOP_PERMUTE
126 UNSPEC_RCP
127 UNSPEC_RSQRT
128 UNSPEC_PSADBW
129
130 ;; For AVX/AVX512F support
131 UNSPEC_SCALEF
132 UNSPEC_PCMP
133
134 ;; Generic math support
135 UNSPEC_IEEE_MIN ; not commutative
136 UNSPEC_IEEE_MAX ; not commutative
137
138 ;; x87 Floating point
139 UNSPEC_SIN
140 UNSPEC_COS
141 UNSPEC_FPATAN
142 UNSPEC_FYL2X
143 UNSPEC_FYL2XP1
144 UNSPEC_FRNDINT
145 UNSPEC_FIST
146 UNSPEC_F2XM1
147 UNSPEC_TAN
148 UNSPEC_FXAM
149
150 ;; x87 Rounding
151 UNSPEC_FRNDINT_ROUNDEVEN
152 UNSPEC_FRNDINT_FLOOR
153 UNSPEC_FRNDINT_CEIL
154 UNSPEC_FRNDINT_TRUNC
155 UNSPEC_FIST_FLOOR
156 UNSPEC_FIST_CEIL
157
158 ;; x87 Double output FP
159 UNSPEC_SINCOS_COS
160 UNSPEC_SINCOS_SIN
161 UNSPEC_XTRACT_FRACT
162 UNSPEC_XTRACT_EXP
163 UNSPEC_FSCALE_FRACT
164 UNSPEC_FSCALE_EXP
165 UNSPEC_FPREM_F
166 UNSPEC_FPREM_U
167 UNSPEC_FPREM1_F
168 UNSPEC_FPREM1_U
169
170 UNSPEC_C2_FLAG
171 UNSPEC_FXAM_MEM
172
173 ;; SSP patterns
174 UNSPEC_SP_SET
175 UNSPEC_SP_TEST
176
177 ;; For ROUND support
178 UNSPEC_ROUND
179
180 ;; For CRC32 support
181 UNSPEC_CRC32
182
183 ;; For LZCNT suppoprt
184 UNSPEC_LZCNT
185
186 ;; For BMI support
187 UNSPEC_TZCNT
188 UNSPEC_BEXTR
189
190 ;; For BMI2 support
191 UNSPEC_PDEP
192 UNSPEC_PEXT
193
194 ;; IRET support
195 UNSPEC_INTERRUPT_RETURN
196
197 ;; For MOVDIRI and MOVDIR64B support
198 UNSPEC_MOVDIRI
199 UNSPEC_MOVDIR64B
200
201 ;; For insn_callee_abi:
202 UNSPEC_CALLEE_ABI
203
204 ])
205
206 (define_c_enum "unspecv" [
207 UNSPECV_UD2
208 UNSPECV_BLOCKAGE
209 UNSPECV_STACK_PROBE
210 UNSPECV_PROBE_STACK_RANGE
211 UNSPECV_ALIGN
212 UNSPECV_PROLOGUE_USE
213 UNSPECV_SPLIT_STACK_RETURN
214 UNSPECV_CLD
215 UNSPECV_NOPS
216 UNSPECV_RDTSC
217 UNSPECV_RDTSCP
218 UNSPECV_RDPMC
219 UNSPECV_LLWP_INTRINSIC
220 UNSPECV_SLWP_INTRINSIC
221 UNSPECV_LWPVAL_INTRINSIC
222 UNSPECV_LWPINS_INTRINSIC
223 UNSPECV_RDFSBASE
224 UNSPECV_RDGSBASE
225 UNSPECV_WRFSBASE
226 UNSPECV_WRGSBASE
227 UNSPECV_FXSAVE
228 UNSPECV_FXRSTOR
229 UNSPECV_FXSAVE64
230 UNSPECV_FXRSTOR64
231 UNSPECV_XSAVE
232 UNSPECV_XRSTOR
233 UNSPECV_XSAVE64
234 UNSPECV_XRSTOR64
235 UNSPECV_XSAVEOPT
236 UNSPECV_XSAVEOPT64
237 UNSPECV_XSAVES
238 UNSPECV_XRSTORS
239 UNSPECV_XSAVES64
240 UNSPECV_XRSTORS64
241 UNSPECV_XSAVEC
242 UNSPECV_XSAVEC64
243 UNSPECV_XGETBV
244 UNSPECV_XSETBV
245 UNSPECV_WBINVD
246 UNSPECV_WBNOINVD
247
248 ;; For atomic compound assignments.
249 UNSPECV_FNSTENV
250 UNSPECV_FLDENV
251 UNSPECV_FNSTSW
252 UNSPECV_FNCLEX
253
254 ;; For RDRAND support
255 UNSPECV_RDRAND
256
257 ;; For RDSEED support
258 UNSPECV_RDSEED
259
260 ;; For RTM support
261 UNSPECV_XBEGIN
262 UNSPECV_XEND
263 UNSPECV_XABORT
264 UNSPECV_XTEST
265
266 UNSPECV_NLGR
267
268 ;; For CLWB support
269 UNSPECV_CLWB
270
271 ;; For CLFLUSHOPT support
272 UNSPECV_CLFLUSHOPT
273
274 ;; For MONITORX and MWAITX support
275 UNSPECV_MONITORX
276 UNSPECV_MWAITX
277
278 ;; For CLZERO support
279 UNSPECV_CLZERO
280
281 ;; For RDPKRU and WRPKRU support
282 UNSPECV_PKU
283
284 ;; For RDPID support
285 UNSPECV_RDPID
286
287 ;; For CET support
288 UNSPECV_NOP_ENDBR
289 UNSPECV_NOP_RDSSP
290 UNSPECV_INCSSP
291 UNSPECV_SAVEPREVSSP
292 UNSPECV_RSTORSSP
293 UNSPECV_WRSS
294 UNSPECV_WRUSS
295 UNSPECV_SETSSBSY
296 UNSPECV_CLRSSBSY
297
298 ;; For TSXLDTRK support
299 UNSPECV_XSUSLDTRK
300 UNSPECV_XRESLDTRK
301
302 ;; For WAITPKG support
303 UNSPECV_UMWAIT
304 UNSPECV_UMONITOR
305 UNSPECV_TPAUSE
306
307 ;; For UINTR support
308 UNSPECV_CLUI
309 UNSPECV_STUI
310 UNSPECV_TESTUI
311 UNSPECV_SENDUIPI
312
313 ;; For CLDEMOTE support
314 UNSPECV_CLDEMOTE
315
316 ;; For Speculation Barrier support
317 UNSPECV_SPECULATION_BARRIER
318
319 UNSPECV_PTWRITE
320
321 ;; For ENQCMD and ENQCMDS support
322 UNSPECV_ENQCMD
323 UNSPECV_ENQCMDS
324
325 ;; For SERIALIZE support
326 UNSPECV_SERIALIZE
327
328 ;; For patchable area support
329 UNSPECV_PATCHABLE_AREA
330
331 ;; For HRESET support
332 UNSPECV_HRESET
333
334 ;; For PREFETCHI support
335 UNSPECV_PREFETCHI
336 ])
337
338 ;; Constants to represent rounding modes in the ROUND instruction
339 (define_constants
340 [(ROUND_ROUNDEVEN 0x0)
341 (ROUND_FLOOR 0x1)
342 (ROUND_CEIL 0x2)
343 (ROUND_TRUNC 0x3)
344 (ROUND_MXCSR 0x4)
345 (ROUND_NO_EXC 0x8)
346 ])
347
348 ;; Constants to represent AVX512F embeded rounding
349 (define_constants
350 [(ROUND_NEAREST_INT 0)
351 (ROUND_NEG_INF 1)
352 (ROUND_POS_INF 2)
353 (ROUND_ZERO 3)
354 (NO_ROUND 4)
355 (ROUND_SAE 8)
356 ])
357
358 ;; Constants to represent pcomtrue/pcomfalse variants
359 (define_constants
360 [(PCOM_FALSE 0)
361 (PCOM_TRUE 1)
362 (COM_FALSE_S 2)
363 (COM_FALSE_P 3)
364 (COM_TRUE_S 4)
365 (COM_TRUE_P 5)
366 ])
367
368 ;; Constants used in the XOP pperm instruction
369 (define_constants
370 [(PPERM_SRC 0x00) /* copy source */
371 (PPERM_INVERT 0x20) /* invert source */
372 (PPERM_REVERSE 0x40) /* bit reverse source */
373 (PPERM_REV_INV 0x60) /* bit reverse & invert src */
374 (PPERM_ZERO 0x80) /* all 0's */
375 (PPERM_ONES 0xa0) /* all 1's */
376 (PPERM_SIGN 0xc0) /* propagate sign bit */
377 (PPERM_INV_SIGN 0xe0) /* invert & propagate sign */
378 (PPERM_SRC1 0x00) /* use first source byte */
379 (PPERM_SRC2 0x10) /* use second source byte */
380 ])
381
382 ;; Registers by name.
383 (define_constants
384 [(AX_REG 0)
385 (DX_REG 1)
386 (CX_REG 2)
387 (BX_REG 3)
388 (SI_REG 4)
389 (DI_REG 5)
390 (BP_REG 6)
391 (SP_REG 7)
392 (ST0_REG 8)
393 (ST1_REG 9)
394 (ST2_REG 10)
395 (ST3_REG 11)
396 (ST4_REG 12)
397 (ST5_REG 13)
398 (ST6_REG 14)
399 (ST7_REG 15)
400 (ARGP_REG 16)
401 (FLAGS_REG 17)
402 (FPSR_REG 18)
403 (FRAME_REG 19)
404 (XMM0_REG 20)
405 (XMM1_REG 21)
406 (XMM2_REG 22)
407 (XMM3_REG 23)
408 (XMM4_REG 24)
409 (XMM5_REG 25)
410 (XMM6_REG 26)
411 (XMM7_REG 27)
412 (MM0_REG 28)
413 (MM1_REG 29)
414 (MM2_REG 30)
415 (MM3_REG 31)
416 (MM4_REG 32)
417 (MM5_REG 33)
418 (MM6_REG 34)
419 (MM7_REG 35)
420 (R8_REG 36)
421 (R9_REG 37)
422 (R10_REG 38)
423 (R11_REG 39)
424 (R12_REG 40)
425 (R13_REG 41)
426 (R14_REG 42)
427 (R15_REG 43)
428 (XMM8_REG 44)
429 (XMM9_REG 45)
430 (XMM10_REG 46)
431 (XMM11_REG 47)
432 (XMM12_REG 48)
433 (XMM13_REG 49)
434 (XMM14_REG 50)
435 (XMM15_REG 51)
436 (XMM16_REG 52)
437 (XMM17_REG 53)
438 (XMM18_REG 54)
439 (XMM19_REG 55)
440 (XMM20_REG 56)
441 (XMM21_REG 57)
442 (XMM22_REG 58)
443 (XMM23_REG 59)
444 (XMM24_REG 60)
445 (XMM25_REG 61)
446 (XMM26_REG 62)
447 (XMM27_REG 63)
448 (XMM28_REG 64)
449 (XMM29_REG 65)
450 (XMM30_REG 66)
451 (XMM31_REG 67)
452 (MASK0_REG 68)
453 (MASK1_REG 69)
454 (MASK2_REG 70)
455 (MASK3_REG 71)
456 (MASK4_REG 72)
457 (MASK5_REG 73)
458 (MASK6_REG 74)
459 (MASK7_REG 75)
460 (FIRST_PSEUDO_REG 76)
461 ])
462
463 ;; Insn callee abi index.
464 (define_constants
465 [(ABI_DEFAULT 0)
466 (ABI_VZEROUPPER 1)
467 (ABI_UNKNOWN 2)])
468
469 ;; Insns whose names begin with "x86_" are emitted by gen_FOO calls
470 ;; from i386.cc.
471
472 ;; In C guard expressions, put expressions which may be compile-time
473 ;; constants first. This allows for better optimization. For
474 ;; example, write "TARGET_64BIT && reload_completed", not
475 ;; "reload_completed && TARGET_64BIT".
476
477 \f
478 ;; Processor type.
479 (define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,nehalem,
480 atom,slm,glm,haswell,generic,lujiazui,amdfam10,bdver1,
481 bdver2,bdver3,bdver4,btver2,znver1,znver2,znver3,znver4"
482 (const (symbol_ref "ix86_schedule")))
483
484 ;; A basic instruction type. Refinements due to arguments to be
485 ;; provided in other attributes.
486 (define_attr "type"
487 "other,multi,
488 alu,alu1,negnot,imov,imovx,lea,
489 incdec,ishift,ishiftx,ishift1,rotate,rotatex,rotate1,
490 imul,imulx,idiv,icmp,test,ibr,setcc,icmov,
491 push,pop,call,callv,leave,
492 str,bitmanip,
493 fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,
494 fxch,fistp,fisttp,frndint,
495 sse,ssemov,sseadd,sseadd1,sseiadd,sseiadd1,
496 ssemul,sseimul,ssediv,sselog,sselog1,
497 sseishft,sseishft1,ssecmp,ssecomi,
498 ssecvt,ssecvt1,sseicvt,sseins,
499 sseshuf,sseshuf1,ssemuladd,sse4arg,
500 lwp,mskmov,msklog,
501 mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft"
502 (const_string "other"))
503
504 ;; Main data type used by the insn
505 (define_attr "mode"
506 "unknown,none,QI,HI,SI,DI,TI,OI,XI,HF,BF,SF,DF,XF,TF,V32HF,V16HF,V8HF,
507 V16SF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,V8DF,V4HF,V4BF,V2HF,V2BF"
508 (const_string "unknown"))
509
510 ;; The CPU unit operations uses.
511 (define_attr "unit" "integer,i387,sse,mmx,unknown"
512 (cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,
513 fxch,fistp,fisttp,frndint")
514 (const_string "i387")
515 (eq_attr "type" "sse,ssemov,sseadd,sseadd1,sseiadd,sseiadd1,
516 ssemul,sseimul,ssediv,sselog,sselog1,
517 sseishft,sseishft1,ssecmp,ssecomi,
518 ssecvt,ssecvt1,sseicvt,sseins,
519 sseshuf,sseshuf1,ssemuladd,sse4arg,mskmov")
520 (const_string "sse")
521 (eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft")
522 (const_string "mmx")
523 (eq_attr "type" "other")
524 (const_string "unknown")]
525 (const_string "integer")))
526
527 ;; The (bounding maximum) length of an instruction immediate.
528 (define_attr "length_immediate" ""
529 (cond [(eq_attr "type" "incdec,setcc,icmov,str,lea,other,multi,idiv,leave,
530 bitmanip,imulx,msklog,mskmov")
531 (const_int 0)
532 (eq_attr "unit" "i387,sse,mmx")
533 (const_int 0)
534 (eq_attr "type" "alu,alu1,negnot,imovx,ishift,ishiftx,ishift1,
535 rotate,rotatex,rotate1,imul,icmp,push,pop")
536 (symbol_ref "ix86_attr_length_immediate_default (insn, true)")
537 (eq_attr "type" "imov,test")
538 (symbol_ref "ix86_attr_length_immediate_default (insn, false)")
539 (eq_attr "type" "call")
540 (if_then_else (match_operand 0 "constant_call_address_operand")
541 (const_int 4)
542 (const_int 0))
543 (eq_attr "type" "callv")
544 (if_then_else (match_operand 1 "constant_call_address_operand")
545 (const_int 4)
546 (const_int 0))
547 ;; We don't know the size before shorten_branches. Expect
548 ;; the instruction to fit for better scheduling.
549 (eq_attr "type" "ibr")
550 (const_int 1)
551 ]
552 (symbol_ref "/* Update immediate_length and other attributes! */
553 gcc_unreachable (),1")))
554
555 ;; The (bounding maximum) length of an instruction address.
556 (define_attr "length_address" ""
557 (cond [(eq_attr "type" "str,other,multi,fxch")
558 (const_int 0)
559 (and (eq_attr "type" "call")
560 (match_operand 0 "constant_call_address_operand"))
561 (const_int 0)
562 (and (eq_attr "type" "callv")
563 (match_operand 1 "constant_call_address_operand"))
564 (const_int 0)
565 ]
566 (symbol_ref "ix86_attr_length_address_default (insn)")))
567
568 ;; Set when length prefix is used.
569 (define_attr "prefix_data16" ""
570 (cond [(eq_attr "type" "ssemuladd,sse4arg,sseiadd1,ssecvt1")
571 (const_int 0)
572 (eq_attr "mode" "HI")
573 (const_int 1)
574 (and (eq_attr "unit" "sse") (eq_attr "mode" "V2DF,TI"))
575 (const_int 1)
576 ]
577 (const_int 0)))
578
579 ;; Set when string REP prefix is used.
580 (define_attr "prefix_rep" ""
581 (cond [(eq_attr "type" "ssemuladd,sse4arg,sseiadd1,ssecvt1")
582 (const_int 0)
583 (and (eq_attr "unit" "sse") (eq_attr "mode" "SF,DF"))
584 (const_int 1)
585 ]
586 (const_int 0)))
587
588 ;; Set when 0f opcode prefix is used.
589 (define_attr "prefix_0f" ""
590 (if_then_else
591 (ior (eq_attr "type" "imovx,setcc,icmov,bitmanip,msklog,mskmov")
592 (eq_attr "unit" "sse,mmx"))
593 (const_int 1)
594 (const_int 0)))
595
596 ;; Set when REX opcode prefix is used.
597 (define_attr "prefix_rex" ""
598 (cond [(not (match_test "TARGET_64BIT"))
599 (const_int 0)
600 (and (eq_attr "mode" "DI")
601 (and (eq_attr "type" "!push,pop,call,callv,leave,ibr")
602 (eq_attr "unit" "!mmx")))
603 (const_int 1)
604 (and (eq_attr "mode" "QI")
605 (match_test "x86_extended_QIreg_mentioned_p (insn)"))
606 (const_int 1)
607 (match_test "x86_extended_reg_mentioned_p (insn)")
608 (const_int 1)
609 (and (eq_attr "type" "imovx")
610 (match_operand:QI 1 "ext_QIreg_operand"))
611 (const_int 1)
612 ]
613 (const_int 0)))
614
615 ;; There are also additional prefixes in 3DNOW, SSSE3.
616 ;; ssemuladd,sse4arg default to 0f24/0f25 and DREX byte,
617 ;; sseiadd1,ssecvt1 to 0f7a with no DREX byte.
618 ;; 3DNOW has 0f0f prefix, SSSE3 and SSE4_{1,2} 0f38/0f3a.
619 (define_attr "prefix_extra" ""
620 (cond [(eq_attr "type" "ssemuladd,sse4arg")
621 (const_int 2)
622 (eq_attr "type" "sseiadd1,ssecvt1")
623 (const_int 1)
624 ]
625 (const_int 0)))
626
627 ;; Prefix used: original, VEX or maybe VEX.
628 (define_attr "prefix" "orig,vex,maybe_vex,evex,maybe_evex"
629 (cond [(eq_attr "mode" "OI,V8SF,V4DF")
630 (const_string "vex")
631 (eq_attr "mode" "XI,V16SF,V8DF")
632 (const_string "evex")
633 ]
634 (const_string "orig")))
635
636 ;; VEX W bit is used.
637 (define_attr "prefix_vex_w" "" (const_int 0))
638
639 ;; The length of VEX prefix
640 ;; Only instructions with 0f prefix can have 2 byte VEX prefix,
641 ;; 0f38/0f3a prefixes can't. In i386.md 0f3[8a] is
642 ;; still prefix_0f 1, with prefix_extra 1.
643 (define_attr "length_vex" ""
644 (if_then_else (and (eq_attr "prefix_0f" "1")
645 (eq_attr "prefix_extra" "0"))
646 (if_then_else (eq_attr "prefix_vex_w" "1")
647 (symbol_ref "ix86_attr_length_vex_default (insn, true, true)")
648 (symbol_ref "ix86_attr_length_vex_default (insn, true, false)"))
649 (if_then_else (eq_attr "prefix_vex_w" "1")
650 (symbol_ref "ix86_attr_length_vex_default (insn, false, true)")
651 (symbol_ref "ix86_attr_length_vex_default (insn, false, false)"))))
652
653 ;; 4-bytes evex prefix and 1 byte opcode.
654 (define_attr "length_evex" "" (const_int 5))
655
656 ;; Set when modrm byte is used.
657 (define_attr "modrm" ""
658 (cond [(eq_attr "type" "str,leave")
659 (const_int 0)
660 (eq_attr "unit" "i387")
661 (const_int 0)
662 (and (eq_attr "type" "incdec")
663 (and (not (match_test "TARGET_64BIT"))
664 (ior (match_operand:SI 1 "register_operand")
665 (match_operand:HI 1 "register_operand"))))
666 (const_int 0)
667 (and (eq_attr "type" "push")
668 (not (match_operand 1 "memory_operand")))
669 (const_int 0)
670 (and (eq_attr "type" "pop")
671 (not (match_operand 0 "memory_operand")))
672 (const_int 0)
673 (and (eq_attr "type" "imov")
674 (and (not (eq_attr "mode" "DI"))
675 (ior (and (match_operand 0 "register_operand")
676 (match_operand 1 "immediate_operand"))
677 (ior (and (match_operand 0 "ax_reg_operand")
678 (match_operand 1 "memory_displacement_only_operand"))
679 (and (match_operand 0 "memory_displacement_only_operand")
680 (match_operand 1 "ax_reg_operand"))))))
681 (const_int 0)
682 (and (eq_attr "type" "call")
683 (match_operand 0 "constant_call_address_operand"))
684 (const_int 0)
685 (and (eq_attr "type" "callv")
686 (match_operand 1 "constant_call_address_operand"))
687 (const_int 0)
688 (and (eq_attr "type" "alu,alu1,icmp,test")
689 (match_operand 0 "ax_reg_operand"))
690 (symbol_ref "(get_attr_length_immediate (insn) <= (get_attr_mode (insn) != MODE_QI))")
691 ]
692 (const_int 1)))
693
694 ;; The (bounding maximum) length of an instruction in bytes.
695 ;; ??? fistp and frndint are in fact fldcw/{fistp,frndint}/fldcw sequences.
696 ;; Later we may want to split them and compute proper length as for
697 ;; other insns.
698 (define_attr "length" ""
699 (cond [(eq_attr "type" "other,multi,fistp,frndint")
700 (const_int 16)
701 (eq_attr "type" "fcmp")
702 (const_int 4)
703 (eq_attr "unit" "i387")
704 (plus (const_int 2)
705 (plus (attr "prefix_data16")
706 (attr "length_address")))
707 (ior (eq_attr "prefix" "evex")
708 (and (ior (eq_attr "prefix" "maybe_evex")
709 (eq_attr "prefix" "maybe_vex"))
710 (match_test "TARGET_AVX512F")))
711 (plus (attr "length_evex")
712 (plus (attr "length_immediate")
713 (plus (attr "modrm")
714 (attr "length_address"))))
715 (ior (eq_attr "prefix" "vex")
716 (and (ior (eq_attr "prefix" "maybe_vex")
717 (eq_attr "prefix" "maybe_evex"))
718 (match_test "TARGET_AVX")))
719 (plus (attr "length_vex")
720 (plus (attr "length_immediate")
721 (plus (attr "modrm")
722 (attr "length_address"))))]
723 (plus (plus (attr "modrm")
724 (plus (attr "prefix_0f")
725 (plus (attr "prefix_rex")
726 (plus (attr "prefix_extra")
727 (const_int 1)))))
728 (plus (attr "prefix_rep")
729 (plus (attr "prefix_data16")
730 (plus (attr "length_immediate")
731 (attr "length_address")))))))
732
733 ;; The `memory' attribute is `none' if no memory is referenced, `load' or
734 ;; `store' if there is a simple memory reference therein, or `unknown'
735 ;; if the instruction is complex.
736
737 (define_attr "memory" "none,load,store,both,unknown"
738 (cond [(eq_attr "type" "other,multi,str,lwp")
739 (const_string "unknown")
740 (eq_attr "type" "lea,fcmov,fpspc")
741 (const_string "none")
742 (eq_attr "type" "fistp,leave")
743 (const_string "both")
744 (eq_attr "type" "frndint")
745 (const_string "load")
746 (eq_attr "type" "push")
747 (if_then_else (match_operand 1 "memory_operand")
748 (const_string "both")
749 (const_string "store"))
750 (eq_attr "type" "pop")
751 (if_then_else (match_operand 0 "memory_operand")
752 (const_string "both")
753 (const_string "load"))
754 (eq_attr "type" "setcc")
755 (if_then_else (match_operand 0 "memory_operand")
756 (const_string "store")
757 (const_string "none"))
758 (eq_attr "type" "icmp,test,ssecmp,ssecomi,mmxcmp,fcmp")
759 (if_then_else (ior (match_operand 0 "memory_operand")
760 (match_operand 1 "memory_operand"))
761 (const_string "load")
762 (const_string "none"))
763 (eq_attr "type" "ibr")
764 (if_then_else (match_operand 0 "memory_operand")
765 (const_string "load")
766 (const_string "none"))
767 (eq_attr "type" "call")
768 (if_then_else (match_operand 0 "constant_call_address_operand")
769 (const_string "none")
770 (const_string "load"))
771 (eq_attr "type" "callv")
772 (if_then_else (match_operand 1 "constant_call_address_operand")
773 (const_string "none")
774 (const_string "load"))
775 (and (eq_attr "type" "alu1,negnot,ishift1,rotate1,sselog1,sseshuf1")
776 (match_operand 1 "memory_operand"))
777 (const_string "both")
778 (and (match_operand 0 "memory_operand")
779 (match_operand 1 "memory_operand"))
780 (const_string "both")
781 (match_operand 0 "memory_operand")
782 (const_string "store")
783 (match_operand 1 "memory_operand")
784 (const_string "load")
785 (and (eq_attr "type"
786 "!alu1,negnot,ishift1,rotate1,
787 imov,imovx,icmp,test,bitmanip,
788 fmov,fcmp,fsgn,
789 sse,ssemov,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt,
790 sselog1,sseshuf1,sseadd1,sseiadd1,sseishft1,
791 mmx,mmxmov,mmxcmp,mmxcvt,mskmov,msklog")
792 (match_operand 2 "memory_operand"))
793 (const_string "load")
794 (and (eq_attr "type" "icmov,ssemuladd,sse4arg")
795 (match_operand 3 "memory_operand"))
796 (const_string "load")
797 ]
798 (const_string "none")))
799
800 ;; Indicates if an instruction has both an immediate and a displacement.
801
802 (define_attr "imm_disp" "false,true,unknown"
803 (cond [(eq_attr "type" "other,multi")
804 (const_string "unknown")
805 (and (eq_attr "type" "icmp,test,imov,alu1,ishift1,rotate1")
806 (and (match_operand 0 "memory_displacement_operand")
807 (match_operand 1 "immediate_operand")))
808 (const_string "true")
809 (and (eq_attr "type" "alu,ishift,ishiftx,rotate,rotatex,imul,idiv")
810 (and (match_operand 0 "memory_displacement_operand")
811 (match_operand 2 "immediate_operand")))
812 (const_string "true")
813 ]
814 (const_string "false")))
815
816 ;; Indicates if an FP operation has an integer source.
817
818 (define_attr "fp_int_src" "false,true"
819 (const_string "false"))
820
821 ;; Defines rounding mode of an FP operation.
822
823 (define_attr "i387_cw" "roundeven,floor,ceil,trunc,uninitialized,any"
824 (const_string "any"))
825
826 ;; Define attribute to indicate AVX insns with partial XMM register update.
827 (define_attr "avx_partial_xmm_update" "false,true"
828 (const_string "false"))
829
830 ;; Define attribute to classify add/sub insns that consumes carry flag (CF)
831 (define_attr "use_carry" "0,1" (const_string "0"))
832
833 ;; Define attribute to indicate unaligned ssemov insns
834 (define_attr "movu" "0,1" (const_string "0"))
835
836 ;; Used to control the "enabled" attribute on a per-instruction basis.
837 (define_attr "isa" "base,x64,nox64,x64_sse2,x64_sse4,x64_sse4_noavx,
838 x64_avx,x64_avx512bw,x64_avx512dq,
839 sse_noavx,sse2,sse2_noavx,sse3,sse3_noavx,sse4,sse4_noavx,
840 avx,noavx,avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f,
841 avx512bw,noavx512bw,avx512dq,noavx512dq,fma_or_avx512vl,
842 avx512vl,noavx512vl,avxvnni,avx512vnnivl,avx512fp16,avxifma,
843 avx512ifmavl,avxneconvert,avx512bf16vl"
844 (const_string "base"))
845
846 ;; Define instruction set of MMX instructions
847 (define_attr "mmx_isa" "base,native,sse,sse_noavx,avx"
848 (const_string "base"))
849
850 (define_attr "enabled" ""
851 (cond [(eq_attr "isa" "x64") (symbol_ref "TARGET_64BIT")
852 (eq_attr "isa" "nox64") (symbol_ref "!TARGET_64BIT")
853 (eq_attr "isa" "x64_sse2")
854 (symbol_ref "TARGET_64BIT && TARGET_SSE2")
855 (eq_attr "isa" "x64_sse4")
856 (symbol_ref "TARGET_64BIT && TARGET_SSE4_1")
857 (eq_attr "isa" "x64_sse4_noavx")
858 (symbol_ref "TARGET_64BIT && TARGET_SSE4_1 && !TARGET_AVX")
859 (eq_attr "isa" "x64_avx")
860 (symbol_ref "TARGET_64BIT && TARGET_AVX")
861 (eq_attr "isa" "x64_avx512bw")
862 (symbol_ref "TARGET_64BIT && TARGET_AVX512BW")
863 (eq_attr "isa" "x64_avx512dq")
864 (symbol_ref "TARGET_64BIT && TARGET_AVX512DQ")
865 (eq_attr "isa" "sse_noavx")
866 (symbol_ref "TARGET_SSE && !TARGET_AVX")
867 (eq_attr "isa" "sse2") (symbol_ref "TARGET_SSE2")
868 (eq_attr "isa" "sse2_noavx")
869 (symbol_ref "TARGET_SSE2 && !TARGET_AVX")
870 (eq_attr "isa" "sse3") (symbol_ref "TARGET_SSE3")
871 (eq_attr "isa" "sse3_noavx")
872 (symbol_ref "TARGET_SSE3 && !TARGET_AVX")
873 (eq_attr "isa" "sse4") (symbol_ref "TARGET_SSE4_1")
874 (eq_attr "isa" "sse4_noavx")
875 (symbol_ref "TARGET_SSE4_1 && !TARGET_AVX")
876 (eq_attr "isa" "avx") (symbol_ref "TARGET_AVX")
877 (eq_attr "isa" "noavx") (symbol_ref "!TARGET_AVX")
878 (eq_attr "isa" "avx2") (symbol_ref "TARGET_AVX2")
879 (eq_attr "isa" "noavx2") (symbol_ref "!TARGET_AVX2")
880 (eq_attr "isa" "bmi") (symbol_ref "TARGET_BMI")
881 (eq_attr "isa" "bmi2") (symbol_ref "TARGET_BMI2")
882 (eq_attr "isa" "fma4") (symbol_ref "TARGET_FMA4")
883 (eq_attr "isa" "fma") (symbol_ref "TARGET_FMA")
884 (eq_attr "isa" "fma_or_avx512vl")
885 (symbol_ref "TARGET_FMA || TARGET_AVX512VL")
886 (eq_attr "isa" "avx512f") (symbol_ref "TARGET_AVX512F")
887 (eq_attr "isa" "noavx512f") (symbol_ref "!TARGET_AVX512F")
888 (eq_attr "isa" "avx512bw") (symbol_ref "TARGET_AVX512BW")
889 (eq_attr "isa" "noavx512bw") (symbol_ref "!TARGET_AVX512BW")
890 (eq_attr "isa" "avx512dq") (symbol_ref "TARGET_AVX512DQ")
891 (eq_attr "isa" "noavx512dq") (symbol_ref "!TARGET_AVX512DQ")
892 (eq_attr "isa" "avx512vl") (symbol_ref "TARGET_AVX512VL")
893 (eq_attr "isa" "noavx512vl") (symbol_ref "!TARGET_AVX512VL")
894 (eq_attr "isa" "avxvnni") (symbol_ref "TARGET_AVXVNNI")
895 (eq_attr "isa" "avx512vnnivl")
896 (symbol_ref "TARGET_AVX512VNNI && TARGET_AVX512VL")
897 (eq_attr "isa" "avx512fp16")
898 (symbol_ref "TARGET_AVX512FP16")
899 (eq_attr "isa" "avxifma") (symbol_ref "TARGET_AVXIFMA")
900 (eq_attr "isa" "avx512ifmavl")
901 (symbol_ref "TARGET_AVX512IFMA && TARGET_AVX512VL")
902 (eq_attr "isa" "avxneconvert") (symbol_ref "TARGET_AVXNECONVERT")
903 (eq_attr "isa" "avx512bf16vl")
904 (symbol_ref "TARGET_AVX512BF16 && TARGET_AVX512VL")
905
906 (eq_attr "mmx_isa" "native")
907 (symbol_ref "!TARGET_MMX_WITH_SSE")
908 (eq_attr "mmx_isa" "sse")
909 (symbol_ref "TARGET_MMX_WITH_SSE")
910 (eq_attr "mmx_isa" "sse_noavx")
911 (symbol_ref "TARGET_MMX_WITH_SSE && !TARGET_AVX")
912 (eq_attr "mmx_isa" "avx")
913 (symbol_ref "TARGET_MMX_WITH_SSE && TARGET_AVX")
914 ]
915 (const_int 1)))
916
917 (define_attr "preferred_for_size" "" (const_int 1))
918 (define_attr "preferred_for_speed" "" (const_int 1))
919
920 ;; Describe a user's asm statement.
921 (define_asm_attributes
922 [(set_attr "length" "128")
923 (set_attr "type" "multi")])
924
925 (define_code_iterator plusminus [plus minus])
926 (define_code_iterator plusminusmultdiv [plus minus mult div])
927
928 (define_code_iterator sat_plusminus [ss_plus us_plus ss_minus us_minus])
929
930 ;; Base name for insn mnemonic.
931 (define_code_attr plusminus_mnemonic
932 [(plus "add") (ss_plus "adds") (us_plus "addus")
933 (minus "sub") (ss_minus "subs") (us_minus "subus")])
934
935 (define_code_iterator multdiv [mult div])
936
937 (define_code_attr multdiv_mnemonic
938 [(mult "mul") (div "div")])
939
940 ;; Mark commutative operators as such in constraints.
941 (define_code_attr comm [(plus "%") (ss_plus "%") (us_plus "%")
942 (minus "") (ss_minus "") (us_minus "")
943 (mult "%") (div "")])
944
945 ;; Mapping of max and min
946 (define_code_iterator maxmin [smax smin umax umin])
947
948 ;; Mapping of signed max and min
949 (define_code_iterator smaxmin [smax smin])
950
951 ;; Mapping of unsigned max and min
952 (define_code_iterator umaxmin [umax umin])
953
954 ;; Base name for integer and FP insn mnemonic
955 (define_code_attr maxmin_int [(smax "maxs") (smin "mins")
956 (umax "maxu") (umin "minu")])
957 (define_code_attr maxmin_float [(smax "max") (smin "min")])
958
959 (define_int_iterator IEEE_MAXMIN
960 [UNSPEC_IEEE_MAX
961 UNSPEC_IEEE_MIN])
962
963 (define_int_attr ieee_maxmin
964 [(UNSPEC_IEEE_MAX "max")
965 (UNSPEC_IEEE_MIN "min")])
966
967 ;; Mapping of logic operators
968 (define_code_iterator any_logic [and ior xor])
969 (define_code_iterator any_or [ior xor])
970 (define_code_iterator fpint_logic [and xor])
971
972 ;; Base name for insn mnemonic.
973 (define_code_attr logic [(and "and") (ior "or") (xor "xor")])
974
975 ;; Mapping of logic-shift operators
976 (define_code_iterator any_lshift [ashift lshiftrt])
977
978 ;; Mapping of shift-right operators
979 (define_code_iterator any_shiftrt [lshiftrt ashiftrt])
980
981 ;; Mapping of all shift operators
982 (define_code_iterator any_shift [ashift lshiftrt ashiftrt])
983
984 ;; Base name for insn mnemonic.
985 (define_code_attr shift [(ashift "sll") (lshiftrt "shr") (ashiftrt "sar")])
986 (define_code_attr vshift [(ashift "sll") (lshiftrt "srl") (ashiftrt "sra")])
987
988 ;; Mapping of rotate operators
989 (define_code_iterator any_rotate [rotate rotatert])
990
991 ;; Base name for insn mnemonic.
992 (define_code_attr rotate [(rotate "rol") (rotatert "ror")])
993
994 ;; Mapping of abs neg operators
995 (define_code_iterator absneg [abs neg])
996
997 ;; Mapping of abs neg operators to logic operation
998 (define_code_attr absneg_op [(abs "and") (neg "xor")])
999
1000 ;; Base name for x87 insn mnemonic.
1001 (define_code_attr absneg_mnemonic [(abs "fabs") (neg "fchs")])
1002
1003 ;; Mapping of extend operators
1004 (define_code_iterator any_extend [sign_extend zero_extend])
1005
1006 ;; Mapping of highpart multiply operators
1007 (define_code_iterator any_mul_highpart [smul_highpart umul_highpart])
1008
1009 ;; Prefix for insn menmonic.
1010 (define_code_attr sgnprefix [(sign_extend "i") (zero_extend "")
1011 (smul_highpart "i") (umul_highpart "")
1012 (div "i") (udiv "")])
1013 ;; Prefix for define_insn
1014 (define_code_attr s [(sign_extend "s") (zero_extend "u")
1015 (smul_highpart "s") (umul_highpart "u")])
1016 (define_code_attr u [(sign_extend "") (zero_extend "u")
1017 (div "") (udiv "u")])
1018 (define_code_attr u_bool [(sign_extend "false") (zero_extend "true")
1019 (div "false") (udiv "true")])
1020
1021 ;; Used in signed and unsigned truncations.
1022 (define_code_iterator any_truncate [ss_truncate truncate us_truncate])
1023 ;; Instruction suffix for truncations.
1024 (define_code_attr trunsuffix
1025 [(ss_truncate "s") (truncate "") (us_truncate "us")])
1026
1027 ;; Instruction suffix for SSE sign and zero extensions.
1028 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
1029
1030 ;; Used in signed and unsigned fix.
1031 (define_code_iterator any_fix [fix unsigned_fix])
1032 (define_code_attr fixsuffix [(fix "") (unsigned_fix "u")])
1033 (define_code_attr fixunssuffix [(fix "") (unsigned_fix "uns")])
1034 (define_code_attr fixprefix [(fix "s") (unsigned_fix "u")])
1035
1036 ;; Used in signed and unsigned float.
1037 (define_code_iterator any_float [float unsigned_float])
1038 (define_code_attr floatsuffix [(float "") (unsigned_float "u")])
1039 (define_code_attr floatunssuffix [(float "") (unsigned_float "uns")])
1040 (define_code_attr floatprefix [(float "s") (unsigned_float "u")])
1041
1042 ;; Base name for expression
1043 (define_code_attr insn
1044 [(plus "add") (ss_plus "ssadd") (us_plus "usadd")
1045 (minus "sub") (ss_minus "sssub") (us_minus "ussub")
1046 (sign_extend "extend") (zero_extend "zero_extend")
1047 (ashift "ashl") (lshiftrt "lshr") (ashiftrt "ashr")
1048 (rotate "rotl") (rotatert "rotr")
1049 (mult "mul") (div "div")])
1050
1051 ;; All integer modes.
1052 (define_mode_iterator SWI1248x [QI HI SI DI])
1053
1054 ;; All integer modes without QImode.
1055 (define_mode_iterator SWI248x [HI SI DI])
1056
1057 ;; All integer modes without QImode and HImode.
1058 (define_mode_iterator SWI48x [SI DI])
1059
1060 ;; All integer modes without SImode and DImode.
1061 (define_mode_iterator SWI12 [QI HI])
1062
1063 ;; All integer modes without DImode.
1064 (define_mode_iterator SWI124 [QI HI SI])
1065
1066 ;; All integer modes without QImode and DImode.
1067 (define_mode_iterator SWI24 [HI SI])
1068
1069 ;; Single word integer modes.
1070 (define_mode_iterator SWI [QI HI SI (DI "TARGET_64BIT")])
1071
1072 ;; Single word integer modes without QImode.
1073 (define_mode_iterator SWI248 [HI SI (DI "TARGET_64BIT")])
1074
1075 ;; Single word integer modes without QImode and HImode.
1076 (define_mode_iterator SWI48 [SI (DI "TARGET_64BIT")])
1077
1078 ;; All math-dependant single and double word integer modes.
1079 (define_mode_iterator SDWIM [(QI "TARGET_QIMODE_MATH")
1080 (HI "TARGET_HIMODE_MATH")
1081 SI DI (TI "TARGET_64BIT")])
1082
1083 ;; Math-dependant single word integer modes.
1084 (define_mode_iterator SWIM [(QI "TARGET_QIMODE_MATH")
1085 (HI "TARGET_HIMODE_MATH")
1086 SI (DI "TARGET_64BIT")])
1087
1088 ;; Math-dependant integer modes without DImode.
1089 (define_mode_iterator SWIM124 [(QI "TARGET_QIMODE_MATH")
1090 (HI "TARGET_HIMODE_MATH")
1091 SI])
1092
1093 ;; Math-dependant integer modes with DImode.
1094 (define_mode_iterator SWIM1248x
1095 [(QI "TARGET_QIMODE_MATH")
1096 (HI "TARGET_HIMODE_MATH")
1097 SI DI])
1098
1099 ;; Math-dependant single word integer modes without QImode.
1100 (define_mode_iterator SWIM248 [(HI "TARGET_HIMODE_MATH")
1101 SI (DI "TARGET_64BIT")])
1102
1103 ;; Double word integer modes.
1104 (define_mode_iterator DWI [(DI "!TARGET_64BIT")
1105 (TI "TARGET_64BIT")])
1106
1107 ;; SWI and DWI together.
1108 (define_mode_iterator SWIDWI [QI HI SI DI (TI "TARGET_64BIT")])
1109
1110 ;; SWI48 and DWI together.
1111 (define_mode_iterator SWI48DWI [SI DI (TI "TARGET_64BIT")])
1112
1113 ;; GET_MODE_SIZE for selected modes. As GET_MODE_SIZE is not
1114 ;; compile time constant, it is faster to use <MODE_SIZE> than
1115 ;; GET_MODE_SIZE (<MODE>mode). For XFmode which depends on
1116 ;; command line options just use GET_MODE_SIZE macro.
1117 (define_mode_attr MODE_SIZE [(QI "1") (HI "2") (SI "4") (DI "8")
1118 (TI "16") (HF "2") (BF "2") (SF "4") (DF "8")
1119 (XF "GET_MODE_SIZE (XFmode)")
1120 (V16QI "16") (V32QI "32") (V64QI "64")
1121 (V8HI "16") (V16HI "32") (V32HI "64")
1122 (V4SI "16") (V8SI "32") (V16SI "64")
1123 (V2DI "16") (V4DI "32") (V8DI "64")
1124 (V1TI "16") (V2TI "32") (V4TI "64")
1125 (V2DF "16") (V4DF "32") (V8DF "64")
1126 (V4SF "16") (V8SF "32") (V16SF "64")
1127 (V8HF "16") (V16HF "32") (V32HF "64")
1128 (V4HF "8") (V2HF "4")
1129 (V8BF "16") (V16BF "32") (V32BF "64")
1130 (V4BF "8") (V2BF "4")])
1131
1132 ;; Double word integer modes as mode attribute.
1133 (define_mode_attr DWI [(QI "HI") (HI "SI") (SI "DI") (DI "TI") (TI "OI")])
1134 (define_mode_attr dwi [(QI "hi") (HI "si") (SI "di") (DI "ti") (TI "oi")])
1135
1136 ;; LEA mode corresponding to an integer mode
1137 (define_mode_attr LEAMODE [(QI "SI") (HI "SI") (SI "SI") (DI "DI")])
1138
1139 ;; Half mode for double word integer modes.
1140 (define_mode_iterator DWIH [(SI "!TARGET_64BIT")
1141 (DI "TARGET_64BIT")])
1142
1143 ;; Instruction suffix for integer modes.
1144 (define_mode_attr imodesuffix [(QI "b") (HI "w") (SI "l") (DI "q")])
1145
1146 ;; Instruction suffix for masks.
1147 (define_mode_attr mskmodesuffix [(QI "b") (HI "w") (SI "d") (DI "q")])
1148
1149 ;; Pointer size prefix for integer modes (Intel asm dialect)
1150 (define_mode_attr iptrsize [(QI "BYTE")
1151 (HI "WORD")
1152 (SI "DWORD")
1153 (DI "QWORD")])
1154
1155 ;; Register class for integer modes.
1156 (define_mode_attr r [(QI "q") (HI "r") (SI "r") (DI "r")])
1157
1158 ;; Immediate operand constraint for integer modes.
1159 (define_mode_attr i [(QI "n") (HI "n") (SI "e") (DI "e")])
1160
1161 ;; General operand constraint for word modes.
1162 (define_mode_attr g [(QI "qmn") (HI "rmn") (SI "rme") (DI "rme")])
1163
1164 ;; Memory operand constraint for word modes.
1165 (define_mode_attr m [(QI "m") (HI "m") (SI "BM") (DI "BM")])
1166
1167 ;; Immediate operand constraint for double integer modes.
1168 (define_mode_attr di [(SI "nF") (DI "Wd")])
1169
1170 ;; Immediate operand constraint for shifts.
1171 (define_mode_attr S [(QI "I") (HI "I") (SI "I") (DI "J") (TI "O")])
1172 (define_mode_attr KS [(QI "Wb") (HI "Ww") (SI "I") (DI "J")])
1173
1174 ;; Print register name in the specified mode.
1175 (define_mode_attr k [(QI "b") (HI "w") (SI "k") (DI "q")])
1176
1177 ;; General operand predicate for integer modes.
1178 (define_mode_attr general_operand
1179 [(QI "general_operand")
1180 (HI "general_operand")
1181 (SI "x86_64_general_operand")
1182 (DI "x86_64_general_operand")
1183 (TI "x86_64_general_operand")])
1184
1185 ;; General operand predicate for integer modes, where for TImode
1186 ;; we need both words of the operand to be general operands.
1187 (define_mode_attr general_hilo_operand
1188 [(QI "general_operand")
1189 (HI "general_operand")
1190 (SI "x86_64_general_operand")
1191 (DI "x86_64_general_operand")
1192 (TI "x86_64_hilo_general_operand")])
1193
1194 ;; General sign extend operand predicate for integer modes,
1195 ;; which disallows VOIDmode operands and thus it is suitable
1196 ;; for use inside sign_extend.
1197 (define_mode_attr general_sext_operand
1198 [(QI "sext_operand")
1199 (HI "sext_operand")
1200 (SI "x86_64_sext_operand")
1201 (DI "x86_64_sext_operand")])
1202
1203 ;; General sign/zero extend operand predicate for integer modes.
1204 (define_mode_attr general_szext_operand
1205 [(QI "general_operand")
1206 (HI "general_operand")
1207 (SI "x86_64_szext_general_operand")
1208 (DI "x86_64_szext_general_operand")
1209 (TI "x86_64_hilo_general_operand")])
1210
1211 (define_mode_attr nonmemory_szext_operand
1212 [(QI "nonmemory_operand")
1213 (HI "nonmemory_operand")
1214 (SI "x86_64_szext_nonmemory_operand")
1215 (DI "x86_64_szext_nonmemory_operand")])
1216
1217 ;; Immediate operand predicate for integer modes.
1218 (define_mode_attr immediate_operand
1219 [(QI "immediate_operand")
1220 (HI "immediate_operand")
1221 (SI "x86_64_immediate_operand")
1222 (DI "x86_64_immediate_operand")])
1223
1224 ;; Nonmemory operand predicate for integer modes.
1225 (define_mode_attr nonmemory_operand
1226 [(QI "nonmemory_operand")
1227 (HI "nonmemory_operand")
1228 (SI "x86_64_nonmemory_operand")
1229 (DI "x86_64_nonmemory_operand")])
1230
1231 ;; Operand predicate for shifts.
1232 (define_mode_attr shift_operand
1233 [(QI "nonimmediate_operand")
1234 (HI "nonimmediate_operand")
1235 (SI "nonimmediate_operand")
1236 (DI "shiftdi_operand")
1237 (TI "register_operand")])
1238
1239 ;; Operand predicate for shift argument.
1240 (define_mode_attr shift_immediate_operand
1241 [(QI "const_1_to_31_operand")
1242 (HI "const_1_to_31_operand")
1243 (SI "const_1_to_31_operand")
1244 (DI "const_1_to_63_operand")])
1245
1246 ;; Input operand predicate for arithmetic left shifts.
1247 (define_mode_attr ashl_input_operand
1248 [(QI "nonimmediate_operand")
1249 (HI "nonimmediate_operand")
1250 (SI "nonimmediate_operand")
1251 (DI "ashldi_input_operand")
1252 (TI "reg_or_pm1_operand")])
1253
1254 ;; SSE and x87 SFmode and DFmode floating point modes
1255 (define_mode_iterator MODEF [SF DF])
1256
1257 ;; SSE floating point modes
1258 (define_mode_iterator MODEFH [(HF "TARGET_AVX512FP16") SF DF])
1259
1260 ;; All x87 floating point modes
1261 (define_mode_iterator X87MODEF [SF DF XF])
1262
1263 ;; All x87 floating point modes plus HFmode
1264 (define_mode_iterator X87MODEFH [HF SF DF XF BF])
1265
1266 ;; All SSE floating point modes
1267 (define_mode_iterator SSEMODEF [HF SF DF TF])
1268 (define_mode_attr ssevecmodef [(HF "V8HF") (SF "V4SF") (DF "V2DF") (TF "TF")])
1269
1270 ;; SSE instruction suffix for various modes
1271 (define_mode_attr ssemodesuffix
1272 [(HF "sh") (SF "ss") (DF "sd")
1273 (V32HF "ph") (V16SF "ps") (V8DF "pd")
1274 (V16HF "ph") (V16BF "bf") (V8SF "ps") (V4DF "pd")
1275 (V8HF "ph") (V8BF "bf") (V4SF "ps") (V2DF "pd")
1276 (V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")
1277 (V32QI "b") (V16HI "w") (V8SI "d") (V4DI "q")
1278 (V64QI "b") (V32HI "w") (V16SI "d") (V8DI "q")])
1279
1280 ;; SSE vector suffix for floating point modes
1281 (define_mode_attr ssevecmodesuffix [(SF "ps") (DF "pd")])
1282
1283 ;; SSE vector mode corresponding to a scalar mode
1284 (define_mode_attr ssevecmode
1285 [(QI "V16QI") (HI "V8HI") (SI "V4SI") (DI "V2DI") (HF "V8HF") (BF "V8BF") (SF "V4SF") (DF "V2DF")])
1286 (define_mode_attr ssevecmodelower
1287 [(QI "v16qi") (HI "v8hi") (SI "v4si") (DI "v2di") (SF "v4sf") (DF "v2df")])
1288
1289 ;; AVX512F vector mode corresponding to a scalar mode
1290 (define_mode_attr avx512fvecmode
1291 [(QI "V64QI") (HI "V32HI") (SI "V16SI") (DI "V8DI") (SF "V16SF") (DF "V8DF")])
1292
1293 ;; Instruction suffix for REX 64bit operators.
1294 (define_mode_attr rex64suffix [(SI "{l}") (DI "{q}")])
1295 (define_mode_attr rex64namesuffix [(SI "") (DI "q")])
1296
1297 ;; This mode iterator allows :P to be used for patterns that operate on
1298 ;; pointer-sized quantities. Exactly one of the two alternatives will match.
1299 (define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")])
1300
1301 ;; This mode iterator allows :W to be used for patterns that operate on
1302 ;; word_mode sized quantities.
1303 (define_mode_iterator W
1304 [(SI "word_mode == SImode") (DI "word_mode == DImode")])
1305
1306 ;; This mode iterator allows :PTR to be used for patterns that operate on
1307 ;; ptr_mode sized quantities.
1308 (define_mode_iterator PTR
1309 [(SI "ptr_mode == SImode") (DI "ptr_mode == DImode")])
1310 \f
1311 ;; Scheduling descriptions
1312
1313 (include "pentium.md")
1314 (include "ppro.md")
1315 (include "k6.md")
1316 (include "athlon.md")
1317 (include "bdver1.md")
1318 (include "bdver3.md")
1319 (include "btver2.md")
1320 (include "znver.md")
1321 (include "geode.md")
1322 (include "atom.md")
1323 (include "slm.md")
1324 (include "glm.md")
1325 (include "core2.md")
1326 (include "haswell.md")
1327 (include "lujiazui.md")
1328
1329 \f
1330 ;; Operand and operator predicates and constraints
1331
1332 (include "predicates.md")
1333 (include "constraints.md")
1334
1335 \f
1336 ;; Compare and branch/compare and store instructions.
1337
1338 (define_expand "cbranch<mode>4"
1339 [(set (reg:CC FLAGS_REG)
1340 (compare:CC (match_operand:SDWIM 1 "nonimmediate_operand")
1341 (match_operand:SDWIM 2 "<general_operand>")))
1342 (set (pc) (if_then_else
1343 (match_operator 0 "ordered_comparison_operator"
1344 [(reg:CC FLAGS_REG) (const_int 0)])
1345 (label_ref (match_operand 3))
1346 (pc)))]
1347 ""
1348 {
1349 if (MEM_P (operands[1]) && MEM_P (operands[2]))
1350 operands[1] = force_reg (<MODE>mode, operands[1]);
1351 ix86_expand_branch (GET_CODE (operands[0]),
1352 operands[1], operands[2], operands[3]);
1353 DONE;
1354 })
1355
1356 (define_expand "cbranchoi4"
1357 [(set (reg:CC FLAGS_REG)
1358 (compare:CC (match_operand:OI 1 "nonimmediate_operand")
1359 (match_operand:OI 2 "nonimmediate_operand")))
1360 (set (pc) (if_then_else
1361 (match_operator 0 "bt_comparison_operator"
1362 [(reg:CC FLAGS_REG) (const_int 0)])
1363 (label_ref (match_operand 3))
1364 (pc)))]
1365 "TARGET_AVX"
1366 {
1367 ix86_expand_branch (GET_CODE (operands[0]),
1368 operands[1], operands[2], operands[3]);
1369 DONE;
1370 })
1371
1372 (define_expand "cstore<mode>4"
1373 [(set (reg:CC FLAGS_REG)
1374 (compare:CC (match_operand:SDWIM 2 "nonimmediate_operand")
1375 (match_operand:SDWIM 3 "<general_operand>")))
1376 (set (match_operand:QI 0 "register_operand")
1377 (match_operator 1 "ordered_comparison_operator"
1378 [(reg:CC FLAGS_REG) (const_int 0)]))]
1379 ""
1380 {
1381 if (<MODE>mode == (TARGET_64BIT ? TImode : DImode))
1382 {
1383 if (GET_CODE (operands[1]) != EQ
1384 && GET_CODE (operands[1]) != NE)
1385 FAIL;
1386 }
1387 else if (MEM_P (operands[2]) && MEM_P (operands[3]))
1388 operands[2] = force_reg (<MODE>mode, operands[2]);
1389 ix86_expand_setcc (operands[0], GET_CODE (operands[1]),
1390 operands[2], operands[3]);
1391 DONE;
1392 })
1393
1394 (define_expand "@cmp<mode>_1"
1395 [(set (reg:CC FLAGS_REG)
1396 (compare:CC (match_operand:SWI48 0 "nonimmediate_operand")
1397 (match_operand:SWI48 1 "<general_operand>")))])
1398
1399 (define_mode_iterator SWI1248_AVX512BWDQ_64
1400 [(QI "TARGET_AVX512DQ") HI
1401 (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW && TARGET_64BIT")])
1402
1403 (define_insn "*cmp<mode>_ccz_1"
1404 [(set (reg FLAGS_REG)
1405 (compare (match_operand:SWI1248_AVX512BWDQ_64 0
1406 "nonimmediate_operand" "<r>,?m<r>,$k")
1407 (match_operand:SWI1248_AVX512BWDQ_64 1 "const0_operand")))]
1408 "TARGET_AVX512F && ix86_match_ccmode (insn, CCZmode)"
1409 "@
1410 test{<imodesuffix>}\t%0, %0
1411 cmp{<imodesuffix>}\t{%1, %0|%0, %1}
1412 kortest<mskmodesuffix>\t%0, %0"
1413 [(set_attr "type" "test,icmp,msklog")
1414 (set_attr "length_immediate" "0,1,*")
1415 (set_attr "prefix" "*,*,vex")
1416 (set_attr "mode" "<MODE>")])
1417
1418 (define_insn "*cmp<mode>_ccno_1"
1419 [(set (reg FLAGS_REG)
1420 (compare (match_operand:SWI 0 "nonimmediate_operand" "<r>,?m<r>")
1421 (match_operand:SWI 1 "const0_operand")))]
1422 "ix86_match_ccmode (insn, CCNOmode)"
1423 "@
1424 test{<imodesuffix>}\t%0, %0
1425 cmp{<imodesuffix>}\t{%1, %0|%0, %1}"
1426 [(set_attr "type" "test,icmp")
1427 (set_attr "length_immediate" "0,1")
1428 (set_attr "mode" "<MODE>")])
1429
1430 (define_insn "*cmp<mode>_1"
1431 [(set (reg FLAGS_REG)
1432 (compare (match_operand:SWI 0 "nonimmediate_operand" "<r>m,<r>")
1433 (match_operand:SWI 1 "<general_operand>" "<r><i>,<r><m>")))]
1434 "ix86_match_ccmode (insn, CCmode)"
1435 "cmp{<imodesuffix>}\t{%1, %0|%0, %1}"
1436 [(set_attr "type" "icmp")
1437 (set_attr "mode" "<MODE>")])
1438
1439 (define_insn "*cmp<mode>_minus_1"
1440 [(set (reg FLAGS_REG)
1441 (compare
1442 (minus:SWI (match_operand:SWI 0 "nonimmediate_operand" "<r>m,<r>")
1443 (match_operand:SWI 1 "<general_operand>" "<r><i>,<r><m>"))
1444 (const_int 0)))]
1445 "ix86_match_ccmode (insn, CCGOCmode)"
1446 "cmp{<imodesuffix>}\t{%1, %0|%0, %1}"
1447 [(set_attr "type" "icmp")
1448 (set_attr "mode" "<MODE>")])
1449
1450 (define_insn "*cmpqi_ext<mode>_1"
1451 [(set (reg FLAGS_REG)
1452 (compare
1453 (match_operand:QI 0 "nonimmediate_operand" "QBc,m")
1454 (subreg:QI
1455 (zero_extract:SWI248
1456 (match_operand:SWI248 1 "register_operand" "Q,Q")
1457 (const_int 8)
1458 (const_int 8)) 0)))]
1459 "ix86_match_ccmode (insn, CCmode)"
1460 "cmp{b}\t{%h1, %0|%0, %h1}"
1461 [(set_attr "isa" "*,nox64")
1462 (set_attr "type" "icmp")
1463 (set_attr "mode" "QI")])
1464
1465 (define_insn "*cmpqi_ext<mode>_2"
1466 [(set (reg FLAGS_REG)
1467 (compare
1468 (subreg:QI
1469 (zero_extract:SWI248
1470 (match_operand:SWI248 0 "register_operand" "Q")
1471 (const_int 8)
1472 (const_int 8)) 0)
1473 (match_operand:QI 1 "const0_operand")))]
1474 "ix86_match_ccmode (insn, CCNOmode)"
1475 "test{b}\t%h0, %h0"
1476 [(set_attr "type" "test")
1477 (set_attr "length_immediate" "0")
1478 (set_attr "mode" "QI")])
1479
1480 (define_expand "cmpqi_ext_3"
1481 [(set (reg:CC FLAGS_REG)
1482 (compare:CC
1483 (subreg:QI
1484 (zero_extract:HI
1485 (match_operand:HI 0 "register_operand")
1486 (const_int 8)
1487 (const_int 8)) 0)
1488 (match_operand:QI 1 "const_int_operand")))])
1489
1490 (define_insn "*cmpqi_ext<mode>_3"
1491 [(set (reg FLAGS_REG)
1492 (compare
1493 (subreg:QI
1494 (zero_extract:SWI248
1495 (match_operand:SWI248 0 "register_operand" "Q,Q")
1496 (const_int 8)
1497 (const_int 8)) 0)
1498 (match_operand:QI 1 "general_operand" "QnBc,m")))]
1499 "ix86_match_ccmode (insn, CCmode)"
1500 "cmp{b}\t{%1, %h0|%h0, %1}"
1501 [(set_attr "isa" "*,nox64")
1502 (set_attr "type" "icmp")
1503 (set_attr "mode" "QI")])
1504
1505 (define_insn "*cmpqi_ext<mode>_4"
1506 [(set (reg FLAGS_REG)
1507 (compare
1508 (subreg:QI
1509 (zero_extract:SWI248
1510 (match_operand:SWI248 0 "register_operand" "Q")
1511 (const_int 8)
1512 (const_int 8)) 0)
1513 (subreg:QI
1514 (zero_extract:SWI248
1515 (match_operand:SWI248 1 "register_operand" "Q")
1516 (const_int 8)
1517 (const_int 8)) 0)))]
1518 "ix86_match_ccmode (insn, CCmode)"
1519 "cmp{b}\t{%h1, %h0|%h0, %h1}"
1520 [(set_attr "type" "icmp")
1521 (set_attr "mode" "QI")])
1522
1523 (define_insn_and_split "*cmp<dwi>_doubleword"
1524 [(set (reg:CCZ FLAGS_REG)
1525 (compare:CCZ (match_operand:<DWI> 0 "nonimmediate_operand")
1526 (match_operand:<DWI> 1 "general_operand")))]
1527 "ix86_pre_reload_split ()"
1528 "#"
1529 "&& 1"
1530 [(parallel [(set (reg:CCZ FLAGS_REG)
1531 (compare:CCZ (ior:DWIH (match_dup 4) (match_dup 5))
1532 (const_int 0)))
1533 (set (match_dup 4) (ior:DWIH (match_dup 4) (match_dup 5)))])]
1534 {
1535 split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[2]);
1536 /* Placing the SUBREG pieces in pseudos helps reload. */
1537 for (int i = 0; i < 4; i++)
1538 if (SUBREG_P (operands[i]))
1539 operands[i] = force_reg (<MODE>mode, operands[i]);
1540
1541 operands[4] = gen_reg_rtx (<MODE>mode);
1542
1543 /* Special case comparisons against -1. */
1544 if (operands[1] == constm1_rtx && operands[3] == constm1_rtx)
1545 {
1546 emit_insn (gen_and<mode>3 (operands[4], operands[0], operands[2]));
1547 emit_insn (gen_cmp_1 (<MODE>mode, operands[4], constm1_rtx));
1548 DONE;
1549 }
1550
1551 if (operands[1] == const0_rtx)
1552 emit_move_insn (operands[4], operands[0]);
1553 else if (operands[0] == const0_rtx)
1554 emit_move_insn (operands[4], operands[1]);
1555 else if (operands[1] == constm1_rtx)
1556 emit_insn (gen_one_cmpl<mode>2 (operands[4], operands[0]));
1557 else if (operands[0] == constm1_rtx)
1558 emit_insn (gen_one_cmpl<mode>2 (operands[4], operands[1]));
1559 else
1560 {
1561 if (CONST_SCALAR_INT_P (operands[1])
1562 && !x86_64_immediate_operand (operands[1], <MODE>mode))
1563 operands[1] = force_reg (<MODE>mode, operands[1]);
1564 emit_insn (gen_xor<mode>3 (operands[4], operands[0], operands[1]));
1565 }
1566
1567 if (operands[3] == const0_rtx)
1568 operands[5] = operands[2];
1569 else if (operands[2] == const0_rtx)
1570 operands[5] = operands[3];
1571 else
1572 {
1573 operands[5] = gen_reg_rtx (<MODE>mode);
1574 if (operands[3] == constm1_rtx)
1575 emit_insn (gen_one_cmpl<mode>2 (operands[5], operands[2]));
1576 else if (operands[2] == constm1_rtx)
1577 emit_insn (gen_one_cmpl<mode>2 (operands[5], operands[3]));
1578 else
1579 {
1580 if (CONST_SCALAR_INT_P (operands[3])
1581 && !x86_64_immediate_operand (operands[3], <MODE>mode))
1582 operands[3] = force_reg (<MODE>mode, operands[3]);
1583 emit_insn (gen_xor<mode>3 (operands[5], operands[2], operands[3]));
1584 }
1585 }
1586 })
1587
1588 ;; These implement float point compares.
1589 ;; %%% See if we can get away with VOIDmode operands on the actual insns,
1590 ;; which would allow mix and match FP modes on the compares. Which is what
1591 ;; the old patterns did, but with many more of them.
1592
1593 (define_expand "cbranchxf4"
1594 [(set (reg:CC FLAGS_REG)
1595 (compare:CC (match_operand:XF 1 "nonmemory_operand")
1596 (match_operand:XF 2 "nonmemory_operand")))
1597 (set (pc) (if_then_else
1598 (match_operator 0 "ix86_fp_comparison_operator"
1599 [(reg:CC FLAGS_REG)
1600 (const_int 0)])
1601 (label_ref (match_operand 3))
1602 (pc)))]
1603 "TARGET_80387"
1604 {
1605 ix86_expand_branch (GET_CODE (operands[0]),
1606 operands[1], operands[2], operands[3]);
1607 DONE;
1608 })
1609
1610 (define_expand "cstorexf4"
1611 [(set (reg:CC FLAGS_REG)
1612 (compare:CC (match_operand:XF 2 "nonmemory_operand")
1613 (match_operand:XF 3 "nonmemory_operand")))
1614 (set (match_operand:QI 0 "register_operand")
1615 (match_operator 1 "ix86_fp_comparison_operator"
1616 [(reg:CC FLAGS_REG)
1617 (const_int 0)]))]
1618 "TARGET_80387"
1619 {
1620 ix86_expand_setcc (operands[0], GET_CODE (operands[1]),
1621 operands[2], operands[3]);
1622 DONE;
1623 })
1624
1625 (define_expand "cbranchhf4"
1626 [(set (reg:CC FLAGS_REG)
1627 (compare:CC (match_operand:HF 1 "cmp_fp_expander_operand")
1628 (match_operand:HF 2 "cmp_fp_expander_operand")))
1629 (set (pc) (if_then_else
1630 (match_operator 0 "ix86_fp_comparison_operator"
1631 [(reg:CC FLAGS_REG)
1632 (const_int 0)])
1633 (label_ref (match_operand 3))
1634 (pc)))]
1635 "TARGET_AVX512FP16"
1636 {
1637 ix86_expand_branch (GET_CODE (operands[0]),
1638 operands[1], operands[2], operands[3]);
1639 DONE;
1640 })
1641
1642 (define_expand "cbranch<mode>4"
1643 [(set (reg:CC FLAGS_REG)
1644 (compare:CC (match_operand:MODEF 1 "cmp_fp_expander_operand")
1645 (match_operand:MODEF 2 "cmp_fp_expander_operand")))
1646 (set (pc) (if_then_else
1647 (match_operator 0 "ix86_fp_comparison_operator"
1648 [(reg:CC FLAGS_REG)
1649 (const_int 0)])
1650 (label_ref (match_operand 3))
1651 (pc)))]
1652 "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
1653 {
1654 ix86_expand_branch (GET_CODE (operands[0]),
1655 operands[1], operands[2], operands[3]);
1656 DONE;
1657 })
1658
1659 (define_expand "cbranchbf4"
1660 [(set (reg:CC FLAGS_REG)
1661 (compare:CC (match_operand:BF 1 "cmp_fp_expander_operand")
1662 (match_operand:BF 2 "cmp_fp_expander_operand")))
1663 (set (pc) (if_then_else
1664 (match_operator 0 "comparison_operator"
1665 [(reg:CC FLAGS_REG)
1666 (const_int 0)])
1667 (label_ref (match_operand 3))
1668 (pc)))]
1669 ""
1670 {
1671 rtx op1 = gen_lowpart (HImode, operands[1]);
1672 if (CONST_INT_P (op1))
1673 op1 = simplify_const_unary_operation (FLOAT_EXTEND, SFmode,
1674 operands[1], BFmode);
1675 else
1676 {
1677 rtx t1 = gen_reg_rtx (SImode);
1678 emit_insn (gen_zero_extendhisi2 (t1, op1));
1679 emit_insn (gen_ashlsi3 (t1, t1, GEN_INT (16)));
1680 op1 = gen_lowpart (SFmode, t1);
1681 }
1682 rtx op2 = gen_lowpart (HImode, operands[2]);
1683 if (CONST_INT_P (op2))
1684 op2 = simplify_const_unary_operation (FLOAT_EXTEND, SFmode,
1685 operands[2], BFmode);
1686 else
1687 {
1688 rtx t2 = gen_reg_rtx (SImode);
1689 emit_insn (gen_zero_extendhisi2 (t2, op2));
1690 emit_insn (gen_ashlsi3 (t2, t2, GEN_INT (16)));
1691 op2 = gen_lowpart (SFmode, t2);
1692 }
1693 do_compare_rtx_and_jump (op1, op2, GET_CODE (operands[0]), 0,
1694 SFmode, NULL_RTX, NULL,
1695 as_a <rtx_code_label *> (operands[3]),
1696 /* Unfortunately this isn't propagated. */
1697 profile_probability::even ());
1698 DONE;
1699 })
1700
1701 (define_expand "cstorehf4"
1702 [(set (reg:CC FLAGS_REG)
1703 (compare:CC (match_operand:HF 2 "cmp_fp_expander_operand")
1704 (match_operand:HF 3 "cmp_fp_expander_operand")))
1705 (set (match_operand:QI 0 "register_operand")
1706 (match_operator 1 "ix86_fp_comparison_operator"
1707 [(reg:CC FLAGS_REG)
1708 (const_int 0)]))]
1709 "TARGET_AVX512FP16"
1710 {
1711 ix86_expand_setcc (operands[0], GET_CODE (operands[1]),
1712 operands[2], operands[3]);
1713 DONE;
1714 })
1715
1716 (define_expand "cstorebf4"
1717 [(set (reg:CC FLAGS_REG)
1718 (compare:CC (match_operand:BF 2 "cmp_fp_expander_operand")
1719 (match_operand:BF 3 "cmp_fp_expander_operand")))
1720 (set (match_operand:QI 0 "register_operand")
1721 (match_operator 1 "comparison_operator"
1722 [(reg:CC FLAGS_REG)
1723 (const_int 0)]))]
1724 ""
1725 {
1726 rtx op1 = gen_lowpart (HImode, operands[2]);
1727 if (CONST_INT_P (op1))
1728 op1 = simplify_const_unary_operation (FLOAT_EXTEND, SFmode,
1729 operands[2], BFmode);
1730 else
1731 {
1732 rtx t1 = gen_reg_rtx (SImode);
1733 emit_insn (gen_zero_extendhisi2 (t1, op1));
1734 emit_insn (gen_ashlsi3 (t1, t1, GEN_INT (16)));
1735 op1 = gen_lowpart (SFmode, t1);
1736 }
1737 rtx op2 = gen_lowpart (HImode, operands[3]);
1738 if (CONST_INT_P (op2))
1739 op2 = simplify_const_unary_operation (FLOAT_EXTEND, SFmode,
1740 operands[3], BFmode);
1741 else
1742 {
1743 rtx t2 = gen_reg_rtx (SImode);
1744 emit_insn (gen_zero_extendhisi2 (t2, op2));
1745 emit_insn (gen_ashlsi3 (t2, t2, GEN_INT (16)));
1746 op2 = gen_lowpart (SFmode, t2);
1747 }
1748 rtx res = emit_store_flag_force (operands[0], GET_CODE (operands[1]),
1749 op1, op2, SFmode, 0, 1);
1750 if (!rtx_equal_p (res, operands[0]))
1751 emit_move_insn (operands[0], res);
1752 DONE;
1753 })
1754
1755 (define_expand "cstore<mode>4"
1756 [(set (reg:CC FLAGS_REG)
1757 (compare:CC (match_operand:MODEF 2 "cmp_fp_expander_operand")
1758 (match_operand:MODEF 3 "cmp_fp_expander_operand")))
1759 (set (match_operand:QI 0 "register_operand")
1760 (match_operator 1 "ix86_fp_comparison_operator"
1761 [(reg:CC FLAGS_REG)
1762 (const_int 0)]))]
1763 "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
1764 {
1765 ix86_expand_setcc (operands[0], GET_CODE (operands[1]),
1766 operands[2], operands[3]);
1767 DONE;
1768 })
1769
1770 (define_expand "cbranchcc4"
1771 [(set (pc) (if_then_else
1772 (match_operator 0 "comparison_operator"
1773 [(match_operand 1 "flags_reg_operand")
1774 (match_operand 2 "const0_operand")])
1775 (label_ref (match_operand 3))
1776 (pc)))]
1777 ""
1778 {
1779 ix86_expand_branch (GET_CODE (operands[0]),
1780 operands[1], operands[2], operands[3]);
1781 DONE;
1782 })
1783
1784 (define_expand "cstorecc4"
1785 [(set (match_operand:QI 0 "register_operand")
1786 (match_operator 1 "comparison_operator"
1787 [(match_operand 2 "flags_reg_operand")
1788 (match_operand 3 "const0_operand")]))]
1789 ""
1790 {
1791 ix86_expand_setcc (operands[0], GET_CODE (operands[1]),
1792 operands[2], operands[3]);
1793 DONE;
1794 })
1795
1796 ;; FP compares, step 1:
1797 ;; Set the FP condition codes and move fpsr to ax.
1798
1799 ;; We may not use "#" to split and emit these
1800 ;; due to reg-stack pops killing fpsr.
1801
1802 (define_insn "*cmpxf_i387"
1803 [(set (match_operand:HI 0 "register_operand" "=a")
1804 (unspec:HI
1805 [(compare:CCFP
1806 (match_operand:XF 1 "register_operand" "f")
1807 (match_operand:XF 2 "reg_or_0_operand" "fC"))]
1808 UNSPEC_FNSTSW))]
1809 "TARGET_80387"
1810 "* return output_fp_compare (insn, operands, false, false);"
1811 [(set_attr "type" "multi")
1812 (set_attr "unit" "i387")
1813 (set_attr "mode" "XF")])
1814
1815 (define_insn "*cmp<mode>_i387"
1816 [(set (match_operand:HI 0 "register_operand" "=a")
1817 (unspec:HI
1818 [(compare:CCFP
1819 (match_operand:MODEF 1 "register_operand" "f")
1820 (match_operand:MODEF 2 "nonimm_or_0_operand" "fmC"))]
1821 UNSPEC_FNSTSW))]
1822 "TARGET_80387"
1823 "* return output_fp_compare (insn, operands, false, false);"
1824 [(set_attr "type" "multi")
1825 (set_attr "unit" "i387")
1826 (set_attr "mode" "<MODE>")])
1827
1828 (define_insn "*cmp<X87MODEF:mode>_<SWI24:mode>_i387"
1829 [(set (match_operand:HI 0 "register_operand" "=a")
1830 (unspec:HI
1831 [(compare:CCFP
1832 (match_operand:X87MODEF 1 "register_operand" "f")
1833 (float:X87MODEF
1834 (match_operand:SWI24 2 "nonimmediate_operand" "m")))]
1835 UNSPEC_FNSTSW))]
1836 "TARGET_80387
1837 && (TARGET_USE_<SWI24:MODE>MODE_FIOP
1838 || optimize_function_for_size_p (cfun))"
1839 "* return output_fp_compare (insn, operands, false, false);"
1840 [(set_attr "type" "multi")
1841 (set_attr "unit" "i387")
1842 (set_attr "fp_int_src" "true")
1843 (set_attr "mode" "<SWI24:MODE>")])
1844
1845 (define_insn "*cmpu<mode>_i387"
1846 [(set (match_operand:HI 0 "register_operand" "=a")
1847 (unspec:HI
1848 [(unspec:CCFP
1849 [(compare:CCFP
1850 (match_operand:X87MODEF 1 "register_operand" "f")
1851 (match_operand:X87MODEF 2 "register_operand" "f"))]
1852 UNSPEC_NOTRAP)]
1853 UNSPEC_FNSTSW))]
1854 "TARGET_80387"
1855 "* return output_fp_compare (insn, operands, false, true);"
1856 [(set_attr "type" "multi")
1857 (set_attr "unit" "i387")
1858 (set_attr "mode" "<MODE>")])
1859
1860 ;; FP compares, step 2:
1861 ;; Get ax into flags, general case.
1862
1863 (define_insn "x86_sahf_1"
1864 [(set (reg:CC FLAGS_REG)
1865 (unspec:CC [(match_operand:HI 0 "register_operand" "a")]
1866 UNSPEC_SAHF))]
1867 "TARGET_SAHF"
1868 {
1869 #ifndef HAVE_AS_IX86_SAHF
1870 if (TARGET_64BIT)
1871 return ASM_BYTE "0x9e";
1872 else
1873 #endif
1874 return "sahf";
1875 }
1876 [(set_attr "length" "1")
1877 (set_attr "athlon_decode" "vector")
1878 (set_attr "amdfam10_decode" "direct")
1879 (set_attr "bdver1_decode" "direct")
1880 (set_attr "mode" "SI")])
1881
1882 ;; Pentium Pro can do both steps in one go.
1883 ;; (these instructions set flags directly)
1884
1885 (define_subst_attr "unord" "unord_subst" "" "u")
1886 (define_subst_attr "unordered" "unord_subst" "false" "true")
1887
1888 (define_subst "unord_subst"
1889 [(set (match_operand:CCFP 0)
1890 (match_operand:CCFP 1))]
1891 ""
1892 [(set (match_dup 0)
1893 (unspec:CCFP
1894 [(match_dup 1)]
1895 UNSPEC_NOTRAP))])
1896
1897 (define_insn "*cmpi<unord>xf_i387"
1898 [(set (reg:CCFP FLAGS_REG)
1899 (compare:CCFP
1900 (match_operand:XF 0 "register_operand" "f")
1901 (match_operand:XF 1 "register_operand" "f")))]
1902 "TARGET_80387 && TARGET_CMOVE"
1903 "* return output_fp_compare (insn, operands, true, <unordered>);"
1904 [(set_attr "type" "fcmp")
1905 (set_attr "mode" "XF")
1906 (set_attr "athlon_decode" "vector")
1907 (set_attr "amdfam10_decode" "direct")
1908 (set_attr "bdver1_decode" "double")
1909 (set_attr "znver1_decode" "double")])
1910
1911 (define_insn "*cmpi<unord><MODEF:mode>"
1912 [(set (reg:CCFP FLAGS_REG)
1913 (compare:CCFP
1914 (match_operand:MODEF 0 "register_operand" "f,v")
1915 (match_operand:MODEF 1 "register_ssemem_operand" "f,vm")))]
1916 "(SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH)
1917 || (TARGET_80387 && TARGET_CMOVE)"
1918 "@
1919 * return output_fp_compare (insn, operands, true, <unordered>);
1920 %v<unord>comi<MODEF:ssemodesuffix>\t{%1, %0|%0, %1}"
1921 [(set_attr "type" "fcmp,ssecomi")
1922 (set_attr "prefix" "orig,maybe_vex")
1923 (set_attr "mode" "<MODEF:MODE>")
1924 (set_attr "prefix_rep" "*,0")
1925 (set (attr "prefix_data16")
1926 (cond [(eq_attr "alternative" "0")
1927 (const_string "*")
1928 (eq_attr "mode" "DF")
1929 (const_string "1")
1930 ]
1931 (const_string "0")))
1932 (set_attr "athlon_decode" "vector")
1933 (set_attr "amdfam10_decode" "direct")
1934 (set_attr "bdver1_decode" "double")
1935 (set_attr "znver1_decode" "double")
1936 (set (attr "enabled")
1937 (if_then_else
1938 (match_test ("SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH"))
1939 (if_then_else
1940 (eq_attr "alternative" "0")
1941 (symbol_ref "TARGET_MIX_SSE_I387")
1942 (symbol_ref "true"))
1943 (if_then_else
1944 (eq_attr "alternative" "0")
1945 (symbol_ref "true")
1946 (symbol_ref "false"))))])
1947
1948 (define_insn "*cmpi<unord>hf"
1949 [(set (reg:CCFP FLAGS_REG)
1950 (compare:CCFP
1951 (match_operand:HF 0 "register_operand" "v")
1952 (match_operand:HF 1 "nonimmediate_operand" "vm")))]
1953 "TARGET_AVX512FP16"
1954 "v<unord>comish\t{%1, %0|%0, %1}"
1955 [(set_attr "type" "ssecomi")
1956 (set_attr "prefix" "evex")
1957 (set_attr "mode" "HF")])
1958 \f
1959 ;; Push/pop instructions.
1960
1961 (define_insn_and_split "*pushv1ti2"
1962 [(set (match_operand:V1TI 0 "push_operand" "=<")
1963 (match_operand:V1TI 1 "register_operand" "v"))]
1964 "TARGET_64BIT && TARGET_STV"
1965 "#"
1966 "&& reload_completed"
1967 [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
1968 (set (match_dup 0) (match_dup 1))]
1969 {
1970 operands[2] = GEN_INT (-PUSH_ROUNDING (GET_MODE_SIZE (V1TImode)));
1971 /* Preserve memory attributes. */
1972 operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
1973 }
1974 [(set_attr "type" "multi")
1975 (set_attr "mode" "TI")])
1976
1977 (define_insn "*push<mode>2"
1978 [(set (match_operand:DWI 0 "push_operand" "=<,<")
1979 (match_operand:DWI 1 "general_no_elim_operand" "riF*o,*v"))]
1980 ""
1981 "#"
1982 [(set_attr "type" "multi")
1983 (set_attr "mode" "<MODE>")])
1984
1985 (define_split
1986 [(set (match_operand:DWI 0 "push_operand")
1987 (match_operand:DWI 1 "general_gr_operand"))]
1988 "reload_completed"
1989 [(const_int 0)]
1990 "ix86_split_long_move (operands); DONE;")
1991
1992 (define_insn "*pushdi2_rex64"
1993 [(set (match_operand:DI 0 "push_operand" "=<,<,!<")
1994 (match_operand:DI 1 "general_no_elim_operand" "re*m,*v,n"))]
1995 "TARGET_64BIT"
1996 "@
1997 push{q}\t%1
1998 #
1999 #"
2000 [(set_attr "type" "push,multi,multi")
2001 (set_attr "mode" "DI")])
2002
2003 ;; Convert impossible pushes of immediate to existing instructions.
2004 ;; First try to get scratch register and go through it. In case this
2005 ;; fails, push sign extended lower part first and then overwrite
2006 ;; upper part by 32bit move.
2007
2008 (define_peephole2
2009 [(match_scratch:DI 2 "r")
2010 (set (match_operand:DI 0 "push_operand")
2011 (match_operand:DI 1 "immediate_operand"))]
2012 "TARGET_64BIT
2013 && !symbolic_operand (operands[1], DImode)
2014 && !x86_64_immediate_operand (operands[1], DImode)"
2015 [(set (match_dup 2) (match_dup 1))
2016 (set (match_dup 0) (match_dup 2))])
2017
2018 (define_split
2019 [(set (match_operand:DI 0 "push_operand")
2020 (match_operand:DI 1 "immediate_operand"))]
2021 "TARGET_64BIT && epilogue_completed
2022 && !symbolic_operand (operands[1], DImode)
2023 && !x86_64_immediate_operand (operands[1], DImode)"
2024 [(set (match_dup 0) (match_dup 1))
2025 (set (match_dup 2) (match_dup 3))]
2026 {
2027 split_double_mode (DImode, &operands[1], 1, &operands[2], &operands[3]);
2028
2029 operands[1] = gen_lowpart (DImode, operands[2]);
2030 operands[2] = gen_rtx_MEM (SImode,
2031 plus_constant (Pmode, stack_pointer_rtx, 4));
2032 })
2033
2034 ;; For TARGET_64BIT we always round up to 8 bytes.
2035 (define_insn "*pushsi2_rex64"
2036 [(set (match_operand:SI 0 "push_operand" "=X,X")
2037 (match_operand:SI 1 "nonmemory_no_elim_operand" "re,*v"))]
2038 "TARGET_64BIT"
2039 "@
2040 push{q}\t%q1
2041 #"
2042 [(set_attr "type" "push,multi")
2043 (set_attr "mode" "DI")])
2044
2045 (define_insn "*pushsi2"
2046 [(set (match_operand:SI 0 "push_operand" "=<,<")
2047 (match_operand:SI 1 "general_no_elim_operand" "ri*m,*v"))]
2048 "!TARGET_64BIT"
2049 "@
2050 push{l}\t%1
2051 #"
2052 [(set_attr "type" "push,multi")
2053 (set_attr "mode" "SI")])
2054
2055 (define_split
2056 [(set (match_operand:SWI48DWI 0 "push_operand")
2057 (match_operand:SWI48DWI 1 "sse_reg_operand"))]
2058 "TARGET_SSE && reload_completed"
2059 [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
2060 (set (match_dup 0) (match_dup 1))]
2061 {
2062 operands[2] = GEN_INT (-PUSH_ROUNDING (GET_MODE_SIZE (<SWI48DWI:MODE>mode)));
2063 /* Preserve memory attributes. */
2064 operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
2065 })
2066
2067 ;; emit_push_insn when it calls move_by_pieces requires an insn to
2068 ;; "push a byte/word". But actually we use push{l,q}, which has
2069 ;; the effect of rounding the amount pushed up to a word.
2070
2071 (define_insn "*push<mode>2"
2072 [(set (match_operand:SWI12 0 "push_operand" "=X")
2073 (match_operand:SWI12 1 "nonmemory_no_elim_operand" "rn"))]
2074 ""
2075 "* return TARGET_64BIT ? \"push{q}\t%q1\" : \"push{l}\t%k1\";"
2076 [(set_attr "type" "push")
2077 (set (attr "mode")
2078 (if_then_else (match_test "TARGET_64BIT")
2079 (const_string "DI")
2080 (const_string "SI")))])
2081
2082 (define_insn "*push<mode>2_prologue"
2083 [(set (match_operand:W 0 "push_operand" "=<")
2084 (match_operand:W 1 "general_no_elim_operand" "r<i>*m"))
2085 (clobber (mem:BLK (scratch)))]
2086 ""
2087 "push{<imodesuffix>}\t%1"
2088 [(set_attr "type" "push")
2089 (set_attr "mode" "<MODE>")])
2090
2091 (define_insn "*pop<mode>1"
2092 [(set (match_operand:W 0 "nonimmediate_operand" "=r*m")
2093 (match_operand:W 1 "pop_operand" ">"))]
2094 ""
2095 "pop{<imodesuffix>}\t%0"
2096 [(set_attr "type" "pop")
2097 (set_attr "mode" "<MODE>")])
2098
2099 (define_insn "*pop<mode>1_epilogue"
2100 [(set (match_operand:W 0 "nonimmediate_operand" "=r*m")
2101 (match_operand:W 1 "pop_operand" ">"))
2102 (clobber (mem:BLK (scratch)))]
2103 ""
2104 "pop{<imodesuffix>}\t%0"
2105 [(set_attr "type" "pop")
2106 (set_attr "mode" "<MODE>")])
2107
2108 (define_insn "*pushfl<mode>2"
2109 [(set (match_operand:W 0 "push_operand" "=<")
2110 (match_operand:W 1 "flags_reg_operand"))]
2111 ""
2112 "pushf{<imodesuffix>}"
2113 [(set_attr "type" "push")
2114 (set_attr "mode" "<MODE>")])
2115
2116 (define_insn "*popfl<mode>1"
2117 [(set (match_operand:W 0 "flags_reg_operand")
2118 (match_operand:W 1 "pop_operand" ">"))]
2119 ""
2120 "popf{<imodesuffix>}"
2121 [(set_attr "type" "pop")
2122 (set_attr "mode" "<MODE>")])
2123
2124 \f
2125 ;; Reload patterns to support multi-word load/store
2126 ;; with non-offsetable address.
2127 (define_expand "reload_noff_store"
2128 [(parallel [(match_operand 0 "memory_operand" "=m")
2129 (match_operand 1 "register_operand" "r")
2130 (match_operand:DI 2 "register_operand" "=&r")])]
2131 "TARGET_64BIT"
2132 {
2133 rtx mem = operands[0];
2134 rtx addr = XEXP (mem, 0);
2135
2136 emit_move_insn (operands[2], addr);
2137 mem = replace_equiv_address_nv (mem, operands[2]);
2138
2139 emit_insn (gen_rtx_SET (mem, operands[1]));
2140 DONE;
2141 })
2142
2143 (define_expand "reload_noff_load"
2144 [(parallel [(match_operand 0 "register_operand" "=r")
2145 (match_operand 1 "memory_operand" "m")
2146 (match_operand:DI 2 "register_operand" "=r")])]
2147 "TARGET_64BIT"
2148 {
2149 rtx mem = operands[1];
2150 rtx addr = XEXP (mem, 0);
2151
2152 emit_move_insn (operands[2], addr);
2153 mem = replace_equiv_address_nv (mem, operands[2]);
2154
2155 emit_insn (gen_rtx_SET (operands[0], mem));
2156 DONE;
2157 })
2158
2159 ;; Move instructions.
2160
2161 (define_expand "movxi"
2162 [(set (match_operand:XI 0 "nonimmediate_operand")
2163 (match_operand:XI 1 "general_operand"))]
2164 "TARGET_AVX512F"
2165 "ix86_expand_vector_move (XImode, operands); DONE;")
2166
2167 (define_expand "movoi"
2168 [(set (match_operand:OI 0 "nonimmediate_operand")
2169 (match_operand:OI 1 "general_operand"))]
2170 "TARGET_AVX"
2171 "ix86_expand_vector_move (OImode, operands); DONE;")
2172
2173 (define_expand "movti"
2174 [(set (match_operand:TI 0 "nonimmediate_operand")
2175 (match_operand:TI 1 "general_operand"))]
2176 "TARGET_64BIT || TARGET_SSE"
2177 {
2178 if (TARGET_64BIT)
2179 ix86_expand_move (TImode, operands);
2180 else
2181 ix86_expand_vector_move (TImode, operands);
2182 DONE;
2183 })
2184
2185 ;; This expands to what emit_move_complex would generate if we didn't
2186 ;; have a movti pattern. Having this avoids problems with reload on
2187 ;; 32-bit targets when SSE is present, but doesn't seem to be harmful
2188 ;; to have around all the time.
2189 (define_expand "movcdi"
2190 [(set (match_operand:CDI 0 "nonimmediate_operand")
2191 (match_operand:CDI 1 "general_operand"))]
2192 ""
2193 {
2194 if (push_operand (operands[0], CDImode))
2195 emit_move_complex_push (CDImode, operands[0], operands[1]);
2196 else
2197 emit_move_complex_parts (operands[0], operands[1]);
2198 DONE;
2199 })
2200
2201 (define_expand "mov<mode>"
2202 [(set (match_operand:SWI1248x 0 "nonimmediate_operand")
2203 (match_operand:SWI1248x 1 "general_operand"))]
2204 ""
2205 "ix86_expand_move (<MODE>mode, operands); DONE;")
2206
2207 (define_insn "*mov<mode>_xor"
2208 [(set (match_operand:SWI48 0 "register_operand" "=r")
2209 (match_operand:SWI48 1 "const0_operand"))
2210 (clobber (reg:CC FLAGS_REG))]
2211 "reload_completed"
2212 "xor{l}\t%k0, %k0"
2213 [(set_attr "type" "alu1")
2214 (set_attr "mode" "SI")
2215 (set_attr "length_immediate" "0")])
2216
2217 (define_insn "*mov<mode>_and"
2218 [(set (match_operand:SWI248 0 "memory_operand" "=m")
2219 (match_operand:SWI248 1 "const0_operand"))
2220 (clobber (reg:CC FLAGS_REG))]
2221 "reload_completed"
2222 "and{<imodesuffix>}\t{%1, %0|%0, %1}"
2223 [(set_attr "type" "alu1")
2224 (set_attr "mode" "<MODE>")
2225 (set_attr "length_immediate" "1")])
2226
2227 (define_insn "*mov<mode>_or"
2228 [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm")
2229 (match_operand:SWI248 1 "constm1_operand"))
2230 (clobber (reg:CC FLAGS_REG))]
2231 "reload_completed"
2232 "or{<imodesuffix>}\t{%1, %0|%0, %1}"
2233 [(set_attr "type" "alu1")
2234 (set_attr "mode" "<MODE>")
2235 (set_attr "length_immediate" "1")])
2236
2237 (define_insn "*movxi_internal_avx512f"
2238 [(set (match_operand:XI 0 "nonimmediate_operand" "=v,v ,v ,m")
2239 (match_operand:XI 1 "nonimmediate_or_sse_const_operand" " C,BC,vm,v"))]
2240 "TARGET_AVX512F
2241 && (register_operand (operands[0], XImode)
2242 || register_operand (operands[1], XImode))"
2243 {
2244 switch (get_attr_type (insn))
2245 {
2246 case TYPE_SSELOG1:
2247 return standard_sse_constant_opcode (insn, operands);
2248
2249 case TYPE_SSEMOV:
2250 return ix86_output_ssemov (insn, operands);
2251
2252 default:
2253 gcc_unreachable ();
2254 }
2255 }
2256 [(set_attr "type" "sselog1,sselog1,ssemov,ssemov")
2257 (set_attr "prefix" "evex")
2258 (set_attr "mode" "XI")])
2259
2260 (define_insn "*movoi_internal_avx"
2261 [(set (match_operand:OI 0 "nonimmediate_operand" "=v,v ,v ,m")
2262 (match_operand:OI 1 "nonimmediate_or_sse_const_operand" " C,BC,vm,v"))]
2263 "TARGET_AVX
2264 && (register_operand (operands[0], OImode)
2265 || register_operand (operands[1], OImode))"
2266 {
2267 switch (get_attr_type (insn))
2268 {
2269 case TYPE_SSELOG1:
2270 return standard_sse_constant_opcode (insn, operands);
2271
2272 case TYPE_SSEMOV:
2273 return ix86_output_ssemov (insn, operands);
2274
2275 default:
2276 gcc_unreachable ();
2277 }
2278 }
2279 [(set_attr "isa" "*,avx2,*,*")
2280 (set_attr "type" "sselog1,sselog1,ssemov,ssemov")
2281 (set_attr "prefix" "vex")
2282 (set_attr "mode" "OI")])
2283
2284 (define_insn "*movti_internal"
2285 [(set (match_operand:TI 0 "nonimmediate_operand" "=!r ,o ,v,v ,v ,m,?r,?Yd")
2286 (match_operand:TI 1 "general_operand" "riFo,re,C,BC,vm,v,Yd,r"))]
2287 "(TARGET_64BIT
2288 && !(MEM_P (operands[0]) && MEM_P (operands[1])))
2289 || (TARGET_SSE
2290 && nonimmediate_or_sse_const_operand (operands[1], TImode)
2291 && (register_operand (operands[0], TImode)
2292 || register_operand (operands[1], TImode)))"
2293 {
2294 switch (get_attr_type (insn))
2295 {
2296 case TYPE_MULTI:
2297 return "#";
2298
2299 case TYPE_SSELOG1:
2300 return standard_sse_constant_opcode (insn, operands);
2301
2302 case TYPE_SSEMOV:
2303 return ix86_output_ssemov (insn, operands);
2304
2305 default:
2306 gcc_unreachable ();
2307 }
2308 }
2309 [(set (attr "isa")
2310 (cond [(eq_attr "alternative" "0,1,6,7")
2311 (const_string "x64")
2312 (eq_attr "alternative" "3")
2313 (const_string "sse2")
2314 ]
2315 (const_string "*")))
2316 (set (attr "type")
2317 (cond [(eq_attr "alternative" "0,1,6,7")
2318 (const_string "multi")
2319 (eq_attr "alternative" "2,3")
2320 (const_string "sselog1")
2321 ]
2322 (const_string "ssemov")))
2323 (set (attr "prefix")
2324 (if_then_else (eq_attr "type" "sselog1,ssemov")
2325 (const_string "maybe_vex")
2326 (const_string "orig")))
2327 (set (attr "mode")
2328 (cond [(eq_attr "alternative" "0,1")
2329 (const_string "DI")
2330 (match_test "TARGET_AVX")
2331 (const_string "TI")
2332 (ior (not (match_test "TARGET_SSE2"))
2333 (match_test "optimize_function_for_size_p (cfun)"))
2334 (const_string "V4SF")
2335 (and (eq_attr "alternative" "5")
2336 (match_test "TARGET_SSE_TYPELESS_STORES"))
2337 (const_string "V4SF")
2338 ]
2339 (const_string "TI")))
2340 (set (attr "preferred_for_speed")
2341 (cond [(eq_attr "alternative" "6")
2342 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
2343 (eq_attr "alternative" "7")
2344 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
2345 ]
2346 (symbol_ref "true")))])
2347
2348 (define_split
2349 [(set (match_operand:TI 0 "sse_reg_operand")
2350 (match_operand:TI 1 "general_reg_operand"))]
2351 "TARGET_64BIT && TARGET_SSE4_1
2352 && reload_completed"
2353 [(set (match_dup 2)
2354 (vec_merge:V2DI
2355 (vec_duplicate:V2DI (match_dup 3))
2356 (match_dup 2)
2357 (const_int 2)))]
2358 {
2359 operands[2] = lowpart_subreg (V2DImode, operands[0], TImode);
2360 operands[3] = gen_highpart (DImode, operands[1]);
2361
2362 emit_move_insn (gen_lowpart (DImode, operands[0]),
2363 gen_lowpart (DImode, operands[1]));
2364 })
2365
2366 (define_insn "*movdi_internal"
2367 [(set (match_operand:DI 0 "nonimmediate_operand"
2368 "=r ,o ,r,r ,r,m ,*y,*y,?*y,?m,?r,?*y,?v,?v,?v,m ,m,?r ,?*Yd,?r,?v,?*y,?*x,*k,*k ,*r,*m,*k")
2369 (match_operand:DI 1 "general_operand"
2370 "riFo,riF,Z,rem,i,re,C ,*y,Bk ,*y,*y,r ,C ,?v,Bk,?v,v,*Yd,r ,?v,r ,*x ,*y ,*r,*kBk,*k,*k,CBC"))]
2371 "!(MEM_P (operands[0]) && MEM_P (operands[1]))
2372 && ix86_hardreg_mov_ok (operands[0], operands[1])"
2373 {
2374 switch (get_attr_type (insn))
2375 {
2376 case TYPE_MSKMOV:
2377 return "kmovq\t{%1, %0|%0, %1}";
2378
2379 case TYPE_MSKLOG:
2380 if (operands[1] == const0_rtx)
2381 return "kxorq\t%0, %0, %0";
2382 else if (operands[1] == constm1_rtx)
2383 return "kxnorq\t%0, %0, %0";
2384 gcc_unreachable ();
2385
2386 case TYPE_MULTI:
2387 return "#";
2388
2389 case TYPE_MMX:
2390 return "pxor\t%0, %0";
2391
2392 case TYPE_MMXMOV:
2393 /* Handle broken assemblers that require movd instead of movq. */
2394 if (!HAVE_AS_IX86_INTERUNIT_MOVQ
2395 && (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1])))
2396 return "movd\t{%1, %0|%0, %1}";
2397 return "movq\t{%1, %0|%0, %1}";
2398
2399 case TYPE_SSELOG1:
2400 return standard_sse_constant_opcode (insn, operands);
2401
2402 case TYPE_SSEMOV:
2403 return ix86_output_ssemov (insn, operands);
2404
2405 case TYPE_SSECVT:
2406 if (SSE_REG_P (operands[0]))
2407 return "movq2dq\t{%1, %0|%0, %1}";
2408 else
2409 return "movdq2q\t{%1, %0|%0, %1}";
2410
2411 case TYPE_LEA:
2412 return "lea{q}\t{%E1, %0|%0, %E1}";
2413
2414 case TYPE_IMOV:
2415 gcc_assert (!flag_pic || LEGITIMATE_PIC_OPERAND_P (operands[1]));
2416 if (get_attr_mode (insn) == MODE_SI)
2417 return "mov{l}\t{%k1, %k0|%k0, %k1}";
2418 else if (which_alternative == 4)
2419 return "movabs{q}\t{%1, %0|%0, %1}";
2420 else if (ix86_use_lea_for_mov (insn, operands))
2421 return "lea{q}\t{%E1, %0|%0, %E1}";
2422 else
2423 return "mov{q}\t{%1, %0|%0, %1}";
2424
2425 default:
2426 gcc_unreachable ();
2427 }
2428 }
2429 [(set (attr "isa")
2430 (cond [(eq_attr "alternative" "0,1,17,18")
2431 (const_string "nox64")
2432 (eq_attr "alternative" "2,3,4,5,10,11,23,25")
2433 (const_string "x64")
2434 (eq_attr "alternative" "19,20")
2435 (const_string "x64_sse2")
2436 (eq_attr "alternative" "21,22")
2437 (const_string "sse2")
2438 ]
2439 (const_string "*")))
2440 (set (attr "type")
2441 (cond [(eq_attr "alternative" "0,1,17,18")
2442 (const_string "multi")
2443 (eq_attr "alternative" "6")
2444 (const_string "mmx")
2445 (eq_attr "alternative" "7,8,9,10,11")
2446 (const_string "mmxmov")
2447 (eq_attr "alternative" "12")
2448 (const_string "sselog1")
2449 (eq_attr "alternative" "13,14,15,16,19,20")
2450 (const_string "ssemov")
2451 (eq_attr "alternative" "21,22")
2452 (const_string "ssecvt")
2453 (eq_attr "alternative" "23,24,25,26")
2454 (const_string "mskmov")
2455 (eq_attr "alternative" "27")
2456 (const_string "msklog")
2457 (and (match_operand 0 "register_operand")
2458 (match_operand 1 "pic_32bit_operand"))
2459 (const_string "lea")
2460 ]
2461 (const_string "imov")))
2462 (set (attr "modrm")
2463 (if_then_else
2464 (and (eq_attr "alternative" "4") (eq_attr "type" "imov"))
2465 (const_string "0")
2466 (const_string "*")))
2467 (set (attr "length_immediate")
2468 (if_then_else
2469 (and (eq_attr "alternative" "4") (eq_attr "type" "imov"))
2470 (const_string "8")
2471 (const_string "*")))
2472 (set (attr "prefix_rex")
2473 (if_then_else
2474 (eq_attr "alternative" "10,11,19,20")
2475 (const_string "1")
2476 (const_string "*")))
2477 (set (attr "prefix")
2478 (if_then_else (eq_attr "type" "sselog1,ssemov")
2479 (const_string "maybe_vex")
2480 (const_string "orig")))
2481 (set (attr "prefix_data16")
2482 (if_then_else (and (eq_attr "type" "ssemov") (eq_attr "mode" "DI"))
2483 (const_string "1")
2484 (const_string "*")))
2485 (set (attr "mode")
2486 (cond [(eq_attr "alternative" "2")
2487 (const_string "SI")
2488 (eq_attr "alternative" "12,13")
2489 (cond [(match_test "TARGET_AVX")
2490 (const_string "TI")
2491 (ior (not (match_test "TARGET_SSE2"))
2492 (match_test "optimize_function_for_size_p (cfun)"))
2493 (const_string "V4SF")
2494 ]
2495 (const_string "TI"))
2496
2497 (and (eq_attr "alternative" "14,15,16")
2498 (not (match_test "TARGET_SSE2")))
2499 (const_string "V2SF")
2500 ]
2501 (const_string "DI")))
2502 (set (attr "preferred_for_speed")
2503 (cond [(eq_attr "alternative" "10,17,19")
2504 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
2505 (eq_attr "alternative" "11,18,20")
2506 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
2507 ]
2508 (symbol_ref "true")))
2509 (set (attr "enabled")
2510 (cond [(eq_attr "alternative" "15")
2511 (if_then_else
2512 (match_test "TARGET_STV && TARGET_SSE2")
2513 (symbol_ref "false")
2514 (const_string "*"))
2515 (eq_attr "alternative" "16")
2516 (if_then_else
2517 (match_test "TARGET_STV && TARGET_SSE2")
2518 (symbol_ref "true")
2519 (symbol_ref "false"))
2520 ]
2521 (const_string "*")))])
2522
2523 (define_split
2524 [(set (match_operand:<DWI> 0 "general_reg_operand")
2525 (match_operand:<DWI> 1 "sse_reg_operand"))]
2526 "TARGET_SSE4_1
2527 && reload_completed"
2528 [(set (match_dup 2)
2529 (vec_select:DWIH
2530 (match_dup 3)
2531 (parallel [(const_int 1)])))]
2532 {
2533 operands[2] = gen_highpart (<MODE>mode, operands[0]);
2534 operands[3] = lowpart_subreg (<ssevecmode>mode, operands[1], <DWI>mode);
2535
2536 emit_move_insn (gen_lowpart (<MODE>mode, operands[0]),
2537 gen_lowpart (<MODE>mode, operands[1]));
2538 })
2539
2540 (define_split
2541 [(set (match_operand:DWI 0 "nonimmediate_gr_operand")
2542 (match_operand:DWI 1 "general_gr_operand"))]
2543 "reload_completed"
2544 [(const_int 0)]
2545 "ix86_split_long_move (operands); DONE;")
2546
2547 (define_split
2548 [(set (match_operand:DI 0 "sse_reg_operand")
2549 (match_operand:DI 1 "general_reg_operand"))]
2550 "!TARGET_64BIT && TARGET_SSE4_1
2551 && reload_completed"
2552 [(set (match_dup 2)
2553 (vec_merge:V4SI
2554 (vec_duplicate:V4SI (match_dup 3))
2555 (match_dup 2)
2556 (const_int 2)))]
2557 {
2558 operands[2] = lowpart_subreg (V4SImode, operands[0], DImode);
2559 operands[3] = gen_highpart (SImode, operands[1]);
2560
2561 emit_move_insn (gen_lowpart (SImode, operands[0]),
2562 gen_lowpart (SImode, operands[1]));
2563 })
2564
2565 ;; movabsq $0x0012345678000000, %rax is longer
2566 ;; than movl $0x12345678, %eax; shlq $24, %rax.
2567 (define_peephole2
2568 [(set (match_operand:DI 0 "register_operand")
2569 (match_operand:DI 1 "const_int_operand"))]
2570 "TARGET_64BIT
2571 && optimize_insn_for_size_p ()
2572 && LEGACY_INT_REG_P (operands[0])
2573 && !x86_64_immediate_operand (operands[1], DImode)
2574 && !x86_64_zext_immediate_operand (operands[1], DImode)
2575 && !((UINTVAL (operands[1]) >> ctz_hwi (UINTVAL (operands[1])))
2576 & ~(HOST_WIDE_INT) 0xffffffff)
2577 && peep2_regno_dead_p (0, FLAGS_REG)"
2578 [(set (match_dup 0) (match_dup 1))
2579 (parallel [(set (match_dup 0) (ashift:DI (match_dup 0) (match_dup 2)))
2580 (clobber (reg:CC FLAGS_REG))])]
2581 {
2582 int shift = ctz_hwi (UINTVAL (operands[1]));
2583 operands[1] = gen_int_mode (UINTVAL (operands[1]) >> shift, DImode);
2584 operands[2] = gen_int_mode (shift, QImode);
2585 })
2586
2587 (define_insn "*movsi_internal"
2588 [(set (match_operand:SI 0 "nonimmediate_operand"
2589 "=r,m ,*y,*y,?*y,?m,?r,?*y,?v,?v,?v,m ,?r,?v,*k,*k ,*rm,*k")
2590 (match_operand:SI 1 "general_operand"
2591 "g ,re,C ,*y,Bk ,*y,*y,r ,C ,?v,Bk,?v,?v,r ,*r,*kBk,*k ,CBC"))]
2592 "!(MEM_P (operands[0]) && MEM_P (operands[1]))
2593 && ix86_hardreg_mov_ok (operands[0], operands[1])"
2594 {
2595 switch (get_attr_type (insn))
2596 {
2597 case TYPE_SSELOG1:
2598 return standard_sse_constant_opcode (insn, operands);
2599
2600 case TYPE_MSKMOV:
2601 return "kmovd\t{%1, %0|%0, %1}";
2602
2603 case TYPE_MSKLOG:
2604 if (operands[1] == const0_rtx)
2605 return "kxord\t%0, %0, %0";
2606 else if (operands[1] == constm1_rtx)
2607 return "kxnord\t%0, %0, %0";
2608 gcc_unreachable ();
2609
2610 case TYPE_SSEMOV:
2611 return ix86_output_ssemov (insn, operands);
2612
2613 case TYPE_MMX:
2614 return "pxor\t%0, %0";
2615
2616 case TYPE_MMXMOV:
2617 switch (get_attr_mode (insn))
2618 {
2619 case MODE_DI:
2620 return "movq\t{%1, %0|%0, %1}";
2621 case MODE_SI:
2622 return "movd\t{%1, %0|%0, %1}";
2623
2624 default:
2625 gcc_unreachable ();
2626 }
2627
2628 case TYPE_LEA:
2629 return "lea{l}\t{%E1, %0|%0, %E1}";
2630
2631 case TYPE_IMOV:
2632 gcc_assert (!flag_pic || LEGITIMATE_PIC_OPERAND_P (operands[1]));
2633 if (ix86_use_lea_for_mov (insn, operands))
2634 return "lea{l}\t{%E1, %0|%0, %E1}";
2635 else
2636 return "mov{l}\t{%1, %0|%0, %1}";
2637
2638 default:
2639 gcc_unreachable ();
2640 }
2641 }
2642 [(set (attr "isa")
2643 (cond [(eq_attr "alternative" "12,13")
2644 (const_string "sse2")
2645 ]
2646 (const_string "*")))
2647 (set (attr "type")
2648 (cond [(eq_attr "alternative" "2")
2649 (const_string "mmx")
2650 (eq_attr "alternative" "3,4,5,6,7")
2651 (const_string "mmxmov")
2652 (eq_attr "alternative" "8")
2653 (const_string "sselog1")
2654 (eq_attr "alternative" "9,10,11,12,13")
2655 (const_string "ssemov")
2656 (eq_attr "alternative" "14,15,16")
2657 (const_string "mskmov")
2658 (eq_attr "alternative" "17")
2659 (const_string "msklog")
2660 (and (match_operand 0 "register_operand")
2661 (match_operand 1 "pic_32bit_operand"))
2662 (const_string "lea")
2663 ]
2664 (const_string "imov")))
2665 (set (attr "prefix")
2666 (if_then_else (eq_attr "type" "sselog1,ssemov")
2667 (const_string "maybe_vex")
2668 (const_string "orig")))
2669 (set (attr "prefix_data16")
2670 (if_then_else (and (eq_attr "type" "ssemov") (eq_attr "mode" "SI"))
2671 (const_string "1")
2672 (const_string "*")))
2673 (set (attr "mode")
2674 (cond [(eq_attr "alternative" "2,3")
2675 (const_string "DI")
2676 (eq_attr "alternative" "8,9")
2677 (cond [(match_test "TARGET_AVX")
2678 (const_string "TI")
2679 (ior (not (match_test "TARGET_SSE2"))
2680 (match_test "optimize_function_for_size_p (cfun)"))
2681 (const_string "V4SF")
2682 ]
2683 (const_string "TI"))
2684
2685 (and (eq_attr "alternative" "10,11")
2686 (not (match_test "TARGET_SSE2")))
2687 (const_string "SF")
2688 ]
2689 (const_string "SI")))
2690 (set (attr "preferred_for_speed")
2691 (cond [(eq_attr "alternative" "6,12")
2692 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
2693 (eq_attr "alternative" "7,13")
2694 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
2695 ]
2696 (symbol_ref "true")))])
2697
2698 ;; With -Oz, transform mov $imm,reg to the shorter push $imm; pop reg.
2699 (define_peephole2
2700 [(set (match_operand:SWI248 0 "general_reg_operand")
2701 (match_operand:SWI248 1 "const_int_operand"))]
2702 "optimize_insn_for_size_p () && optimize_size > 1
2703 && operands[1] != const0_rtx
2704 && IN_RANGE (INTVAL (operands[1]), -128, 127)
2705 && !ix86_red_zone_used
2706 && REGNO (operands[0]) != SP_REG"
2707 [(set (match_dup 2) (match_dup 1))
2708 (set (match_dup 0) (match_dup 3))]
2709 {
2710 if (GET_MODE (operands[0]) != word_mode)
2711 operands[0] = gen_rtx_REG (word_mode, REGNO (operands[0]));
2712
2713 operands[2] = gen_rtx_MEM (word_mode,
2714 gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx));
2715 operands[3] = gen_rtx_MEM (word_mode,
2716 gen_rtx_POST_INC (Pmode, stack_pointer_rtx));
2717 })
2718
2719 ;; With -Oz, transform mov $0,mem to the shorter and $0,mem.
2720 ;; Likewise, transform mov $-1,mem to the shorter or $-1,mem.
2721 (define_peephole2
2722 [(set (match_operand:SWI248 0 "memory_operand")
2723 (match_operand:SWI248 1 "const_int_operand"))]
2724 "(operands[1] == const0_rtx || operands[1] == constm1_rtx)
2725 && optimize_insn_for_size_p () && optimize_size > 1
2726 && peep2_regno_dead_p (0, FLAGS_REG)"
2727 [(parallel [(set (match_dup 0) (match_dup 1))
2728 (clobber (reg:CC FLAGS_REG))])])
2729
2730 (define_insn "*movhi_internal"
2731 [(set (match_operand:HI 0 "nonimmediate_operand"
2732 "=r,r,r,m ,*k,*k ,r ,m ,*k ,?r,?*v,*v,*v,*v,m")
2733 (match_operand:HI 1 "general_operand"
2734 "r ,n,m,rn,r ,*km,*k,*k,CBC,*v,r ,C ,*v,m ,*v"))]
2735 "!(MEM_P (operands[0]) && MEM_P (operands[1]))
2736 && ix86_hardreg_mov_ok (operands[0], operands[1])"
2737 {
2738 switch (get_attr_type (insn))
2739 {
2740 case TYPE_IMOVX:
2741 /* movzwl is faster than movw on p2 due to partial word stalls,
2742 though not as fast as an aligned movl. */
2743 return "movz{wl|x}\t{%1, %k0|%k0, %1}";
2744
2745 case TYPE_MSKMOV:
2746 switch (which_alternative)
2747 {
2748 case 4:
2749 return "kmovw\t{%k1, %0|%0, %k1}";
2750 case 6:
2751 return "kmovw\t{%1, %k0|%k0, %1}";
2752 case 5:
2753 case 7:
2754 return "kmovw\t{%1, %0|%0, %1}";
2755 default:
2756 gcc_unreachable ();
2757 }
2758
2759 case TYPE_SSEMOV:
2760 return ix86_output_ssemov (insn, operands);
2761
2762 case TYPE_SSELOG1:
2763 if (satisfies_constraint_C (operands[1]))
2764 return standard_sse_constant_opcode (insn, operands);
2765
2766 if (SSE_REG_P (operands[0]))
2767 return "%vpinsrw\t{$0, %1, %d0|%d0, %1, 0}";
2768 else
2769 return "%vpextrw\t{$0, %1, %0|%0, %1, 0}";
2770
2771 case TYPE_MSKLOG:
2772 if (operands[1] == const0_rtx)
2773 return "kxorw\t%0, %0, %0";
2774 else if (operands[1] == constm1_rtx)
2775 return "kxnorw\t%0, %0, %0";
2776 gcc_unreachable ();
2777
2778 default:
2779 if (get_attr_mode (insn) == MODE_SI)
2780 return "mov{l}\t{%k1, %k0|%k0, %k1}";
2781 else
2782 return "mov{w}\t{%1, %0|%0, %1}";
2783 }
2784 }
2785 [(set (attr "isa")
2786 (cond [(eq_attr "alternative" "9,10,11,12,13")
2787 (const_string "sse2")
2788 (eq_attr "alternative" "14")
2789 (const_string "sse4")
2790 ]
2791 (const_string "*")))
2792 (set (attr "type")
2793 (cond [(eq_attr "alternative" "4,5,6,7")
2794 (const_string "mskmov")
2795 (eq_attr "alternative" "8")
2796 (const_string "msklog")
2797 (eq_attr "alternative" "13,14")
2798 (if_then_else (match_test "TARGET_AVX512FP16")
2799 (const_string "ssemov")
2800 (const_string "sselog1"))
2801 (eq_attr "alternative" "11")
2802 (const_string "sselog1")
2803 (eq_attr "alternative" "9,10,12")
2804 (const_string "ssemov")
2805 (match_test "optimize_function_for_size_p (cfun)")
2806 (const_string "imov")
2807 (and (eq_attr "alternative" "0")
2808 (ior (not (match_test "TARGET_PARTIAL_REG_STALL"))
2809 (not (match_test "TARGET_HIMODE_MATH"))))
2810 (const_string "imov")
2811 (and (eq_attr "alternative" "1,2")
2812 (match_operand:HI 1 "aligned_operand"))
2813 (const_string "imov")
2814 (and (match_test "TARGET_MOVX")
2815 (eq_attr "alternative" "0,2"))
2816 (const_string "imovx")
2817 ]
2818 (const_string "imov")))
2819 (set (attr "prefix")
2820 (cond [(eq_attr "alternative" "4,5,6,7,8")
2821 (const_string "vex")
2822 (eq_attr "alternative" "9,10,11,12,13,14")
2823 (const_string "maybe_evex")
2824 ]
2825 (const_string "orig")))
2826 (set (attr "mode")
2827 (cond [(eq_attr "alternative" "9,10")
2828 (if_then_else (match_test "TARGET_AVX512FP16")
2829 (const_string "HI")
2830 (const_string "SI"))
2831 (eq_attr "alternative" "13,14")
2832 (if_then_else (match_test "TARGET_AVX512FP16")
2833 (const_string "HI")
2834 (const_string "TI"))
2835 (eq_attr "alternative" "11")
2836 (cond [(match_test "TARGET_AVX")
2837 (const_string "TI")
2838 (ior (not (match_test "TARGET_SSE2"))
2839 (match_test "optimize_function_for_size_p (cfun)"))
2840 (const_string "V4SF")
2841 ]
2842 (const_string "TI"))
2843 (eq_attr "alternative" "12")
2844 (cond [(match_test "TARGET_AVX512FP16")
2845 (const_string "HF")
2846 (match_test "TARGET_AVX")
2847 (const_string "TI")
2848 (ior (not (match_test "TARGET_SSE2"))
2849 (match_test "optimize_function_for_size_p (cfun)"))
2850 (const_string "V4SF")
2851 ]
2852 (const_string "TI"))
2853 (eq_attr "type" "imovx")
2854 (const_string "SI")
2855 (and (eq_attr "alternative" "1,2")
2856 (match_operand:HI 1 "aligned_operand"))
2857 (const_string "SI")
2858 (and (eq_attr "alternative" "0")
2859 (ior (not (match_test "TARGET_PARTIAL_REG_STALL"))
2860 (not (match_test "TARGET_HIMODE_MATH"))))
2861 (const_string "SI")
2862 ]
2863 (const_string "HI")))
2864 (set (attr "preferred_for_speed")
2865 (cond [(eq_attr "alternative" "9")
2866 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
2867 (eq_attr "alternative" "10")
2868 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
2869 ]
2870 (symbol_ref "true")))])
2871
2872 ;; Situation is quite tricky about when to choose full sized (SImode) move
2873 ;; over QImode moves. For Q_REG -> Q_REG move we use full size only for
2874 ;; partial register dependency machines (such as AMD Athlon), where QImode
2875 ;; moves issue extra dependency and for partial register stalls machines
2876 ;; that don't use QImode patterns (and QImode move cause stall on the next
2877 ;; instruction).
2878 ;;
2879 ;; For loads of Q_REG to NONQ_REG we use full sized moves except for partial
2880 ;; register stall machines with, where we use QImode instructions, since
2881 ;; partial register stall can be caused there. Then we use movzx.
2882
2883 (define_insn "*movqi_internal"
2884 [(set (match_operand:QI 0 "nonimmediate_operand"
2885 "=Q,R,r,q,q,r,r ,?r,m ,*k,*k,*r,*m,*k,*k,*k")
2886 (match_operand:QI 1 "general_operand"
2887 "Q ,R,r,n,m,q,rn, m,qn,*r,*k,*k,*k,*m,C,BC"))]
2888 "!(MEM_P (operands[0]) && MEM_P (operands[1]))
2889 && ix86_hardreg_mov_ok (operands[0], operands[1])"
2890
2891 {
2892 char buf[128];
2893 const char *ops;
2894 const char *suffix;
2895
2896 switch (get_attr_type (insn))
2897 {
2898 case TYPE_IMOVX:
2899 gcc_assert (ANY_QI_REG_P (operands[1]) || MEM_P (operands[1]));
2900 return "movz{bl|x}\t{%1, %k0|%k0, %1}";
2901
2902 case TYPE_MSKMOV:
2903 switch (which_alternative)
2904 {
2905 case 9:
2906 ops = "kmov%s\t{%%k1, %%0|%%0, %%k1}";
2907 break;
2908 case 11:
2909 ops = "kmov%s\t{%%1, %%k0|%%k0, %%1}";
2910 break;
2911 case 12:
2912 case 13:
2913 gcc_assert (TARGET_AVX512DQ);
2914 /* FALLTHRU */
2915 case 10:
2916 ops = "kmov%s\t{%%1, %%0|%%0, %%1}";
2917 break;
2918 default:
2919 gcc_unreachable ();
2920 }
2921
2922 suffix = (get_attr_mode (insn) == MODE_HI) ? "w" : "b";
2923
2924 snprintf (buf, sizeof (buf), ops, suffix);
2925 output_asm_insn (buf, operands);
2926 return "";
2927
2928 case TYPE_MSKLOG:
2929 if (operands[1] == const0_rtx)
2930 {
2931 if (get_attr_mode (insn) == MODE_HI)
2932 return "kxorw\t%0, %0, %0";
2933 else
2934 return "kxorb\t%0, %0, %0";
2935 }
2936 else if (operands[1] == constm1_rtx)
2937 {
2938 gcc_assert (TARGET_AVX512DQ);
2939 return "kxnorb\t%0, %0, %0";
2940 }
2941 gcc_unreachable ();
2942
2943 default:
2944 if (get_attr_mode (insn) == MODE_SI)
2945 return "mov{l}\t{%k1, %k0|%k0, %k1}";
2946 else
2947 return "mov{b}\t{%1, %0|%0, %1}";
2948 }
2949 }
2950 [(set (attr "isa")
2951 (cond [(eq_attr "alternative" "1,2")
2952 (const_string "x64")
2953 (eq_attr "alternative" "12,13,15")
2954 (const_string "avx512dq")
2955 ]
2956 (const_string "*")))
2957 (set (attr "type")
2958 (cond [(eq_attr "alternative" "9,10,11,12,13")
2959 (const_string "mskmov")
2960 (eq_attr "alternative" "14,15")
2961 (const_string "msklog")
2962 (and (eq_attr "alternative" "7")
2963 (not (match_operand:QI 1 "aligned_operand")))
2964 (const_string "imovx")
2965 (match_test "optimize_function_for_size_p (cfun)")
2966 (const_string "imov")
2967 (and (eq_attr "alternative" "5")
2968 (ior (not (match_test "TARGET_PARTIAL_REG_STALL"))
2969 (not (match_test "TARGET_QIMODE_MATH"))))
2970 (const_string "imov")
2971 (eq_attr "alternative" "5,7")
2972 (const_string "imovx")
2973 (and (match_test "TARGET_MOVX")
2974 (eq_attr "alternative" "4"))
2975 (const_string "imovx")
2976 ]
2977 (const_string "imov")))
2978 (set (attr "prefix")
2979 (if_then_else (eq_attr "alternative" "9,10,11,12,13,14,15")
2980 (const_string "vex")
2981 (const_string "orig")))
2982 (set (attr "mode")
2983 (cond [(eq_attr "alternative" "5,6,7")
2984 (const_string "SI")
2985 (eq_attr "alternative" "8")
2986 (const_string "QI")
2987 (and (eq_attr "alternative" "9,10,11,14")
2988 (not (match_test "TARGET_AVX512DQ")))
2989 (const_string "HI")
2990 (eq_attr "type" "imovx")
2991 (const_string "SI")
2992 ;; For -Os, 8-bit immediates are always shorter than 32-bit
2993 ;; ones.
2994 (and (eq_attr "type" "imov")
2995 (and (eq_attr "alternative" "3")
2996 (match_test "optimize_function_for_size_p (cfun)")))
2997 (const_string "QI")
2998 ;; For -Os, movl where one or both operands are NON_Q_REGS
2999 ;; and both are LEGACY_REGS is shorter than movb.
3000 ;; Otherwise movb and movl sizes are the same, so decide purely
3001 ;; based on speed factors.
3002 (and (eq_attr "type" "imov")
3003 (and (eq_attr "alternative" "1")
3004 (match_test "optimize_function_for_size_p (cfun)")))
3005 (const_string "SI")
3006 (and (eq_attr "type" "imov")
3007 (and (eq_attr "alternative" "0,1,2,3")
3008 (and (match_test "TARGET_PARTIAL_REG_DEPENDENCY")
3009 (not (match_test "TARGET_PARTIAL_REG_STALL")))))
3010 (const_string "SI")
3011 ;; Avoid partial register stalls when not using QImode arithmetic
3012 (and (eq_attr "type" "imov")
3013 (and (eq_attr "alternative" "0,1,2,3")
3014 (and (match_test "TARGET_PARTIAL_REG_STALL")
3015 (not (match_test "TARGET_QIMODE_MATH")))))
3016 (const_string "SI")
3017 ]
3018 (const_string "QI")))])
3019
3020 /* Reload dislikes loading 0/-1 directly into mask registers.
3021 Try to tidy things up here. */
3022 (define_peephole2
3023 [(set (match_operand:SWI 0 "general_reg_operand")
3024 (match_operand:SWI 1 "immediate_operand"))
3025 (set (match_operand:SWI 2 "mask_reg_operand")
3026 (match_dup 0))]
3027 "peep2_reg_dead_p (2, operands[0])
3028 && (const0_operand (operands[1], <MODE>mode)
3029 || (constm1_operand (operands[1], <MODE>mode)
3030 && (<MODE_SIZE> > 1 || TARGET_AVX512DQ)))"
3031 [(set (match_dup 2) (match_dup 1))])
3032
3033 ;; Stores and loads of ax to arbitrary constant address.
3034 ;; We fake an second form of instruction to force reload to load address
3035 ;; into register when rax is not available
3036 (define_insn "*movabs<mode>_1"
3037 [(set (mem:SWI1248x (match_operand:DI 0 "x86_64_movabs_operand" "i,r"))
3038 (match_operand:SWI1248x 1 "nonmemory_operand" "a,r<i>"))]
3039 "TARGET_LP64 && ix86_check_movabs (insn, 0)"
3040 {
3041 /* Recover the full memory rtx. */
3042 operands[0] = SET_DEST (PATTERN (insn));
3043 switch (which_alternative)
3044 {
3045 case 0:
3046 return "movabs{<imodesuffix>}\t{%1, %P0|<iptrsize> PTR [%P0], %1}";
3047 case 1:
3048 return "mov{<imodesuffix>}\t{%1, %0|%0, %1}";
3049 default:
3050 gcc_unreachable ();
3051 }
3052 }
3053 [(set_attr "type" "imov")
3054 (set_attr "modrm" "0,*")
3055 (set_attr "length_address" "8,0")
3056 (set_attr "length_immediate" "0,*")
3057 (set_attr "memory" "store")
3058 (set_attr "mode" "<MODE>")])
3059
3060 (define_insn "*movabs<mode>_2"
3061 [(set (match_operand:SWI1248x 0 "register_operand" "=a,r")
3062 (mem:SWI1248x (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))]
3063 "TARGET_LP64 && ix86_check_movabs (insn, 1)"
3064 {
3065 /* Recover the full memory rtx. */
3066 operands[1] = SET_SRC (PATTERN (insn));
3067 switch (which_alternative)
3068 {
3069 case 0:
3070 return "movabs{<imodesuffix>}\t{%P1, %0|%0, <iptrsize> PTR [%P1]}";
3071 case 1:
3072 return "mov{<imodesuffix>}\t{%1, %0|%0, %1}";
3073 default:
3074 gcc_unreachable ();
3075 }
3076 }
3077 [(set_attr "type" "imov")
3078 (set_attr "modrm" "0,*")
3079 (set_attr "length_address" "8,0")
3080 (set_attr "length_immediate" "0")
3081 (set_attr "memory" "load")
3082 (set_attr "mode" "<MODE>")])
3083
3084 (define_insn "swap<mode>"
3085 [(set (match_operand:SWI48 0 "register_operand" "+r")
3086 (match_operand:SWI48 1 "register_operand" "+r"))
3087 (set (match_dup 1)
3088 (match_dup 0))]
3089 ""
3090 "xchg{<imodesuffix>}\t%1, %0"
3091 [(set_attr "type" "imov")
3092 (set_attr "mode" "<MODE>")
3093 (set_attr "pent_pair" "np")
3094 (set_attr "athlon_decode" "vector")
3095 (set_attr "amdfam10_decode" "double")
3096 (set_attr "bdver1_decode" "double")])
3097
3098 (define_insn "*swap<mode>"
3099 [(set (match_operand:SWI12 0 "register_operand" "+<r>,r")
3100 (match_operand:SWI12 1 "register_operand" "+<r>,r"))
3101 (set (match_dup 1)
3102 (match_dup 0))]
3103 ""
3104 "@
3105 xchg{<imodesuffix>}\t%1, %0
3106 xchg{l}\t%k1, %k0"
3107 [(set_attr "type" "imov")
3108 (set_attr "mode" "<MODE>,SI")
3109 (set (attr "preferred_for_size")
3110 (cond [(eq_attr "alternative" "0")
3111 (symbol_ref "false")]
3112 (symbol_ref "true")))
3113 ;; Potential partial reg stall on alternative 1.
3114 (set (attr "preferred_for_speed")
3115 (cond [(eq_attr "alternative" "1")
3116 (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
3117 (symbol_ref "true")))
3118 (set_attr "pent_pair" "np")
3119 (set_attr "athlon_decode" "vector")
3120 (set_attr "amdfam10_decode" "double")
3121 (set_attr "bdver1_decode" "double")])
3122
3123 (define_peephole2
3124 [(set (match_operand:SWI 0 "general_reg_operand")
3125 (match_operand:SWI 1 "general_reg_operand"))
3126 (set (match_dup 1)
3127 (match_operand:SWI 2 "general_reg_operand"))
3128 (set (match_dup 2) (match_dup 0))]
3129 "peep2_reg_dead_p (3, operands[0])
3130 && optimize_insn_for_size_p ()"
3131 [(parallel [(set (match_dup 1) (match_dup 2))
3132 (set (match_dup 2) (match_dup 1))])])
3133
3134 ;; Convert moves to/from AX_REG into xchg with -Oz.
3135 (define_peephole2
3136 [(set (match_operand:SWI48 0 "general_reg_operand")
3137 (match_operand:SWI48 1 "general_reg_operand"))]
3138 "optimize_size > 1
3139 && ((REGNO (operands[0]) == AX_REG)
3140 != (REGNO (operands[1]) == AX_REG))
3141 && optimize_insn_for_size_p ()
3142 && peep2_reg_dead_p (1, operands[1])"
3143 [(parallel [(set (match_dup 0) (match_dup 1))
3144 (set (match_dup 1) (match_dup 0))])])
3145
3146 (define_expand "movstrict<mode>"
3147 [(set (strict_low_part (match_operand:SWI12 0 "register_operand"))
3148 (match_operand:SWI12 1 "general_operand"))]
3149 ""
3150 {
3151 gcc_assert (SUBREG_P (operands[0]));
3152 if ((TARGET_PARTIAL_REG_STALL && optimize_function_for_speed_p (cfun))
3153 || !VALID_INT_MODE_P (GET_MODE (SUBREG_REG (operands[0]))))
3154 FAIL;
3155 })
3156
3157 (define_insn "*movstrict<mode>_1"
3158 [(set (strict_low_part
3159 (match_operand:SWI12 0 "register_operand" "+<r>"))
3160 (match_operand:SWI12 1 "general_operand" "<r>mn"))]
3161 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
3162 "mov{<imodesuffix>}\t{%1, %0|%0, %1}"
3163 [(set_attr "type" "imov")
3164 (set_attr "mode" "<MODE>")])
3165
3166 (define_insn "*movstrict<mode>_xor"
3167 [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>"))
3168 (match_operand:SWI12 1 "const0_operand"))
3169 (clobber (reg:CC FLAGS_REG))]
3170 "reload_completed"
3171 "xor{<imodesuffix>}\t%0, %0"
3172 [(set_attr "type" "alu1")
3173 (set_attr "mode" "<MODE>")
3174 (set_attr "length_immediate" "0")])
3175
3176 (define_expand "extv<mode>"
3177 [(set (match_operand:SWI24 0 "register_operand")
3178 (sign_extract:SWI24 (match_operand:SWI24 1 "register_operand")
3179 (match_operand:SI 2 "const_int_operand")
3180 (match_operand:SI 3 "const_int_operand")))]
3181 ""
3182 {
3183 /* Handle extractions from %ah et al. */
3184 if (INTVAL (operands[2]) != 8 || INTVAL (operands[3]) != 8)
3185 FAIL;
3186
3187 unsigned int regno = reg_or_subregno (operands[1]);
3188
3189 /* Be careful to expand only with registers having upper parts. */
3190 if (regno <= LAST_VIRTUAL_REGISTER && !QI_REGNO_P (regno))
3191 operands[1] = copy_to_reg (operands[1]);
3192 })
3193
3194 (define_insn "*extv<mode>"
3195 [(set (match_operand:SWI24 0 "register_operand" "=R")
3196 (sign_extract:SWI24 (match_operand:SWI24 1 "register_operand" "Q")
3197 (const_int 8)
3198 (const_int 8)))]
3199 ""
3200 "movs{bl|x}\t{%h1, %k0|%k0, %h1}"
3201 [(set_attr "type" "imovx")
3202 (set_attr "mode" "SI")])
3203
3204 (define_expand "extzv<mode>"
3205 [(set (match_operand:SWI248 0 "register_operand")
3206 (zero_extract:SWI248 (match_operand:SWI248 1 "register_operand")
3207 (match_operand:SI 2 "const_int_operand")
3208 (match_operand:SI 3 "const_int_operand")))]
3209 ""
3210 {
3211 if (ix86_expand_pextr (operands))
3212 DONE;
3213
3214 /* Handle extractions from %ah et al. */
3215 if (INTVAL (operands[2]) != 8 || INTVAL (operands[3]) != 8)
3216 FAIL;
3217
3218 unsigned int regno = reg_or_subregno (operands[1]);
3219
3220 /* Be careful to expand only with registers having upper parts. */
3221 if (regno <= LAST_VIRTUAL_REGISTER && !QI_REGNO_P (regno))
3222 operands[1] = copy_to_reg (operands[1]);
3223 })
3224
3225 (define_insn "*extzvqi_mem_rex64"
3226 [(set (match_operand:QI 0 "norex_memory_operand" "=Bn")
3227 (subreg:QI
3228 (zero_extract:SWI248
3229 (match_operand:SWI248 1 "register_operand" "Q")
3230 (const_int 8)
3231 (const_int 8)) 0))]
3232 "TARGET_64BIT && reload_completed"
3233 "mov{b}\t{%h1, %0|%0, %h1}"
3234 [(set_attr "type" "imov")
3235 (set_attr "mode" "QI")])
3236
3237 (define_insn "*extzv<mode>"
3238 [(set (match_operand:SWI248 0 "register_operand" "=R")
3239 (zero_extract:SWI248 (match_operand:SWI248 1 "register_operand" "Q")
3240 (const_int 8)
3241 (const_int 8)))]
3242 ""
3243 "movz{bl|x}\t{%h1, %k0|%k0, %h1}"
3244 [(set_attr "type" "imovx")
3245 (set_attr "mode" "SI")])
3246
3247 (define_insn "*extzvqi"
3248 [(set (match_operand:QI 0 "nonimmediate_operand" "=QBc,?R,m")
3249 (subreg:QI
3250 (zero_extract:SWI248
3251 (match_operand:SWI248 1 "register_operand" "Q,Q,Q")
3252 (const_int 8)
3253 (const_int 8)) 0))]
3254 ""
3255 {
3256 switch (get_attr_type (insn))
3257 {
3258 case TYPE_IMOVX:
3259 return "movz{bl|x}\t{%h1, %k0|%k0, %h1}";
3260 default:
3261 return "mov{b}\t{%h1, %0|%0, %h1}";
3262 }
3263 }
3264 [(set_attr "isa" "*,*,nox64")
3265 (set (attr "type")
3266 (if_then_else (and (match_operand:QI 0 "register_operand")
3267 (ior (not (match_operand:QI 0 "QIreg_operand"))
3268 (match_test "TARGET_MOVX")))
3269 (const_string "imovx")
3270 (const_string "imov")))
3271 (set (attr "mode")
3272 (if_then_else (eq_attr "type" "imovx")
3273 (const_string "SI")
3274 (const_string "QI")))])
3275
3276 (define_peephole2
3277 [(set (match_operand:QI 0 "register_operand")
3278 (subreg:QI
3279 (zero_extract:SWI248 (match_operand:SWI248 1 "register_operand")
3280 (const_int 8)
3281 (const_int 8)) 0))
3282 (set (match_operand:QI 2 "norex_memory_operand") (match_dup 0))]
3283 "TARGET_64BIT
3284 && peep2_reg_dead_p (2, operands[0])"
3285 [(set (match_dup 2)
3286 (subreg:QI
3287 (zero_extract:SWI248 (match_dup 1)
3288 (const_int 8)
3289 (const_int 8)) 0))])
3290
3291 (define_expand "insv<mode>"
3292 [(set (zero_extract:SWI248 (match_operand:SWI248 0 "register_operand")
3293 (match_operand:SI 1 "const_int_operand")
3294 (match_operand:SI 2 "const_int_operand"))
3295 (match_operand:SWI248 3 "register_operand"))]
3296 ""
3297 {
3298 rtx dst;
3299
3300 if (ix86_expand_pinsr (operands))
3301 DONE;
3302
3303 /* Handle insertions to %ah et al. */
3304 if (INTVAL (operands[1]) != 8 || INTVAL (operands[2]) != 8)
3305 FAIL;
3306
3307 unsigned int regno = reg_or_subregno (operands[0]);
3308
3309 /* Be careful to expand only with registers having upper parts. */
3310 if (regno <= LAST_VIRTUAL_REGISTER && !QI_REGNO_P (regno))
3311 dst = copy_to_reg (operands[0]);
3312 else
3313 dst = operands[0];
3314
3315 emit_insn (gen_insv_1 (<MODE>mode, dst, operands[3]));
3316
3317 /* Fix up the destination if needed. */
3318 if (dst != operands[0])
3319 emit_move_insn (operands[0], dst);
3320
3321 DONE;
3322 })
3323
3324 (define_insn "*insvqi_1_mem_rex64"
3325 [(set (zero_extract:SWI248
3326 (match_operand:SWI248 0 "register_operand" "+Q")
3327 (const_int 8)
3328 (const_int 8))
3329 (subreg:SWI248
3330 (match_operand:QI 1 "norex_memory_operand" "Bn") 0))]
3331 "TARGET_64BIT && reload_completed"
3332 "mov{b}\t{%1, %h0|%h0, %1}"
3333 [(set_attr "type" "imov")
3334 (set_attr "mode" "QI")])
3335
3336 (define_insn "@insv<mode>_1"
3337 [(set (zero_extract:SWI248
3338 (match_operand:SWI248 0 "register_operand" "+Q,Q")
3339 (const_int 8)
3340 (const_int 8))
3341 (match_operand:SWI248 1 "general_operand" "QnBc,m"))]
3342 ""
3343 {
3344 if (CONST_INT_P (operands[1]))
3345 operands[1] = gen_int_mode (INTVAL (operands[1]), QImode);
3346 return "mov{b}\t{%b1, %h0|%h0, %b1}";
3347 }
3348 [(set_attr "isa" "*,nox64")
3349 (set_attr "type" "imov")
3350 (set_attr "mode" "QI")])
3351
3352 (define_insn "*insvqi_1"
3353 [(set (zero_extract:SWI248
3354 (match_operand:SWI248 0 "register_operand" "+Q,Q")
3355 (const_int 8)
3356 (const_int 8))
3357 (subreg:SWI248
3358 (match_operand:QI 1 "general_operand" "QnBc,m") 0))]
3359 ""
3360 "mov{b}\t{%1, %h0|%h0, %1}"
3361 [(set_attr "isa" "*,nox64")
3362 (set_attr "type" "imov")
3363 (set_attr "mode" "QI")])
3364
3365 (define_peephole2
3366 [(set (match_operand:QI 0 "register_operand")
3367 (match_operand:QI 1 "norex_memory_operand"))
3368 (set (zero_extract:SWI248 (match_operand:SWI248 2 "register_operand")
3369 (const_int 8)
3370 (const_int 8))
3371 (subreg:SWI248 (match_dup 0) 0))]
3372 "TARGET_64BIT
3373 && peep2_reg_dead_p (2, operands[0])"
3374 [(set (zero_extract:SWI248 (match_dup 2)
3375 (const_int 8)
3376 (const_int 8))
3377 (subreg:SWI248 (match_dup 1) 0))])
3378
3379 ;; Eliminate redundant insv, e.g. xorl %eax,%eax; movb $0, %ah
3380 (define_peephole2
3381 [(parallel [(set (match_operand:SWI48 0 "general_reg_operand")
3382 (const_int 0))
3383 (clobber (reg:CC FLAGS_REG))])
3384 (set (zero_extract:SWI248 (match_operand:SWI248 1 "general_reg_operand")
3385 (const_int 8)
3386 (const_int 8))
3387 (const_int 0))]
3388 "REGNO (operands[0]) == REGNO (operands[1])"
3389 [(parallel [(set (match_operand:SWI48 0 "general_reg_operand")
3390 (const_int 0))
3391 (clobber (reg:CC FLAGS_REG))])])
3392
3393 ;; Combine movl followed by movb.
3394 (define_peephole2
3395 [(set (match_operand:SWI48 0 "general_reg_operand")
3396 (match_operand:SWI48 1 "const_int_operand"))
3397 (set (zero_extract:SWI248 (match_operand:SWI248 2 "general_reg_operand")
3398 (const_int 8)
3399 (const_int 8))
3400 (match_operand:SWI248 3 "const_int_operand"))]
3401 "REGNO (operands[0]) == REGNO (operands[2])"
3402 [(set (match_operand:SWI48 0 "general_reg_operand")
3403 (match_dup 4))]
3404 {
3405 HOST_WIDE_INT tmp = INTVAL (operands[1]) & ~(HOST_WIDE_INT)0xff00;
3406 tmp |= (INTVAL (operands[3]) & 0xff) << 8;
3407 operands[4] = gen_int_mode (tmp, <SWI48:MODE>mode);
3408 })
3409
3410
3411 (define_code_iterator any_extract [sign_extract zero_extract])
3412
3413 (define_insn "*insvqi_2"
3414 [(set (zero_extract:SWI248
3415 (match_operand:SWI248 0 "register_operand" "+Q")
3416 (const_int 8)
3417 (const_int 8))
3418 (any_extract:SWI248
3419 (match_operand:SWI248 1 "register_operand" "Q")
3420 (const_int 8)
3421 (const_int 8)))]
3422 ""
3423 "mov{b}\t{%h1, %h0|%h0, %h1}"
3424 [(set_attr "type" "imov")
3425 (set_attr "mode" "QI")])
3426
3427 (define_insn "*insvqi_3"
3428 [(set (zero_extract:SWI248
3429 (match_operand:SWI248 0 "register_operand" "+Q")
3430 (const_int 8)
3431 (const_int 8))
3432 (any_shiftrt:SWI248
3433 (match_operand:SWI248 1 "register_operand" "Q")
3434 (const_int 8)))]
3435 ""
3436 "mov{b}\t{%h1, %h0|%h0, %h1}"
3437 [(set_attr "type" "imov")
3438 (set_attr "mode" "QI")])
3439 \f
3440 ;; Floating point push instructions.
3441
3442 (define_insn "*pushtf"
3443 [(set (match_operand:TF 0 "push_operand" "=<,<")
3444 (match_operand:TF 1 "general_no_elim_operand" "v,*roC"))]
3445 "TARGET_64BIT || TARGET_SSE"
3446 {
3447 /* This insn should be already split before reg-stack. */
3448 return "#";
3449 }
3450 [(set_attr "isa" "*,x64")
3451 (set_attr "type" "multi")
3452 (set_attr "unit" "sse,*")
3453 (set_attr "mode" "TF,DI")])
3454
3455 ;; %%% Kill this when call knows how to work this out.
3456 (define_split
3457 [(set (match_operand:TF 0 "push_operand")
3458 (match_operand:TF 1 "sse_reg_operand"))]
3459 "TARGET_SSE && reload_completed"
3460 [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -16)))
3461 (set (match_dup 0) (match_dup 1))]
3462 {
3463 /* Preserve memory attributes. */
3464 operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
3465 })
3466
3467 (define_insn "*pushxf"
3468 [(set (match_operand:XF 0 "push_operand" "=<,<,<,<,<")
3469 (match_operand:XF 1 "general_no_elim_operand" "f,r,*r,oF,oC"))]
3470 ""
3471 {
3472 /* This insn should be already split before reg-stack. */
3473 return "#";
3474 }
3475 [(set_attr "isa" "*,*,*,nox64,x64")
3476 (set_attr "type" "multi")
3477 (set_attr "unit" "i387,*,*,*,*")
3478 (set (attr "mode")
3479 (cond [(eq_attr "alternative" "1,2,3,4")
3480 (if_then_else (match_test "TARGET_64BIT")
3481 (const_string "DI")
3482 (const_string "SI"))
3483 ]
3484 (const_string "XF")))
3485 (set (attr "preferred_for_size")
3486 (cond [(eq_attr "alternative" "1")
3487 (symbol_ref "false")]
3488 (symbol_ref "true")))])
3489
3490 ;; %%% Kill this when call knows how to work this out.
3491 (define_split
3492 [(set (match_operand:XF 0 "push_operand")
3493 (match_operand:XF 1 "fp_register_operand"))]
3494 "reload_completed"
3495 [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
3496 (set (match_dup 0) (match_dup 1))]
3497 {
3498 operands[2] = GEN_INT (-PUSH_ROUNDING (GET_MODE_SIZE (XFmode)));
3499 /* Preserve memory attributes. */
3500 operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
3501 })
3502
3503 (define_insn "*pushdf"
3504 [(set (match_operand:DF 0 "push_operand" "=<,<,<,<,<,<")
3505 (match_operand:DF 1 "general_no_elim_operand" "f,r,*r,oF,rmC,v"))]
3506 ""
3507 {
3508 /* This insn should be already split before reg-stack. */
3509 return "#";
3510 }
3511 [(set_attr "isa" "*,nox64,nox64,nox64,x64,sse2")
3512 (set_attr "type" "multi")
3513 (set_attr "unit" "i387,*,*,*,*,sse")
3514 (set_attr "mode" "DF,SI,SI,SI,DI,DF")
3515 (set (attr "preferred_for_size")
3516 (cond [(eq_attr "alternative" "1")
3517 (symbol_ref "false")]
3518 (symbol_ref "true")))
3519 (set (attr "preferred_for_speed")
3520 (cond [(eq_attr "alternative" "1")
3521 (symbol_ref "TARGET_INTEGER_DFMODE_MOVES")]
3522 (symbol_ref "true")))])
3523
3524 ;; %%% Kill this when call knows how to work this out.
3525 (define_split
3526 [(set (match_operand:DF 0 "push_operand")
3527 (match_operand:DF 1 "any_fp_register_operand"))]
3528 "reload_completed"
3529 [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -8)))
3530 (set (match_dup 0) (match_dup 1))]
3531 {
3532 /* Preserve memory attributes. */
3533 operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
3534 })
3535
3536 (define_mode_iterator HFBF [HF BF])
3537
3538 (define_insn "*push<mode>_rex64"
3539 [(set (match_operand:HFBF 0 "push_operand" "=X,X")
3540 (match_operand:HFBF 1 "nonmemory_no_elim_operand" "r,x"))]
3541 "TARGET_64BIT"
3542 {
3543 /* Anything else should be already split before reg-stack. */
3544 gcc_assert (which_alternative == 0);
3545 return "push{q}\t%q1";
3546 }
3547 [(set_attr "isa" "*,sse4")
3548 (set_attr "type" "push,multi")
3549 (set_attr "mode" "DI,TI")])
3550
3551 (define_insn "*push<mode>"
3552 [(set (match_operand:HFBF 0 "push_operand" "=X,X")
3553 (match_operand:HFBF 1 "general_no_elim_operand" "rmF,x"))]
3554 "!TARGET_64BIT"
3555 {
3556 /* Anything else should be already split before reg-stack. */
3557 gcc_assert (which_alternative == 0);
3558 return "push{l}\t%k1";
3559 }
3560 [(set_attr "isa" "*,sse4")
3561 (set_attr "type" "push,multi")
3562 (set_attr "mode" "SI,TI")])
3563
3564 (define_insn "*pushsf_rex64"
3565 [(set (match_operand:SF 0 "push_operand" "=X,X,X")
3566 (match_operand:SF 1 "nonmemory_no_elim_operand" "f,rF,v"))]
3567 "TARGET_64BIT"
3568 {
3569 /* Anything else should be already split before reg-stack. */
3570 if (which_alternative != 1)
3571 return "#";
3572 return "push{q}\t%q1";
3573 }
3574 [(set_attr "type" "multi,push,multi")
3575 (set_attr "unit" "i387,*,*")
3576 (set_attr "mode" "SF,DI,SF")])
3577
3578 (define_insn "*pushsf"
3579 [(set (match_operand:SF 0 "push_operand" "=<,<,<")
3580 (match_operand:SF 1 "general_no_elim_operand" "f,rmF,v"))]
3581 "!TARGET_64BIT"
3582 {
3583 /* Anything else should be already split before reg-stack. */
3584 if (which_alternative != 1)
3585 return "#";
3586 return "push{l}\t%1";
3587 }
3588 [(set_attr "type" "multi,push,multi")
3589 (set_attr "unit" "i387,*,*")
3590 (set_attr "mode" "SF,SI,SF")])
3591
3592 (define_mode_iterator MODESH [SF HF BF])
3593 ;; %%% Kill this when call knows how to work this out.
3594 (define_split
3595 [(set (match_operand:MODESH 0 "push_operand")
3596 (match_operand:MODESH 1 "any_fp_register_operand"))]
3597 "reload_completed"
3598 [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
3599 (set (match_dup 0) (match_dup 1))]
3600 {
3601 rtx op = XEXP (operands[0], 0);
3602 if (GET_CODE (op) == PRE_DEC)
3603 {
3604 gcc_assert (!TARGET_64BIT);
3605 op = GEN_INT (-4);
3606 }
3607 else
3608 {
3609 op = XEXP (XEXP (op, 1), 1);
3610 gcc_assert (CONST_INT_P (op));
3611 }
3612 operands[2] = op;
3613 /* Preserve memory attributes. */
3614 operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
3615 })
3616
3617 (define_split
3618 [(set (match_operand:SF 0 "push_operand")
3619 (match_operand:SF 1 "memory_operand"))]
3620 "reload_completed
3621 && find_constant_src (insn)"
3622 [(set (match_dup 0) (match_dup 2))]
3623 "operands[2] = find_constant_src (curr_insn);")
3624
3625 (define_split
3626 [(set (match_operand 0 "push_operand")
3627 (match_operand 1 "general_gr_operand"))]
3628 "reload_completed
3629 && (GET_MODE (operands[0]) == TFmode
3630 || GET_MODE (operands[0]) == XFmode
3631 || GET_MODE (operands[0]) == DFmode)"
3632 [(const_int 0)]
3633 "ix86_split_long_move (operands); DONE;")
3634 \f
3635 ;; Floating point move instructions.
3636
3637 (define_expand "movtf"
3638 [(set (match_operand:TF 0 "nonimmediate_operand")
3639 (match_operand:TF 1 "nonimmediate_operand"))]
3640 "TARGET_64BIT || TARGET_SSE"
3641 "ix86_expand_move (TFmode, operands); DONE;")
3642
3643 (define_expand "mov<mode>"
3644 [(set (match_operand:X87MODEFH 0 "nonimmediate_operand")
3645 (match_operand:X87MODEFH 1 "general_operand"))]
3646 ""
3647 "ix86_expand_move (<MODE>mode, operands); DONE;")
3648
3649 (define_insn "*movtf_internal"
3650 [(set (match_operand:TF 0 "nonimmediate_operand" "=v,v ,m,?*r ,!o")
3651 (match_operand:TF 1 "general_operand" "C ,vm,v,*roF,*rC"))]
3652 "(TARGET_64BIT || TARGET_SSE)
3653 && !(MEM_P (operands[0]) && MEM_P (operands[1]))
3654 && (lra_in_progress || reload_completed
3655 || !CONST_DOUBLE_P (operands[1])
3656 || (standard_sse_constant_p (operands[1], TFmode) == 1
3657 && !memory_operand (operands[0], TFmode))
3658 || (!TARGET_MEMORY_MISMATCH_STALL
3659 && memory_operand (operands[0], TFmode)))"
3660 {
3661 switch (get_attr_type (insn))
3662 {
3663 case TYPE_SSELOG1:
3664 return standard_sse_constant_opcode (insn, operands);
3665
3666 case TYPE_SSEMOV:
3667 return ix86_output_ssemov (insn, operands);
3668
3669 case TYPE_MULTI:
3670 return "#";
3671
3672 default:
3673 gcc_unreachable ();
3674 }
3675 }
3676 [(set_attr "isa" "*,*,*,x64,x64")
3677 (set_attr "type" "sselog1,ssemov,ssemov,multi,multi")
3678 (set (attr "prefix")
3679 (if_then_else (eq_attr "type" "sselog1,ssemov")
3680 (const_string "maybe_vex")
3681 (const_string "orig")))
3682 (set (attr "mode")
3683 (cond [(eq_attr "alternative" "3,4")
3684 (const_string "DI")
3685 (match_test "TARGET_AVX")
3686 (const_string "TI")
3687 (ior (not (match_test "TARGET_SSE2"))
3688 (match_test "optimize_function_for_size_p (cfun)"))
3689 (const_string "V4SF")
3690 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3691 (const_string "V4SF")
3692 (and (eq_attr "alternative" "2")
3693 (match_test "TARGET_SSE_TYPELESS_STORES"))
3694 (const_string "V4SF")
3695 ]
3696 (const_string "TI")))])
3697
3698 (define_split
3699 [(set (match_operand:TF 0 "nonimmediate_gr_operand")
3700 (match_operand:TF 1 "general_gr_operand"))]
3701 "reload_completed"
3702 [(const_int 0)]
3703 "ix86_split_long_move (operands); DONE;")
3704
3705 ;; Possible store forwarding (partial memory) stall
3706 ;; in alternatives 4, 6, 7 and 8.
3707 (define_insn "*movxf_internal"
3708 [(set (match_operand:XF 0 "nonimmediate_operand"
3709 "=f,m,f,?r ,!o,?*r ,!o,!o,!o,r ,o ,o")
3710 (match_operand:XF 1 "general_operand"
3711 "fm,f,G,roF,r ,*roF,*r,F ,C ,roF,rF,rC"))]
3712 "!(MEM_P (operands[0]) && MEM_P (operands[1]))
3713 && (lra_in_progress || reload_completed
3714 || !CONST_DOUBLE_P (operands[1])
3715 || ((optimize_function_for_size_p (cfun)
3716 || (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC))
3717 && standard_80387_constant_p (operands[1]) > 0
3718 && !memory_operand (operands[0], XFmode))
3719 || (!TARGET_MEMORY_MISMATCH_STALL
3720 && memory_operand (operands[0], XFmode))
3721 || !TARGET_HARD_XF_REGS)"
3722 {
3723 switch (get_attr_type (insn))
3724 {
3725 case TYPE_FMOV:
3726 if (which_alternative == 2)
3727 return standard_80387_constant_opcode (operands[1]);
3728 return output_387_reg_move (insn, operands);
3729
3730 case TYPE_MULTI:
3731 return "#";
3732
3733 default:
3734 gcc_unreachable ();
3735 }
3736 }
3737 [(set (attr "isa")
3738 (cond [(eq_attr "alternative" "7,10")
3739 (const_string "nox64")
3740 (eq_attr "alternative" "8,11")
3741 (const_string "x64")
3742 ]
3743 (const_string "*")))
3744 (set (attr "type")
3745 (cond [(eq_attr "alternative" "3,4,5,6,7,8,9,10,11")
3746 (const_string "multi")
3747 ]
3748 (const_string "fmov")))
3749 (set (attr "mode")
3750 (cond [(eq_attr "alternative" "3,4,5,6,7,8,9,10,11")
3751 (if_then_else (match_test "TARGET_64BIT")
3752 (const_string "DI")
3753 (const_string "SI"))
3754 ]
3755 (const_string "XF")))
3756 (set (attr "preferred_for_size")
3757 (cond [(eq_attr "alternative" "3,4")
3758 (symbol_ref "false")]
3759 (symbol_ref "true")))
3760 (set (attr "enabled")
3761 (cond [(eq_attr "alternative" "9,10,11")
3762 (if_then_else
3763 (match_test "TARGET_HARD_XF_REGS")
3764 (symbol_ref "false")
3765 (const_string "*"))
3766 (not (match_test "TARGET_HARD_XF_REGS"))
3767 (symbol_ref "false")
3768 ]
3769 (const_string "*")))])
3770
3771 (define_split
3772 [(set (match_operand:XF 0 "nonimmediate_gr_operand")
3773 (match_operand:XF 1 "general_gr_operand"))]
3774 "reload_completed"
3775 [(const_int 0)]
3776 "ix86_split_long_move (operands); DONE;")
3777
3778 ;; Possible store forwarding (partial memory) stall in alternatives 4, 6 and 7.
3779 (define_insn "*movdf_internal"
3780 [(set (match_operand:DF 0 "nonimmediate_operand"
3781 "=Yf*f,m ,Yf*f,?r ,!o,?*r ,!o,!o,?r,?m,?r,?r,v,v,v,m,*x,*x,*x,m ,r ,v,r ,o ,r ,m")
3782 (match_operand:DF 1 "general_operand"
3783 "Yf*fm,Yf*f,G ,roF,r ,*roF,*r,F ,rm,rC,C ,F ,C,v,m,v,C ,*x,m ,*x,v,r ,roF,rF,rmF,rC"))]
3784 "!(MEM_P (operands[0]) && MEM_P (operands[1]))
3785 && (lra_in_progress || reload_completed
3786 || !CONST_DOUBLE_P (operands[1])
3787 || ((optimize_function_for_size_p (cfun)
3788 || (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC))
3789 && IS_STACK_MODE (DFmode)
3790 && standard_80387_constant_p (operands[1]) > 0
3791 && !memory_operand (operands[0], DFmode))
3792 || (TARGET_SSE2 && TARGET_SSE_MATH
3793 && standard_sse_constant_p (operands[1], DFmode) == 1
3794 && !memory_operand (operands[0], DFmode))
3795 || ((TARGET_64BIT || !TARGET_MEMORY_MISMATCH_STALL)
3796 && memory_operand (operands[0], DFmode))
3797 || !TARGET_HARD_DF_REGS)"
3798 {
3799 switch (get_attr_type (insn))
3800 {
3801 case TYPE_FMOV:
3802 if (which_alternative == 2)
3803 return standard_80387_constant_opcode (operands[1]);
3804 return output_387_reg_move (insn, operands);
3805
3806 case TYPE_MULTI:
3807 return "#";
3808
3809 case TYPE_IMOV:
3810 if (get_attr_mode (insn) == MODE_SI)
3811 return "mov{l}\t{%1, %k0|%k0, %1}";
3812 else if (which_alternative == 11)
3813 return "movabs{q}\t{%1, %0|%0, %1}";
3814 else
3815 return "mov{q}\t{%1, %0|%0, %1}";
3816
3817 case TYPE_SSELOG1:
3818 return standard_sse_constant_opcode (insn, operands);
3819
3820 case TYPE_SSEMOV:
3821 return ix86_output_ssemov (insn, operands);
3822
3823 default:
3824 gcc_unreachable ();
3825 }
3826 }
3827 [(set (attr "isa")
3828 (cond [(eq_attr "alternative" "3,4,5,6,7,22,23")
3829 (const_string "nox64")
3830 (eq_attr "alternative" "8,9,10,11,24,25")
3831 (const_string "x64")
3832 (eq_attr "alternative" "12,13,14,15")
3833 (const_string "sse2")
3834 (eq_attr "alternative" "20,21")
3835 (const_string "x64_sse2")
3836 ]
3837 (const_string "*")))
3838 (set (attr "type")
3839 (cond [(eq_attr "alternative" "0,1,2")
3840 (const_string "fmov")
3841 (eq_attr "alternative" "3,4,5,6,7,22,23")
3842 (const_string "multi")
3843 (eq_attr "alternative" "8,9,10,11,24,25")
3844 (const_string "imov")
3845 (eq_attr "alternative" "12,16")
3846 (const_string "sselog1")
3847 ]
3848 (const_string "ssemov")))
3849 (set (attr "modrm")
3850 (if_then_else (eq_attr "alternative" "11")
3851 (const_string "0")
3852 (const_string "*")))
3853 (set (attr "length_immediate")
3854 (if_then_else (eq_attr "alternative" "11")
3855 (const_string "8")
3856 (const_string "*")))
3857 (set (attr "prefix")
3858 (if_then_else (eq_attr "type" "sselog1,ssemov")
3859 (const_string "maybe_vex")
3860 (const_string "orig")))
3861 (set (attr "prefix_data16")
3862 (if_then_else
3863 (ior (and (eq_attr "type" "ssemov") (eq_attr "mode" "DI"))
3864 (eq_attr "mode" "V1DF"))
3865 (const_string "1")
3866 (const_string "*")))
3867 (set (attr "mode")
3868 (cond [(eq_attr "alternative" "3,4,5,6,7,10,22,23")
3869 (const_string "SI")
3870 (eq_attr "alternative" "8,9,11,20,21,24,25")
3871 (const_string "DI")
3872
3873 /* xorps is one byte shorter for non-AVX targets. */
3874 (eq_attr "alternative" "12,16")
3875 (cond [(match_test "TARGET_AVX")
3876 (const_string "V2DF")
3877 (ior (not (match_test "TARGET_SSE2"))
3878 (match_test "optimize_function_for_size_p (cfun)"))
3879 (const_string "V4SF")
3880 (match_test "TARGET_SSE_LOAD0_BY_PXOR")
3881 (const_string "TI")
3882 ]
3883 (const_string "V2DF"))
3884
3885 /* For architectures resolving dependencies on
3886 whole SSE registers use movapd to break dependency
3887 chains, otherwise use short move to avoid extra work. */
3888
3889 /* movaps is one byte shorter for non-AVX targets. */
3890 (eq_attr "alternative" "13,17")
3891 (cond [(match_test "TARGET_AVX")
3892 (const_string "DF")
3893 (ior (not (match_test "TARGET_SSE2"))
3894 (match_test "optimize_function_for_size_p (cfun)"))
3895 (const_string "V4SF")
3896 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3897 (const_string "V4SF")
3898 (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
3899 (const_string "V2DF")
3900 ]
3901 (const_string "DF"))
3902
3903 /* For architectures resolving dependencies on register
3904 parts we may avoid extra work to zero out upper part
3905 of register. */
3906 (eq_attr "alternative" "14,18")
3907 (cond [(not (match_test "TARGET_SSE2"))
3908 (const_string "V2SF")
3909 (match_test "TARGET_AVX")
3910 (const_string "DF")
3911 (match_test "TARGET_SSE_SPLIT_REGS")
3912 (const_string "V1DF")
3913 ]
3914 (const_string "DF"))
3915
3916 (and (eq_attr "alternative" "15,19")
3917 (not (match_test "TARGET_SSE2")))
3918 (const_string "V2SF")
3919 ]
3920 (const_string "DF")))
3921 (set (attr "preferred_for_size")
3922 (cond [(eq_attr "alternative" "3,4")
3923 (symbol_ref "false")]
3924 (symbol_ref "true")))
3925 (set (attr "preferred_for_speed")
3926 (cond [(eq_attr "alternative" "3,4")
3927 (symbol_ref "TARGET_INTEGER_DFMODE_MOVES")
3928 (eq_attr "alternative" "20")
3929 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
3930 (eq_attr "alternative" "21")
3931 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
3932 ]
3933 (symbol_ref "true")))
3934 (set (attr "enabled")
3935 (cond [(eq_attr "alternative" "22,23,24,25")
3936 (if_then_else
3937 (match_test "TARGET_HARD_DF_REGS")
3938 (symbol_ref "false")
3939 (const_string "*"))
3940 (not (match_test "TARGET_HARD_DF_REGS"))
3941 (symbol_ref "false")
3942 ]
3943 (const_string "*")))])
3944
3945 (define_split
3946 [(set (match_operand:DF 0 "nonimmediate_gr_operand")
3947 (match_operand:DF 1 "general_gr_operand"))]
3948 "!TARGET_64BIT && reload_completed"
3949 [(const_int 0)]
3950 "ix86_split_long_move (operands); DONE;")
3951
3952 (define_insn "*movsf_internal"
3953 [(set (match_operand:SF 0 "nonimmediate_operand"
3954 "=Yf*f,m ,Yf*f,?r ,?m,v,v,v,m,?r,?v,!*y,!*y,!m,!r,!*y,r ,m")
3955 (match_operand:SF 1 "general_operand"
3956 "Yf*fm,Yf*f,G ,rmF,rF,C,v,m,v,v ,r ,*y ,m ,*y,*y,r ,rmF,rF"))]
3957 "!(MEM_P (operands[0]) && MEM_P (operands[1]))
3958 && (lra_in_progress || reload_completed
3959 || !CONST_DOUBLE_P (operands[1])
3960 || ((optimize_function_for_size_p (cfun)
3961 || (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC))
3962 && IS_STACK_MODE (SFmode)
3963 && standard_80387_constant_p (operands[1]) > 0)
3964 || (TARGET_SSE && TARGET_SSE_MATH
3965 && standard_sse_constant_p (operands[1], SFmode) == 1)
3966 || memory_operand (operands[0], SFmode)
3967 || !TARGET_HARD_SF_REGS)"
3968 {
3969 switch (get_attr_type (insn))
3970 {
3971 case TYPE_FMOV:
3972 if (which_alternative == 2)
3973 return standard_80387_constant_opcode (operands[1]);
3974 return output_387_reg_move (insn, operands);
3975
3976 case TYPE_IMOV:
3977 return "mov{l}\t{%1, %0|%0, %1}";
3978
3979 case TYPE_SSELOG1:
3980 return standard_sse_constant_opcode (insn, operands);
3981
3982 case TYPE_SSEMOV:
3983 return ix86_output_ssemov (insn, operands);
3984
3985 case TYPE_MMXMOV:
3986 switch (get_attr_mode (insn))
3987 {
3988 case MODE_DI:
3989 return "movq\t{%1, %0|%0, %1}";
3990 case MODE_SI:
3991 return "movd\t{%1, %0|%0, %1}";
3992
3993 default:
3994 gcc_unreachable ();
3995 }
3996
3997 default:
3998 gcc_unreachable ();
3999 }
4000 }
4001 [(set (attr "isa")
4002 (cond [(eq_attr "alternative" "9,10")
4003 (const_string "sse2")
4004 ]
4005 (const_string "*")))
4006 (set (attr "type")
4007 (cond [(eq_attr "alternative" "0,1,2")
4008 (const_string "fmov")
4009 (eq_attr "alternative" "3,4,16,17")
4010 (const_string "imov")
4011 (eq_attr "alternative" "5")
4012 (const_string "sselog1")
4013 (eq_attr "alternative" "11,12,13,14,15")
4014 (const_string "mmxmov")
4015 ]
4016 (const_string "ssemov")))
4017 (set (attr "prefix")
4018 (if_then_else (eq_attr "type" "sselog1,ssemov")
4019 (const_string "maybe_vex")
4020 (const_string "orig")))
4021 (set (attr "prefix_data16")
4022 (if_then_else (and (eq_attr "type" "ssemov") (eq_attr "mode" "SI"))
4023 (const_string "1")
4024 (const_string "*")))
4025 (set (attr "mode")
4026 (cond [(eq_attr "alternative" "3,4,9,10,12,13,14,15,16,17")
4027 (const_string "SI")
4028 (eq_attr "alternative" "11")
4029 (const_string "DI")
4030 (eq_attr "alternative" "5")
4031 (cond [(and (match_test "TARGET_AVX512F")
4032 (not (match_test "TARGET_PREFER_AVX256")))
4033 (const_string "V16SF")
4034 (match_test "TARGET_AVX")
4035 (const_string "V4SF")
4036 (ior (not (match_test "TARGET_SSE2"))
4037 (match_test "optimize_function_for_size_p (cfun)"))
4038 (const_string "V4SF")
4039 (match_test "TARGET_SSE_LOAD0_BY_PXOR")
4040 (const_string "TI")
4041 ]
4042 (const_string "V4SF"))
4043
4044 /* For architectures resolving dependencies on
4045 whole SSE registers use APS move to break dependency
4046 chains, otherwise use short move to avoid extra work.
4047
4048 Do the same for architectures resolving dependencies on
4049 the parts. While in DF mode it is better to always handle
4050 just register parts, the SF mode is different due to lack
4051 of instructions to load just part of the register. It is
4052 better to maintain the whole registers in single format
4053 to avoid problems on using packed logical operations. */
4054 (eq_attr "alternative" "6")
4055 (cond [(ior (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
4056 (match_test "TARGET_SSE_SPLIT_REGS"))
4057 (const_string "V4SF")
4058 ]
4059 (const_string "SF"))
4060 ]
4061 (const_string "SF")))
4062 (set (attr "preferred_for_speed")
4063 (cond [(eq_attr "alternative" "9,14")
4064 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
4065 (eq_attr "alternative" "10,15")
4066 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
4067 ]
4068 (symbol_ref "true")))
4069 (set (attr "enabled")
4070 (cond [(eq_attr "alternative" "16,17")
4071 (if_then_else
4072 (match_test "TARGET_HARD_SF_REGS")
4073 (symbol_ref "false")
4074 (const_string "*"))
4075 (not (match_test "TARGET_HARD_SF_REGS"))
4076 (symbol_ref "false")
4077 ]
4078 (const_string "*")))])
4079
4080 (define_mode_attr hfbfconstf
4081 [(HF "F") (BF "")])
4082
4083 (define_insn "*mov<mode>_internal"
4084 [(set (match_operand:HFBF 0 "nonimmediate_operand"
4085 "=?r,?r,?r,?m,v,v,?r,m,?v,v")
4086 (match_operand:HFBF 1 "general_operand"
4087 "r ,F ,m ,r<hfbfconstf>,C,v, v,v,r ,m"))]
4088 "!(MEM_P (operands[0]) && MEM_P (operands[1]))
4089 && (lra_in_progress
4090 || reload_completed
4091 || !CONST_DOUBLE_P (operands[1])
4092 || (TARGET_SSE2
4093 && standard_sse_constant_p (operands[1], <MODE>mode) == 1)
4094 || memory_operand (operands[0], <MODE>mode))"
4095 {
4096 switch (get_attr_type (insn))
4097 {
4098 case TYPE_IMOVX:
4099 /* movzwl is faster than movw on p2 due to partial word stalls,
4100 though not as fast as an aligned movl. */
4101 return "movz{wl|x}\t{%1, %k0|%k0, %1}";
4102
4103 case TYPE_SSEMOV:
4104 return ix86_output_ssemov (insn, operands);
4105
4106 case TYPE_SSELOG1:
4107 if (satisfies_constraint_C (operands[1]))
4108 return standard_sse_constant_opcode (insn, operands);
4109
4110 if (SSE_REG_P (operands[0]))
4111 return "%vpinsrw\t{$0, %1, %d0|%d0, %1, 0}";
4112 else
4113 return "%vpextrw\t{$0, %1, %0|%0, %1, 0}";
4114
4115 default:
4116 if (get_attr_mode (insn) == MODE_SI)
4117 return "mov{l}\t{%k1, %k0|%k0, %k1}";
4118 else
4119 return "mov{w}\t{%1, %0|%0, %1}";
4120 }
4121 }
4122 [(set (attr "isa")
4123 (cond [(eq_attr "alternative" "4,5,6,8,9")
4124 (const_string "sse2")
4125 (eq_attr "alternative" "7")
4126 (const_string "sse4")
4127 ]
4128 (const_string "*")))
4129 (set (attr "type")
4130 (cond [(eq_attr "alternative" "4")
4131 (const_string "sselog1")
4132 (eq_attr "alternative" "5,6,8")
4133 (const_string "ssemov")
4134 (eq_attr "alternative" "7,9")
4135 (if_then_else
4136 (match_test ("TARGET_AVX512FP16"))
4137 (const_string "ssemov")
4138 (const_string "sselog1"))
4139 (match_test "optimize_function_for_size_p (cfun)")
4140 (const_string "imov")
4141 (and (eq_attr "alternative" "0")
4142 (ior (not (match_test "TARGET_PARTIAL_REG_STALL"))
4143 (not (match_test "TARGET_HIMODE_MATH"))))
4144 (const_string "imov")
4145 (and (eq_attr "alternative" "1,2")
4146 (match_operand:HI 1 "aligned_operand"))
4147 (const_string "imov")
4148 (and (match_test "TARGET_MOVX")
4149 (eq_attr "alternative" "0,2"))
4150 (const_string "imovx")
4151 ]
4152 (const_string "imov")))
4153 (set (attr "prefix")
4154 (cond [(eq_attr "alternative" "4,5,6,7,8,9")
4155 (const_string "maybe_vex")
4156 ]
4157 (const_string "orig")))
4158 (set (attr "mode")
4159 (cond [(eq_attr "alternative" "4")
4160 (const_string "V4SF")
4161 (eq_attr "alternative" "6,8")
4162 (if_then_else
4163 (match_test "TARGET_AVX512FP16")
4164 (const_string "HI")
4165 (const_string "SI"))
4166 (eq_attr "alternative" "7,9")
4167 (if_then_else
4168 (match_test "TARGET_AVX512FP16")
4169 (const_string "HI")
4170 (const_string "TI"))
4171 (eq_attr "alternative" "5")
4172 (cond [(match_test "TARGET_AVX512FP16")
4173 (const_string "HF")
4174 (ior (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
4175 (match_test "TARGET_SSE_SPLIT_REGS"))
4176 (const_string "V4SF")
4177 ]
4178 (const_string "SF"))
4179 (eq_attr "type" "imovx")
4180 (const_string "SI")
4181 (and (eq_attr "alternative" "1,2")
4182 (match_operand:HI 1 "aligned_operand"))
4183 (const_string "SI")
4184 (and (eq_attr "alternative" "0")
4185 (ior (not (match_test "TARGET_PARTIAL_REG_STALL"))
4186 (not (match_test "TARGET_HIMODE_MATH"))))
4187 (const_string "SI")
4188 ]
4189 (const_string "HI")))
4190 (set (attr "enabled")
4191 (cond [(and (match_test "<MODE>mode == BFmode")
4192 (eq_attr "alternative" "1"))
4193 (symbol_ref "false")
4194 ]
4195 (const_string "*")))])
4196
4197 (define_split
4198 [(set (match_operand 0 "any_fp_register_operand")
4199 (match_operand 1 "memory_operand"))]
4200 "reload_completed
4201 && (GET_MODE (operands[0]) == TFmode
4202 || GET_MODE (operands[0]) == XFmode
4203 || GET_MODE (operands[0]) == DFmode
4204 || GET_MODE (operands[0]) == SFmode)
4205 && ix86_standard_x87sse_constant_load_p (insn, operands[0])"
4206 [(set (match_dup 0) (match_dup 2))]
4207 "operands[2] = find_constant_src (curr_insn);")
4208
4209 (define_split
4210 [(set (match_operand 0 "any_fp_register_operand")
4211 (float_extend (match_operand 1 "memory_operand")))]
4212 "reload_completed
4213 && (GET_MODE (operands[0]) == TFmode
4214 || GET_MODE (operands[0]) == XFmode
4215 || GET_MODE (operands[0]) == DFmode)
4216 && ix86_standard_x87sse_constant_load_p (insn, operands[0])"
4217 [(set (match_dup 0) (match_dup 2))]
4218 "operands[2] = find_constant_src (curr_insn);")
4219
4220 ;; Split the load of -0.0 or -1.0 into fldz;fchs or fld1;fchs sequence
4221 (define_split
4222 [(set (match_operand:X87MODEF 0 "fp_register_operand")
4223 (match_operand:X87MODEF 1 "immediate_operand"))]
4224 "reload_completed
4225 && (standard_80387_constant_p (operands[1]) == 8
4226 || standard_80387_constant_p (operands[1]) == 9)"
4227 [(set (match_dup 0)(match_dup 1))
4228 (set (match_dup 0)
4229 (neg:X87MODEF (match_dup 0)))]
4230 {
4231 if (real_isnegzero (CONST_DOUBLE_REAL_VALUE (operands[1])))
4232 operands[1] = CONST0_RTX (<MODE>mode);
4233 else
4234 operands[1] = CONST1_RTX (<MODE>mode);
4235 })
4236
4237 (define_insn "*swapxf"
4238 [(set (match_operand:XF 0 "register_operand" "+f")
4239 (match_operand:XF 1 "register_operand" "+f"))
4240 (set (match_dup 1)
4241 (match_dup 0))]
4242 "TARGET_80387"
4243 {
4244 if (STACK_TOP_P (operands[0]))
4245 return "fxch\t%1";
4246 else
4247 return "fxch\t%0";
4248 }
4249 [(set_attr "type" "fxch")
4250 (set_attr "mode" "XF")])
4251 \f
4252
4253 ;; Zero extension instructions
4254
4255 (define_insn_and_split "zero_extendditi2"
4256 [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o")
4257 (zero_extend:TI (match_operand:DI 1 "nonimmediate_operand" "rm,r")))]
4258 "TARGET_64BIT"
4259 "#"
4260 "&& reload_completed"
4261 [(set (match_dup 3) (match_dup 1))
4262 (set (match_dup 4) (const_int 0))]
4263 "split_double_mode (TImode, &operands[0], 1, &operands[3], &operands[4]);")
4264
4265 (define_expand "zero_extendsidi2"
4266 [(set (match_operand:DI 0 "nonimmediate_operand")
4267 (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand")))])
4268
4269 (define_insn "*zero_extendsidi2"
4270 [(set (match_operand:DI 0 "nonimmediate_operand"
4271 "=r,?r,?o,r ,o,?*y,?!*y,$r,$v,$x,*x,*v,*r,*k")
4272 (zero_extend:DI
4273 (match_operand:SI 1 "x86_64_zext_operand"
4274 "0 ,rm,r ,rmWz,0,r ,m ,v ,r ,m ,*x,*v,*k,*km")))]
4275 ""
4276 {
4277 switch (get_attr_type (insn))
4278 {
4279 case TYPE_IMOVX:
4280 if (ix86_use_lea_for_mov (insn, operands))
4281 return "lea{l}\t{%E1, %k0|%k0, %E1}";
4282 else
4283 return "mov{l}\t{%1, %k0|%k0, %1}";
4284
4285 case TYPE_MULTI:
4286 return "#";
4287
4288 case TYPE_MMXMOV:
4289 return "movd\t{%1, %0|%0, %1}";
4290
4291 case TYPE_SSEMOV:
4292 if (SSE_REG_P (operands[0]) && SSE_REG_P (operands[1]))
4293 {
4294 if (EXT_REX_SSE_REG_P (operands[0])
4295 || EXT_REX_SSE_REG_P (operands[1]))
4296 return "vpmovzxdq\t{%t1, %g0|%g0, %t1}";
4297 else
4298 return "%vpmovzxdq\t{%1, %0|%0, %1}";
4299 }
4300
4301 if (GENERAL_REG_P (operands[0]))
4302 return "%vmovd\t{%1, %k0|%k0, %1}";
4303
4304 return "%vmovd\t{%1, %0|%0, %1}";
4305
4306 case TYPE_MSKMOV:
4307 return "kmovd\t{%1, %k0|%k0, %1}";
4308
4309 default:
4310 gcc_unreachable ();
4311 }
4312 }
4313 [(set (attr "isa")
4314 (cond [(eq_attr "alternative" "0,1,2")
4315 (const_string "nox64")
4316 (eq_attr "alternative" "3")
4317 (const_string "x64")
4318 (eq_attr "alternative" "7,8,9")
4319 (const_string "sse2")
4320 (eq_attr "alternative" "10")
4321 (const_string "sse4")
4322 (eq_attr "alternative" "11")
4323 (const_string "avx512f")
4324 (eq_attr "alternative" "12")
4325 (const_string "x64_avx512bw")
4326 (eq_attr "alternative" "13")
4327 (const_string "avx512bw")
4328 ]
4329 (const_string "*")))
4330 (set (attr "mmx_isa")
4331 (if_then_else (eq_attr "alternative" "5,6")
4332 (const_string "native")
4333 (const_string "*")))
4334 (set (attr "type")
4335 (cond [(eq_attr "alternative" "0,1,2,4")
4336 (const_string "multi")
4337 (eq_attr "alternative" "5,6")
4338 (const_string "mmxmov")
4339 (eq_attr "alternative" "7")
4340 (if_then_else (match_test "TARGET_64BIT")
4341 (const_string "ssemov")
4342 (const_string "multi"))
4343 (eq_attr "alternative" "8,9,10,11")
4344 (const_string "ssemov")
4345 (eq_attr "alternative" "12,13")
4346 (const_string "mskmov")
4347 ]
4348 (const_string "imovx")))
4349 (set (attr "prefix_extra")
4350 (if_then_else (eq_attr "alternative" "10,11")
4351 (const_string "1")
4352 (const_string "*")))
4353 (set (attr "prefix")
4354 (if_then_else (eq_attr "type" "ssemov")
4355 (const_string "maybe_vex")
4356 (const_string "orig")))
4357 (set (attr "prefix_0f")
4358 (if_then_else (eq_attr "type" "imovx")
4359 (const_string "0")
4360 (const_string "*")))
4361 (set (attr "mode")
4362 (cond [(eq_attr "alternative" "5,6")
4363 (const_string "DI")
4364 (and (eq_attr "alternative" "7")
4365 (match_test "TARGET_64BIT"))
4366 (const_string "TI")
4367 (eq_attr "alternative" "8,10,11")
4368 (const_string "TI")
4369 ]
4370 (const_string "SI")))
4371 (set (attr "preferred_for_speed")
4372 (cond [(eq_attr "alternative" "7")
4373 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
4374 (eq_attr "alternative" "5,8")
4375 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
4376 ]
4377 (symbol_ref "true")))])
4378
4379 (define_split
4380 [(set (match_operand:DI 0 "memory_operand")
4381 (zero_extend:DI (match_operand:SI 1 "memory_operand")))]
4382 "reload_completed"
4383 [(set (match_dup 4) (const_int 0))]
4384 "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);")
4385
4386 (define_split
4387 [(set (match_operand:DI 0 "general_reg_operand")
4388 (zero_extend:DI (match_operand:SI 1 "general_reg_operand")))]
4389 "!TARGET_64BIT && reload_completed
4390 && REGNO (operands[0]) == REGNO (operands[1])"
4391 [(set (match_dup 4) (const_int 0))]
4392 "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);")
4393
4394 (define_split
4395 [(set (match_operand:DI 0 "nonimmediate_gr_operand")
4396 (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand")))]
4397 "!TARGET_64BIT && reload_completed
4398 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4399 [(set (match_dup 3) (match_dup 1))
4400 (set (match_dup 4) (const_int 0))]
4401 "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);")
4402
4403 (define_mode_attr kmov_isa
4404 [(QI "avx512dq") (HI "avx512f") (SI "avx512bw") (DI "avx512bw")])
4405
4406 (define_insn "zero_extend<mode>di2"
4407 [(set (match_operand:DI 0 "register_operand" "=r,*r,*k")
4408 (zero_extend:DI
4409 (match_operand:SWI12 1 "nonimmediate_operand" "<r>m,*k,*km")))]
4410 "TARGET_64BIT"
4411 "@
4412 movz{<imodesuffix>l|x}\t{%1, %k0|%k0, %1}
4413 kmov<mskmodesuffix>\t{%1, %k0|%k0, %1}
4414 kmov<mskmodesuffix>\t{%1, %k0|%k0, %1}"
4415 [(set_attr "isa" "*,<kmov_isa>,<kmov_isa>")
4416 (set_attr "type" "imovx,mskmov,mskmov")
4417 (set_attr "mode" "SI,<MODE>,<MODE>")])
4418
4419 (define_expand "zero_extend<mode>si2"
4420 [(set (match_operand:SI 0 "register_operand")
4421 (zero_extend:SI (match_operand:SWI12 1 "nonimmediate_operand")))]
4422 ""
4423 {
4424 if (TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))
4425 {
4426 operands[1] = force_reg (<MODE>mode, operands[1]);
4427 emit_insn (gen_zero_extend<mode>si2_and (operands[0], operands[1]));
4428 DONE;
4429 }
4430 })
4431
4432 (define_insn_and_split "zero_extend<mode>si2_and"
4433 [(set (match_operand:SI 0 "register_operand" "=r,?&<r>")
4434 (zero_extend:SI
4435 (match_operand:SWI12 1 "nonimmediate_operand" "0,<r>m")))
4436 (clobber (reg:CC FLAGS_REG))]
4437 "TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)"
4438 "#"
4439 "&& reload_completed"
4440 [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (match_dup 2)))
4441 (clobber (reg:CC FLAGS_REG))])]
4442 {
4443 if (!REG_P (operands[1])
4444 || REGNO (operands[0]) != REGNO (operands[1]))
4445 {
4446 ix86_expand_clear (operands[0]);
4447
4448 gcc_assert (!TARGET_PARTIAL_REG_STALL);
4449 emit_insn (gen_rtx_SET
4450 (gen_rtx_STRICT_LOW_PART
4451 (VOIDmode, gen_lowpart (<MODE>mode, operands[0])),
4452 operands[1]));
4453 DONE;
4454 }
4455
4456 operands[2] = GEN_INT (GET_MODE_MASK (<MODE>mode));
4457 }
4458 [(set_attr "type" "alu1")
4459 (set_attr "mode" "SI")])
4460
4461 (define_insn "*zero_extend<mode>si2"
4462 [(set (match_operand:SI 0 "register_operand" "=r,*r,*k")
4463 (zero_extend:SI
4464 (match_operand:SWI12 1 "nonimmediate_operand" "<r>m,*k,*km")))]
4465 "!(TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))"
4466 "@
4467 movz{<imodesuffix>l|x}\t{%1, %0|%0, %1}
4468 kmov<mskmodesuffix>\t{%1, %0|%0, %1}
4469 kmov<mskmodesuffix>\t{%1, %0|%0, %1}"
4470 [(set_attr "isa" "*,<kmov_isa>,<kmov_isa>")
4471 (set_attr "type" "imovx,mskmov,mskmov")
4472 (set_attr "mode" "SI,<MODE>,<MODE>")])
4473
4474 (define_expand "zero_extendqihi2"
4475 [(set (match_operand:HI 0 "register_operand")
4476 (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand")))]
4477 ""
4478 {
4479 if (TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))
4480 {
4481 operands[1] = force_reg (QImode, operands[1]);
4482 emit_insn (gen_zero_extendqihi2_and (operands[0], operands[1]));
4483 DONE;
4484 }
4485 })
4486
4487 (define_insn_and_split "zero_extendqihi2_and"
4488 [(set (match_operand:HI 0 "register_operand" "=r,?&q")
4489 (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "0,qm")))
4490 (clobber (reg:CC FLAGS_REG))]
4491 "TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)"
4492 "#"
4493 "&& reload_completed"
4494 [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (const_int 255)))
4495 (clobber (reg:CC FLAGS_REG))])]
4496 {
4497 if (!REG_P (operands[1])
4498 || REGNO (operands[0]) != REGNO (operands[1]))
4499 {
4500 ix86_expand_clear (operands[0]);
4501
4502 gcc_assert (!TARGET_PARTIAL_REG_STALL);
4503 emit_insn (gen_rtx_SET
4504 (gen_rtx_STRICT_LOW_PART
4505 (VOIDmode, gen_lowpart (QImode, operands[0])),
4506 operands[1]));
4507 DONE;
4508 }
4509
4510 operands[0] = gen_lowpart (SImode, operands[0]);
4511 }
4512 [(set_attr "type" "alu1")
4513 (set_attr "mode" "SI")])
4514
4515 ; zero extend to SImode to avoid partial register stalls
4516 (define_insn "*zero_extendqihi2"
4517 [(set (match_operand:HI 0 "register_operand" "=r,*r,*k")
4518 (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "qm,*k,*km")))]
4519 "!(TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))"
4520 "@
4521 movz{bl|x}\t{%1, %k0|%k0, %1}
4522 kmovb\t{%1, %k0|%k0, %1}
4523 kmovb\t{%1, %0|%0, %1}"
4524 [(set_attr "isa" "*,avx512dq,avx512dq")
4525 (set_attr "type" "imovx,mskmov,mskmov")
4526 (set_attr "mode" "SI,QI,QI")])
4527
4528 ;; Transform xorl; mov[bw] (set strict_low_part) into movz[bw]l.
4529 (define_peephole2
4530 [(parallel [(set (match_operand:SWI48 0 "general_reg_operand")
4531 (const_int 0))
4532 (clobber (reg:CC FLAGS_REG))])
4533 (set (strict_low_part (match_operand:SWI12 1 "general_reg_operand"))
4534 (match_operand:SWI12 2 "nonimmediate_operand"))]
4535 "REGNO (operands[0]) == REGNO (operands[1])
4536 && (<SWI48:MODE>mode != SImode
4537 || !TARGET_ZERO_EXTEND_WITH_AND
4538 || !optimize_function_for_speed_p (cfun))"
4539 [(set (match_dup 0) (zero_extend:SWI48 (match_dup 2)))])
4540
4541 ;; Likewise, but preserving FLAGS_REG.
4542 (define_peephole2
4543 [(set (match_operand:SWI48 0 "general_reg_operand") (const_int 0))
4544 (set (strict_low_part (match_operand:SWI12 1 "general_reg_operand"))
4545 (match_operand:SWI12 2 "nonimmediate_operand"))]
4546 "REGNO (operands[0]) == REGNO (operands[1])
4547 && (<SWI48:MODE>mode != SImode
4548 || !TARGET_ZERO_EXTEND_WITH_AND
4549 || !optimize_function_for_speed_p (cfun))"
4550 [(set (match_dup 0) (zero_extend:SWI48 (match_dup 2)))])
4551 \f
4552 ;; Sign extension instructions
4553
4554 (define_expand "extendsidi2"
4555 [(set (match_operand:DI 0 "register_operand")
4556 (sign_extend:DI (match_operand:SI 1 "register_operand")))]
4557 ""
4558 {
4559 if (!TARGET_64BIT)
4560 {
4561 emit_insn (gen_extendsidi2_1 (operands[0], operands[1]));
4562 DONE;
4563 }
4564 })
4565
4566 (define_insn "*extendsidi2_rex64"
4567 [(set (match_operand:DI 0 "register_operand" "=*a,r")
4568 (sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "*0,rm")))]
4569 "TARGET_64BIT"
4570 "@
4571 {cltq|cdqe}
4572 movs{lq|x}\t{%1, %0|%0, %1}"
4573 [(set_attr "type" "imovx")
4574 (set_attr "mode" "DI")
4575 (set_attr "prefix_0f" "0")
4576 (set_attr "modrm" "0,1")])
4577
4578 (define_insn "extendsidi2_1"
4579 [(set (match_operand:DI 0 "nonimmediate_operand" "=*A,r,?r,?*o")
4580 (sign_extend:DI (match_operand:SI 1 "register_operand" "0,0,r,r")))
4581 (clobber (reg:CC FLAGS_REG))
4582 (clobber (match_scratch:SI 2 "=X,X,X,&r"))]
4583 "!TARGET_64BIT"
4584 "#")
4585
4586 ;; Split the memory case. If the source register doesn't die, it will stay
4587 ;; this way, if it does die, following peephole2s take care of it.
4588 (define_split
4589 [(set (match_operand:DI 0 "memory_operand")
4590 (sign_extend:DI (match_operand:SI 1 "register_operand")))
4591 (clobber (reg:CC FLAGS_REG))
4592 (clobber (match_operand:SI 2 "register_operand"))]
4593 "reload_completed"
4594 [(const_int 0)]
4595 {
4596 split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);
4597
4598 emit_move_insn (operands[3], operands[1]);
4599
4600 /* Generate a cltd if possible and doing so it profitable. */
4601 if ((optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
4602 && REGNO (operands[1]) == AX_REG
4603 && REGNO (operands[2]) == DX_REG)
4604 {
4605 emit_insn (gen_ashrsi3_cvt (operands[2], operands[1], GEN_INT (31)));
4606 }
4607 else
4608 {
4609 emit_move_insn (operands[2], operands[1]);
4610 emit_insn (gen_ashrsi3_cvt (operands[2], operands[2], GEN_INT (31)));
4611 }
4612 emit_move_insn (operands[4], operands[2]);
4613 DONE;
4614 })
4615
4616 ;; Peepholes for the case where the source register does die, after
4617 ;; being split with the above splitter.
4618 (define_peephole2
4619 [(set (match_operand:SI 0 "memory_operand")
4620 (match_operand:SI 1 "general_reg_operand"))
4621 (set (match_operand:SI 2 "general_reg_operand") (match_dup 1))
4622 (parallel [(set (match_dup 2)
4623 (ashiftrt:SI (match_dup 2) (const_int 31)))
4624 (clobber (reg:CC FLAGS_REG))])
4625 (set (match_operand:SI 3 "memory_operand") (match_dup 2))]
4626 "REGNO (operands[1]) != REGNO (operands[2])
4627 && peep2_reg_dead_p (2, operands[1])
4628 && peep2_reg_dead_p (4, operands[2])
4629 && !reg_mentioned_p (operands[2], operands[3])"
4630 [(set (match_dup 0) (match_dup 1))
4631 (parallel [(set (match_dup 1) (ashiftrt:SI (match_dup 1) (const_int 31)))
4632 (clobber (reg:CC FLAGS_REG))])
4633 (set (match_dup 3) (match_dup 1))])
4634
4635 (define_peephole2
4636 [(set (match_operand:SI 0 "memory_operand")
4637 (match_operand:SI 1 "general_reg_operand"))
4638 (parallel [(set (match_operand:SI 2 "general_reg_operand")
4639 (ashiftrt:SI (match_dup 1) (const_int 31)))
4640 (clobber (reg:CC FLAGS_REG))])
4641 (set (match_operand:SI 3 "memory_operand") (match_dup 2))]
4642 "/* cltd is shorter than sarl $31, %eax */
4643 !optimize_function_for_size_p (cfun)
4644 && REGNO (operands[1]) == AX_REG
4645 && REGNO (operands[2]) == DX_REG
4646 && peep2_reg_dead_p (2, operands[1])
4647 && peep2_reg_dead_p (3, operands[2])
4648 && !reg_mentioned_p (operands[2], operands[3])"
4649 [(set (match_dup 0) (match_dup 1))
4650 (parallel [(set (match_dup 1) (ashiftrt:SI (match_dup 1) (const_int 31)))
4651 (clobber (reg:CC FLAGS_REG))])
4652 (set (match_dup 3) (match_dup 1))])
4653
4654 ;; Extend to register case. Optimize case where source and destination
4655 ;; registers match and cases where we can use cltd.
4656 (define_split
4657 [(set (match_operand:DI 0 "register_operand")
4658 (sign_extend:DI (match_operand:SI 1 "register_operand")))
4659 (clobber (reg:CC FLAGS_REG))
4660 (clobber (match_scratch:SI 2))]
4661 "reload_completed"
4662 [(const_int 0)]
4663 {
4664 split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);
4665
4666 if (REGNO (operands[3]) != REGNO (operands[1]))
4667 emit_move_insn (operands[3], operands[1]);
4668
4669 /* Generate a cltd if possible and doing so it profitable. */
4670 if ((optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
4671 && REGNO (operands[3]) == AX_REG
4672 && REGNO (operands[4]) == DX_REG)
4673 {
4674 emit_insn (gen_ashrsi3_cvt (operands[4], operands[3], GEN_INT (31)));
4675 DONE;
4676 }
4677
4678 if (REGNO (operands[4]) != REGNO (operands[1]))
4679 emit_move_insn (operands[4], operands[1]);
4680
4681 emit_insn (gen_ashrsi3_cvt (operands[4], operands[4], GEN_INT (31)));
4682 DONE;
4683 })
4684
4685 (define_insn "extend<mode>di2"
4686 [(set (match_operand:DI 0 "register_operand" "=r")
4687 (sign_extend:DI
4688 (match_operand:SWI12 1 "nonimmediate_operand" "<r>m")))]
4689 "TARGET_64BIT"
4690 "movs{<imodesuffix>q|x}\t{%1, %0|%0, %1}"
4691 [(set_attr "type" "imovx")
4692 (set_attr "mode" "DI")])
4693
4694 (define_insn "extendhisi2"
4695 [(set (match_operand:SI 0 "register_operand" "=*a,r")
4696 (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "*0,rm")))]
4697 ""
4698 {
4699 switch (get_attr_prefix_0f (insn))
4700 {
4701 case 0:
4702 return "{cwtl|cwde}";
4703 default:
4704 return "movs{wl|x}\t{%1, %0|%0, %1}";
4705 }
4706 }
4707 [(set_attr "type" "imovx")
4708 (set_attr "mode" "SI")
4709 (set (attr "prefix_0f")
4710 ;; movsx is short decodable while cwtl is vector decoded.
4711 (if_then_else (and (eq_attr "cpu" "!k6")
4712 (eq_attr "alternative" "0"))
4713 (const_string "0")
4714 (const_string "1")))
4715 (set (attr "znver1_decode")
4716 (if_then_else (eq_attr "prefix_0f" "0")
4717 (const_string "double")
4718 (const_string "direct")))
4719 (set (attr "modrm")
4720 (if_then_else (eq_attr "prefix_0f" "0")
4721 (const_string "0")
4722 (const_string "1")))])
4723
4724 (define_insn "*extendhisi2_zext"
4725 [(set (match_operand:DI 0 "register_operand" "=*a,r")
4726 (zero_extend:DI
4727 (sign_extend:SI
4728 (match_operand:HI 1 "nonimmediate_operand" "*0,rm"))))]
4729 "TARGET_64BIT"
4730 {
4731 switch (get_attr_prefix_0f (insn))
4732 {
4733 case 0:
4734 return "{cwtl|cwde}";
4735 default:
4736 return "movs{wl|x}\t{%1, %k0|%k0, %1}";
4737 }
4738 }
4739 [(set_attr "type" "imovx")
4740 (set_attr "mode" "SI")
4741 (set (attr "prefix_0f")
4742 ;; movsx is short decodable while cwtl is vector decoded.
4743 (if_then_else (and (eq_attr "cpu" "!k6")
4744 (eq_attr "alternative" "0"))
4745 (const_string "0")
4746 (const_string "1")))
4747 (set (attr "modrm")
4748 (if_then_else (eq_attr "prefix_0f" "0")
4749 (const_string "0")
4750 (const_string "1")))])
4751
4752 (define_insn "extendqisi2"
4753 [(set (match_operand:SI 0 "register_operand" "=r")
4754 (sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm")))]
4755 ""
4756 "movs{bl|x}\t{%1, %0|%0, %1}"
4757 [(set_attr "type" "imovx")
4758 (set_attr "mode" "SI")])
4759
4760 (define_insn "*extendqisi2_zext"
4761 [(set (match_operand:DI 0 "register_operand" "=r")
4762 (zero_extend:DI
4763 (sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm"))))]
4764 "TARGET_64BIT"
4765 "movs{bl|x}\t{%1, %k0|%k0, %1}"
4766 [(set_attr "type" "imovx")
4767 (set_attr "mode" "SI")])
4768
4769 (define_insn "extendqihi2"
4770 [(set (match_operand:HI 0 "register_operand" "=*a,r")
4771 (sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" "*0,qm")))]
4772 ""
4773 {
4774 switch (get_attr_prefix_0f (insn))
4775 {
4776 case 0:
4777 return "{cbtw|cbw}";
4778 default:
4779 return "movs{bw|x}\t{%1, %0|%0, %1}";
4780 }
4781 }
4782 [(set_attr "type" "imovx")
4783 (set_attr "mode" "HI")
4784 (set (attr "prefix_0f")
4785 ;; movsx is short decodable while cwtl is vector decoded.
4786 (if_then_else (and (eq_attr "cpu" "!k6")
4787 (eq_attr "alternative" "0"))
4788 (const_string "0")
4789 (const_string "1")))
4790 (set (attr "modrm")
4791 (if_then_else (eq_attr "prefix_0f" "0")
4792 (const_string "0")
4793 (const_string "1")))])
4794 \f
4795 ;; Conversions between float and double.
4796
4797 ;; These are all no-ops in the model used for the 80387.
4798 ;; So just emit moves.
4799
4800 ;; %%% Kill these when call knows how to work out a DFmode push earlier.
4801 (define_split
4802 [(set (match_operand:DF 0 "push_operand")
4803 (float_extend:DF (match_operand:SF 1 "fp_register_operand")))]
4804 "reload_completed"
4805 [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -8)))
4806 (set (mem:DF (reg:P SP_REG)) (float_extend:DF (match_dup 1)))])
4807
4808 (define_split
4809 [(set (match_operand:XF 0 "push_operand")
4810 (float_extend:XF (match_operand:MODEF 1 "fp_register_operand")))]
4811 "reload_completed"
4812 [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
4813 (set (mem:XF (reg:P SP_REG)) (float_extend:XF (match_dup 1)))]
4814 "operands[2] = GEN_INT (-GET_MODE_SIZE (XFmode));")
4815
4816 (define_expand "extendsfdf2"
4817 [(set (match_operand:DF 0 "nonimm_ssenomem_operand")
4818 (float_extend:DF (match_operand:SF 1 "general_operand")))]
4819 "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
4820 {
4821 /* ??? Needed for compress_float_constant since all fp constants
4822 are TARGET_LEGITIMATE_CONSTANT_P. */
4823 if (CONST_DOUBLE_P (operands[1]))
4824 {
4825 if ((!TARGET_SSE2 || TARGET_MIX_SSE_I387)
4826 && standard_80387_constant_p (operands[1]) > 0)
4827 {
4828 operands[1] = simplify_const_unary_operation
4829 (FLOAT_EXTEND, DFmode, operands[1], SFmode);
4830 emit_move_insn_1 (operands[0], operands[1]);
4831 DONE;
4832 }
4833 operands[1] = validize_mem (force_const_mem (SFmode, operands[1]));
4834 }
4835 })
4836
4837 (define_insn "*extendsfdf2"
4838 [(set (match_operand:DF 0 "nonimm_ssenomem_operand" "=f,m,v,v")
4839 (float_extend:DF
4840 (match_operand:SF 1 "nonimmediate_operand" "fm,f,v,m")))]
4841 "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
4842 {
4843 switch (which_alternative)
4844 {
4845 case 0:
4846 case 1:
4847 return output_387_reg_move (insn, operands);
4848
4849 case 2:
4850 return "%vcvtss2sd\t{%d1, %0|%0, %d1}";
4851 case 3:
4852 return "%vcvtss2sd\t{%1, %d0|%d0, %1}";
4853
4854 default:
4855 gcc_unreachable ();
4856 }
4857 }
4858 [(set_attr "type" "fmov,fmov,ssecvt,ssecvt")
4859 (set_attr "avx_partial_xmm_update" "false,false,false,true")
4860 (set_attr "prefix" "orig,orig,maybe_vex,maybe_vex")
4861 (set_attr "mode" "SF,XF,DF,DF")
4862 (set (attr "enabled")
4863 (if_then_else
4864 (match_test ("TARGET_SSE2 && TARGET_SSE_MATH"))
4865 (if_then_else
4866 (eq_attr "alternative" "0,1")
4867 (symbol_ref "TARGET_MIX_SSE_I387")
4868 (symbol_ref "true"))
4869 (if_then_else
4870 (eq_attr "alternative" "0,1")
4871 (symbol_ref "true")
4872 (symbol_ref "false"))))])
4873
4874 /* For converting SF(xmm2) to DF(xmm1), use the following code instead of
4875 cvtss2sd:
4876 unpcklps xmm2,xmm2 ; packed conversion might crash on signaling NaNs
4877 cvtps2pd xmm2,xmm1
4878 We do the conversion post reload to avoid producing of 128bit spills
4879 that might lead to ICE on 32bit target. The sequence unlikely combine
4880 anyway. */
4881 (define_split
4882 [(set (match_operand:DF 0 "sse_reg_operand")
4883 (float_extend:DF
4884 (match_operand:SF 1 "nonimmediate_operand")))]
4885 "TARGET_USE_VECTOR_FP_CONVERTS
4886 && optimize_insn_for_speed_p ()
4887 && reload_completed
4888 && (!EXT_REX_SSE_REG_P (operands[0])
4889 || TARGET_AVX512VL)"
4890 [(set (match_dup 2)
4891 (float_extend:V2DF
4892 (vec_select:V2SF
4893 (match_dup 3)
4894 (parallel [(const_int 0) (const_int 1)]))))]
4895 {
4896 operands[2] = lowpart_subreg (V2DFmode, operands[0], DFmode);
4897 operands[3] = lowpart_subreg (V4SFmode, operands[0], DFmode);
4898 /* Use movss for loading from memory, unpcklps reg, reg for registers.
4899 Try to avoid move when unpacking can be done in source. */
4900 if (REG_P (operands[1]))
4901 {
4902 /* If it is unsafe to overwrite upper half of source, we need
4903 to move to destination and unpack there. */
4904 if (REGNO (operands[0]) != REGNO (operands[1])
4905 || (EXT_REX_SSE_REG_P (operands[1])
4906 && !TARGET_AVX512VL))
4907 {
4908 rtx tmp = lowpart_subreg (SFmode, operands[0], DFmode);
4909 emit_move_insn (tmp, operands[1]);
4910 }
4911 else
4912 operands[3] = lowpart_subreg (V4SFmode, operands[1], SFmode);
4913 /* FIXME: vec_interleave_lowv4sf for AVX512VL should allow
4914 =v, v, then vbroadcastss will be only needed for AVX512F without
4915 AVX512VL. */
4916 if (!EXT_REX_SSE_REGNO_P (REGNO (operands[3])))
4917 emit_insn (gen_vec_interleave_lowv4sf (operands[3], operands[3],
4918 operands[3]));
4919 else
4920 {
4921 rtx tmp = lowpart_subreg (V16SFmode, operands[3], V4SFmode);
4922 emit_insn (gen_avx512f_vec_dupv16sf_1 (tmp, tmp));
4923 }
4924 }
4925 else
4926 emit_insn (gen_vec_setv4sf_0 (operands[3],
4927 CONST0_RTX (V4SFmode), operands[1]));
4928 })
4929
4930 ;; It's more profitable to split and then extend in the same register.
4931 (define_peephole2
4932 [(set (match_operand:DF 0 "sse_reg_operand")
4933 (float_extend:DF
4934 (match_operand:SF 1 "memory_operand")))]
4935 "TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS
4936 && optimize_insn_for_speed_p ()"
4937 [(set (match_dup 2) (match_dup 1))
4938 (set (match_dup 0) (float_extend:DF (match_dup 2)))]
4939 "operands[2] = lowpart_subreg (SFmode, operands[0], DFmode);")
4940
4941 ;; Break partial SSE register dependency stall. This splitter should split
4942 ;; late in the pass sequence (after register rename pass), so allocated
4943 ;; registers won't change anymore
4944
4945 (define_split
4946 [(set (match_operand:DF 0 "sse_reg_operand")
4947 (float_extend:DF
4948 (match_operand:SF 1 "nonimmediate_operand")))]
4949 "!TARGET_AVX
4950 && TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY
4951 && epilogue_completed
4952 && optimize_function_for_speed_p (cfun)
4953 && (!REG_P (operands[1])
4954 || (!TARGET_AVX && REGNO (operands[0]) != REGNO (operands[1])))
4955 && (!EXT_REX_SSE_REG_P (operands[0])
4956 || TARGET_AVX512VL)"
4957 [(set (match_dup 0)
4958 (vec_merge:V2DF
4959 (vec_duplicate:V2DF
4960 (float_extend:DF
4961 (match_dup 1)))
4962 (match_dup 0)
4963 (const_int 1)))]
4964 {
4965 operands[0] = lowpart_subreg (V2DFmode, operands[0], DFmode);
4966 emit_move_insn (operands[0], CONST0_RTX (V2DFmode));
4967 })
4968
4969 (define_expand "extendhfsf2"
4970 [(set (match_operand:SF 0 "register_operand")
4971 (float_extend:SF
4972 (match_operand:HF 1 "nonimmediate_operand")))]
4973 "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL"
4974 {
4975 if (!TARGET_AVX512FP16)
4976 {
4977 rtx res = gen_reg_rtx (V4SFmode);
4978 rtx tmp = gen_reg_rtx (V8HFmode);
4979 rtx zero = force_reg (V8HFmode, CONST0_RTX (V8HFmode));
4980
4981 emit_insn (gen_vec_setv8hf_0 (tmp, zero, operands[1]));
4982 emit_insn (gen_vcvtph2ps (res, gen_lowpart (V8HImode, tmp)));
4983 emit_move_insn (operands[0], gen_lowpart (SFmode, res));
4984 DONE;
4985 }
4986 })
4987
4988 (define_expand "extendhfdf2"
4989 [(set (match_operand:DF 0 "register_operand")
4990 (float_extend:DF
4991 (match_operand:HF 1 "nonimmediate_operand")))]
4992 "TARGET_AVX512FP16")
4993
4994 (define_insn "*extendhf<mode>2"
4995 [(set (match_operand:MODEF 0 "register_operand" "=v")
4996 (float_extend:MODEF
4997 (match_operand:HF 1 "nonimmediate_operand" "vm")))]
4998 "TARGET_AVX512FP16"
4999 "vcvtsh2<ssemodesuffix>\t{%1, %0, %0|%0, %0, %1}"
5000 [(set_attr "type" "ssecvt")
5001 (set_attr "prefix" "evex")
5002 (set_attr "mode" "<MODE>")])
5003
5004
5005 (define_expand "extend<mode>xf2"
5006 [(set (match_operand:XF 0 "nonimmediate_operand")
5007 (float_extend:XF (match_operand:MODEF 1 "general_operand")))]
5008 "TARGET_80387"
5009 {
5010 /* ??? Needed for compress_float_constant since all fp constants
5011 are TARGET_LEGITIMATE_CONSTANT_P. */
5012 if (CONST_DOUBLE_P (operands[1]))
5013 {
5014 if (standard_80387_constant_p (operands[1]) > 0)
5015 {
5016 operands[1] = simplify_const_unary_operation
5017 (FLOAT_EXTEND, XFmode, operands[1], <MODE>mode);
5018 emit_move_insn_1 (operands[0], operands[1]);
5019 DONE;
5020 }
5021 operands[1] = validize_mem (force_const_mem (<MODE>mode, operands[1]));
5022 }
5023 })
5024
5025 (define_insn "*extend<mode>xf2_i387"
5026 [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m")
5027 (float_extend:XF
5028 (match_operand:MODEF 1 "nonimmediate_operand" "fm,f")))]
5029 "TARGET_80387"
5030 "* return output_387_reg_move (insn, operands);"
5031 [(set_attr "type" "fmov")
5032 (set_attr "mode" "<MODE>,XF")])
5033
5034 ;; %%% This seems like bad news.
5035 ;; This cannot output into an f-reg because there is no way to be sure
5036 ;; of truncating in that case. Otherwise this is just like a simple move
5037 ;; insn. So we pretend we can output to a reg in order to get better
5038 ;; register preferencing, but we really use a stack slot.
5039
5040 ;; Conversion from DFmode to SFmode.
5041
5042 (define_insn "truncdfsf2"
5043 [(set (match_operand:SF 0 "nonimm_ssenomem_operand" "=m,f,v,v")
5044 (float_truncate:SF
5045 (match_operand:DF 1 "register_ssemem_operand" "f,f,v,m")))]
5046 "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
5047 {
5048 switch (which_alternative)
5049 {
5050 case 0:
5051 case 1:
5052 return output_387_reg_move (insn, operands);
5053
5054 case 2:
5055 return "%vcvtsd2ss\t{%d1, %0|%0, %d1}";
5056 case 3:
5057 return "%vcvtsd2ss\t{%1, %d0|%d0, %1}";
5058
5059 default:
5060 gcc_unreachable ();
5061 }
5062 }
5063 [(set_attr "type" "fmov,fmov,ssecvt,ssecvt")
5064 (set_attr "avx_partial_xmm_update" "false,false,false,true")
5065 (set_attr "mode" "SF")
5066 (set (attr "enabled")
5067 (if_then_else
5068 (match_test ("TARGET_SSE2 && TARGET_SSE_MATH"))
5069 (cond [(eq_attr "alternative" "0")
5070 (symbol_ref "TARGET_MIX_SSE_I387")
5071 (eq_attr "alternative" "1")
5072 (symbol_ref "TARGET_MIX_SSE_I387
5073 && flag_unsafe_math_optimizations")
5074 ]
5075 (symbol_ref "true"))
5076 (cond [(eq_attr "alternative" "0")
5077 (symbol_ref "true")
5078 (eq_attr "alternative" "1")
5079 (symbol_ref "flag_unsafe_math_optimizations")
5080 ]
5081 (symbol_ref "false"))))])
5082
5083 /* For converting DF(xmm2) to SF(xmm1), use the following code instead of
5084 cvtsd2ss:
5085 unpcklpd xmm2,xmm2 ; packed conversion might crash on signaling NaNs
5086 cvtpd2ps xmm2,xmm1
5087 We do the conversion post reload to avoid producing of 128bit spills
5088 that might lead to ICE on 32bit target. The sequence unlikely combine
5089 anyway. */
5090 (define_split
5091 [(set (match_operand:SF 0 "sse_reg_operand")
5092 (float_truncate:SF
5093 (match_operand:DF 1 "nonimmediate_operand")))]
5094 "TARGET_USE_VECTOR_FP_CONVERTS
5095 && optimize_insn_for_speed_p ()
5096 && reload_completed
5097 && (!EXT_REX_SSE_REG_P (operands[0])
5098 || TARGET_AVX512VL)"
5099 [(set (match_dup 2)
5100 (vec_concat:V4SF
5101 (float_truncate:V2SF
5102 (match_dup 4))
5103 (match_dup 3)))]
5104 {
5105 operands[2] = lowpart_subreg (V4SFmode, operands[0], SFmode);
5106 operands[3] = CONST0_RTX (V2SFmode);
5107 operands[4] = lowpart_subreg (V2DFmode, operands[0], SFmode);
5108 /* Use movsd for loading from memory, unpcklpd for registers.
5109 Try to avoid move when unpacking can be done in source, or SSE3
5110 movddup is available. */
5111 if (REG_P (operands[1]))
5112 {
5113 if ((!TARGET_SSE3 && REGNO (operands[0]) != REGNO (operands[1]))
5114 || (EXT_REX_SSE_REG_P (operands[1]) && !TARGET_AVX512VL))
5115 {
5116 rtx tmp = lowpart_subreg (DFmode, operands[0], SFmode);
5117 emit_move_insn (tmp, operands[1]);
5118 operands[1] = tmp;
5119 }
5120 else if (!TARGET_SSE3)
5121 operands[4] = lowpart_subreg (V2DFmode, operands[1], DFmode);
5122 emit_insn (gen_vec_dupv2df (operands[4], operands[1]));
5123 }
5124 else
5125 emit_insn (gen_vec_concatv2df (operands[4], operands[1],
5126 CONST0_RTX (DFmode)));
5127 })
5128
5129 ;; It's more profitable to split and then truncate in the same register.
5130 (define_peephole2
5131 [(set (match_operand:SF 0 "sse_reg_operand")
5132 (float_truncate:SF
5133 (match_operand:DF 1 "memory_operand")))]
5134 "TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS
5135 && optimize_insn_for_speed_p ()"
5136 [(set (match_dup 2) (match_dup 1))
5137 (set (match_dup 0) (float_truncate:SF (match_dup 2)))]
5138 "operands[2] = lowpart_subreg (DFmode, operands[0], SFmode);")
5139
5140 ;; Break partial SSE register dependency stall. This splitter should split
5141 ;; late in the pass sequence (after register rename pass), so allocated
5142 ;; registers won't change anymore
5143
5144 (define_split
5145 [(set (match_operand:SF 0 "sse_reg_operand")
5146 (float_truncate:SF
5147 (match_operand:DF 1 "nonimmediate_operand")))]
5148 "!TARGET_AVX
5149 && TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY
5150 && epilogue_completed
5151 && optimize_function_for_speed_p (cfun)
5152 && (!REG_P (operands[1])
5153 || (!TARGET_AVX && REGNO (operands[0]) != REGNO (operands[1])))
5154 && (!EXT_REX_SSE_REG_P (operands[0])
5155 || TARGET_AVX512VL)"
5156 [(set (match_dup 0)
5157 (vec_merge:V4SF
5158 (vec_duplicate:V4SF
5159 (float_truncate:SF
5160 (match_dup 1)))
5161 (match_dup 0)
5162 (const_int 1)))]
5163 {
5164 operands[0] = lowpart_subreg (V4SFmode, operands[0], SFmode);
5165 emit_move_insn (operands[0], CONST0_RTX (V4SFmode));
5166 })
5167
5168 ;; Conversion from XFmode to {SF,DF}mode
5169
5170 (define_insn "truncxf<mode>2"
5171 [(set (match_operand:MODEF 0 "nonimmediate_operand" "=m,f")
5172 (float_truncate:MODEF
5173 (match_operand:XF 1 "register_operand" "f,f")))]
5174 "TARGET_80387"
5175 "* return output_387_reg_move (insn, operands);"
5176 [(set_attr "type" "fmov")
5177 (set_attr "mode" "<MODE>")
5178 (set (attr "enabled")
5179 (cond [(eq_attr "alternative" "1")
5180 (symbol_ref "flag_unsafe_math_optimizations")
5181 ]
5182 (symbol_ref "true")))])
5183
5184 ;; Conversion from {SF,DF}mode to HFmode.
5185
5186 (define_expand "truncsfhf2"
5187 [(set (match_operand:HF 0 "register_operand")
5188 (float_truncate:HF
5189 (match_operand:SF 1 "nonimmediate_operand")))]
5190 "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL"
5191 {
5192 if (!TARGET_AVX512FP16)
5193 {
5194 rtx res = gen_reg_rtx (V8HFmode);
5195 rtx tmp = gen_reg_rtx (V4SFmode);
5196 rtx zero = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
5197
5198 emit_insn (gen_vec_setv4sf_0 (tmp, zero, operands[1]));
5199 emit_insn (gen_vcvtps2ph (gen_lowpart (V8HImode, res), tmp, GEN_INT (4)));
5200 emit_move_insn (operands[0], gen_lowpart (HFmode, res));
5201 DONE;
5202 }
5203 })
5204
5205 (define_expand "truncdfhf2"
5206 [(set (match_operand:HF 0 "register_operand")
5207 (float_truncate:HF
5208 (match_operand:DF 1 "nonimmediate_operand")))]
5209 "TARGET_AVX512FP16")
5210
5211 (define_insn "*trunc<mode>hf2"
5212 [(set (match_operand:HF 0 "register_operand" "=v")
5213 (float_truncate:HF
5214 (match_operand:MODEF 1 "nonimmediate_operand" "vm")))]
5215 "TARGET_AVX512FP16"
5216 "vcvt<ssemodesuffix>2sh\t{%1, %d0|%d0, %1}"
5217 [(set_attr "type" "ssecvt")
5218 (set_attr "prefix" "evex")
5219 (set_attr "mode" "HF")])
5220 \f
5221 ;; Signed conversion to DImode.
5222
5223 (define_expand "fix_truncxfdi2"
5224 [(parallel [(set (match_operand:DI 0 "nonimmediate_operand")
5225 (fix:DI (match_operand:XF 1 "register_operand")))
5226 (clobber (reg:CC FLAGS_REG))])]
5227 "TARGET_80387"
5228 {
5229 if (TARGET_FISTTP)
5230 {
5231 emit_insn (gen_fix_truncdi_i387_fisttp (operands[0], operands[1]));
5232 DONE;
5233 }
5234 })
5235
5236 (define_expand "fix_trunc<mode>di2"
5237 [(parallel [(set (match_operand:DI 0 "nonimmediate_operand")
5238 (fix:DI (match_operand:MODEF 1 "register_operand")))
5239 (clobber (reg:CC FLAGS_REG))])]
5240 "TARGET_80387 || (TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode))"
5241 {
5242 if (TARGET_FISTTP
5243 && !(TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
5244 {
5245 emit_insn (gen_fix_truncdi_i387_fisttp (operands[0], operands[1]));
5246 DONE;
5247 }
5248 if (TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode))
5249 {
5250 rtx out = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (DImode);
5251 emit_insn (gen_fix_trunc<mode>di_sse (out, operands[1]));
5252 if (out != operands[0])
5253 emit_move_insn (operands[0], out);
5254 DONE;
5255 }
5256 })
5257
5258 (define_insn "fix<fixunssuffix>_trunchf<mode>2"
5259 [(set (match_operand:SWI48 0 "register_operand" "=r")
5260 (any_fix:SWI48
5261 (match_operand:HF 1 "nonimmediate_operand" "vm")))]
5262 "TARGET_AVX512FP16"
5263 "vcvttsh2<fixsuffix>si\t{%1, %0|%0, %1}"
5264 [(set_attr "type" "sseicvt")
5265 (set_attr "prefix" "evex")
5266 (set_attr "mode" "<MODE>")])
5267
5268 ;; Signed conversion to SImode.
5269
5270 (define_expand "fix_truncxfsi2"
5271 [(parallel [(set (match_operand:SI 0 "nonimmediate_operand")
5272 (fix:SI (match_operand:XF 1 "register_operand")))
5273 (clobber (reg:CC FLAGS_REG))])]
5274 "TARGET_80387"
5275 {
5276 if (TARGET_FISTTP)
5277 {
5278 emit_insn (gen_fix_truncsi_i387_fisttp (operands[0], operands[1]));
5279 DONE;
5280 }
5281 })
5282
5283 (define_expand "fix_trunc<mode>si2"
5284 [(parallel [(set (match_operand:SI 0 "nonimmediate_operand")
5285 (fix:SI (match_operand:MODEF 1 "register_operand")))
5286 (clobber (reg:CC FLAGS_REG))])]
5287 "TARGET_80387 || SSE_FLOAT_MODE_P (<MODE>mode)"
5288 {
5289 if (TARGET_FISTTP
5290 && !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
5291 {
5292 emit_insn (gen_fix_truncsi_i387_fisttp (operands[0], operands[1]));
5293 DONE;
5294 }
5295 if (SSE_FLOAT_MODE_P (<MODE>mode))
5296 {
5297 rtx out = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (SImode);
5298 emit_insn (gen_fix_trunc<mode>si_sse (out, operands[1]));
5299 if (out != operands[0])
5300 emit_move_insn (operands[0], out);
5301 DONE;
5302 }
5303 })
5304
5305 ;; Signed conversion to HImode.
5306
5307 (define_expand "fix_trunc<mode>hi2"
5308 [(parallel [(set (match_operand:HI 0 "nonimmediate_operand")
5309 (fix:HI (match_operand:X87MODEF 1 "register_operand")))
5310 (clobber (reg:CC FLAGS_REG))])]
5311 "TARGET_80387
5312 && !(SSE_FLOAT_MODE_P (<MODE>mode) && (!TARGET_FISTTP || TARGET_SSE_MATH))"
5313 {
5314 if (TARGET_FISTTP)
5315 {
5316 emit_insn (gen_fix_trunchi_i387_fisttp (operands[0], operands[1]));
5317 DONE;
5318 }
5319 })
5320
5321 ;; Unsigned conversion to DImode
5322
5323 (define_insn "fixuns_trunc<mode>di2"
5324 [(set (match_operand:DI 0 "register_operand" "=r")
5325 (unsigned_fix:DI
5326 (match_operand:MODEF 1 "nonimmediate_operand" "vm")))]
5327 "TARGET_64BIT && TARGET_AVX512F && TARGET_SSE_MATH"
5328 "vcvtt<ssemodesuffix>2usi\t{%1, %0|%0, %1}"
5329 [(set_attr "type" "sseicvt")
5330 (set_attr "prefix" "evex")
5331 (set_attr "mode" "DI")])
5332
5333 ;; Unsigned conversion to SImode.
5334
5335 (define_expand "fixuns_trunc<mode>si2"
5336 [(parallel
5337 [(set (match_operand:SI 0 "register_operand")
5338 (unsigned_fix:SI
5339 (match_operand:MODEF 1 "nonimmediate_operand")))
5340 (use (match_dup 2))
5341 (clobber (scratch:<ssevecmode>))
5342 (clobber (scratch:<ssevecmode>))])]
5343 "(!TARGET_64BIT || TARGET_AVX512F) && TARGET_SSE2 && TARGET_SSE_MATH"
5344 {
5345 machine_mode mode = <MODE>mode;
5346 machine_mode vecmode = <ssevecmode>mode;
5347 REAL_VALUE_TYPE TWO31r;
5348 rtx two31;
5349
5350 if (TARGET_AVX512F)
5351 {
5352 emit_insn (gen_fixuns_trunc<mode>si2_avx512f (operands[0], operands[1]));
5353 DONE;
5354 }
5355
5356 if (optimize_insn_for_size_p ())
5357 FAIL;
5358
5359 real_ldexp (&TWO31r, &dconst1, 31);
5360 two31 = const_double_from_real_value (TWO31r, mode);
5361 two31 = ix86_build_const_vector (vecmode, true, two31);
5362 operands[2] = force_reg (vecmode, two31);
5363 })
5364
5365 (define_insn "fixuns_trunc<mode>si2_avx512f"
5366 [(set (match_operand:SI 0 "register_operand" "=r")
5367 (unsigned_fix:SI
5368 (match_operand:MODEF 1 "nonimmediate_operand" "vm")))]
5369 "TARGET_AVX512F && TARGET_SSE_MATH"
5370 "vcvtt<ssemodesuffix>2usi\t{%1, %0|%0, %1}"
5371 [(set_attr "type" "sseicvt")
5372 (set_attr "prefix" "evex")
5373 (set_attr "mode" "SI")])
5374
5375 (define_insn "*fixuns_trunchfsi2zext"
5376 [(set (match_operand:DI 0 "register_operand" "=r")
5377 (zero_extend:DI
5378 (unsigned_fix:SI
5379 (match_operand:HF 1 "nonimmediate_operand" "vm"))))]
5380 "TARGET_64BIT && TARGET_AVX512FP16"
5381 "vcvttsh2usi\t{%1, %k0|%k0, %1}"
5382 [(set_attr "type" "sseicvt")
5383 (set_attr "prefix" "evex")
5384 (set_attr "mode" "SI")])
5385
5386 (define_insn "*fixuns_trunc<mode>si2_avx512f_zext"
5387 [(set (match_operand:DI 0 "register_operand" "=r")
5388 (zero_extend:DI
5389 (unsigned_fix:SI
5390 (match_operand:MODEF 1 "nonimmediate_operand" "vm"))))]
5391 "TARGET_64BIT && TARGET_AVX512F && TARGET_SSE_MATH"
5392 "vcvtt<ssemodesuffix>2usi\t{%1, %k0|%k0, %1}"
5393 [(set_attr "type" "sseicvt")
5394 (set_attr "prefix" "evex")
5395 (set_attr "mode" "SI")])
5396
5397 (define_insn_and_split "*fixuns_trunc<mode>_1"
5398 [(set (match_operand:SI 0 "register_operand" "=&x,&x")
5399 (unsigned_fix:SI
5400 (match_operand:MODEF 3 "nonimmediate_operand" "xm,xm")))
5401 (use (match_operand:<ssevecmode> 4 "nonimmediate_operand" "m,x"))
5402 (clobber (match_scratch:<ssevecmode> 1 "=x,&x"))
5403 (clobber (match_scratch:<ssevecmode> 2 "=x,x"))]
5404 "!TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH
5405 && optimize_function_for_speed_p (cfun)"
5406 "#"
5407 "&& reload_completed"
5408 [(const_int 0)]
5409 {
5410 ix86_split_convert_uns_si_sse (operands);
5411 DONE;
5412 })
5413
5414 ;; Unsigned conversion to HImode.
5415 ;; Without these patterns, we'll try the unsigned SI conversion which
5416 ;; is complex for SSE, rather than the signed SI conversion, which isn't.
5417
5418 (define_expand "fixuns_trunchfhi2"
5419 [(set (match_dup 2)
5420 (fix:SI (match_operand:HF 1 "nonimmediate_operand")))
5421 (set (match_operand:HI 0 "nonimmediate_operand")
5422 (subreg:HI (match_dup 2) 0))]
5423 "TARGET_AVX512FP16"
5424 "operands[2] = gen_reg_rtx (SImode);")
5425
5426 (define_expand "fixuns_trunc<mode>hi2"
5427 [(set (match_dup 2)
5428 (fix:SI (match_operand:MODEF 1 "nonimmediate_operand")))
5429 (set (match_operand:HI 0 "nonimmediate_operand")
5430 (subreg:HI (match_dup 2) 0))]
5431 "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
5432 "operands[2] = gen_reg_rtx (SImode);")
5433
5434 ;; When SSE is available, it is always faster to use it!
5435 (define_insn "fix_trunc<MODEF:mode><SWI48:mode>_sse"
5436 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5437 (fix:SWI48 (match_operand:MODEF 1 "nonimmediate_operand" "v,m")))]
5438 "SSE_FLOAT_MODE_P (<MODEF:MODE>mode)
5439 && (!TARGET_FISTTP || TARGET_SSE_MATH)"
5440 "%vcvtt<MODEF:ssemodesuffix>2si<SWI48:rex64suffix>\t{%1, %0|%0, %1}"
5441 [(set_attr "type" "sseicvt")
5442 (set_attr "prefix" "maybe_vex")
5443 (set (attr "prefix_rex")
5444 (if_then_else
5445 (match_test "<SWI48:MODE>mode == DImode")
5446 (const_string "1")
5447 (const_string "*")))
5448 (set_attr "mode" "<MODEF:MODE>")
5449 (set_attr "athlon_decode" "double,vector")
5450 (set_attr "amdfam10_decode" "double,double")
5451 (set_attr "bdver1_decode" "double,double")])
5452
5453 ;; Avoid vector decoded forms of the instruction.
5454 (define_peephole2
5455 [(match_scratch:MODEF 2 "x")
5456 (set (match_operand:SWI48 0 "register_operand")
5457 (fix:SWI48 (match_operand:MODEF 1 "memory_operand")))]
5458 "TARGET_AVOID_VECTOR_DECODE
5459 && SSE_FLOAT_MODE_P (<MODEF:MODE>mode)
5460 && optimize_insn_for_speed_p ()"
5461 [(set (match_dup 2) (match_dup 1))
5462 (set (match_dup 0) (fix:SWI48 (match_dup 2)))])
5463
5464 (define_insn "fix_trunc<mode>_i387_fisttp"
5465 [(set (match_operand:SWI248x 0 "nonimmediate_operand" "=m")
5466 (fix:SWI248x (match_operand 1 "register_operand" "f")))
5467 (clobber (match_scratch:XF 2 "=&f"))]
5468 "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
5469 && TARGET_FISTTP
5470 && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
5471 && (TARGET_64BIT || <MODE>mode != DImode))
5472 && TARGET_SSE_MATH)"
5473 "* return output_fix_trunc (insn, operands, true);"
5474 [(set_attr "type" "fisttp")
5475 (set_attr "mode" "<MODE>")])
5476
5477 ;; See the comments in i386.h near OPTIMIZE_MODE_SWITCHING for the description
5478 ;; of the machinery. Please note the clobber of FLAGS_REG. In i387 control
5479 ;; word calculation (inserted by LCM in mode switching pass) a FLAGS_REG
5480 ;; clobbering insns can be used. Look at emit_i387_cw_initialization ()
5481 ;; function in i386.cc.
5482 (define_insn_and_split "*fix_trunc<mode>_i387_1"
5483 [(set (match_operand:SWI248x 0 "nonimmediate_operand")
5484 (fix:SWI248x (match_operand 1 "register_operand")))
5485 (clobber (reg:CC FLAGS_REG))]
5486 "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
5487 && !TARGET_FISTTP
5488 && !(SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
5489 && (TARGET_64BIT || <MODE>mode != DImode))
5490 && ix86_pre_reload_split ()"
5491 "#"
5492 "&& 1"
5493 [(const_int 0)]
5494 {
5495 ix86_optimize_mode_switching[I387_TRUNC] = 1;
5496
5497 operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
5498 operands[3] = assign_386_stack_local (HImode, SLOT_CW_TRUNC);
5499
5500 emit_insn (gen_fix_trunc<mode>_i387 (operands[0], operands[1],
5501 operands[2], operands[3]));
5502 DONE;
5503 }
5504 [(set_attr "type" "fistp")
5505 (set_attr "i387_cw" "trunc")
5506 (set_attr "mode" "<MODE>")])
5507
5508 (define_insn "fix_truncdi_i387"
5509 [(set (match_operand:DI 0 "nonimmediate_operand" "=m")
5510 (fix:DI (match_operand 1 "register_operand" "f")))
5511 (use (match_operand:HI 2 "memory_operand" "m"))
5512 (use (match_operand:HI 3 "memory_operand" "m"))
5513 (clobber (match_scratch:XF 4 "=&f"))]
5514 "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
5515 && !TARGET_FISTTP
5516 && !(TARGET_64BIT && SSE_FLOAT_MODE_P (GET_MODE (operands[1])))"
5517 "* return output_fix_trunc (insn, operands, false);"
5518 [(set_attr "type" "fistp")
5519 (set_attr "i387_cw" "trunc")
5520 (set_attr "mode" "DI")])
5521
5522 (define_insn "fix_trunc<mode>_i387"
5523 [(set (match_operand:SWI24 0 "nonimmediate_operand" "=m")
5524 (fix:SWI24 (match_operand 1 "register_operand" "f")))
5525 (use (match_operand:HI 2 "memory_operand" "m"))
5526 (use (match_operand:HI 3 "memory_operand" "m"))]
5527 "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
5528 && !TARGET_FISTTP
5529 && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))"
5530 "* return output_fix_trunc (insn, operands, false);"
5531 [(set_attr "type" "fistp")
5532 (set_attr "i387_cw" "trunc")
5533 (set_attr "mode" "<MODE>")])
5534
5535 (define_insn "x86_fnstcw_1"
5536 [(set (match_operand:HI 0 "memory_operand" "=m")
5537 (unspec:HI [(const_int 0)] UNSPEC_FSTCW))]
5538 "TARGET_80387"
5539 "fnstcw\t%0"
5540 [(set (attr "length")
5541 (symbol_ref "ix86_attr_length_address_default (insn) + 2"))
5542 (set_attr "mode" "HI")
5543 (set_attr "unit" "i387")
5544 (set_attr "bdver1_decode" "vector")])
5545 \f
5546 ;; Conversion between fixed point and floating point.
5547
5548 ;; Even though we only accept memory inputs, the backend _really_
5549 ;; wants to be able to do this between registers. Thankfully, LRA
5550 ;; will fix this up for us during register allocation.
5551
5552 (define_insn "floathi<mode>2"
5553 [(set (match_operand:X87MODEF 0 "register_operand" "=f")
5554 (float:X87MODEF (match_operand:HI 1 "nonimmediate_operand" "m")))]
5555 "TARGET_80387
5556 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
5557 || TARGET_MIX_SSE_I387)"
5558 "fild%Z1\t%1"
5559 [(set_attr "type" "fmov")
5560 (set_attr "mode" "<MODE>")
5561 (set_attr "znver1_decode" "double")
5562 (set_attr "fp_int_src" "true")])
5563
5564 (define_insn "float<SWI48x:mode>xf2"
5565 [(set (match_operand:XF 0 "register_operand" "=f")
5566 (float:XF (match_operand:SWI48x 1 "nonimmediate_operand" "m")))]
5567 "TARGET_80387"
5568 "fild%Z1\t%1"
5569 [(set_attr "type" "fmov")
5570 (set_attr "mode" "XF")
5571 (set_attr "znver1_decode" "double")
5572 (set_attr "fp_int_src" "true")])
5573
5574 (define_expand "float<SWI48x:mode><MODEF:mode>2"
5575 [(set (match_operand:MODEF 0 "register_operand")
5576 (float:MODEF (match_operand:SWI48x 1 "nonimmediate_operand")))]
5577 "(TARGET_80387 && X87_ENABLE_FLOAT (<MODEF:MODE>mode, <SWI48x:MODE>mode))
5578 || (SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
5579 && ((<SWI48x:MODE>mode != DImode) || TARGET_64BIT))")
5580
5581 (define_insn "*float<SWI48:mode><MODEF:mode>2"
5582 [(set (match_operand:MODEF 0 "register_operand" "=f,v,v")
5583 (float:MODEF
5584 (match_operand:SWI48 1 "nonimmediate_operand" "m,r,m")))]
5585 "(TARGET_80387 && X87_ENABLE_FLOAT (<MODEF:MODE>mode, <SWI48:MODE>mode))
5586 || (SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH)"
5587 "@
5588 fild%Z1\t%1
5589 %vcvtsi2<MODEF:ssemodesuffix><SWI48:rex64suffix>\t{%1, %d0|%d0, %1}
5590 %vcvtsi2<MODEF:ssemodesuffix><SWI48:rex64suffix>\t{%1, %d0|%d0, %1}"
5591 [(set_attr "type" "fmov,sseicvt,sseicvt")
5592 (set_attr "avx_partial_xmm_update" "false,true,true")
5593 (set_attr "prefix" "orig,maybe_vex,maybe_vex")
5594 (set_attr "mode" "<MODEF:MODE>")
5595 (set (attr "prefix_rex")
5596 (if_then_else
5597 (and (eq_attr "prefix" "maybe_vex")
5598 (match_test "<SWI48:MODE>mode == DImode"))
5599 (const_string "1")
5600 (const_string "*")))
5601 (set_attr "unit" "i387,*,*")
5602 (set_attr "athlon_decode" "*,double,direct")
5603 (set_attr "amdfam10_decode" "*,vector,double")
5604 (set_attr "bdver1_decode" "*,double,direct")
5605 (set_attr "znver1_decode" "double,*,*")
5606 (set_attr "fp_int_src" "true")
5607 (set (attr "enabled")
5608 (if_then_else
5609 (match_test ("SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH"))
5610 (if_then_else
5611 (eq_attr "alternative" "0")
5612 (symbol_ref "TARGET_MIX_SSE_I387
5613 && X87_ENABLE_FLOAT (<MODEF:MODE>mode,
5614 <SWI48:MODE>mode)")
5615 (symbol_ref "true"))
5616 (if_then_else
5617 (eq_attr "alternative" "0")
5618 (symbol_ref "true")
5619 (symbol_ref "false"))))
5620 (set (attr "preferred_for_speed")
5621 (cond [(eq_attr "alternative" "1")
5622 (symbol_ref "TARGET_INTER_UNIT_CONVERSIONS")]
5623 (symbol_ref "true")))])
5624
5625 (define_insn "float<floatunssuffix><mode>hf2"
5626 [(set (match_operand:HF 0 "register_operand" "=v")
5627 (any_float:HF
5628 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))]
5629 "TARGET_AVX512FP16"
5630 "vcvt<floatsuffix>si2sh<rex64suffix>\t{%1, %d0|%d0, %1}"
5631 [(set_attr "type" "sseicvt")
5632 (set_attr "prefix" "evex")
5633 (set_attr "mode" "HF")])
5634
5635 (define_insn "*floatdi<MODEF:mode>2_i387"
5636 [(set (match_operand:MODEF 0 "register_operand" "=f")
5637 (float:MODEF (match_operand:DI 1 "nonimmediate_operand" "m")))]
5638 "!TARGET_64BIT
5639 && TARGET_80387 && X87_ENABLE_FLOAT (<MODEF:MODE>mode, DImode)"
5640 "fild%Z1\t%1"
5641 [(set_attr "type" "fmov")
5642 (set_attr "mode" "<MODEF:MODE>")
5643 (set_attr "znver1_decode" "double")
5644 (set_attr "fp_int_src" "true")])
5645
5646 ;; Try TARGET_USE_VECTOR_CONVERTS, but not so hard as to require extra memory
5647 ;; slots when !TARGET_INTER_UNIT_MOVES_TO_VEC disables the general_regs
5648 ;; alternative in sse2_loadld.
5649 (define_split
5650 [(set (match_operand:MODEF 0 "sse_reg_operand")
5651 (float:MODEF (match_operand:SI 1 "nonimmediate_operand")))]
5652 "TARGET_SSE2
5653 && TARGET_USE_VECTOR_CONVERTS
5654 && optimize_function_for_speed_p (cfun)
5655 && reload_completed
5656 && (MEM_P (operands[1]) || TARGET_INTER_UNIT_MOVES_TO_VEC)
5657 && (!EXT_REX_SSE_REG_P (operands[0])
5658 || TARGET_AVX512VL)"
5659 [(const_int 0)]
5660 {
5661 operands[3] = lowpart_subreg (<ssevecmode>mode, operands[0], <MODE>mode);
5662 operands[4] = lowpart_subreg (V4SImode, operands[0], <MODE>mode);
5663
5664 emit_insn (gen_sse2_loadld (operands[4],
5665 CONST0_RTX (V4SImode), operands[1]));
5666
5667 if (<ssevecmode>mode == V4SFmode)
5668 emit_insn (gen_floatv4siv4sf2 (operands[3], operands[4]));
5669 else
5670 emit_insn (gen_sse2_cvtdq2pd (operands[3], operands[4]));
5671 DONE;
5672 })
5673
5674 ;; Avoid store forwarding (partial memory) stall penalty
5675 ;; by passing DImode value through XMM registers. */
5676
5677 (define_split
5678 [(set (match_operand:X87MODEF 0 "register_operand")
5679 (float:X87MODEF
5680 (match_operand:DI 1 "register_operand")))]
5681 "!TARGET_64BIT && TARGET_INTER_UNIT_MOVES_TO_VEC
5682 && TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
5683 && TARGET_SSE2 && optimize_function_for_speed_p (cfun)
5684 && can_create_pseudo_p ()"
5685 [(const_int 0)]
5686 {
5687 rtx s = assign_386_stack_local (DImode, SLOT_FLOATxFDI_387);
5688 emit_insn (gen_floatdi<mode>2_i387_with_xmm (operands[0], operands[1], s));
5689 DONE;
5690 })
5691
5692 (define_insn_and_split "floatdi<X87MODEF:mode>2_i387_with_xmm"
5693 [(set (match_operand:X87MODEF 0 "register_operand" "=f,f")
5694 (float:X87MODEF
5695 (match_operand:DI 1 "register_operand" "r,r")))
5696 (clobber (match_operand:DI 2 "memory_operand" "=m,m"))
5697 (clobber (match_scratch:V4SI 3 "=x,x"))
5698 (clobber (match_scratch:V4SI 4 "=X,x"))]
5699 "!TARGET_64BIT && TARGET_INTER_UNIT_MOVES_TO_VEC
5700 && TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
5701 && TARGET_SSE2 && optimize_function_for_speed_p (cfun)"
5702 "#"
5703 "&& reload_completed"
5704 [(set (match_dup 2) (match_dup 3))
5705 (set (match_dup 0) (float:X87MODEF (match_dup 2)))]
5706 {
5707 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
5708 Assemble the 64-bit DImode value in an xmm register. */
5709 emit_insn (gen_sse2_loadld (operands[3], CONST0_RTX (V4SImode),
5710 gen_lowpart (SImode, operands[1])));
5711 if (TARGET_SSE4_1)
5712 emit_insn (gen_sse4_1_pinsrd (operands[3], operands[3],
5713 gen_highpart (SImode, operands[1]),
5714 GEN_INT (2)));
5715 else
5716 {
5717 emit_insn (gen_sse2_loadld (operands[4], CONST0_RTX (V4SImode),
5718 gen_highpart (SImode, operands[1])));
5719 emit_insn (gen_vec_interleave_lowv4si (operands[3], operands[3],
5720 operands[4]));
5721 }
5722 operands[3] = gen_lowpart (DImode, operands[3]);
5723 }
5724 [(set_attr "isa" "sse4,*")
5725 (set_attr "type" "multi")
5726 (set_attr "mode" "<X87MODEF:MODE>")
5727 (set_attr "unit" "i387")
5728 (set_attr "fp_int_src" "true")])
5729
5730 ;; Break partial SSE register dependency stall. This splitter should split
5731 ;; late in the pass sequence (after register rename pass), so allocated
5732 ;; registers won't change anymore
5733
5734 (define_split
5735 [(set (match_operand:MODEF 0 "sse_reg_operand")
5736 (float:MODEF (match_operand:SWI48 1 "nonimmediate_operand")))]
5737 "!TARGET_AVX
5738 && TARGET_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY
5739 && epilogue_completed
5740 && optimize_function_for_speed_p (cfun)
5741 && (!EXT_REX_SSE_REG_P (operands[0])
5742 || TARGET_AVX512VL)"
5743 [(set (match_dup 0)
5744 (vec_merge:<MODEF:ssevecmode>
5745 (vec_duplicate:<MODEF:ssevecmode>
5746 (float:MODEF
5747 (match_dup 1)))
5748 (match_dup 0)
5749 (const_int 1)))]
5750 {
5751 const machine_mode vmode = <MODEF:ssevecmode>mode;
5752
5753 operands[0] = lowpart_subreg (vmode, operands[0], <MODEF:MODE>mode);
5754 emit_move_insn (operands[0], CONST0_RTX (vmode));
5755 })
5756
5757 (define_expand "floatuns<SWI12:mode><MODEF:mode>2"
5758 [(set (match_operand:MODEF 0 "register_operand")
5759 (unsigned_float:MODEF
5760 (match_operand:SWI12 1 "nonimmediate_operand")))]
5761 "!TARGET_64BIT
5762 && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH"
5763 {
5764 operands[1] = convert_to_mode (SImode, operands[1], 1);
5765 emit_insn (gen_floatsi<MODEF:mode>2 (operands[0], operands[1]));
5766 DONE;
5767 })
5768
5769 (define_insn "*floatuns<SWI48:mode><MODEF:mode>2_avx512"
5770 [(set (match_operand:MODEF 0 "register_operand" "=v")
5771 (unsigned_float:MODEF
5772 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))]
5773 "TARGET_AVX512F && TARGET_SSE_MATH"
5774 "vcvtusi2<MODEF:ssemodesuffix><SWI48:rex64suffix>\t{%1, %0, %0|%0, %0, %1}"
5775 [(set_attr "type" "sseicvt")
5776 (set_attr "avx_partial_xmm_update" "true")
5777 (set_attr "prefix" "evex")
5778 (set_attr "mode" "<MODEF:MODE>")])
5779
5780 ;; Avoid store forwarding (partial memory) stall penalty by extending
5781 ;; SImode value to DImode through XMM register instead of pushing two
5782 ;; SImode values to stack. Also note that fild loads from memory only.
5783
5784 (define_insn_and_split "floatunssi<mode>2_i387_with_xmm"
5785 [(set (match_operand:X87MODEF 0 "register_operand" "=f")
5786 (unsigned_float:X87MODEF
5787 (match_operand:SI 1 "nonimmediate_operand" "rm")))
5788 (clobber (match_operand:DI 2 "memory_operand" "=m"))
5789 (clobber (match_scratch:DI 3 "=x"))]
5790 "!TARGET_64BIT
5791 && TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
5792 && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
5793 "#"
5794 "&& reload_completed"
5795 [(set (match_dup 3) (zero_extend:DI (match_dup 1)))
5796 (set (match_dup 2) (match_dup 3))
5797 (set (match_dup 0)
5798 (float:X87MODEF (match_dup 2)))]
5799 ""
5800 [(set_attr "type" "multi")
5801 (set_attr "mode" "<MODE>")])
5802
5803 (define_expand "floatunssi<mode>2"
5804 [(set (match_operand:X87MODEF 0 "register_operand")
5805 (unsigned_float:X87MODEF
5806 (match_operand:SI 1 "nonimmediate_operand")))]
5807 "(!TARGET_64BIT
5808 && TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
5809 && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC)
5810 || ((!TARGET_64BIT || TARGET_AVX512F)
5811 && SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
5812 {
5813 if (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
5814 {
5815 emit_insn (gen_floatunssi<mode>2_i387_with_xmm
5816 (operands[0], operands[1],
5817 assign_386_stack_local (DImode, SLOT_TEMP)));
5818 DONE;
5819 }
5820 if (!TARGET_AVX512F)
5821 {
5822 ix86_expand_convert_uns_si<mode>_sse (operands[0], operands[1]);
5823 DONE;
5824 }
5825 })
5826
5827 (define_expand "floatunsdisf2"
5828 [(set (match_operand:SF 0 "register_operand")
5829 (unsigned_float:SF
5830 (match_operand:DI 1 "nonimmediate_operand")))]
5831 "TARGET_64BIT && TARGET_SSE && TARGET_SSE_MATH"
5832 {
5833 if (!TARGET_AVX512F)
5834 {
5835 x86_emit_floatuns (operands);
5836 DONE;
5837 }
5838 })
5839
5840 (define_expand "floatunsdidf2"
5841 [(set (match_operand:DF 0 "register_operand")
5842 (unsigned_float:DF
5843 (match_operand:DI 1 "nonimmediate_operand")))]
5844 "((TARGET_64BIT && TARGET_AVX512F)
5845 || TARGET_KEEPS_VECTOR_ALIGNED_STACK)
5846 && TARGET_SSE2 && TARGET_SSE_MATH"
5847 {
5848 if (!TARGET_64BIT)
5849 {
5850 ix86_expand_convert_uns_didf_sse (operands[0], operands[1]);
5851 DONE;
5852 }
5853 if (!TARGET_AVX512F)
5854 {
5855 x86_emit_floatuns (operands);
5856 DONE;
5857 }
5858 })
5859 \f
5860 ;; Load effective address instructions
5861
5862 (define_insn "*lea<mode>"
5863 [(set (match_operand:SWI48 0 "register_operand" "=r")
5864 (match_operand:SWI48 1 "address_no_seg_operand" "Ts"))]
5865 "ix86_hardreg_mov_ok (operands[0], operands[1])"
5866 {
5867 if (SImode_address_operand (operands[1], VOIDmode))
5868 {
5869 gcc_assert (TARGET_64BIT);
5870 return "lea{l}\t{%E1, %k0|%k0, %E1}";
5871 }
5872 else
5873 return "lea{<imodesuffix>}\t{%E1, %0|%0, %E1}";
5874 }
5875 [(set_attr "type" "lea")
5876 (set (attr "mode")
5877 (if_then_else
5878 (match_operand 1 "SImode_address_operand")
5879 (const_string "SI")
5880 (const_string "<MODE>")))])
5881
5882 (define_peephole2
5883 [(set (match_operand:SWI48 0 "register_operand")
5884 (match_operand:SWI48 1 "address_no_seg_operand"))]
5885 "ix86_hardreg_mov_ok (operands[0], operands[1])
5886 && peep2_regno_dead_p (0, FLAGS_REG)
5887 && ix86_avoid_lea_for_addr (peep2_next_insn (0), operands)"
5888 [(const_int 0)]
5889 {
5890 machine_mode mode = <MODE>mode;
5891
5892 /* Emit all operations in SImode for zero-extended addresses. */
5893 if (SImode_address_operand (operands[1], VOIDmode))
5894 mode = SImode;
5895
5896 ix86_split_lea_for_addr (peep2_next_insn (0), operands, mode);
5897
5898 /* Zero-extend return register to DImode for zero-extended addresses. */
5899 if (mode != <MODE>mode)
5900 emit_insn (gen_zero_extendsidi2 (operands[0],
5901 gen_lowpart (mode, operands[0])));
5902
5903 DONE;
5904 })
5905
5906 ;; ix86_split_lea_for_addr emits the shifts as MULT to avoid it from being
5907 ;; peephole2 optimized back into a lea. Split that into the shift during
5908 ;; the following split pass.
5909 (define_split
5910 [(set (match_operand:SWI48 0 "general_reg_operand")
5911 (mult:SWI48 (match_dup 0) (match_operand:SWI48 1 "const1248_operand")))
5912 (clobber (reg:CC FLAGS_REG))]
5913 "reload_completed"
5914 [(parallel [(set (match_dup 0) (ashift:SWI48 (match_dup 0) (match_dup 1)))
5915 (clobber (reg:CC FLAGS_REG))])]
5916 "operands[1] = GEN_INT (exact_log2 (INTVAL (operands[1])));")
5917 \f
5918 ;; Add instructions
5919
5920 (define_expand "add<mode>3"
5921 [(set (match_operand:SDWIM 0 "nonimmediate_operand")
5922 (plus:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand")
5923 (match_operand:SDWIM 2 "<general_hilo_operand>")))]
5924 ""
5925 "ix86_expand_binary_operator (PLUS, <MODE>mode, operands); DONE;")
5926
5927 (define_insn_and_split "*add<dwi>3_doubleword"
5928 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
5929 (plus:<DWI>
5930 (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0")
5931 (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o")))
5932 (clobber (reg:CC FLAGS_REG))]
5933 "ix86_binary_operator_ok (PLUS, <DWI>mode, operands)"
5934 "#"
5935 "&& reload_completed"
5936 [(parallel [(set (reg:CCC FLAGS_REG)
5937 (compare:CCC
5938 (plus:DWIH (match_dup 1) (match_dup 2))
5939 (match_dup 1)))
5940 (set (match_dup 0)
5941 (plus:DWIH (match_dup 1) (match_dup 2)))])
5942 (parallel [(set (match_dup 3)
5943 (plus:DWIH
5944 (plus:DWIH
5945 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
5946 (match_dup 4))
5947 (match_dup 5)))
5948 (clobber (reg:CC FLAGS_REG))])]
5949 {
5950 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
5951 if (operands[2] == const0_rtx)
5952 {
5953 if (operands[5] != const0_rtx)
5954 ix86_expand_binary_operator (PLUS, <MODE>mode, &operands[3]);
5955 else if (!rtx_equal_p (operands[3], operands[4]))
5956 emit_move_insn (operands[3], operands[4]);
5957 else
5958 emit_note (NOTE_INSN_DELETED);
5959 DONE;
5960 }
5961 })
5962
5963 (define_insn_and_split "*add<dwi>3_doubleword_zext"
5964 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=r,o")
5965 (plus:<DWI>
5966 (zero_extend:<DWI>
5967 (match_operand:DWIH 2 "nonimmediate_operand" "rm,r"))
5968 (match_operand:<DWI> 1 "nonimmediate_operand" "0,0")))
5969 (clobber (reg:CC FLAGS_REG))]
5970 "ix86_binary_operator_ok (UNKNOWN, <DWI>mode, operands)"
5971 "#"
5972 "&& reload_completed"
5973 [(parallel [(set (reg:CCC FLAGS_REG)
5974 (compare:CCC
5975 (plus:DWIH (match_dup 1) (match_dup 2))
5976 (match_dup 1)))
5977 (set (match_dup 0)
5978 (plus:DWIH (match_dup 1) (match_dup 2)))])
5979 (parallel [(set (match_dup 3)
5980 (plus:DWIH
5981 (plus:DWIH
5982 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
5983 (match_dup 4))
5984 (const_int 0)))
5985 (clobber (reg:CC FLAGS_REG))])]
5986 "split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[3]);")
5987
5988 (define_insn "*add<mode>_1"
5989 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,r")
5990 (plus:SWI48
5991 (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,r,r")
5992 (match_operand:SWI48 2 "x86_64_general_operand" "re,BM,0,le")))
5993 (clobber (reg:CC FLAGS_REG))]
5994 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
5995 {
5996 switch (get_attr_type (insn))
5997 {
5998 case TYPE_LEA:
5999 return "#";
6000
6001 case TYPE_INCDEC:
6002 gcc_assert (rtx_equal_p (operands[0], operands[1]));
6003 if (operands[2] == const1_rtx)
6004 return "inc{<imodesuffix>}\t%0";
6005 else
6006 {
6007 gcc_assert (operands[2] == constm1_rtx);
6008 return "dec{<imodesuffix>}\t%0";
6009 }
6010
6011 default:
6012 /* For most processors, ADD is faster than LEA. This alternative
6013 was added to use ADD as much as possible. */
6014 if (which_alternative == 2)
6015 std::swap (operands[1], operands[2]);
6016
6017 gcc_assert (rtx_equal_p (operands[0], operands[1]));
6018 if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
6019 return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
6020
6021 return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
6022 }
6023 }
6024 [(set (attr "type")
6025 (cond [(eq_attr "alternative" "3")
6026 (const_string "lea")
6027 (match_operand:SWI48 2 "incdec_operand")
6028 (const_string "incdec")
6029 ]
6030 (const_string "alu")))
6031 (set (attr "length_immediate")
6032 (if_then_else
6033 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
6034 (const_string "1")
6035 (const_string "*")))
6036 (set_attr "mode" "<MODE>")])
6037
6038 ;; It may seem that nonimmediate operand is proper one for operand 1.
6039 ;; The addsi_1 pattern allows nonimmediate operand at that place and
6040 ;; we take care in ix86_binary_operator_ok to not allow two memory
6041 ;; operands so proper swapping will be done in reload. This allow
6042 ;; patterns constructed from addsi_1 to match.
6043
6044 (define_insn "addsi_1_zext"
6045 [(set (match_operand:DI 0 "register_operand" "=r,r,r")
6046 (zero_extend:DI
6047 (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r,r")
6048 (match_operand:SI 2 "x86_64_general_operand" "rBMe,0,le"))))
6049 (clobber (reg:CC FLAGS_REG))]
6050 "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)"
6051 {
6052 switch (get_attr_type (insn))
6053 {
6054 case TYPE_LEA:
6055 return "#";
6056
6057 case TYPE_INCDEC:
6058 if (operands[2] == const1_rtx)
6059 return "inc{l}\t%k0";
6060 else
6061 {
6062 gcc_assert (operands[2] == constm1_rtx);
6063 return "dec{l}\t%k0";
6064 }
6065
6066 default:
6067 /* For most processors, ADD is faster than LEA. This alternative
6068 was added to use ADD as much as possible. */
6069 if (which_alternative == 1)
6070 std::swap (operands[1], operands[2]);
6071
6072 if (x86_maybe_negate_const_int (&operands[2], SImode))
6073 return "sub{l}\t{%2, %k0|%k0, %2}";
6074
6075 return "add{l}\t{%2, %k0|%k0, %2}";
6076 }
6077 }
6078 [(set (attr "type")
6079 (cond [(eq_attr "alternative" "2")
6080 (const_string "lea")
6081 (match_operand:SI 2 "incdec_operand")
6082 (const_string "incdec")
6083 ]
6084 (const_string "alu")))
6085 (set (attr "length_immediate")
6086 (if_then_else
6087 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
6088 (const_string "1")
6089 (const_string "*")))
6090 (set_attr "mode" "SI")])
6091
6092 (define_insn "*addhi_1"
6093 [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r,Yp")
6094 (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,r,Yp")
6095 (match_operand:HI 2 "general_operand" "rn,m,0,ln")))
6096 (clobber (reg:CC FLAGS_REG))]
6097 "ix86_binary_operator_ok (PLUS, HImode, operands)"
6098 {
6099 switch (get_attr_type (insn))
6100 {
6101 case TYPE_LEA:
6102 return "#";
6103
6104 case TYPE_INCDEC:
6105 gcc_assert (rtx_equal_p (operands[0], operands[1]));
6106 if (operands[2] == const1_rtx)
6107 return "inc{w}\t%0";
6108 else
6109 {
6110 gcc_assert (operands[2] == constm1_rtx);
6111 return "dec{w}\t%0";
6112 }
6113
6114 default:
6115 /* For most processors, ADD is faster than LEA. This alternative
6116 was added to use ADD as much as possible. */
6117 if (which_alternative == 2)
6118 std::swap (operands[1], operands[2]);
6119
6120 gcc_assert (rtx_equal_p (operands[0], operands[1]));
6121 if (x86_maybe_negate_const_int (&operands[2], HImode))
6122 return "sub{w}\t{%2, %0|%0, %2}";
6123
6124 return "add{w}\t{%2, %0|%0, %2}";
6125 }
6126 }
6127 [(set (attr "type")
6128 (cond [(eq_attr "alternative" "3")
6129 (const_string "lea")
6130 (match_operand:HI 2 "incdec_operand")
6131 (const_string "incdec")
6132 ]
6133 (const_string "alu")))
6134 (set (attr "length_immediate")
6135 (if_then_else
6136 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
6137 (const_string "1")
6138 (const_string "*")))
6139 (set_attr "mode" "HI,HI,HI,SI")])
6140
6141 (define_insn "*addqi_1"
6142 [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,q,r,r,Yp")
6143 (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,q,0,r,Yp")
6144 (match_operand:QI 2 "general_operand" "qn,m,0,rn,0,ln")))
6145 (clobber (reg:CC FLAGS_REG))]
6146 "ix86_binary_operator_ok (PLUS, QImode, operands)"
6147 {
6148 bool widen = (get_attr_mode (insn) != MODE_QI);
6149
6150 switch (get_attr_type (insn))
6151 {
6152 case TYPE_LEA:
6153 return "#";
6154
6155 case TYPE_INCDEC:
6156 gcc_assert (rtx_equal_p (operands[0], operands[1]));
6157 if (operands[2] == const1_rtx)
6158 return widen ? "inc{l}\t%k0" : "inc{b}\t%0";
6159 else
6160 {
6161 gcc_assert (operands[2] == constm1_rtx);
6162 return widen ? "dec{l}\t%k0" : "dec{b}\t%0";
6163 }
6164
6165 default:
6166 /* For most processors, ADD is faster than LEA. These alternatives
6167 were added to use ADD as much as possible. */
6168 if (which_alternative == 2 || which_alternative == 4)
6169 std::swap (operands[1], operands[2]);
6170
6171 gcc_assert (rtx_equal_p (operands[0], operands[1]));
6172 if (x86_maybe_negate_const_int (&operands[2], QImode))
6173 {
6174 if (widen)
6175 return "sub{l}\t{%2, %k0|%k0, %2}";
6176 else
6177 return "sub{b}\t{%2, %0|%0, %2}";
6178 }
6179 if (widen)
6180 return "add{l}\t{%k2, %k0|%k0, %k2}";
6181 else
6182 return "add{b}\t{%2, %0|%0, %2}";
6183 }
6184 }
6185 [(set (attr "type")
6186 (cond [(eq_attr "alternative" "5")
6187 (const_string "lea")
6188 (match_operand:QI 2 "incdec_operand")
6189 (const_string "incdec")
6190 ]
6191 (const_string "alu")))
6192 (set (attr "length_immediate")
6193 (if_then_else
6194 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
6195 (const_string "1")
6196 (const_string "*")))
6197 (set_attr "mode" "QI,QI,QI,SI,SI,SI")
6198 ;; Potential partial reg stall on alternatives 3 and 4.
6199 (set (attr "preferred_for_speed")
6200 (cond [(eq_attr "alternative" "3,4")
6201 (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
6202 (symbol_ref "true")))])
6203
6204 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
6205 (define_insn_and_split "*add<mode>_1_slp"
6206 [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>,&<r>"))
6207 (plus:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "%0,!<r>")
6208 (match_operand:SWI12 2 "general_operand" "<r>mn,<r>mn")))
6209 (clobber (reg:CC FLAGS_REG))]
6210 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
6211 {
6212 if (which_alternative)
6213 return "#";
6214
6215 switch (get_attr_type (insn))
6216 {
6217 case TYPE_INCDEC:
6218 if (operands[2] == const1_rtx)
6219 return "inc{<imodesuffix>}\t%0";
6220 else
6221 {
6222 gcc_assert (operands[2] == constm1_rtx);
6223 return "dec{<imodesuffix>}\t%0";
6224 }
6225
6226 default:
6227 if (x86_maybe_negate_const_int (&operands[2], QImode))
6228 return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
6229
6230 return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
6231 }
6232 }
6233 "&& reload_completed"
6234 [(set (strict_low_part (match_dup 0)) (match_dup 1))
6235 (parallel
6236 [(set (strict_low_part (match_dup 0))
6237 (plus:SWI12 (match_dup 0) (match_dup 2)))
6238 (clobber (reg:CC FLAGS_REG))])]
6239 ""
6240 [(set (attr "type")
6241 (if_then_else (match_operand:QI 2 "incdec_operand")
6242 (const_string "incdec")
6243 (const_string "alu")))
6244 (set_attr "mode" "<MODE>")])
6245
6246 ;; Split non destructive adds if we cannot use lea.
6247 (define_split
6248 [(set (match_operand:SWI48 0 "register_operand")
6249 (plus:SWI48 (match_operand:SWI48 1 "register_operand")
6250 (match_operand:SWI48 2 "x86_64_nonmemory_operand")))
6251 (clobber (reg:CC FLAGS_REG))]
6252 "reload_completed && ix86_avoid_lea_for_add (insn, operands)"
6253 [(set (match_dup 0) (match_dup 1))
6254 (parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (match_dup 2)))
6255 (clobber (reg:CC FLAGS_REG))])])
6256
6257 ;; Split non destructive adds if we cannot use lea.
6258 (define_split
6259 [(set (match_operand:DI 0 "register_operand")
6260 (zero_extend:DI
6261 (plus:SI (match_operand:SI 1 "register_operand")
6262 (match_operand:SI 2 "x86_64_nonmemory_operand"))))
6263 (clobber (reg:CC FLAGS_REG))]
6264 "TARGET_64BIT
6265 && reload_completed && ix86_avoid_lea_for_add (insn, operands)"
6266 [(set (match_dup 3) (match_dup 1))
6267 (parallel [(set (match_dup 0)
6268 (zero_extend:DI (plus:SI (match_dup 3) (match_dup 2))))
6269 (clobber (reg:CC FLAGS_REG))])]
6270 "operands[3] = gen_lowpart (SImode, operands[0]);")
6271
6272 ;; Convert add to the lea pattern to avoid flags dependency.
6273 (define_split
6274 [(set (match_operand:SWI 0 "register_operand")
6275 (plus:SWI (match_operand:SWI 1 "register_operand")
6276 (match_operand:SWI 2 "<nonmemory_operand>")))
6277 (clobber (reg:CC FLAGS_REG))]
6278 "reload_completed && ix86_lea_for_add_ok (insn, operands)"
6279 [(set (match_dup 0)
6280 (plus:<LEAMODE> (match_dup 1) (match_dup 2)))]
6281 {
6282 if (<MODE>mode != <LEAMODE>mode)
6283 {
6284 operands[0] = gen_lowpart (<LEAMODE>mode, operands[0]);
6285 operands[1] = gen_lowpart (<LEAMODE>mode, operands[1]);
6286 operands[2] = gen_lowpart (<LEAMODE>mode, operands[2]);
6287 }
6288 })
6289
6290 ;; Convert add to the lea pattern to avoid flags dependency.
6291 (define_split
6292 [(set (match_operand:DI 0 "register_operand")
6293 (zero_extend:DI
6294 (plus:SI (match_operand:SI 1 "register_operand")
6295 (match_operand:SI 2 "x86_64_nonmemory_operand"))))
6296 (clobber (reg:CC FLAGS_REG))]
6297 "TARGET_64BIT && reload_completed && ix86_lea_for_add_ok (insn, operands)"
6298 [(set (match_dup 0)
6299 (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))])
6300
6301 (define_insn "*add<mode>_2"
6302 [(set (reg FLAGS_REG)
6303 (compare
6304 (plus:SWI
6305 (match_operand:SWI 1 "nonimmediate_operand" "%0,0,<r>")
6306 (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,0"))
6307 (const_int 0)))
6308 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,<r>")
6309 (plus:SWI (match_dup 1) (match_dup 2)))]
6310 "ix86_match_ccmode (insn, CCGOCmode)
6311 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
6312 {
6313 switch (get_attr_type (insn))
6314 {
6315 case TYPE_INCDEC:
6316 if (operands[2] == const1_rtx)
6317 return "inc{<imodesuffix>}\t%0";
6318 else
6319 {
6320 gcc_assert (operands[2] == constm1_rtx);
6321 return "dec{<imodesuffix>}\t%0";
6322 }
6323
6324 default:
6325 if (which_alternative == 2)
6326 std::swap (operands[1], operands[2]);
6327
6328 gcc_assert (rtx_equal_p (operands[0], operands[1]));
6329 if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
6330 return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
6331
6332 return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
6333 }
6334 }
6335 [(set (attr "type")
6336 (if_then_else (match_operand:SWI 2 "incdec_operand")
6337 (const_string "incdec")
6338 (const_string "alu")))
6339 (set (attr "length_immediate")
6340 (if_then_else
6341 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
6342 (const_string "1")
6343 (const_string "*")))
6344 (set_attr "mode" "<MODE>")])
6345
6346 ;; See comment for addsi_1_zext why we do use nonimmediate_operand
6347 (define_insn "*addsi_2_zext"
6348 [(set (reg FLAGS_REG)
6349 (compare
6350 (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r")
6351 (match_operand:SI 2 "x86_64_general_operand" "rBMe,0"))
6352 (const_int 0)))
6353 (set (match_operand:DI 0 "register_operand" "=r,r")
6354 (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
6355 "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
6356 && ix86_binary_operator_ok (PLUS, SImode, operands)"
6357 {
6358 switch (get_attr_type (insn))
6359 {
6360 case TYPE_INCDEC:
6361 if (operands[2] == const1_rtx)
6362 return "inc{l}\t%k0";
6363 else
6364 {
6365 gcc_assert (operands[2] == constm1_rtx);
6366 return "dec{l}\t%k0";
6367 }
6368
6369 default:
6370 if (which_alternative == 1)
6371 std::swap (operands[1], operands[2]);
6372
6373 if (x86_maybe_negate_const_int (&operands[2], SImode))
6374 return "sub{l}\t{%2, %k0|%k0, %2}";
6375
6376 return "add{l}\t{%2, %k0|%k0, %2}";
6377 }
6378 }
6379 [(set (attr "type")
6380 (if_then_else (match_operand:SI 2 "incdec_operand")
6381 (const_string "incdec")
6382 (const_string "alu")))
6383 (set (attr "length_immediate")
6384 (if_then_else
6385 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
6386 (const_string "1")
6387 (const_string "*")))
6388 (set_attr "mode" "SI")])
6389
6390 (define_insn "*add<mode>_3"
6391 [(set (reg FLAGS_REG)
6392 (compare
6393 (neg:SWI (match_operand:SWI 2 "<general_operand>" "<g>,0"))
6394 (match_operand:SWI 1 "nonimmediate_operand" "%0,<r>")))
6395 (clobber (match_scratch:SWI 0 "=<r>,<r>"))]
6396 "ix86_match_ccmode (insn, CCZmode)
6397 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
6398 {
6399 switch (get_attr_type (insn))
6400 {
6401 case TYPE_INCDEC:
6402 if (operands[2] == const1_rtx)
6403 return "inc{<imodesuffix>}\t%0";
6404 else
6405 {
6406 gcc_assert (operands[2] == constm1_rtx);
6407 return "dec{<imodesuffix>}\t%0";
6408 }
6409
6410 default:
6411 if (which_alternative == 1)
6412 std::swap (operands[1], operands[2]);
6413
6414 gcc_assert (rtx_equal_p (operands[0], operands[1]));
6415 if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
6416 return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
6417
6418 return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
6419 }
6420 }
6421 [(set (attr "type")
6422 (if_then_else (match_operand:SWI 2 "incdec_operand")
6423 (const_string "incdec")
6424 (const_string "alu")))
6425 (set (attr "length_immediate")
6426 (if_then_else
6427 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
6428 (const_string "1")
6429 (const_string "*")))
6430 (set_attr "mode" "<MODE>")])
6431
6432 ;; See comment for addsi_1_zext why we do use nonimmediate_operand
6433 (define_insn "*addsi_3_zext"
6434 [(set (reg FLAGS_REG)
6435 (compare
6436 (neg:SI (match_operand:SI 2 "x86_64_general_operand" "rBMe,0"))
6437 (match_operand:SI 1 "nonimmediate_operand" "%0,r")))
6438 (set (match_operand:DI 0 "register_operand" "=r,r")
6439 (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
6440 "TARGET_64BIT && ix86_match_ccmode (insn, CCZmode)
6441 && ix86_binary_operator_ok (PLUS, SImode, operands)"
6442 {
6443 switch (get_attr_type (insn))
6444 {
6445 case TYPE_INCDEC:
6446 if (operands[2] == const1_rtx)
6447 return "inc{l}\t%k0";
6448 else
6449 {
6450 gcc_assert (operands[2] == constm1_rtx);
6451 return "dec{l}\t%k0";
6452 }
6453
6454 default:
6455 if (which_alternative == 1)
6456 std::swap (operands[1], operands[2]);
6457
6458 if (x86_maybe_negate_const_int (&operands[2], SImode))
6459 return "sub{l}\t{%2, %k0|%k0, %2}";
6460
6461 return "add{l}\t{%2, %k0|%k0, %2}";
6462 }
6463 }
6464 [(set (attr "type")
6465 (if_then_else (match_operand:SI 2 "incdec_operand")
6466 (const_string "incdec")
6467 (const_string "alu")))
6468 (set (attr "length_immediate")
6469 (if_then_else
6470 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
6471 (const_string "1")
6472 (const_string "*")))
6473 (set_attr "mode" "SI")])
6474
6475 ; For comparisons against 1, -1 and 128, we may generate better code
6476 ; by converting cmp to add, inc or dec as done by peephole2. This pattern
6477 ; is matched then. We can't accept general immediate, because for
6478 ; case of overflows, the result is messed up.
6479 ; Also carry flag is reversed compared to cmp, so this conversion is valid
6480 ; only for comparisons not depending on it.
6481
6482 (define_insn "*adddi_4"
6483 [(set (reg FLAGS_REG)
6484 (compare
6485 (match_operand:DI 1 "nonimmediate_operand" "0")
6486 (match_operand:DI 2 "x86_64_immediate_operand" "e")))
6487 (clobber (match_scratch:DI 0 "=r"))]
6488 "TARGET_64BIT
6489 && ix86_match_ccmode (insn, CCGCmode)"
6490 {
6491 switch (get_attr_type (insn))
6492 {
6493 case TYPE_INCDEC:
6494 if (operands[2] == constm1_rtx)
6495 return "inc{q}\t%0";
6496 else
6497 {
6498 gcc_assert (operands[2] == const1_rtx);
6499 return "dec{q}\t%0";
6500 }
6501
6502 default:
6503 if (x86_maybe_negate_const_int (&operands[2], DImode))
6504 return "add{q}\t{%2, %0|%0, %2}";
6505
6506 return "sub{q}\t{%2, %0|%0, %2}";
6507 }
6508 }
6509 [(set (attr "type")
6510 (if_then_else (match_operand:DI 2 "incdec_operand")
6511 (const_string "incdec")
6512 (const_string "alu")))
6513 (set (attr "length_immediate")
6514 (if_then_else
6515 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
6516 (const_string "1")
6517 (const_string "*")))
6518 (set_attr "mode" "DI")])
6519
6520 ; For comparisons against 1, -1 and 128, we may generate better code
6521 ; by converting cmp to add, inc or dec as done by peephole2. This pattern
6522 ; is matched then. We can't accept general immediate, because for
6523 ; case of overflows, the result is messed up.
6524 ; Also carry flag is reversed compared to cmp, so this conversion is valid
6525 ; only for comparisons not depending on it.
6526
6527 (define_insn "*add<mode>_4"
6528 [(set (reg FLAGS_REG)
6529 (compare
6530 (match_operand:SWI124 1 "nonimmediate_operand" "0")
6531 (match_operand:SWI124 2 "const_int_operand")))
6532 (clobber (match_scratch:SWI124 0 "=<r>"))]
6533 "ix86_match_ccmode (insn, CCGCmode)"
6534 {
6535 switch (get_attr_type (insn))
6536 {
6537 case TYPE_INCDEC:
6538 if (operands[2] == constm1_rtx)
6539 return "inc{<imodesuffix>}\t%0";
6540 else
6541 {
6542 gcc_assert (operands[2] == const1_rtx);
6543 return "dec{<imodesuffix>}\t%0";
6544 }
6545
6546 default:
6547 if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
6548 return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
6549
6550 return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
6551 }
6552 }
6553 [(set (attr "type")
6554 (if_then_else (match_operand:<MODE> 2 "incdec_operand")
6555 (const_string "incdec")
6556 (const_string "alu")))
6557 (set (attr "length_immediate")
6558 (if_then_else
6559 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
6560 (const_string "1")
6561 (const_string "*")))
6562 (set_attr "mode" "<MODE>")])
6563
6564 (define_insn "*add<mode>_5"
6565 [(set (reg FLAGS_REG)
6566 (compare
6567 (plus:SWI
6568 (match_operand:SWI 1 "nonimmediate_operand" "%0,<r>")
6569 (match_operand:SWI 2 "<general_operand>" "<g>,0"))
6570 (const_int 0)))
6571 (clobber (match_scratch:SWI 0 "=<r>,<r>"))]
6572 "ix86_match_ccmode (insn, CCGOCmode)
6573 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
6574 {
6575 switch (get_attr_type (insn))
6576 {
6577 case TYPE_INCDEC:
6578 if (operands[2] == const1_rtx)
6579 return "inc{<imodesuffix>}\t%0";
6580 else
6581 {
6582 gcc_assert (operands[2] == constm1_rtx);
6583 return "dec{<imodesuffix>}\t%0";
6584 }
6585
6586 default:
6587 if (which_alternative == 1)
6588 std::swap (operands[1], operands[2]);
6589
6590 gcc_assert (rtx_equal_p (operands[0], operands[1]));
6591 if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
6592 return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
6593
6594 return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
6595 }
6596 }
6597 [(set (attr "type")
6598 (if_then_else (match_operand:SWI 2 "incdec_operand")
6599 (const_string "incdec")
6600 (const_string "alu")))
6601 (set (attr "length_immediate")
6602 (if_then_else
6603 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
6604 (const_string "1")
6605 (const_string "*")))
6606 (set_attr "mode" "<MODE>")])
6607
6608 (define_expand "addqi_ext_1"
6609 [(parallel
6610 [(set (zero_extract:HI (match_operand:HI 0 "register_operand")
6611 (const_int 8)
6612 (const_int 8))
6613 (subreg:HI
6614 (plus:QI
6615 (subreg:QI
6616 (zero_extract:HI (match_operand:HI 1 "register_operand")
6617 (const_int 8)
6618 (const_int 8)) 0)
6619 (match_operand:QI 2 "const_int_operand")) 0))
6620 (clobber (reg:CC FLAGS_REG))])])
6621
6622 (define_insn "*addqi_ext<mode>_1"
6623 [(set (zero_extract:SWI248
6624 (match_operand:SWI248 0 "register_operand" "+Q,Q")
6625 (const_int 8)
6626 (const_int 8))
6627 (subreg:SWI248
6628 (plus:QI
6629 (subreg:QI
6630 (zero_extract:SWI248
6631 (match_operand:SWI248 1 "register_operand" "0,0")
6632 (const_int 8)
6633 (const_int 8)) 0)
6634 (match_operand:QI 2 "general_operand" "QnBc,m")) 0))
6635 (clobber (reg:CC FLAGS_REG))]
6636 "/* FIXME: without this LRA can't reload this pattern, see PR82524. */
6637 rtx_equal_p (operands[0], operands[1])"
6638 {
6639 switch (get_attr_type (insn))
6640 {
6641 case TYPE_INCDEC:
6642 if (operands[2] == const1_rtx)
6643 return "inc{b}\t%h0";
6644 else
6645 {
6646 gcc_assert (operands[2] == constm1_rtx);
6647 return "dec{b}\t%h0";
6648 }
6649
6650 default:
6651 return "add{b}\t{%2, %h0|%h0, %2}";
6652 }
6653 }
6654 [(set_attr "isa" "*,nox64")
6655 (set (attr "type")
6656 (if_then_else (match_operand:QI 2 "incdec_operand")
6657 (const_string "incdec")
6658 (const_string "alu")))
6659 (set_attr "mode" "QI")])
6660
6661 (define_insn "*addqi_ext<mode>_2"
6662 [(set (zero_extract:SWI248
6663 (match_operand:SWI248 0 "register_operand" "+Q")
6664 (const_int 8)
6665 (const_int 8))
6666 (subreg:SWI248
6667 (plus:QI
6668 (subreg:QI
6669 (zero_extract:SWI248
6670 (match_operand:SWI248 1 "register_operand" "%0")
6671 (const_int 8)
6672 (const_int 8)) 0)
6673 (subreg:QI
6674 (zero_extract:SWI248
6675 (match_operand:SWI248 2 "register_operand" "Q")
6676 (const_int 8)
6677 (const_int 8)) 0)) 0))
6678 (clobber (reg:CC FLAGS_REG))]
6679 "/* FIXME: without this LRA can't reload this pattern, see PR82524. */
6680 rtx_equal_p (operands[0], operands[1])
6681 || rtx_equal_p (operands[0], operands[2])"
6682 "add{b}\t{%h2, %h0|%h0, %h2}"
6683 [(set_attr "type" "alu")
6684 (set_attr "mode" "QI")])
6685
6686 ;; Like DWI, but use POImode instead of OImode.
6687 (define_mode_attr DPWI [(QI "HI") (HI "SI") (SI "DI") (DI "TI") (TI "POI")])
6688
6689 ;; Add with jump on overflow.
6690 (define_expand "addv<mode>4"
6691 [(parallel [(set (reg:CCO FLAGS_REG)
6692 (eq:CCO
6693 (plus:<DPWI>
6694 (sign_extend:<DPWI>
6695 (match_operand:SWIDWI 1 "nonimmediate_operand"))
6696 (match_dup 4))
6697 (sign_extend:<DPWI>
6698 (plus:SWIDWI (match_dup 1)
6699 (match_operand:SWIDWI 2
6700 "<general_hilo_operand>")))))
6701 (set (match_operand:SWIDWI 0 "register_operand")
6702 (plus:SWIDWI (match_dup 1) (match_dup 2)))])
6703 (set (pc) (if_then_else
6704 (eq (reg:CCO FLAGS_REG) (const_int 0))
6705 (label_ref (match_operand 3))
6706 (pc)))]
6707 ""
6708 {
6709 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
6710 if (CONST_SCALAR_INT_P (operands[2]))
6711 operands[4] = operands[2];
6712 else
6713 operands[4] = gen_rtx_SIGN_EXTEND (<DPWI>mode, operands[2]);
6714 })
6715
6716 (define_insn "*addv<mode>4"
6717 [(set (reg:CCO FLAGS_REG)
6718 (eq:CCO (plus:<DWI>
6719 (sign_extend:<DWI>
6720 (match_operand:SWI 1 "nonimmediate_operand" "%0,0"))
6721 (sign_extend:<DWI>
6722 (match_operand:SWI 2 "<general_sext_operand>" "<r>We,m")))
6723 (sign_extend:<DWI>
6724 (plus:SWI (match_dup 1) (match_dup 2)))))
6725 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
6726 (plus:SWI (match_dup 1) (match_dup 2)))]
6727 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
6728 "add{<imodesuffix>}\t{%2, %0|%0, %2}"
6729 [(set_attr "type" "alu")
6730 (set_attr "mode" "<MODE>")])
6731
6732 (define_insn "addv<mode>4_1"
6733 [(set (reg:CCO FLAGS_REG)
6734 (eq:CCO (plus:<DWI>
6735 (sign_extend:<DWI>
6736 (match_operand:SWI 1 "nonimmediate_operand" "0"))
6737 (match_operand:<DWI> 3 "const_int_operand"))
6738 (sign_extend:<DWI>
6739 (plus:SWI
6740 (match_dup 1)
6741 (match_operand:SWI 2 "x86_64_immediate_operand" "<i>")))))
6742 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
6743 (plus:SWI (match_dup 1) (match_dup 2)))]
6744 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)
6745 && CONST_INT_P (operands[2])
6746 && INTVAL (operands[2]) == INTVAL (operands[3])"
6747 "add{<imodesuffix>}\t{%2, %0|%0, %2}"
6748 [(set_attr "type" "alu")
6749 (set_attr "mode" "<MODE>")
6750 (set (attr "length_immediate")
6751 (cond [(match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)")
6752 (const_string "1")
6753 (match_test "<MODE_SIZE> == 8")
6754 (const_string "4")]
6755 (const_string "<MODE_SIZE>")))])
6756
6757 ;; Quad word integer modes as mode attribute.
6758 (define_mode_attr QPWI [(SI "TI") (DI "POI")])
6759
6760 (define_insn_and_split "*addv<dwi>4_doubleword"
6761 [(set (reg:CCO FLAGS_REG)
6762 (eq:CCO
6763 (plus:<QPWI>
6764 (sign_extend:<QPWI>
6765 (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0"))
6766 (sign_extend:<QPWI>
6767 (match_operand:<DWI> 2 "nonimmediate_operand" "r,o")))
6768 (sign_extend:<QPWI>
6769 (plus:<DWI> (match_dup 1) (match_dup 2)))))
6770 (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
6771 (plus:<DWI> (match_dup 1) (match_dup 2)))]
6772 "ix86_binary_operator_ok (PLUS, <DWI>mode, operands)"
6773 "#"
6774 "&& reload_completed"
6775 [(parallel [(set (reg:CCC FLAGS_REG)
6776 (compare:CCC
6777 (plus:DWIH (match_dup 1) (match_dup 2))
6778 (match_dup 1)))
6779 (set (match_dup 0)
6780 (plus:DWIH (match_dup 1) (match_dup 2)))])
6781 (parallel [(set (reg:CCO FLAGS_REG)
6782 (eq:CCO
6783 (plus:<DWI>
6784 (plus:<DWI>
6785 (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0))
6786 (sign_extend:<DWI> (match_dup 4)))
6787 (sign_extend:<DWI> (match_dup 5)))
6788 (sign_extend:<DWI>
6789 (plus:DWIH
6790 (plus:DWIH
6791 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
6792 (match_dup 4))
6793 (match_dup 5)))))
6794 (set (match_dup 3)
6795 (plus:DWIH
6796 (plus:DWIH
6797 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
6798 (match_dup 4))
6799 (match_dup 5)))])]
6800 {
6801 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
6802 })
6803
6804 (define_insn_and_split "*addv<dwi>4_doubleword_1"
6805 [(set (reg:CCO FLAGS_REG)
6806 (eq:CCO
6807 (plus:<QPWI>
6808 (sign_extend:<QPWI>
6809 (match_operand:<DWI> 1 "nonimmediate_operand" "%0"))
6810 (match_operand:<QPWI> 3 "const_scalar_int_operand" "n"))
6811 (sign_extend:<QPWI>
6812 (plus:<DWI>
6813 (match_dup 1)
6814 (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "<di>")))))
6815 (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro")
6816 (plus:<DWI> (match_dup 1) (match_dup 2)))]
6817 "ix86_binary_operator_ok (PLUS, <DWI>mode, operands)
6818 && CONST_SCALAR_INT_P (operands[2])
6819 && rtx_equal_p (operands[2], operands[3])"
6820 "#"
6821 "&& reload_completed"
6822 [(parallel [(set (reg:CCC FLAGS_REG)
6823 (compare:CCC
6824 (plus:DWIH (match_dup 1) (match_dup 2))
6825 (match_dup 1)))
6826 (set (match_dup 0)
6827 (plus:DWIH (match_dup 1) (match_dup 2)))])
6828 (parallel [(set (reg:CCO FLAGS_REG)
6829 (eq:CCO
6830 (plus:<DWI>
6831 (plus:<DWI>
6832 (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0))
6833 (sign_extend:<DWI> (match_dup 4)))
6834 (match_dup 5))
6835 (sign_extend:<DWI>
6836 (plus:DWIH
6837 (plus:DWIH
6838 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
6839 (match_dup 4))
6840 (match_dup 5)))))
6841 (set (match_dup 3)
6842 (plus:DWIH
6843 (plus:DWIH
6844 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
6845 (match_dup 4))
6846 (match_dup 5)))])]
6847 {
6848 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
6849 if (operands[2] == const0_rtx)
6850 {
6851 emit_insn (gen_addv<mode>4_1 (operands[3], operands[4], operands[5],
6852 operands[5]));
6853 DONE;
6854 }
6855 })
6856
6857 (define_insn "*addv<mode>4_overflow_1"
6858 [(set (reg:CCO FLAGS_REG)
6859 (eq:CCO
6860 (plus:<DWI>
6861 (plus:<DWI>
6862 (match_operator:<DWI> 4 "ix86_carry_flag_operator"
6863 [(match_operand 3 "flags_reg_operand") (const_int 0)])
6864 (sign_extend:<DWI>
6865 (match_operand:SWI 1 "nonimmediate_operand" "%0,0")))
6866 (sign_extend:<DWI>
6867 (match_operand:SWI 2 "<general_sext_operand>" "rWe,m")))
6868 (sign_extend:<DWI>
6869 (plus:SWI
6870 (plus:SWI
6871 (match_operator:SWI 5 "ix86_carry_flag_operator"
6872 [(match_dup 3) (const_int 0)])
6873 (match_dup 1))
6874 (match_dup 2)))))
6875 (set (match_operand:SWI 0 "nonimmediate_operand" "=rm,r")
6876 (plus:SWI
6877 (plus:SWI
6878 (match_op_dup 5 [(match_dup 3) (const_int 0)])
6879 (match_dup 1))
6880 (match_dup 2)))]
6881 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
6882 "adc{<imodesuffix>}\t{%2, %0|%0, %2}"
6883 [(set_attr "type" "alu")
6884 (set_attr "mode" "<MODE>")])
6885
6886 (define_insn "*addv<mode>4_overflow_2"
6887 [(set (reg:CCO FLAGS_REG)
6888 (eq:CCO
6889 (plus:<DWI>
6890 (plus:<DWI>
6891 (match_operator:<DWI> 4 "ix86_carry_flag_operator"
6892 [(match_operand 3 "flags_reg_operand") (const_int 0)])
6893 (sign_extend:<DWI>
6894 (match_operand:SWI 1 "nonimmediate_operand" "%0")))
6895 (match_operand:<DWI> 6 "const_int_operand" "n"))
6896 (sign_extend:<DWI>
6897 (plus:SWI
6898 (plus:SWI
6899 (match_operator:SWI 5 "ix86_carry_flag_operator"
6900 [(match_dup 3) (const_int 0)])
6901 (match_dup 1))
6902 (match_operand:SWI 2 "x86_64_immediate_operand" "e")))))
6903 (set (match_operand:SWI 0 "nonimmediate_operand" "=rm")
6904 (plus:SWI
6905 (plus:SWI
6906 (match_op_dup 5 [(match_dup 3) (const_int 0)])
6907 (match_dup 1))
6908 (match_dup 2)))]
6909 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)
6910 && CONST_INT_P (operands[2])
6911 && INTVAL (operands[2]) == INTVAL (operands[6])"
6912 "adc{<imodesuffix>}\t{%2, %0|%0, %2}"
6913 [(set_attr "type" "alu")
6914 (set_attr "mode" "<MODE>")
6915 (set (attr "length_immediate")
6916 (if_then_else (match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)")
6917 (const_string "1")
6918 (const_string "4")))])
6919
6920 (define_expand "uaddv<mode>4"
6921 [(parallel [(set (reg:CCC FLAGS_REG)
6922 (compare:CCC
6923 (plus:SWIDWI
6924 (match_operand:SWIDWI 1 "nonimmediate_operand")
6925 (match_operand:SWIDWI 2 "<general_hilo_operand>"))
6926 (match_dup 1)))
6927 (set (match_operand:SWIDWI 0 "register_operand")
6928 (plus:SWIDWI (match_dup 1) (match_dup 2)))])
6929 (set (pc) (if_then_else
6930 (ltu (reg:CCC FLAGS_REG) (const_int 0))
6931 (label_ref (match_operand 3))
6932 (pc)))]
6933 ""
6934 "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
6935
6936 ;; The lea patterns for modes less than 32 bits need to be matched by
6937 ;; several insns converted to real lea by splitters.
6938
6939 (define_insn_and_split "*lea<mode>_general_1"
6940 [(set (match_operand:SWI12 0 "register_operand" "=r")
6941 (plus:SWI12
6942 (plus:SWI12 (match_operand:SWI12 1 "index_register_operand" "l")
6943 (match_operand:SWI12 2 "register_operand" "r"))
6944 (match_operand:SWI12 3 "immediate_operand" "i")))]
6945 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
6946 "#"
6947 "&& reload_completed"
6948 [(set (match_dup 0)
6949 (plus:SI
6950 (plus:SI (match_dup 1) (match_dup 2))
6951 (match_dup 3)))]
6952 {
6953 operands[0] = gen_lowpart (SImode, operands[0]);
6954 operands[1] = gen_lowpart (SImode, operands[1]);
6955 operands[2] = gen_lowpart (SImode, operands[2]);
6956 operands[3] = gen_lowpart (SImode, operands[3]);
6957 }
6958 [(set_attr "type" "lea")
6959 (set_attr "mode" "SI")])
6960
6961 (define_insn_and_split "*lea<mode>_general_2"
6962 [(set (match_operand:SWI12 0 "register_operand" "=r")
6963 (plus:SWI12
6964 (mult:SWI12 (match_operand:SWI12 1 "index_register_operand" "l")
6965 (match_operand 2 "const248_operand" "n"))
6966 (match_operand:SWI12 3 "nonmemory_operand" "ri")))]
6967 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
6968 "#"
6969 "&& reload_completed"
6970 [(set (match_dup 0)
6971 (plus:SI
6972 (mult:SI (match_dup 1) (match_dup 2))
6973 (match_dup 3)))]
6974 {
6975 operands[0] = gen_lowpart (SImode, operands[0]);
6976 operands[1] = gen_lowpart (SImode, operands[1]);
6977 operands[3] = gen_lowpart (SImode, operands[3]);
6978 }
6979 [(set_attr "type" "lea")
6980 (set_attr "mode" "SI")])
6981
6982 (define_insn_and_split "*lea<mode>_general_2b"
6983 [(set (match_operand:SWI12 0 "register_operand" "=r")
6984 (plus:SWI12
6985 (ashift:SWI12 (match_operand:SWI12 1 "index_register_operand" "l")
6986 (match_operand 2 "const123_operand" "n"))
6987 (match_operand:SWI12 3 "nonmemory_operand" "ri")))]
6988 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
6989 "#"
6990 "&& reload_completed"
6991 [(set (match_dup 0)
6992 (plus:SI
6993 (ashift:SI (match_dup 1) (match_dup 2))
6994 (match_dup 3)))]
6995 {
6996 operands[0] = gen_lowpart (SImode, operands[0]);
6997 operands[1] = gen_lowpart (SImode, operands[1]);
6998 operands[3] = gen_lowpart (SImode, operands[3]);
6999 }
7000 [(set_attr "type" "lea")
7001 (set_attr "mode" "SI")])
7002
7003 (define_insn_and_split "*lea<mode>_general_3"
7004 [(set (match_operand:SWI12 0 "register_operand" "=r")
7005 (plus:SWI12
7006 (plus:SWI12
7007 (mult:SWI12 (match_operand:SWI12 1 "index_register_operand" "l")
7008 (match_operand 2 "const248_operand" "n"))
7009 (match_operand:SWI12 3 "register_operand" "r"))
7010 (match_operand:SWI12 4 "immediate_operand" "i")))]
7011 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
7012 "#"
7013 "&& reload_completed"
7014 [(set (match_dup 0)
7015 (plus:SI
7016 (plus:SI
7017 (mult:SI (match_dup 1) (match_dup 2))
7018 (match_dup 3))
7019 (match_dup 4)))]
7020 {
7021 operands[0] = gen_lowpart (SImode, operands[0]);
7022 operands[1] = gen_lowpart (SImode, operands[1]);
7023 operands[3] = gen_lowpart (SImode, operands[3]);
7024 operands[4] = gen_lowpart (SImode, operands[4]);
7025 }
7026 [(set_attr "type" "lea")
7027 (set_attr "mode" "SI")])
7028
7029 (define_insn_and_split "*lea<mode>_general_3b"
7030 [(set (match_operand:SWI12 0 "register_operand" "=r")
7031 (plus:SWI12
7032 (plus:SWI12
7033 (ashift:SWI12 (match_operand:SWI12 1 "index_register_operand" "l")
7034 (match_operand 2 "const123_operand" "n"))
7035 (match_operand:SWI12 3 "register_operand" "r"))
7036 (match_operand:SWI12 4 "immediate_operand" "i")))]
7037 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
7038 "#"
7039 "&& reload_completed"
7040 [(set (match_dup 0)
7041 (plus:SI
7042 (plus:SI
7043 (ashift:SI (match_dup 1) (match_dup 2))
7044 (match_dup 3))
7045 (match_dup 4)))]
7046 {
7047 operands[0] = gen_lowpart (SImode, operands[0]);
7048 operands[1] = gen_lowpart (SImode, operands[1]);
7049 operands[3] = gen_lowpart (SImode, operands[3]);
7050 operands[4] = gen_lowpart (SImode, operands[4]);
7051 }
7052 [(set_attr "type" "lea")
7053 (set_attr "mode" "SI")])
7054
7055 (define_insn_and_split "*lea<mode>_general_4"
7056 [(set (match_operand:SWI12 0 "register_operand" "=r")
7057 (any_or:SWI12
7058 (ashift:SWI12
7059 (match_operand:SWI12 1 "index_register_operand" "l")
7060 (match_operand 2 "const_0_to_3_operand"))
7061 (match_operand 3 "const_int_operand")))]
7062 "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
7063 && ((unsigned HOST_WIDE_INT) INTVAL (operands[3])
7064 < (HOST_WIDE_INT_1U << INTVAL (operands[2])))"
7065 "#"
7066 "&& reload_completed"
7067 [(set (match_dup 0)
7068 (plus:SI
7069 (mult:SI (match_dup 1) (match_dup 2))
7070 (match_dup 3)))]
7071 {
7072 operands[0] = gen_lowpart (SImode, operands[0]);
7073 operands[1] = gen_lowpart (SImode, operands[1]);
7074 operands[2] = GEN_INT (1 << INTVAL (operands[2]));
7075 }
7076 [(set_attr "type" "lea")
7077 (set_attr "mode" "SI")])
7078
7079 (define_insn_and_split "*lea<mode>_general_4"
7080 [(set (match_operand:SWI48 0 "register_operand" "=r")
7081 (any_or:SWI48
7082 (ashift:SWI48
7083 (match_operand:SWI48 1 "index_register_operand" "l")
7084 (match_operand 2 "const_0_to_3_operand"))
7085 (match_operand 3 "const_int_operand")))]
7086 "(unsigned HOST_WIDE_INT) INTVAL (operands[3])
7087 < (HOST_WIDE_INT_1U << INTVAL (operands[2]))"
7088 "#"
7089 "&& reload_completed"
7090 [(set (match_dup 0)
7091 (plus:SWI48
7092 (mult:SWI48 (match_dup 1) (match_dup 2))
7093 (match_dup 3)))]
7094 "operands[2] = GEN_INT (1 << INTVAL (operands[2]));"
7095 [(set_attr "type" "lea")
7096 (set_attr "mode" "<MODE>")])
7097 \f
7098 ;; Subtract instructions
7099
7100 (define_expand "sub<mode>3"
7101 [(set (match_operand:SDWIM 0 "nonimmediate_operand")
7102 (minus:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand")
7103 (match_operand:SDWIM 2 "<general_hilo_operand>")))]
7104 ""
7105 "ix86_expand_binary_operator (MINUS, <MODE>mode, operands); DONE;")
7106
7107 (define_insn_and_split "*sub<dwi>3_doubleword"
7108 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
7109 (minus:<DWI>
7110 (match_operand:<DWI> 1 "nonimmediate_operand" "0,0")
7111 (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o")))
7112 (clobber (reg:CC FLAGS_REG))]
7113 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
7114 "#"
7115 "&& reload_completed"
7116 [(parallel [(set (reg:CC FLAGS_REG)
7117 (compare:CC (match_dup 1) (match_dup 2)))
7118 (set (match_dup 0)
7119 (minus:DWIH (match_dup 1) (match_dup 2)))])
7120 (parallel [(set (match_dup 3)
7121 (minus:DWIH
7122 (minus:DWIH
7123 (match_dup 4)
7124 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)))
7125 (match_dup 5)))
7126 (clobber (reg:CC FLAGS_REG))])]
7127 {
7128 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
7129 if (operands[2] == const0_rtx)
7130 {
7131 ix86_expand_binary_operator (MINUS, <MODE>mode, &operands[3]);
7132 DONE;
7133 }
7134 })
7135
7136 (define_insn_and_split "*sub<dwi>3_doubleword_zext"
7137 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=r,o")
7138 (minus:<DWI>
7139 (match_operand:<DWI> 1 "nonimmediate_operand" "0,0")
7140 (zero_extend:<DWI>
7141 (match_operand:DWIH 2 "nonimmediate_operand" "rm,r"))))
7142 (clobber (reg:CC FLAGS_REG))]
7143 "ix86_binary_operator_ok (UNKNOWN, <DWI>mode, operands)"
7144 "#"
7145 "&& reload_completed"
7146 [(parallel [(set (reg:CC FLAGS_REG)
7147 (compare:CC (match_dup 1) (match_dup 2)))
7148 (set (match_dup 0)
7149 (minus:DWIH (match_dup 1) (match_dup 2)))])
7150 (parallel [(set (match_dup 3)
7151 (minus:DWIH
7152 (minus:DWIH
7153 (match_dup 4)
7154 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)))
7155 (const_int 0)))
7156 (clobber (reg:CC FLAGS_REG))])]
7157 "split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[3]);")
7158
7159 (define_insn "*sub<mode>_1"
7160 [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
7161 (minus:SWI
7162 (match_operand:SWI 1 "nonimmediate_operand" "0,0")
7163 (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>")))
7164 (clobber (reg:CC FLAGS_REG))]
7165 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
7166 "sub{<imodesuffix>}\t{%2, %0|%0, %2}"
7167 [(set_attr "type" "alu")
7168 (set_attr "mode" "<MODE>")])
7169
7170 (define_insn "*subsi_1_zext"
7171 [(set (match_operand:DI 0 "register_operand" "=r")
7172 (zero_extend:DI
7173 (minus:SI (match_operand:SI 1 "register_operand" "0")
7174 (match_operand:SI 2 "x86_64_general_operand" "rBMe"))))
7175 (clobber (reg:CC FLAGS_REG))]
7176 "TARGET_64BIT && ix86_binary_operator_ok (MINUS, SImode, operands)"
7177 "sub{l}\t{%2, %k0|%k0, %2}"
7178 [(set_attr "type" "alu")
7179 (set_attr "mode" "SI")])
7180
7181 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
7182 (define_insn_and_split "*sub<mode>_1_slp"
7183 [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>,&<r>"))
7184 (minus:SWI12 (match_operand:SWI12 1 "register_operand" "0,!<r>")
7185 (match_operand:SWI12 2 "general_operand" "<r>mn,<r>mn")))
7186 (clobber (reg:CC FLAGS_REG))]
7187 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
7188 "@
7189 sub{<imodesuffix>}\t{%2, %0|%0, %2}
7190 #"
7191 "&& reload_completed"
7192 [(set (strict_low_part (match_dup 0)) (match_dup 1))
7193 (parallel
7194 [(set (strict_low_part (match_dup 0))
7195 (minus:SWI12 (match_dup 0) (match_dup 2)))
7196 (clobber (reg:CC FLAGS_REG))])]
7197 ""
7198 [(set_attr "type" "alu")
7199 (set_attr "mode" "<MODE>")])
7200
7201 (define_insn "*sub<mode>_2"
7202 [(set (reg FLAGS_REG)
7203 (compare
7204 (minus:SWI
7205 (match_operand:SWI 1 "nonimmediate_operand" "0,0")
7206 (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>"))
7207 (const_int 0)))
7208 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
7209 (minus:SWI (match_dup 1) (match_dup 2)))]
7210 "ix86_match_ccmode (insn, CCGOCmode)
7211 && ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
7212 "sub{<imodesuffix>}\t{%2, %0|%0, %2}"
7213 [(set_attr "type" "alu")
7214 (set_attr "mode" "<MODE>")])
7215
7216 (define_insn "*subsi_2_zext"
7217 [(set (reg FLAGS_REG)
7218 (compare
7219 (minus:SI (match_operand:SI 1 "register_operand" "0")
7220 (match_operand:SI 2 "x86_64_general_operand" "rBMe"))
7221 (const_int 0)))
7222 (set (match_operand:DI 0 "register_operand" "=r")
7223 (zero_extend:DI
7224 (minus:SI (match_dup 1)
7225 (match_dup 2))))]
7226 "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
7227 && ix86_binary_operator_ok (MINUS, SImode, operands)"
7228 "sub{l}\t{%2, %k0|%k0, %2}"
7229 [(set_attr "type" "alu")
7230 (set_attr "mode" "SI")])
7231
7232 (define_insn "*subqi_ext<mode>_2"
7233 [(set (zero_extract:SWI248
7234 (match_operand:SWI248 0 "register_operand" "+Q")
7235 (const_int 8)
7236 (const_int 8))
7237 (subreg:SWI248
7238 (minus:QI
7239 (subreg:QI
7240 (zero_extract:SWI248
7241 (match_operand:SWI248 1 "register_operand" "0")
7242 (const_int 8)
7243 (const_int 8)) 0)
7244 (subreg:QI
7245 (zero_extract:SWI248
7246 (match_operand:SWI248 2 "register_operand" "Q")
7247 (const_int 8)
7248 (const_int 8)) 0)) 0))
7249 (clobber (reg:CC FLAGS_REG))]
7250 "/* FIXME: without this LRA can't reload this pattern, see PR82524. */
7251 rtx_equal_p (operands[0], operands[1])"
7252 "sub{b}\t{%h2, %h0|%h0, %h2}"
7253 [(set_attr "type" "alu")
7254 (set_attr "mode" "QI")])
7255
7256 ;; Subtract with jump on overflow.
7257 (define_expand "subv<mode>4"
7258 [(parallel [(set (reg:CCO FLAGS_REG)
7259 (eq:CCO
7260 (minus:<DPWI>
7261 (sign_extend:<DPWI>
7262 (match_operand:SWIDWI 1 "nonimmediate_operand"))
7263 (match_dup 4))
7264 (sign_extend:<DPWI>
7265 (minus:SWIDWI (match_dup 1)
7266 (match_operand:SWIDWI 2
7267 "<general_hilo_operand>")))))
7268 (set (match_operand:SWIDWI 0 "register_operand")
7269 (minus:SWIDWI (match_dup 1) (match_dup 2)))])
7270 (set (pc) (if_then_else
7271 (eq (reg:CCO FLAGS_REG) (const_int 0))
7272 (label_ref (match_operand 3))
7273 (pc)))]
7274 ""
7275 {
7276 ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);
7277 if (CONST_SCALAR_INT_P (operands[2]))
7278 operands[4] = operands[2];
7279 else
7280 operands[4] = gen_rtx_SIGN_EXTEND (<DPWI>mode, operands[2]);
7281 })
7282
7283 (define_insn "*subv<mode>4"
7284 [(set (reg:CCO FLAGS_REG)
7285 (eq:CCO (minus:<DWI>
7286 (sign_extend:<DWI>
7287 (match_operand:SWI 1 "nonimmediate_operand" "0,0"))
7288 (sign_extend:<DWI>
7289 (match_operand:SWI 2 "<general_sext_operand>" "<r>We,m")))
7290 (sign_extend:<DWI>
7291 (minus:SWI (match_dup 1) (match_dup 2)))))
7292 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
7293 (minus:SWI (match_dup 1) (match_dup 2)))]
7294 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
7295 "sub{<imodesuffix>}\t{%2, %0|%0, %2}"
7296 [(set_attr "type" "alu")
7297 (set_attr "mode" "<MODE>")])
7298
7299 (define_insn "subv<mode>4_1"
7300 [(set (reg:CCO FLAGS_REG)
7301 (eq:CCO (minus:<DWI>
7302 (sign_extend:<DWI>
7303 (match_operand:SWI 1 "nonimmediate_operand" "0"))
7304 (match_operand:<DWI> 3 "const_int_operand"))
7305 (sign_extend:<DWI>
7306 (minus:SWI
7307 (match_dup 1)
7308 (match_operand:SWI 2 "x86_64_immediate_operand" "<i>")))))
7309 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
7310 (minus:SWI (match_dup 1) (match_dup 2)))]
7311 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)
7312 && CONST_INT_P (operands[2])
7313 && INTVAL (operands[2]) == INTVAL (operands[3])"
7314 "sub{<imodesuffix>}\t{%2, %0|%0, %2}"
7315 [(set_attr "type" "alu")
7316 (set_attr "mode" "<MODE>")
7317 (set (attr "length_immediate")
7318 (cond [(match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)")
7319 (const_string "1")
7320 (match_test "<MODE_SIZE> == 8")
7321 (const_string "4")]
7322 (const_string "<MODE_SIZE>")))])
7323
7324 (define_insn_and_split "*subv<dwi>4_doubleword"
7325 [(set (reg:CCO FLAGS_REG)
7326 (eq:CCO
7327 (minus:<QPWI>
7328 (sign_extend:<QPWI>
7329 (match_operand:<DWI> 1 "nonimmediate_operand" "0,0"))
7330 (sign_extend:<QPWI>
7331 (match_operand:<DWI> 2 "nonimmediate_operand" "r,o")))
7332 (sign_extend:<QPWI>
7333 (minus:<DWI> (match_dup 1) (match_dup 2)))))
7334 (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
7335 (minus:<DWI> (match_dup 1) (match_dup 2)))]
7336 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
7337 "#"
7338 "&& reload_completed"
7339 [(parallel [(set (reg:CC FLAGS_REG)
7340 (compare:CC (match_dup 1) (match_dup 2)))
7341 (set (match_dup 0)
7342 (minus:DWIH (match_dup 1) (match_dup 2)))])
7343 (parallel [(set (reg:CCO FLAGS_REG)
7344 (eq:CCO
7345 (minus:<DWI>
7346 (minus:<DWI>
7347 (sign_extend:<DWI> (match_dup 4))
7348 (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0)))
7349 (sign_extend:<DWI> (match_dup 5)))
7350 (sign_extend:<DWI>
7351 (minus:DWIH
7352 (minus:DWIH
7353 (match_dup 4)
7354 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)))
7355 (match_dup 5)))))
7356 (set (match_dup 3)
7357 (minus:DWIH
7358 (minus:DWIH
7359 (match_dup 4)
7360 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)))
7361 (match_dup 5)))])]
7362 {
7363 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
7364 })
7365
7366 (define_insn_and_split "*subv<dwi>4_doubleword_1"
7367 [(set (reg:CCO FLAGS_REG)
7368 (eq:CCO
7369 (minus:<QPWI>
7370 (sign_extend:<QPWI>
7371 (match_operand:<DWI> 1 "nonimmediate_operand" "0"))
7372 (match_operand:<QPWI> 3 "const_scalar_int_operand"))
7373 (sign_extend:<QPWI>
7374 (minus:<DWI>
7375 (match_dup 1)
7376 (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "<di>")))))
7377 (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro")
7378 (minus:<DWI> (match_dup 1) (match_dup 2)))]
7379 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)
7380 && CONST_SCALAR_INT_P (operands[2])
7381 && rtx_equal_p (operands[2], operands[3])"
7382 "#"
7383 "&& reload_completed"
7384 [(parallel [(set (reg:CC FLAGS_REG)
7385 (compare:CC (match_dup 1) (match_dup 2)))
7386 (set (match_dup 0)
7387 (minus:DWIH (match_dup 1) (match_dup 2)))])
7388 (parallel [(set (reg:CCO FLAGS_REG)
7389 (eq:CCO
7390 (minus:<DWI>
7391 (minus:<DWI>
7392 (sign_extend:<DWI> (match_dup 4))
7393 (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0)))
7394 (match_dup 5))
7395 (sign_extend:<DWI>
7396 (minus:DWIH
7397 (minus:DWIH
7398 (match_dup 4)
7399 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)))
7400 (match_dup 5)))))
7401 (set (match_dup 3)
7402 (minus:DWIH
7403 (minus:DWIH
7404 (match_dup 4)
7405 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)))
7406 (match_dup 5)))])]
7407 {
7408 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
7409 if (operands[2] == const0_rtx)
7410 {
7411 emit_insn (gen_subv<mode>4_1 (operands[3], operands[4], operands[5],
7412 operands[5]));
7413 DONE;
7414 }
7415 })
7416
7417 (define_insn "*subv<mode>4_overflow_1"
7418 [(set (reg:CCO FLAGS_REG)
7419 (eq:CCO
7420 (minus:<DWI>
7421 (minus:<DWI>
7422 (sign_extend:<DWI>
7423 (match_operand:SWI 1 "nonimmediate_operand" "%0,0"))
7424 (match_operator:<DWI> 4 "ix86_carry_flag_operator"
7425 [(match_operand 3 "flags_reg_operand") (const_int 0)]))
7426 (sign_extend:<DWI>
7427 (match_operand:SWI 2 "<general_sext_operand>" "rWe,m")))
7428 (sign_extend:<DWI>
7429 (minus:SWI
7430 (minus:SWI
7431 (match_dup 1)
7432 (match_operator:SWI 5 "ix86_carry_flag_operator"
7433 [(match_dup 3) (const_int 0)]))
7434 (match_dup 2)))))
7435 (set (match_operand:SWI 0 "nonimmediate_operand" "=rm,r")
7436 (minus:SWI
7437 (minus:SWI
7438 (match_dup 1)
7439 (match_op_dup 5 [(match_dup 3) (const_int 0)]))
7440 (match_dup 2)))]
7441 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
7442 "sbb{<imodesuffix>}\t{%2, %0|%0, %2}"
7443 [(set_attr "type" "alu")
7444 (set_attr "mode" "<MODE>")])
7445
7446 (define_insn "*subv<mode>4_overflow_2"
7447 [(set (reg:CCO FLAGS_REG)
7448 (eq:CCO
7449 (minus:<DWI>
7450 (minus:<DWI>
7451 (sign_extend:<DWI>
7452 (match_operand:SWI 1 "nonimmediate_operand" "%0"))
7453 (match_operator:<DWI> 4 "ix86_carry_flag_operator"
7454 [(match_operand 3 "flags_reg_operand") (const_int 0)]))
7455 (match_operand:<DWI> 6 "const_int_operand" "n"))
7456 (sign_extend:<DWI>
7457 (minus:SWI
7458 (minus:SWI
7459 (match_dup 1)
7460 (match_operator:SWI 5 "ix86_carry_flag_operator"
7461 [(match_dup 3) (const_int 0)]))
7462 (match_operand:SWI 2 "x86_64_immediate_operand" "e")))))
7463 (set (match_operand:SWI 0 "nonimmediate_operand" "=rm")
7464 (minus:SWI
7465 (minus:SWI
7466 (match_dup 1)
7467 (match_op_dup 5 [(match_dup 3) (const_int 0)]))
7468 (match_dup 2)))]
7469 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)
7470 && CONST_INT_P (operands[2])
7471 && INTVAL (operands[2]) == INTVAL (operands[6])"
7472 "sbb{<imodesuffix>}\t{%2, %0|%0, %2}"
7473 [(set_attr "type" "alu")
7474 (set_attr "mode" "<MODE>")
7475 (set (attr "length_immediate")
7476 (if_then_else (match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)")
7477 (const_string "1")
7478 (const_string "4")))])
7479
7480 (define_expand "usubv<mode>4"
7481 [(parallel [(set (reg:CC FLAGS_REG)
7482 (compare:CC
7483 (match_operand:SWI 1 "nonimmediate_operand")
7484 (match_operand:SWI 2 "<general_operand>")))
7485 (set (match_operand:SWI 0 "register_operand")
7486 (minus:SWI (match_dup 1) (match_dup 2)))])
7487 (set (pc) (if_then_else
7488 (ltu (reg:CC FLAGS_REG) (const_int 0))
7489 (label_ref (match_operand 3))
7490 (pc)))]
7491 ""
7492 "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
7493
7494 (define_insn "*sub<mode>_3"
7495 [(set (reg FLAGS_REG)
7496 (compare (match_operand:SWI 1 "nonimmediate_operand" "0,0")
7497 (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>")))
7498 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
7499 (minus:SWI (match_dup 1) (match_dup 2)))]
7500 "ix86_match_ccmode (insn, CCmode)
7501 && ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
7502 "sub{<imodesuffix>}\t{%2, %0|%0, %2}"
7503 [(set_attr "type" "alu")
7504 (set_attr "mode" "<MODE>")])
7505
7506 (define_peephole2
7507 [(parallel
7508 [(set (reg:CC FLAGS_REG)
7509 (compare:CC (match_operand:SWI 0 "general_reg_operand")
7510 (match_operand:SWI 1 "general_gr_operand")))
7511 (set (match_dup 0)
7512 (minus:SWI (match_dup 0) (match_dup 1)))])]
7513 "find_regno_note (peep2_next_insn (0), REG_UNUSED, REGNO (operands[0])) != 0"
7514 [(set (reg:CC FLAGS_REG)
7515 (compare:CC (match_dup 0) (match_dup 1)))])
7516
7517 ;; decl %eax; cmpl $-1, %eax; jne .Lxx; can be optimized into
7518 ;; subl $1, %eax; jnc .Lxx;
7519 (define_peephole2
7520 [(parallel
7521 [(set (match_operand:SWI 0 "general_reg_operand")
7522 (plus:SWI (match_dup 0) (const_int -1)))
7523 (clobber (reg FLAGS_REG))])
7524 (set (reg:CCZ FLAGS_REG)
7525 (compare:CCZ (match_dup 0) (const_int -1)))
7526 (set (pc)
7527 (if_then_else (match_operator 1 "bt_comparison_operator"
7528 [(reg:CCZ FLAGS_REG) (const_int 0)])
7529 (match_operand 2)
7530 (pc)))]
7531 "peep2_regno_dead_p (3, FLAGS_REG)"
7532 [(parallel
7533 [(set (reg:CC FLAGS_REG)
7534 (compare:CC (match_dup 0) (const_int 1)))
7535 (set (match_dup 0)
7536 (minus:SWI (match_dup 0) (const_int 1)))])
7537 (set (pc)
7538 (if_then_else (match_dup 3)
7539 (match_dup 2)
7540 (pc)))]
7541 {
7542 rtx cc = gen_rtx_REG (CCmode, FLAGS_REG);
7543 operands[3] = gen_rtx_fmt_ee (GET_CODE (operands[1]) == NE
7544 ? GEU : LTU, VOIDmode, cc, const0_rtx);
7545 })
7546
7547 ;; Help combine use borrow flag to test for -1 after dec (add $-1).
7548 (define_insn_and_split "*dec_cmov<mode>"
7549 [(set (match_operand:SWI248 0 "register_operand" "=r")
7550 (if_then_else:SWI248
7551 (match_operator 1 "bt_comparison_operator"
7552 [(match_operand:SWI248 2 "register_operand" "0") (const_int 0)])
7553 (plus:SWI248 (match_dup 2) (const_int -1))
7554 (match_operand:SWI248 3 "nonimmediate_operand" "rm")))
7555 (clobber (reg:CC FLAGS_REG))]
7556 "TARGET_CMOVE"
7557 "#"
7558 "&& reload_completed"
7559 [(parallel [(set (reg:CC FLAGS_REG)
7560 (compare:CC (match_dup 2) (const_int 1)))
7561 (set (match_dup 0) (minus:SWI248 (match_dup 2) (const_int 1)))])
7562 (set (match_dup 0)
7563 (if_then_else:SWI248 (match_dup 4) (match_dup 0) (match_dup 3)))]
7564 {
7565 rtx cc = gen_rtx_REG (CCCmode, FLAGS_REG);
7566 operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[1]) == NE
7567 ? GEU : LTU, VOIDmode, cc, const0_rtx);
7568 })
7569
7570 (define_insn "*subsi_3_zext"
7571 [(set (reg FLAGS_REG)
7572 (compare (match_operand:SI 1 "register_operand" "0")
7573 (match_operand:SI 2 "x86_64_general_operand" "rBMe")))
7574 (set (match_operand:DI 0 "register_operand" "=r")
7575 (zero_extend:DI
7576 (minus:SI (match_dup 1)
7577 (match_dup 2))))]
7578 "TARGET_64BIT && ix86_match_ccmode (insn, CCmode)
7579 && ix86_binary_operator_ok (MINUS, SImode, operands)"
7580 "sub{l}\t{%2, %1|%1, %2}"
7581 [(set_attr "type" "alu")
7582 (set_attr "mode" "SI")])
7583 \f
7584 ;; Add with carry and subtract with borrow
7585
7586 (define_insn "@add<mode>3_carry"
7587 [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
7588 (plus:SWI
7589 (plus:SWI
7590 (match_operator:SWI 4 "ix86_carry_flag_operator"
7591 [(match_operand 3 "flags_reg_operand") (const_int 0)])
7592 (match_operand:SWI 1 "nonimmediate_operand" "%0,0"))
7593 (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>")))
7594 (clobber (reg:CC FLAGS_REG))]
7595 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
7596 "adc{<imodesuffix>}\t{%2, %0|%0, %2}"
7597 [(set_attr "type" "alu")
7598 (set_attr "use_carry" "1")
7599 (set_attr "pent_pair" "pu")
7600 (set_attr "mode" "<MODE>")])
7601
7602 (define_insn "*add<mode>3_carry_0"
7603 [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
7604 (plus:SWI
7605 (match_operator:SWI 2 "ix86_carry_flag_operator"
7606 [(reg FLAGS_REG) (const_int 0)])
7607 (match_operand:SWI 1 "nonimmediate_operand" "0")))
7608 (clobber (reg:CC FLAGS_REG))]
7609 "!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[1])"
7610 "adc{<imodesuffix>}\t{$0, %0|%0, 0}"
7611 [(set_attr "type" "alu")
7612 (set_attr "use_carry" "1")
7613 (set_attr "pent_pair" "pu")
7614 (set_attr "mode" "<MODE>")])
7615
7616 (define_insn "*add<mode>3_carry_0r"
7617 [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
7618 (plus:SWI
7619 (match_operator:SWI 2 "ix86_carry_flag_unset_operator"
7620 [(reg FLAGS_REG) (const_int 0)])
7621 (match_operand:SWI 1 "nonimmediate_operand" "0")))
7622 (clobber (reg:CC FLAGS_REG))]
7623 "!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[1])"
7624 "sbb{<imodesuffix>}\t{$-1, %0|%0, -1}"
7625 [(set_attr "type" "alu")
7626 (set_attr "use_carry" "1")
7627 (set_attr "pent_pair" "pu")
7628 (set_attr "mode" "<MODE>")])
7629
7630 (define_insn "*addsi3_carry_zext"
7631 [(set (match_operand:DI 0 "register_operand" "=r")
7632 (zero_extend:DI
7633 (plus:SI
7634 (plus:SI (match_operator:SI 3 "ix86_carry_flag_operator"
7635 [(reg FLAGS_REG) (const_int 0)])
7636 (match_operand:SI 1 "register_operand" "%0"))
7637 (match_operand:SI 2 "x86_64_general_operand" "rBMe"))))
7638 (clobber (reg:CC FLAGS_REG))]
7639 "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)"
7640 "adc{l}\t{%2, %k0|%k0, %2}"
7641 [(set_attr "type" "alu")
7642 (set_attr "use_carry" "1")
7643 (set_attr "pent_pair" "pu")
7644 (set_attr "mode" "SI")])
7645
7646 (define_insn "*addsi3_carry_zext_0"
7647 [(set (match_operand:DI 0 "register_operand" "=r")
7648 (zero_extend:DI
7649 (plus:SI (match_operator:SI 2 "ix86_carry_flag_operator"
7650 [(reg FLAGS_REG) (const_int 0)])
7651 (match_operand:SI 1 "register_operand" "0"))))
7652 (clobber (reg:CC FLAGS_REG))]
7653 "TARGET_64BIT"
7654 "adc{l}\t{$0, %k0|%k0, 0}"
7655 [(set_attr "type" "alu")
7656 (set_attr "use_carry" "1")
7657 (set_attr "pent_pair" "pu")
7658 (set_attr "mode" "SI")])
7659
7660 (define_insn "*addsi3_carry_zext_0r"
7661 [(set (match_operand:DI 0 "register_operand" "=r")
7662 (zero_extend:DI
7663 (plus:SI (match_operator:SI 2 "ix86_carry_flag_unset_operator"
7664 [(reg FLAGS_REG) (const_int 0)])
7665 (match_operand:SI 1 "register_operand" "0"))))
7666 (clobber (reg:CC FLAGS_REG))]
7667 "TARGET_64BIT"
7668 "sbb{l}\t{$-1, %k0|%k0, -1}"
7669 [(set_attr "type" "alu")
7670 (set_attr "use_carry" "1")
7671 (set_attr "pent_pair" "pu")
7672 (set_attr "mode" "SI")])
7673
7674 ;; There is no point to generate ADCX instruction. ADC is shorter and faster.
7675
7676 (define_insn "addcarry<mode>"
7677 [(set (reg:CCC FLAGS_REG)
7678 (compare:CCC
7679 (zero_extend:<DWI>
7680 (plus:SWI48
7681 (plus:SWI48
7682 (match_operator:SWI48 5 "ix86_carry_flag_operator"
7683 [(match_operand 3 "flags_reg_operand") (const_int 0)])
7684 (match_operand:SWI48 1 "nonimmediate_operand" "%0,0"))
7685 (match_operand:SWI48 2 "nonimmediate_operand" "r,rm")))
7686 (plus:<DWI>
7687 (zero_extend:<DWI> (match_dup 2))
7688 (match_operator:<DWI> 4 "ix86_carry_flag_operator"
7689 [(match_dup 3) (const_int 0)]))))
7690 (set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
7691 (plus:SWI48 (plus:SWI48 (match_op_dup 5
7692 [(match_dup 3) (const_int 0)])
7693 (match_dup 1))
7694 (match_dup 2)))]
7695 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
7696 "adc{<imodesuffix>}\t{%2, %0|%0, %2}"
7697 [(set_attr "type" "alu")
7698 (set_attr "use_carry" "1")
7699 (set_attr "pent_pair" "pu")
7700 (set_attr "mode" "<MODE>")])
7701
7702 (define_expand "addcarry<mode>_0"
7703 [(parallel
7704 [(set (reg:CCC FLAGS_REG)
7705 (compare:CCC
7706 (plus:SWI48
7707 (match_operand:SWI48 1 "nonimmediate_operand")
7708 (match_operand:SWI48 2 "x86_64_general_operand"))
7709 (match_dup 1)))
7710 (set (match_operand:SWI48 0 "nonimmediate_operand")
7711 (plus:SWI48 (match_dup 1) (match_dup 2)))])]
7712 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)")
7713
7714 (define_insn "*addcarry<mode>_1"
7715 [(set (reg:CCC FLAGS_REG)
7716 (compare:CCC
7717 (zero_extend:<DWI>
7718 (plus:SWI48
7719 (plus:SWI48
7720 (match_operator:SWI48 5 "ix86_carry_flag_operator"
7721 [(match_operand 3 "flags_reg_operand") (const_int 0)])
7722 (match_operand:SWI48 1 "nonimmediate_operand" "%0"))
7723 (match_operand:SWI48 2 "x86_64_immediate_operand" "e")))
7724 (plus:<DWI>
7725 (match_operand:<DWI> 6 "const_scalar_int_operand")
7726 (match_operator:<DWI> 4 "ix86_carry_flag_operator"
7727 [(match_dup 3) (const_int 0)]))))
7728 (set (match_operand:SWI48 0 "nonimmediate_operand" "=rm")
7729 (plus:SWI48 (plus:SWI48 (match_op_dup 5
7730 [(match_dup 3) (const_int 0)])
7731 (match_dup 1))
7732 (match_dup 2)))]
7733 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)
7734 && CONST_INT_P (operands[2])
7735 /* Check that operands[6] is operands[2] zero extended from
7736 <MODE>mode to <DWI>mode. */
7737 && ((<MODE>mode == SImode || INTVAL (operands[2]) >= 0)
7738 ? (CONST_INT_P (operands[6])
7739 && UINTVAL (operands[6]) == (UINTVAL (operands[2])
7740 & GET_MODE_MASK (<MODE>mode)))
7741 : (CONST_WIDE_INT_P (operands[6])
7742 && CONST_WIDE_INT_NUNITS (operands[6]) == 2
7743 && ((unsigned HOST_WIDE_INT) CONST_WIDE_INT_ELT (operands[6], 0)
7744 == UINTVAL (operands[2]))
7745 && CONST_WIDE_INT_ELT (operands[6], 1) == 0))"
7746 "adc{<imodesuffix>}\t{%2, %0|%0, %2}"
7747 [(set_attr "type" "alu")
7748 (set_attr "use_carry" "1")
7749 (set_attr "pent_pair" "pu")
7750 (set_attr "mode" "<MODE>")
7751 (set (attr "length_immediate")
7752 (if_then_else (match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)")
7753 (const_string "1")
7754 (const_string "4")))])
7755
7756 (define_insn "@sub<mode>3_carry"
7757 [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
7758 (minus:SWI
7759 (minus:SWI
7760 (match_operand:SWI 1 "nonimmediate_operand" "0,0")
7761 (match_operator:SWI 4 "ix86_carry_flag_operator"
7762 [(match_operand 3 "flags_reg_operand") (const_int 0)]))
7763 (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>")))
7764 (clobber (reg:CC FLAGS_REG))]
7765 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
7766 "sbb{<imodesuffix>}\t{%2, %0|%0, %2}"
7767 [(set_attr "type" "alu")
7768 (set_attr "use_carry" "1")
7769 (set_attr "pent_pair" "pu")
7770 (set_attr "mode" "<MODE>")])
7771
7772 (define_insn "*sub<mode>3_carry_0"
7773 [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
7774 (minus:SWI
7775 (match_operand:SWI 1 "nonimmediate_operand" "0")
7776 (match_operator:SWI 2 "ix86_carry_flag_operator"
7777 [(reg FLAGS_REG) (const_int 0)])))
7778 (clobber (reg:CC FLAGS_REG))]
7779 "!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[1])"
7780 "sbb{<imodesuffix>}\t{$0, %0|%0, 0}"
7781 [(set_attr "type" "alu")
7782 (set_attr "use_carry" "1")
7783 (set_attr "pent_pair" "pu")
7784 (set_attr "mode" "<MODE>")])
7785
7786 (define_insn "*sub<mode>3_carry_0r"
7787 [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
7788 (minus:SWI
7789 (match_operand:SWI 1 "nonimmediate_operand" "0")
7790 (match_operator:SWI 2 "ix86_carry_flag_unset_operator"
7791 [(reg FLAGS_REG) (const_int 0)])))
7792 (clobber (reg:CC FLAGS_REG))]
7793 "!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[1])"
7794 "adc{<imodesuffix>}\t{$-1, %0|%0, -1}"
7795 [(set_attr "type" "alu")
7796 (set_attr "use_carry" "1")
7797 (set_attr "pent_pair" "pu")
7798 (set_attr "mode" "<MODE>")])
7799
7800 (define_insn "*subsi3_carry_zext"
7801 [(set (match_operand:DI 0 "register_operand" "=r")
7802 (zero_extend:DI
7803 (minus:SI
7804 (minus:SI
7805 (match_operand:SI 1 "register_operand" "0")
7806 (match_operator:SI 3 "ix86_carry_flag_operator"
7807 [(reg FLAGS_REG) (const_int 0)]))
7808 (match_operand:SI 2 "x86_64_general_operand" "rBMe"))))
7809 (clobber (reg:CC FLAGS_REG))]
7810 "TARGET_64BIT && ix86_binary_operator_ok (MINUS, SImode, operands)"
7811 "sbb{l}\t{%2, %k0|%k0, %2}"
7812 [(set_attr "type" "alu")
7813 (set_attr "use_carry" "1")
7814 (set_attr "pent_pair" "pu")
7815 (set_attr "mode" "SI")])
7816
7817 (define_insn "*subsi3_carry_zext_0"
7818 [(set (match_operand:DI 0 "register_operand" "=r")
7819 (zero_extend:DI
7820 (minus:SI
7821 (match_operand:SI 1 "register_operand" "0")
7822 (match_operator:SI 2 "ix86_carry_flag_operator"
7823 [(reg FLAGS_REG) (const_int 0)]))))
7824 (clobber (reg:CC FLAGS_REG))]
7825 "TARGET_64BIT"
7826 "sbb{l}\t{$0, %k0|%k0, 0}"
7827 [(set_attr "type" "alu")
7828 (set_attr "use_carry" "1")
7829 (set_attr "pent_pair" "pu")
7830 (set_attr "mode" "SI")])
7831
7832 (define_insn "*subsi3_carry_zext_0r"
7833 [(set (match_operand:DI 0 "register_operand" "=r")
7834 (zero_extend:DI
7835 (minus:SI
7836 (match_operand:SI 1 "register_operand" "0")
7837 (match_operator:SI 2 "ix86_carry_flag_unset_operator"
7838 [(reg FLAGS_REG) (const_int 0)]))))
7839 (clobber (reg:CC FLAGS_REG))]
7840 "TARGET_64BIT"
7841 "adc{l}\t{$-1, %k0|%k0, -1}"
7842 [(set_attr "type" "alu")
7843 (set_attr "use_carry" "1")
7844 (set_attr "pent_pair" "pu")
7845 (set_attr "mode" "SI")])
7846
7847 (define_insn "@sub<mode>3_carry_ccc"
7848 [(set (reg:CCC FLAGS_REG)
7849 (compare:CCC
7850 (zero_extend:<DWI> (match_operand:DWIH 1 "register_operand" "0"))
7851 (plus:<DWI>
7852 (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0))
7853 (zero_extend:<DWI>
7854 (match_operand:DWIH 2 "x86_64_sext_operand" "rmWe")))))
7855 (clobber (match_scratch:DWIH 0 "=r"))]
7856 ""
7857 "sbb{<imodesuffix>}\t{%2, %0|%0, %2}"
7858 [(set_attr "type" "alu")
7859 (set_attr "mode" "<MODE>")])
7860
7861 (define_insn "*sub<mode>3_carry_ccc_1"
7862 [(set (reg:CCC FLAGS_REG)
7863 (compare:CCC
7864 (zero_extend:<DWI> (match_operand:DWIH 1 "register_operand" "0"))
7865 (plus:<DWI>
7866 (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0))
7867 (match_operand:<DWI> 2 "x86_64_dwzext_immediate_operand" "Wf"))))
7868 (clobber (match_scratch:DWIH 0 "=r"))]
7869 ""
7870 {
7871 operands[3] = simplify_subreg (<MODE>mode, operands[2], <DWI>mode, 0);
7872 return "sbb{<imodesuffix>}\t{%3, %0|%0, %3}";
7873 }
7874 [(set_attr "type" "alu")
7875 (set_attr "mode" "<MODE>")])
7876
7877 ;; The sign flag is set from the
7878 ;; (compare (match_dup 1) (plus:DWIH (ltu:DWIH ...) (match_dup 2)))
7879 ;; result, the overflow flag likewise, but the overflow flag is also
7880 ;; set if the (plus:DWIH (ltu:DWIH ...) (match_dup 2)) overflows.
7881 (define_insn "@sub<mode>3_carry_ccgz"
7882 [(set (reg:CCGZ FLAGS_REG)
7883 (unspec:CCGZ [(match_operand:DWIH 1 "register_operand" "0")
7884 (match_operand:DWIH 2 "x86_64_general_operand" "rBMe")
7885 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))]
7886 UNSPEC_SBB))
7887 (clobber (match_scratch:DWIH 0 "=r"))]
7888 ""
7889 "sbb{<imodesuffix>}\t{%2, %0|%0, %2}"
7890 [(set_attr "type" "alu")
7891 (set_attr "mode" "<MODE>")])
7892
7893 (define_insn "subborrow<mode>"
7894 [(set (reg:CCC FLAGS_REG)
7895 (compare:CCC
7896 (zero_extend:<DWI>
7897 (match_operand:SWI48 1 "nonimmediate_operand" "0"))
7898 (plus:<DWI>
7899 (match_operator:<DWI> 4 "ix86_carry_flag_operator"
7900 [(match_operand 3 "flags_reg_operand") (const_int 0)])
7901 (zero_extend:<DWI>
7902 (match_operand:SWI48 2 "nonimmediate_operand" "rm")))))
7903 (set (match_operand:SWI48 0 "register_operand" "=r")
7904 (minus:SWI48 (minus:SWI48
7905 (match_dup 1)
7906 (match_operator:SWI48 5 "ix86_carry_flag_operator"
7907 [(match_dup 3) (const_int 0)]))
7908 (match_dup 2)))]
7909 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
7910 "sbb{<imodesuffix>}\t{%2, %0|%0, %2}"
7911 [(set_attr "type" "alu")
7912 (set_attr "use_carry" "1")
7913 (set_attr "pent_pair" "pu")
7914 (set_attr "mode" "<MODE>")])
7915
7916 (define_expand "subborrow<mode>_0"
7917 [(parallel
7918 [(set (reg:CC FLAGS_REG)
7919 (compare:CC
7920 (match_operand:SWI48 1 "nonimmediate_operand")
7921 (match_operand:SWI48 2 "<general_operand>")))
7922 (set (match_operand:SWI48 0 "register_operand")
7923 (minus:SWI48 (match_dup 1) (match_dup 2)))])]
7924 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)")
7925
7926 (define_mode_iterator CC_CCC [CC CCC])
7927
7928 ;; Pre-reload splitter to optimize
7929 ;; *setcc_qi followed by *addqi3_cconly_overflow_1 with the same QI
7930 ;; operand and no intervening flags modifications into nothing.
7931 (define_insn_and_split "*setcc_qi_addqi3_cconly_overflow_1_<mode>"
7932 [(set (reg:CCC FLAGS_REG)
7933 (compare:CCC (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
7934 (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))))]
7935 "ix86_pre_reload_split ()"
7936 "#"
7937 "&& 1"
7938 [(const_int 0)])
7939 \f
7940 ;; Overflow setting add instructions
7941
7942 (define_expand "addqi3_cconly_overflow"
7943 [(parallel
7944 [(set (reg:CCC FLAGS_REG)
7945 (compare:CCC
7946 (plus:QI
7947 (match_operand:QI 0 "nonimmediate_operand")
7948 (match_operand:QI 1 "general_operand"))
7949 (match_dup 0)))
7950 (clobber (scratch:QI))])]
7951 "!(MEM_P (operands[0]) && MEM_P (operands[1]))")
7952
7953 (define_insn "*add<mode>3_cconly_overflow_1"
7954 [(set (reg:CCC FLAGS_REG)
7955 (compare:CCC
7956 (plus:SWI
7957 (match_operand:SWI 1 "nonimmediate_operand" "%0")
7958 (match_operand:SWI 2 "<general_operand>" "<g>"))
7959 (match_dup 1)))
7960 (clobber (match_scratch:SWI 0 "=<r>"))]
7961 "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
7962 "add{<imodesuffix>}\t{%2, %0|%0, %2}"
7963 [(set_attr "type" "alu")
7964 (set_attr "mode" "<MODE>")])
7965
7966 (define_insn "*add<mode>3_cc_overflow_1"
7967 [(set (reg:CCC FLAGS_REG)
7968 (compare:CCC
7969 (plus:SWI
7970 (match_operand:SWI 1 "nonimmediate_operand" "%0,0")
7971 (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>"))
7972 (match_dup 1)))
7973 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
7974 (plus:SWI (match_dup 1) (match_dup 2)))]
7975 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
7976 "add{<imodesuffix>}\t{%2, %0|%0, %2}"
7977 [(set_attr "type" "alu")
7978 (set_attr "mode" "<MODE>")])
7979
7980 (define_peephole2
7981 [(parallel [(set (reg:CCC FLAGS_REG)
7982 (compare:CCC
7983 (plus:SWI (match_operand:SWI 0 "general_reg_operand")
7984 (match_operand:SWI 1 "memory_operand"))
7985 (match_dup 0)))
7986 (set (match_dup 0) (plus:SWI (match_dup 0) (match_dup 1)))])
7987 (set (match_dup 1) (match_dup 0))]
7988 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
7989 && peep2_reg_dead_p (2, operands[0])
7990 && !reg_overlap_mentioned_p (operands[0], operands[1])"
7991 [(parallel [(set (reg:CCC FLAGS_REG)
7992 (compare:CCC
7993 (plus:SWI (match_dup 1) (match_dup 0))
7994 (match_dup 1)))
7995 (set (match_dup 1) (plus:SWI (match_dup 1) (match_dup 0)))])])
7996
7997 (define_insn "*addsi3_zext_cc_overflow_1"
7998 [(set (reg:CCC FLAGS_REG)
7999 (compare:CCC
8000 (plus:SI
8001 (match_operand:SI 1 "nonimmediate_operand" "%0")
8002 (match_operand:SI 2 "x86_64_general_operand" "rBMe"))
8003 (match_dup 1)))
8004 (set (match_operand:DI 0 "register_operand" "=r")
8005 (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
8006 "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)"
8007 "add{l}\t{%2, %k0|%k0, %2}"
8008 [(set_attr "type" "alu")
8009 (set_attr "mode" "SI")])
8010
8011 (define_insn "*add<mode>3_cconly_overflow_2"
8012 [(set (reg:CCC FLAGS_REG)
8013 (compare:CCC
8014 (plus:SWI
8015 (match_operand:SWI 1 "nonimmediate_operand" "%0")
8016 (match_operand:SWI 2 "<general_operand>" "<g>"))
8017 (match_dup 2)))
8018 (clobber (match_scratch:SWI 0 "=<r>"))]
8019 "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
8020 "add{<imodesuffix>}\t{%2, %0|%0, %2}"
8021 [(set_attr "type" "alu")
8022 (set_attr "mode" "<MODE>")])
8023
8024 (define_insn "*add<mode>3_cc_overflow_2"
8025 [(set (reg:CCC FLAGS_REG)
8026 (compare:CCC
8027 (plus:SWI
8028 (match_operand:SWI 1 "nonimmediate_operand" "%0,0")
8029 (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>"))
8030 (match_dup 2)))
8031 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
8032 (plus:SWI (match_dup 1) (match_dup 2)))]
8033 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
8034 "add{<imodesuffix>}\t{%2, %0|%0, %2}"
8035 [(set_attr "type" "alu")
8036 (set_attr "mode" "<MODE>")])
8037
8038 (define_insn "*addsi3_zext_cc_overflow_2"
8039 [(set (reg:CCC FLAGS_REG)
8040 (compare:CCC
8041 (plus:SI
8042 (match_operand:SI 1 "nonimmediate_operand" "%0")
8043 (match_operand:SI 2 "x86_64_general_operand" "rBMe"))
8044 (match_dup 2)))
8045 (set (match_operand:DI 0 "register_operand" "=r")
8046 (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
8047 "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)"
8048 "add{l}\t{%2, %k0|%k0, %2}"
8049 [(set_attr "type" "alu")
8050 (set_attr "mode" "SI")])
8051
8052 (define_insn_and_split "*add<dwi>3_doubleword_cc_overflow_1"
8053 [(set (reg:CCC FLAGS_REG)
8054 (compare:CCC
8055 (plus:<DWI>
8056 (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0")
8057 (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o"))
8058 (match_dup 1)))
8059 (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
8060 (plus:<DWI> (match_dup 1) (match_dup 2)))]
8061 "ix86_binary_operator_ok (PLUS, <DWI>mode, operands)"
8062 "#"
8063 "&& reload_completed"
8064 [(parallel [(set (reg:CCC FLAGS_REG)
8065 (compare:CCC
8066 (plus:DWIH (match_dup 1) (match_dup 2))
8067 (match_dup 1)))
8068 (set (match_dup 0)
8069 (plus:DWIH (match_dup 1) (match_dup 2)))])
8070 (parallel [(set (reg:CCC FLAGS_REG)
8071 (compare:CCC
8072 (zero_extend:<DWI>
8073 (plus:DWIH
8074 (plus:DWIH
8075 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
8076 (match_dup 4))
8077 (match_dup 5)))
8078 (plus:<DWI>
8079 (match_dup 6)
8080 (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0)))))
8081 (set (match_dup 3)
8082 (plus:DWIH
8083 (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
8084 (match_dup 4))
8085 (match_dup 5)))])]
8086 {
8087 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
8088 if (operands[2] == const0_rtx)
8089 {
8090 emit_insn (gen_addcarry<mode>_0 (operands[3], operands[4], operands[5]));
8091 DONE;
8092 }
8093 if (CONST_INT_P (operands[5]))
8094 operands[6] = simplify_unary_operation (ZERO_EXTEND, <DWI>mode,
8095 operands[5], <MODE>mode);
8096 else
8097 operands[6] = gen_rtx_ZERO_EXTEND (<DWI>mode, operands[5]);
8098 })
8099
8100 ;; x == 0 with zero flag test can be done also as x < 1U with carry flag
8101 ;; test, where the latter is preferrable if we have some carry consuming
8102 ;; instruction.
8103 ;; For x != 0, we need to use x < 1U with negation of carry, i.e.
8104 ;; + (1 - CF).
8105 (define_insn_and_split "*add<mode>3_eq"
8106 [(set (match_operand:SWI 0 "nonimmediate_operand")
8107 (plus:SWI
8108 (plus:SWI
8109 (eq:SWI (match_operand 3 "int_nonimmediate_operand") (const_int 0))
8110 (match_operand:SWI 1 "nonimmediate_operand"))
8111 (match_operand:SWI 2 "<general_operand>")))
8112 (clobber (reg:CC FLAGS_REG))]
8113 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)
8114 && ix86_pre_reload_split ()"
8115 "#"
8116 "&& 1"
8117 [(set (reg:CC FLAGS_REG)
8118 (compare:CC (match_dup 3) (const_int 1)))
8119 (parallel [(set (match_dup 0)
8120 (plus:SWI
8121 (plus:SWI (ltu:SWI (reg:CC FLAGS_REG) (const_int 0))
8122 (match_dup 1))
8123 (match_dup 2)))
8124 (clobber (reg:CC FLAGS_REG))])])
8125
8126 (define_insn_and_split "*add<mode>3_ne"
8127 [(set (match_operand:SWI 0 "nonimmediate_operand")
8128 (plus:SWI
8129 (plus:SWI
8130 (ne:SWI (match_operand 3 "int_nonimmediate_operand") (const_int 0))
8131 (match_operand:SWI 1 "nonimmediate_operand"))
8132 (match_operand:SWI 2 "<immediate_operand>")))
8133 (clobber (reg:CC FLAGS_REG))]
8134 "CONST_INT_P (operands[2])
8135 && (<MODE>mode != DImode
8136 || INTVAL (operands[2]) != HOST_WIDE_INT_C (-0x80000000))
8137 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)
8138 && ix86_pre_reload_split ()"
8139 "#"
8140 "&& 1"
8141 [(set (reg:CC FLAGS_REG)
8142 (compare:CC (match_dup 3) (const_int 1)))
8143 (parallel [(set (match_dup 0)
8144 (minus:SWI
8145 (minus:SWI (match_dup 1)
8146 (ltu:SWI (reg:CC FLAGS_REG) (const_int 0)))
8147 (match_dup 2)))
8148 (clobber (reg:CC FLAGS_REG))])]
8149 {
8150 operands[2] = gen_int_mode (~INTVAL (operands[2]),
8151 <MODE>mode == DImode ? SImode : <MODE>mode);
8152 })
8153
8154 (define_insn_and_split "*add<mode>3_eq_0"
8155 [(set (match_operand:SWI 0 "nonimmediate_operand")
8156 (plus:SWI
8157 (eq:SWI (match_operand 2 "int_nonimmediate_operand") (const_int 0))
8158 (match_operand:SWI 1 "<general_operand>")))
8159 (clobber (reg:CC FLAGS_REG))]
8160 "ix86_unary_operator_ok (PLUS, <MODE>mode, operands)
8161 && ix86_pre_reload_split ()"
8162 "#"
8163 "&& 1"
8164 [(set (reg:CC FLAGS_REG)
8165 (compare:CC (match_dup 2) (const_int 1)))
8166 (parallel [(set (match_dup 0)
8167 (plus:SWI (ltu:SWI (reg:CC FLAGS_REG) (const_int 0))
8168 (match_dup 1)))
8169 (clobber (reg:CC FLAGS_REG))])]
8170 {
8171 if (!nonimmediate_operand (operands[1], <MODE>mode))
8172 operands[1] = force_reg (<MODE>mode, operands[1]);
8173 })
8174
8175 (define_insn_and_split "*add<mode>3_ne_0"
8176 [(set (match_operand:SWI 0 "nonimmediate_operand")
8177 (plus:SWI
8178 (ne:SWI (match_operand 2 "int_nonimmediate_operand") (const_int 0))
8179 (match_operand:SWI 1 "<general_operand>")))
8180 (clobber (reg:CC FLAGS_REG))]
8181 "ix86_unary_operator_ok (PLUS, <MODE>mode, operands)
8182 && ix86_pre_reload_split ()"
8183 "#"
8184 "&& 1"
8185 [(set (reg:CC FLAGS_REG)
8186 (compare:CC (match_dup 2) (const_int 1)))
8187 (parallel [(set (match_dup 0)
8188 (minus:SWI (minus:SWI
8189 (match_dup 1)
8190 (ltu:SWI (reg:CC FLAGS_REG) (const_int 0)))
8191 (const_int -1)))
8192 (clobber (reg:CC FLAGS_REG))])]
8193 {
8194 if (!nonimmediate_operand (operands[1], <MODE>mode))
8195 operands[1] = force_reg (<MODE>mode, operands[1]);
8196 })
8197
8198 (define_insn_and_split "*sub<mode>3_eq"
8199 [(set (match_operand:SWI 0 "nonimmediate_operand")
8200 (minus:SWI
8201 (minus:SWI
8202 (match_operand:SWI 1 "nonimmediate_operand")
8203 (eq:SWI (match_operand 3 "int_nonimmediate_operand")
8204 (const_int 0)))
8205 (match_operand:SWI 2 "<general_operand>")))
8206 (clobber (reg:CC FLAGS_REG))]
8207 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)
8208 && ix86_pre_reload_split ()"
8209 "#"
8210 "&& 1"
8211 [(set (reg:CC FLAGS_REG)
8212 (compare:CC (match_dup 3) (const_int 1)))
8213 (parallel [(set (match_dup 0)
8214 (minus:SWI
8215 (minus:SWI (match_dup 1)
8216 (ltu:SWI (reg:CC FLAGS_REG) (const_int 0)))
8217 (match_dup 2)))
8218 (clobber (reg:CC FLAGS_REG))])])
8219
8220 (define_insn_and_split "*sub<mode>3_ne"
8221 [(set (match_operand:SWI 0 "nonimmediate_operand")
8222 (plus:SWI
8223 (minus:SWI
8224 (match_operand:SWI 1 "nonimmediate_operand")
8225 (ne:SWI (match_operand 3 "int_nonimmediate_operand")
8226 (const_int 0)))
8227 (match_operand:SWI 2 "<immediate_operand>")))
8228 (clobber (reg:CC FLAGS_REG))]
8229 "CONST_INT_P (operands[2])
8230 && (<MODE>mode != DImode
8231 || INTVAL (operands[2]) != HOST_WIDE_INT_C (-0x80000000))
8232 && ix86_binary_operator_ok (MINUS, <MODE>mode, operands)
8233 && ix86_pre_reload_split ()"
8234 "#"
8235 "&& 1"
8236 [(set (reg:CC FLAGS_REG)
8237 (compare:CC (match_dup 3) (const_int 1)))
8238 (parallel [(set (match_dup 0)
8239 (plus:SWI
8240 (plus:SWI (ltu:SWI (reg:CC FLAGS_REG) (const_int 0))
8241 (match_dup 1))
8242 (match_dup 2)))
8243 (clobber (reg:CC FLAGS_REG))])]
8244 {
8245 operands[2] = gen_int_mode (INTVAL (operands[2]) - 1,
8246 <MODE>mode == DImode ? SImode : <MODE>mode);
8247 })
8248
8249 (define_insn_and_split "*sub<mode>3_eq_1"
8250 [(set (match_operand:SWI 0 "nonimmediate_operand")
8251 (plus:SWI
8252 (minus:SWI
8253 (match_operand:SWI 1 "nonimmediate_operand")
8254 (eq:SWI (match_operand 3 "int_nonimmediate_operand")
8255 (const_int 0)))
8256 (match_operand:SWI 2 "<immediate_operand>")))
8257 (clobber (reg:CC FLAGS_REG))]
8258 "CONST_INT_P (operands[2])
8259 && (<MODE>mode != DImode
8260 || INTVAL (operands[2]) != HOST_WIDE_INT_C (-0x80000000))
8261 && ix86_binary_operator_ok (MINUS, <MODE>mode, operands)
8262 && ix86_pre_reload_split ()"
8263 "#"
8264 "&& 1"
8265 [(set (reg:CC FLAGS_REG)
8266 (compare:CC (match_dup 3) (const_int 1)))
8267 (parallel [(set (match_dup 0)
8268 (minus:SWI
8269 (minus:SWI (match_dup 1)
8270 (ltu:SWI (reg:CC FLAGS_REG) (const_int 0)))
8271 (match_dup 2)))
8272 (clobber (reg:CC FLAGS_REG))])]
8273 {
8274 operands[2] = gen_int_mode (-INTVAL (operands[2]),
8275 <MODE>mode == DImode ? SImode : <MODE>mode);
8276 })
8277
8278 (define_insn_and_split "*sub<mode>3_eq_0"
8279 [(set (match_operand:SWI 0 "nonimmediate_operand")
8280 (minus:SWI
8281 (match_operand:SWI 1 "<general_operand>")
8282 (eq:SWI (match_operand 2 "int_nonimmediate_operand") (const_int 0))))
8283 (clobber (reg:CC FLAGS_REG))]
8284 "ix86_unary_operator_ok (MINUS, <MODE>mode, operands)
8285 && ix86_pre_reload_split ()"
8286 "#"
8287 "&& 1"
8288 [(set (reg:CC FLAGS_REG)
8289 (compare:CC (match_dup 2) (const_int 1)))
8290 (parallel [(set (match_dup 0)
8291 (minus:SWI (match_dup 1)
8292 (ltu:SWI (reg:CC FLAGS_REG) (const_int 0))))
8293 (clobber (reg:CC FLAGS_REG))])]
8294 {
8295 if (!nonimmediate_operand (operands[1], <MODE>mode))
8296 operands[1] = force_reg (<MODE>mode, operands[1]);
8297 })
8298
8299 (define_insn_and_split "*sub<mode>3_ne_0"
8300 [(set (match_operand:SWI 0 "nonimmediate_operand")
8301 (minus:SWI
8302 (match_operand:SWI 1 "<general_operand>")
8303 (ne:SWI (match_operand 2 "int_nonimmediate_operand") (const_int 0))))
8304 (clobber (reg:CC FLAGS_REG))]
8305 "ix86_unary_operator_ok (MINUS, <MODE>mode, operands)
8306 && ix86_pre_reload_split ()"
8307 "#"
8308 "&& 1"
8309 [(set (reg:CC FLAGS_REG)
8310 (compare:CC (match_dup 2) (const_int 1)))
8311 (parallel [(set (match_dup 0)
8312 (plus:SWI (plus:SWI
8313 (ltu:SWI (reg:CC FLAGS_REG) (const_int 0))
8314 (match_dup 1))
8315 (const_int -1)))
8316 (clobber (reg:CC FLAGS_REG))])]
8317 {
8318 if (!nonimmediate_operand (operands[1], <MODE>mode))
8319 operands[1] = force_reg (<MODE>mode, operands[1]);
8320 })
8321
8322 ;; The patterns that match these are at the end of this file.
8323
8324 (define_expand "<insn>xf3"
8325 [(set (match_operand:XF 0 "register_operand")
8326 (plusminus:XF
8327 (match_operand:XF 1 "register_operand")
8328 (match_operand:XF 2 "register_operand")))]
8329 "TARGET_80387")
8330
8331 (define_expand "<insn>hf3"
8332 [(set (match_operand:HF 0 "register_operand")
8333 (plusminus:HF
8334 (match_operand:HF 1 "register_operand")
8335 (match_operand:HF 2 "nonimmediate_operand")))]
8336 "TARGET_AVX512FP16")
8337
8338 (define_expand "<insn><mode>3"
8339 [(set (match_operand:MODEF 0 "register_operand")
8340 (plusminus:MODEF
8341 (match_operand:MODEF 1 "register_operand")
8342 (match_operand:MODEF 2 "nonimmediate_operand")))]
8343 "(TARGET_80387 && X87_ENABLE_ARITH (<MODE>mode))
8344 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)")
8345 \f
8346 ;; Multiply instructions
8347
8348 (define_expand "mul<mode>3"
8349 [(parallel [(set (match_operand:SWIM248 0 "register_operand")
8350 (mult:SWIM248
8351 (match_operand:SWIM248 1 "register_operand")
8352 (match_operand:SWIM248 2 "<general_operand>")))
8353 (clobber (reg:CC FLAGS_REG))])])
8354
8355 (define_expand "mulqi3"
8356 [(parallel [(set (match_operand:QI 0 "register_operand")
8357 (mult:QI
8358 (match_operand:QI 1 "register_operand")
8359 (match_operand:QI 2 "nonimmediate_operand")))
8360 (clobber (reg:CC FLAGS_REG))])]
8361 "TARGET_QIMODE_MATH")
8362
8363 ;; On AMDFAM10
8364 ;; IMUL reg32/64, reg32/64, imm8 Direct
8365 ;; IMUL reg32/64, mem32/64, imm8 VectorPath
8366 ;; IMUL reg32/64, reg32/64, imm32 Direct
8367 ;; IMUL reg32/64, mem32/64, imm32 VectorPath
8368 ;; IMUL reg32/64, reg32/64 Direct
8369 ;; IMUL reg32/64, mem32/64 Direct
8370 ;;
8371 ;; On BDVER1, all above IMULs use DirectPath
8372 ;;
8373 ;; On AMDFAM10
8374 ;; IMUL reg16, reg16, imm8 VectorPath
8375 ;; IMUL reg16, mem16, imm8 VectorPath
8376 ;; IMUL reg16, reg16, imm16 VectorPath
8377 ;; IMUL reg16, mem16, imm16 VectorPath
8378 ;; IMUL reg16, reg16 Direct
8379 ;; IMUL reg16, mem16 Direct
8380 ;;
8381 ;; On BDVER1, all HI MULs use DoublePath
8382
8383 (define_insn "*mul<mode>3_1"
8384 [(set (match_operand:SWIM248 0 "register_operand" "=r,r,r")
8385 (mult:SWIM248
8386 (match_operand:SWIM248 1 "nonimmediate_operand" "%rm,rm,0")
8387 (match_operand:SWIM248 2 "<general_operand>" "K,<i>,<m>r")))
8388 (clobber (reg:CC FLAGS_REG))]
8389 "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
8390 "@
8391 imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
8392 imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
8393 imul{<imodesuffix>}\t{%2, %0|%0, %2}"
8394 [(set_attr "type" "imul")
8395 (set_attr "prefix_0f" "0,0,1")
8396 (set (attr "athlon_decode")
8397 (cond [(eq_attr "cpu" "athlon")
8398 (const_string "vector")
8399 (eq_attr "alternative" "1")
8400 (const_string "vector")
8401 (and (eq_attr "alternative" "2")
8402 (ior (match_test "<MODE>mode == HImode")
8403 (match_operand 1 "memory_operand")))
8404 (const_string "vector")]
8405 (const_string "direct")))
8406 (set (attr "amdfam10_decode")
8407 (cond [(and (eq_attr "alternative" "0,1")
8408 (ior (match_test "<MODE>mode == HImode")
8409 (match_operand 1 "memory_operand")))
8410 (const_string "vector")]
8411 (const_string "direct")))
8412 (set (attr "bdver1_decode")
8413 (if_then_else
8414 (match_test "<MODE>mode == HImode")
8415 (const_string "double")
8416 (const_string "direct")))
8417 (set_attr "mode" "<MODE>")])
8418
8419 (define_insn "*mulsi3_1_zext"
8420 [(set (match_operand:DI 0 "register_operand" "=r,r,r")
8421 (zero_extend:DI
8422 (mult:SI (match_operand:SI 1 "nonimmediate_operand" "%rm,rm,0")
8423 (match_operand:SI 2 "x86_64_general_operand" "K,e,BMr"))))
8424 (clobber (reg:CC FLAGS_REG))]
8425 "TARGET_64BIT
8426 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
8427 "@
8428 imul{l}\t{%2, %1, %k0|%k0, %1, %2}
8429 imul{l}\t{%2, %1, %k0|%k0, %1, %2}
8430 imul{l}\t{%2, %k0|%k0, %2}"
8431 [(set_attr "type" "imul")
8432 (set_attr "prefix_0f" "0,0,1")
8433 (set (attr "athlon_decode")
8434 (cond [(eq_attr "cpu" "athlon")
8435 (const_string "vector")
8436 (eq_attr "alternative" "1")
8437 (const_string "vector")
8438 (and (eq_attr "alternative" "2")
8439 (match_operand 1 "memory_operand"))
8440 (const_string "vector")]
8441 (const_string "direct")))
8442 (set (attr "amdfam10_decode")
8443 (cond [(and (eq_attr "alternative" "0,1")
8444 (match_operand 1 "memory_operand"))
8445 (const_string "vector")]
8446 (const_string "direct")))
8447 (set_attr "bdver1_decode" "direct")
8448 (set_attr "mode" "SI")])
8449
8450 ;;On AMDFAM10 and BDVER1
8451 ;; MUL reg8 Direct
8452 ;; MUL mem8 Direct
8453
8454 (define_insn "*mulqi3_1"
8455 [(set (match_operand:QI 0 "register_operand" "=a")
8456 (mult:QI (match_operand:QI 1 "nonimmediate_operand" "%0")
8457 (match_operand:QI 2 "nonimmediate_operand" "qm")))
8458 (clobber (reg:CC FLAGS_REG))]
8459 "TARGET_QIMODE_MATH
8460 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
8461 "mul{b}\t%2"
8462 [(set_attr "type" "imul")
8463 (set_attr "length_immediate" "0")
8464 (set (attr "athlon_decode")
8465 (if_then_else (eq_attr "cpu" "athlon")
8466 (const_string "vector")
8467 (const_string "direct")))
8468 (set_attr "amdfam10_decode" "direct")
8469 (set_attr "bdver1_decode" "direct")
8470 (set_attr "mode" "QI")])
8471
8472 ;; Multiply with jump on overflow.
8473 (define_expand "mulv<mode>4"
8474 [(parallel [(set (reg:CCO FLAGS_REG)
8475 (eq:CCO (mult:<DWI>
8476 (sign_extend:<DWI>
8477 (match_operand:SWI248 1 "register_operand"))
8478 (match_dup 4))
8479 (sign_extend:<DWI>
8480 (mult:SWI248 (match_dup 1)
8481 (match_operand:SWI248 2
8482 "<general_operand>")))))
8483 (set (match_operand:SWI248 0 "register_operand")
8484 (mult:SWI248 (match_dup 1) (match_dup 2)))])
8485 (set (pc) (if_then_else
8486 (eq (reg:CCO FLAGS_REG) (const_int 0))
8487 (label_ref (match_operand 3))
8488 (pc)))]
8489 ""
8490 {
8491 if (CONST_INT_P (operands[2]))
8492 operands[4] = operands[2];
8493 else
8494 operands[4] = gen_rtx_SIGN_EXTEND (<DWI>mode, operands[2]);
8495 })
8496
8497 (define_insn "*mulv<mode>4"
8498 [(set (reg:CCO FLAGS_REG)
8499 (eq:CCO (mult:<DWI>
8500 (sign_extend:<DWI>
8501 (match_operand:SWI48 1 "nonimmediate_operand" "%rm,0"))
8502 (sign_extend:<DWI>
8503 (match_operand:SWI48 2 "x86_64_sext_operand" "We,mr")))
8504 (sign_extend:<DWI>
8505 (mult:SWI48 (match_dup 1) (match_dup 2)))))
8506 (set (match_operand:SWI48 0 "register_operand" "=r,r")
8507 (mult:SWI48 (match_dup 1) (match_dup 2)))]
8508 "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
8509 "@
8510 imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
8511 imul{<imodesuffix>}\t{%2, %0|%0, %2}"
8512 [(set_attr "type" "imul")
8513 (set_attr "prefix_0f" "0,1")
8514 (set (attr "athlon_decode")
8515 (cond [(eq_attr "cpu" "athlon")
8516 (const_string "vector")
8517 (eq_attr "alternative" "0")
8518 (const_string "vector")
8519 (and (eq_attr "alternative" "1")
8520 (match_operand 1 "memory_operand"))
8521 (const_string "vector")]
8522 (const_string "direct")))
8523 (set (attr "amdfam10_decode")
8524 (cond [(and (eq_attr "alternative" "1")
8525 (match_operand 1 "memory_operand"))
8526 (const_string "vector")]
8527 (const_string "direct")))
8528 (set_attr "bdver1_decode" "direct")
8529 (set_attr "mode" "<MODE>")])
8530
8531 (define_insn "*mulvhi4"
8532 [(set (reg:CCO FLAGS_REG)
8533 (eq:CCO (mult:SI
8534 (sign_extend:SI
8535 (match_operand:HI 1 "nonimmediate_operand" "%0"))
8536 (sign_extend:SI
8537 (match_operand:HI 2 "nonimmediate_operand" "mr")))
8538 (sign_extend:SI
8539 (mult:HI (match_dup 1) (match_dup 2)))))
8540 (set (match_operand:HI 0 "register_operand" "=r")
8541 (mult:HI (match_dup 1) (match_dup 2)))]
8542 "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
8543 "imul{w}\t{%2, %0|%0, %2}"
8544 [(set_attr "type" "imul")
8545 (set_attr "prefix_0f" "1")
8546 (set_attr "athlon_decode" "vector")
8547 (set_attr "amdfam10_decode" "direct")
8548 (set_attr "bdver1_decode" "double")
8549 (set_attr "mode" "HI")])
8550
8551 (define_insn "*mulv<mode>4_1"
8552 [(set (reg:CCO FLAGS_REG)
8553 (eq:CCO (mult:<DWI>
8554 (sign_extend:<DWI>
8555 (match_operand:SWI248 1 "nonimmediate_operand" "rm,rm"))
8556 (match_operand:<DWI> 3 "const_int_operand" "K,i"))
8557 (sign_extend:<DWI>
8558 (mult:SWI248 (match_dup 1)
8559 (match_operand:SWI248 2
8560 "<immediate_operand>" "K,<i>")))))
8561 (set (match_operand:SWI248 0 "register_operand" "=r,r")
8562 (mult:SWI248 (match_dup 1) (match_dup 2)))]
8563 "!(MEM_P (operands[1]) && MEM_P (operands[2]))
8564 && CONST_INT_P (operands[2])
8565 && INTVAL (operands[2]) == INTVAL (operands[3])"
8566 "imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
8567 [(set_attr "type" "imul")
8568 (set (attr "prefix_0f")
8569 (if_then_else
8570 (match_test "<MODE>mode == HImode")
8571 (const_string "0")
8572 (const_string "*")))
8573 (set (attr "athlon_decode")
8574 (cond [(eq_attr "cpu" "athlon")
8575 (const_string "vector")
8576 (eq_attr "alternative" "1")
8577 (const_string "vector")]
8578 (const_string "direct")))
8579 (set (attr "amdfam10_decode")
8580 (cond [(ior (match_test "<MODE>mode == HImode")
8581 (match_operand 1 "memory_operand"))
8582 (const_string "vector")]
8583 (const_string "direct")))
8584 (set (attr "bdver1_decode")
8585 (if_then_else
8586 (match_test "<MODE>mode == HImode")
8587 (const_string "double")
8588 (const_string "direct")))
8589 (set_attr "mode" "<MODE>")
8590 (set (attr "length_immediate")
8591 (cond [(eq_attr "alternative" "0")
8592 (const_string "1")
8593 (match_test "<MODE_SIZE> == 8")
8594 (const_string "4")]
8595 (const_string "<MODE_SIZE>")))])
8596
8597 (define_expand "umulv<mode>4"
8598 [(parallel [(set (reg:CCO FLAGS_REG)
8599 (eq:CCO (mult:<DWI>
8600 (zero_extend:<DWI>
8601 (match_operand:SWI248 1
8602 "nonimmediate_operand"))
8603 (zero_extend:<DWI>
8604 (match_operand:SWI248 2
8605 "nonimmediate_operand")))
8606 (zero_extend:<DWI>
8607 (mult:SWI248 (match_dup 1) (match_dup 2)))))
8608 (set (match_operand:SWI248 0 "register_operand")
8609 (mult:SWI248 (match_dup 1) (match_dup 2)))
8610 (clobber (scratch:SWI248))])
8611 (set (pc) (if_then_else
8612 (eq (reg:CCO FLAGS_REG) (const_int 0))
8613 (label_ref (match_operand 3))
8614 (pc)))]
8615 ""
8616 {
8617 if (MEM_P (operands[1]) && MEM_P (operands[2]))
8618 operands[1] = force_reg (<MODE>mode, operands[1]);
8619 })
8620
8621 (define_insn "*umulv<mode>4"
8622 [(set (reg:CCO FLAGS_REG)
8623 (eq:CCO (mult:<DWI>
8624 (zero_extend:<DWI>
8625 (match_operand:SWI248 1 "nonimmediate_operand" "%0"))
8626 (zero_extend:<DWI>
8627 (match_operand:SWI248 2 "nonimmediate_operand" "rm")))
8628 (zero_extend:<DWI>
8629 (mult:SWI248 (match_dup 1) (match_dup 2)))))
8630 (set (match_operand:SWI248 0 "register_operand" "=a")
8631 (mult:SWI248 (match_dup 1) (match_dup 2)))
8632 (clobber (match_scratch:SWI248 3 "=d"))]
8633 "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
8634 "mul{<imodesuffix>}\t%2"
8635 [(set_attr "type" "imul")
8636 (set_attr "length_immediate" "0")
8637 (set (attr "athlon_decode")
8638 (if_then_else (eq_attr "cpu" "athlon")
8639 (const_string "vector")
8640 (const_string "double")))
8641 (set_attr "amdfam10_decode" "double")
8642 (set_attr "bdver1_decode" "direct")
8643 (set_attr "mode" "<MODE>")])
8644
8645 (define_expand "<u>mulvqi4"
8646 [(parallel [(set (reg:CCO FLAGS_REG)
8647 (eq:CCO (mult:HI
8648 (any_extend:HI
8649 (match_operand:QI 1 "nonimmediate_operand"))
8650 (any_extend:HI
8651 (match_operand:QI 2 "nonimmediate_operand")))
8652 (any_extend:HI
8653 (mult:QI (match_dup 1) (match_dup 2)))))
8654 (set (match_operand:QI 0 "register_operand")
8655 (mult:QI (match_dup 1) (match_dup 2)))])
8656 (set (pc) (if_then_else
8657 (eq (reg:CCO FLAGS_REG) (const_int 0))
8658 (label_ref (match_operand 3))
8659 (pc)))]
8660 "TARGET_QIMODE_MATH"
8661 {
8662 if (MEM_P (operands[1]) && MEM_P (operands[2]))
8663 operands[1] = force_reg (QImode, operands[1]);
8664 })
8665
8666 (define_insn "*<u>mulvqi4"
8667 [(set (reg:CCO FLAGS_REG)
8668 (eq:CCO (mult:HI
8669 (any_extend:HI
8670 (match_operand:QI 1 "nonimmediate_operand" "%0"))
8671 (any_extend:HI
8672 (match_operand:QI 2 "nonimmediate_operand" "qm")))
8673 (any_extend:HI
8674 (mult:QI (match_dup 1) (match_dup 2)))))
8675 (set (match_operand:QI 0 "register_operand" "=a")
8676 (mult:QI (match_dup 1) (match_dup 2)))]
8677 "TARGET_QIMODE_MATH
8678 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
8679 "<sgnprefix>mul{b}\t%2"
8680 [(set_attr "type" "imul")
8681 (set_attr "length_immediate" "0")
8682 (set (attr "athlon_decode")
8683 (if_then_else (eq_attr "cpu" "athlon")
8684 (const_string "vector")
8685 (const_string "direct")))
8686 (set_attr "amdfam10_decode" "direct")
8687 (set_attr "bdver1_decode" "direct")
8688 (set_attr "mode" "QI")])
8689
8690 (define_expand "<u>mul<mode><dwi>3"
8691 [(parallel [(set (match_operand:<DWI> 0 "register_operand")
8692 (mult:<DWI>
8693 (any_extend:<DWI>
8694 (match_operand:DWIH 1 "nonimmediate_operand"))
8695 (any_extend:<DWI>
8696 (match_operand:DWIH 2 "register_operand"))))
8697 (clobber (reg:CC FLAGS_REG))])])
8698
8699 (define_expand "<u>mulqihi3"
8700 [(parallel [(set (match_operand:HI 0 "register_operand")
8701 (mult:HI
8702 (any_extend:HI
8703 (match_operand:QI 1 "nonimmediate_operand"))
8704 (any_extend:HI
8705 (match_operand:QI 2 "register_operand"))))
8706 (clobber (reg:CC FLAGS_REG))])]
8707 "TARGET_QIMODE_MATH")
8708
8709 (define_insn "*bmi2_umul<mode><dwi>3_1"
8710 [(set (match_operand:DWIH 0 "register_operand" "=r")
8711 (mult:DWIH
8712 (match_operand:DWIH 2 "nonimmediate_operand" "%d")
8713 (match_operand:DWIH 3 "nonimmediate_operand" "rm")))
8714 (set (match_operand:DWIH 1 "register_operand" "=r")
8715 (truncate:DWIH
8716 (lshiftrt:<DWI>
8717 (mult:<DWI> (zero_extend:<DWI> (match_dup 2))
8718 (zero_extend:<DWI> (match_dup 3)))
8719 (match_operand:QI 4 "const_int_operand"))))]
8720 "TARGET_BMI2 && INTVAL (operands[4]) == <MODE_SIZE> * BITS_PER_UNIT
8721 && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
8722 "mulx\t{%3, %0, %1|%1, %0, %3}"
8723 [(set_attr "type" "imulx")
8724 (set_attr "prefix" "vex")
8725 (set_attr "mode" "<MODE>")])
8726
8727 (define_insn "*umul<mode><dwi>3_1"
8728 [(set (match_operand:<DWI> 0 "register_operand" "=r,A")
8729 (mult:<DWI>
8730 (zero_extend:<DWI>
8731 (match_operand:DWIH 1 "nonimmediate_operand" "%d,0"))
8732 (zero_extend:<DWI>
8733 (match_operand:DWIH 2 "nonimmediate_operand" "rm,rm"))))
8734 (clobber (reg:CC FLAGS_REG))]
8735 "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
8736 "@
8737 #
8738 mul{<imodesuffix>}\t%2"
8739 [(set_attr "isa" "bmi2,*")
8740 (set_attr "type" "imulx,imul")
8741 (set_attr "length_immediate" "*,0")
8742 (set (attr "athlon_decode")
8743 (cond [(eq_attr "alternative" "1")
8744 (if_then_else (eq_attr "cpu" "athlon")
8745 (const_string "vector")
8746 (const_string "double"))]
8747 (const_string "*")))
8748 (set_attr "amdfam10_decode" "*,double")
8749 (set_attr "bdver1_decode" "*,direct")
8750 (set_attr "prefix" "vex,orig")
8751 (set_attr "mode" "<MODE>")])
8752
8753 ;; Convert mul to the mulx pattern to avoid flags dependency.
8754 (define_split
8755 [(set (match_operand:<DWI> 0 "register_operand")
8756 (mult:<DWI>
8757 (zero_extend:<DWI>
8758 (match_operand:DWIH 1 "register_operand"))
8759 (zero_extend:<DWI>
8760 (match_operand:DWIH 2 "nonimmediate_operand"))))
8761 (clobber (reg:CC FLAGS_REG))]
8762 "TARGET_BMI2 && reload_completed
8763 && REGNO (operands[1]) == DX_REG"
8764 [(parallel [(set (match_dup 3)
8765 (mult:DWIH (match_dup 1) (match_dup 2)))
8766 (set (match_dup 4)
8767 (truncate:DWIH
8768 (lshiftrt:<DWI>
8769 (mult:<DWI> (zero_extend:<DWI> (match_dup 1))
8770 (zero_extend:<DWI> (match_dup 2)))
8771 (match_dup 5))))])]
8772 {
8773 split_double_mode (<DWI>mode, &operands[0], 1, &operands[3], &operands[4]);
8774
8775 operands[5] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);
8776 })
8777
8778 (define_insn "*mul<mode><dwi>3_1"
8779 [(set (match_operand:<DWI> 0 "register_operand" "=A")
8780 (mult:<DWI>
8781 (sign_extend:<DWI>
8782 (match_operand:DWIH 1 "nonimmediate_operand" "%0"))
8783 (sign_extend:<DWI>
8784 (match_operand:DWIH 2 "nonimmediate_operand" "rm"))))
8785 (clobber (reg:CC FLAGS_REG))]
8786 "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
8787 "imul{<imodesuffix>}\t%2"
8788 [(set_attr "type" "imul")
8789 (set_attr "length_immediate" "0")
8790 (set (attr "athlon_decode")
8791 (if_then_else (eq_attr "cpu" "athlon")
8792 (const_string "vector")
8793 (const_string "double")))
8794 (set_attr "amdfam10_decode" "double")
8795 (set_attr "bdver1_decode" "direct")
8796 (set_attr "mode" "<MODE>")])
8797
8798 (define_insn "*<u>mulqihi3_1"
8799 [(set (match_operand:HI 0 "register_operand" "=a")
8800 (mult:HI
8801 (any_extend:HI
8802 (match_operand:QI 1 "nonimmediate_operand" "%0"))
8803 (any_extend:HI
8804 (match_operand:QI 2 "nonimmediate_operand" "qm"))))
8805 (clobber (reg:CC FLAGS_REG))]
8806 "TARGET_QIMODE_MATH
8807 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
8808 "<sgnprefix>mul{b}\t%2"
8809 [(set_attr "type" "imul")
8810 (set_attr "length_immediate" "0")
8811 (set (attr "athlon_decode")
8812 (if_then_else (eq_attr "cpu" "athlon")
8813 (const_string "vector")
8814 (const_string "direct")))
8815 (set_attr "amdfam10_decode" "direct")
8816 (set_attr "bdver1_decode" "direct")
8817 (set_attr "mode" "QI")])
8818
8819 ;; Highpart multiplication patterns
8820 (define_insn "<s>mul<mode>3_highpart"
8821 [(set (match_operand:DWIH 0 "register_operand" "=d")
8822 (any_mul_highpart:DWIH
8823 (match_operand:DWIH 1 "register_operand" "%a")
8824 (match_operand:DWIH 2 "nonimmediate_operand" "rm")))
8825 (clobber (match_scratch:DWIH 3 "=1"))
8826 (clobber (reg:CC FLAGS_REG))]
8827 ""
8828 "<sgnprefix>mul{<imodesuffix>}\t%2"
8829 [(set_attr "type" "imul")
8830 (set_attr "length_immediate" "0")
8831 (set (attr "athlon_decode")
8832 (if_then_else (eq_attr "cpu" "athlon")
8833 (const_string "vector")
8834 (const_string "double")))
8835 (set_attr "amdfam10_decode" "double")
8836 (set_attr "bdver1_decode" "direct")
8837 (set_attr "mode" "<MODE>")])
8838
8839 (define_insn "*<s>mulsi3_highpart_zext"
8840 [(set (match_operand:DI 0 "register_operand" "=d")
8841 (zero_extend:DI
8842 (any_mul_highpart:SI
8843 (match_operand:SI 1 "register_operand" "%a")
8844 (match_operand:SI 2 "nonimmediate_operand" "rm"))))
8845 (clobber (match_scratch:SI 3 "=1"))
8846 (clobber (reg:CC FLAGS_REG))]
8847 "TARGET_64BIT"
8848 "<sgnprefix>mul{l}\t%2"
8849 [(set_attr "type" "imul")
8850 (set_attr "length_immediate" "0")
8851 (set (attr "athlon_decode")
8852 (if_then_else (eq_attr "cpu" "athlon")
8853 (const_string "vector")
8854 (const_string "double")))
8855 (set_attr "amdfam10_decode" "double")
8856 (set_attr "bdver1_decode" "direct")
8857 (set_attr "mode" "SI")])
8858
8859 (define_insn "*<s>muldi3_highpart_1"
8860 [(set (match_operand:DI 0 "register_operand" "=d")
8861 (truncate:DI
8862 (lshiftrt:TI
8863 (mult:TI
8864 (any_extend:TI
8865 (match_operand:DI 1 "nonimmediate_operand" "%a"))
8866 (any_extend:TI
8867 (match_operand:DI 2 "nonimmediate_operand" "rm")))
8868 (const_int 64))))
8869 (clobber (match_scratch:DI 3 "=1"))
8870 (clobber (reg:CC FLAGS_REG))]
8871 "TARGET_64BIT
8872 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
8873 "<sgnprefix>mul{q}\t%2"
8874 [(set_attr "type" "imul")
8875 (set_attr "length_immediate" "0")
8876 (set (attr "athlon_decode")
8877 (if_then_else (eq_attr "cpu" "athlon")
8878 (const_string "vector")
8879 (const_string "double")))
8880 (set_attr "amdfam10_decode" "double")
8881 (set_attr "bdver1_decode" "direct")
8882 (set_attr "mode" "DI")])
8883
8884 (define_insn "*<s>mulsi3_highpart_zext"
8885 [(set (match_operand:DI 0 "register_operand" "=d")
8886 (zero_extend:DI (truncate:SI
8887 (lshiftrt:DI
8888 (mult:DI (any_extend:DI
8889 (match_operand:SI 1 "nonimmediate_operand" "%a"))
8890 (any_extend:DI
8891 (match_operand:SI 2 "nonimmediate_operand" "rm")))
8892 (const_int 32)))))
8893 (clobber (match_scratch:SI 3 "=1"))
8894 (clobber (reg:CC FLAGS_REG))]
8895 "TARGET_64BIT
8896 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
8897 "<sgnprefix>mul{l}\t%2"
8898 [(set_attr "type" "imul")
8899 (set_attr "length_immediate" "0")
8900 (set (attr "athlon_decode")
8901 (if_then_else (eq_attr "cpu" "athlon")
8902 (const_string "vector")
8903 (const_string "double")))
8904 (set_attr "amdfam10_decode" "double")
8905 (set_attr "bdver1_decode" "direct")
8906 (set_attr "mode" "SI")])
8907
8908 (define_insn "*<s>mulsi3_highpart_1"
8909 [(set (match_operand:SI 0 "register_operand" "=d")
8910 (truncate:SI
8911 (lshiftrt:DI
8912 (mult:DI
8913 (any_extend:DI
8914 (match_operand:SI 1 "nonimmediate_operand" "%a"))
8915 (any_extend:DI
8916 (match_operand:SI 2 "nonimmediate_operand" "rm")))
8917 (const_int 32))))
8918 (clobber (match_scratch:SI 3 "=1"))
8919 (clobber (reg:CC FLAGS_REG))]
8920 "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
8921 "<sgnprefix>mul{l}\t%2"
8922 [(set_attr "type" "imul")
8923 (set_attr "length_immediate" "0")
8924 (set (attr "athlon_decode")
8925 (if_then_else (eq_attr "cpu" "athlon")
8926 (const_string "vector")
8927 (const_string "double")))
8928 (set_attr "amdfam10_decode" "double")
8929 (set_attr "bdver1_decode" "direct")
8930 (set_attr "mode" "SI")])
8931
8932 ;; Highpart multiplication peephole2s to tweak register allocation.
8933 ;; mov imm,%rdx; mov %rdi,%rax; imulq %rdx -> mov imm,%rax; imulq %rdi
8934 (define_peephole2
8935 [(set (match_operand:SWI48 0 "general_reg_operand")
8936 (match_operand:SWI48 1 "immediate_operand"))
8937 (set (match_operand:SWI48 2 "general_reg_operand")
8938 (match_operand:SWI48 3 "general_reg_operand"))
8939 (parallel [(set (match_operand:SWI48 4 "general_reg_operand")
8940 (any_mul_highpart:SWI48 (match_dup 2) (match_dup 0)))
8941 (clobber (match_dup 2))
8942 (clobber (reg:CC FLAGS_REG))])]
8943 "REGNO (operands[3]) != AX_REG
8944 && REGNO (operands[0]) != REGNO (operands[2])
8945 && REGNO (operands[0]) != REGNO (operands[3])
8946 && (REGNO (operands[0]) == REGNO (operands[4])
8947 || peep2_reg_dead_p (3, operands[0]))"
8948 [(set (match_dup 2) (match_dup 1))
8949 (parallel [(set (match_dup 4)
8950 (any_mul_highpart:SWI48 (match_dup 2) (match_dup 3)))
8951 (clobber (match_dup 2))
8952 (clobber (reg:CC FLAGS_REG))])])
8953
8954 (define_peephole2
8955 [(set (match_operand:SI 0 "general_reg_operand")
8956 (match_operand:SI 1 "immediate_operand"))
8957 (set (match_operand:SI 2 "general_reg_operand")
8958 (match_operand:SI 3 "general_reg_operand"))
8959 (parallel [(set (match_operand:DI 4 "general_reg_operand")
8960 (zero_extend:DI
8961 (any_mul_highpart:SI (match_dup 2) (match_dup 0))))
8962 (clobber (match_dup 2))
8963 (clobber (reg:CC FLAGS_REG))])]
8964 "TARGET_64BIT
8965 && REGNO (operands[3]) != AX_REG
8966 && REGNO (operands[0]) != REGNO (operands[2])
8967 && REGNO (operands[2]) != REGNO (operands[3])
8968 && REGNO (operands[0]) != REGNO (operands[3])
8969 && (REGNO (operands[0]) == REGNO (operands[4])
8970 || peep2_reg_dead_p (3, operands[0]))"
8971 [(set (match_dup 2) (match_dup 1))
8972 (parallel [(set (match_dup 4)
8973 (zero_extend:DI
8974 (any_mul_highpart:SI (match_dup 2) (match_dup 3))))
8975 (clobber (match_dup 2))
8976 (clobber (reg:CC FLAGS_REG))])])
8977
8978 ;; The patterns that match these are at the end of this file.
8979
8980 (define_expand "mulxf3"
8981 [(set (match_operand:XF 0 "register_operand")
8982 (mult:XF (match_operand:XF 1 "register_operand")
8983 (match_operand:XF 2 "register_operand")))]
8984 "TARGET_80387")
8985
8986 (define_expand "mulhf3"
8987 [(set (match_operand:HF 0 "register_operand")
8988 (mult:HF (match_operand:HF 1 "register_operand")
8989 (match_operand:HF 2 "nonimmediate_operand")))]
8990 "TARGET_AVX512FP16")
8991
8992 (define_expand "mul<mode>3"
8993 [(set (match_operand:MODEF 0 "register_operand")
8994 (mult:MODEF (match_operand:MODEF 1 "register_operand")
8995 (match_operand:MODEF 2 "nonimmediate_operand")))]
8996 "(TARGET_80387 && X87_ENABLE_ARITH (<MODE>mode))
8997 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)")
8998 \f
8999 ;; Divide instructions
9000
9001 ;; The patterns that match these are at the end of this file.
9002
9003 (define_expand "divxf3"
9004 [(set (match_operand:XF 0 "register_operand")
9005 (div:XF (match_operand:XF 1 "register_operand")
9006 (match_operand:XF 2 "register_operand")))]
9007 "TARGET_80387")
9008
9009 /* There is no more precision loss than Newton-Rhapson approximation
9010 when using HFmode rcp/rsqrt, so do the transformation directly under
9011 TARGET_RECIP_DIV and fast-math. */
9012 (define_expand "divhf3"
9013 [(set (match_operand:HF 0 "register_operand")
9014 (div:HF (match_operand:HF 1 "register_operand")
9015 (match_operand:HF 2 "nonimmediate_operand")))]
9016 "TARGET_AVX512FP16"
9017 {
9018 if (TARGET_RECIP_DIV
9019 && optimize_insn_for_speed_p ()
9020 && flag_finite_math_only && !flag_trapping_math
9021 && flag_unsafe_math_optimizations)
9022 {
9023 rtx op = gen_reg_rtx (HFmode);
9024 operands[2] = force_reg (HFmode, operands[2]);
9025 emit_insn (gen_rcphf2 (op, operands[2]));
9026 emit_insn (gen_mulhf3 (operands[0], operands[1], op));
9027 DONE;
9028 }
9029 })
9030
9031 (define_expand "div<mode>3"
9032 [(set (match_operand:MODEF 0 "register_operand")
9033 (div:MODEF (match_operand:MODEF 1 "register_operand")
9034 (match_operand:MODEF 2 "nonimmediate_operand")))]
9035 "(TARGET_80387 && X87_ENABLE_ARITH (<MODE>mode))
9036 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
9037 {
9038 if (<MODE>mode == SFmode
9039 && TARGET_SSE && TARGET_SSE_MATH
9040 && TARGET_RECIP_DIV
9041 && optimize_insn_for_speed_p ()
9042 && flag_finite_math_only && !flag_trapping_math
9043 && flag_unsafe_math_optimizations)
9044 {
9045 ix86_emit_swdivsf (operands[0], operands[1],
9046 operands[2], SFmode);
9047 DONE;
9048 }
9049 })
9050 \f
9051 ;; Divmod instructions.
9052
9053 (define_code_iterator any_div [div udiv])
9054 (define_code_attr paired_mod [(div "mod") (udiv "umod")])
9055
9056 (define_expand "<u>divmod<mode>4"
9057 [(parallel [(set (match_operand:SWIM248 0 "register_operand")
9058 (any_div:SWIM248
9059 (match_operand:SWIM248 1 "register_operand")
9060 (match_operand:SWIM248 2 "nonimmediate_operand")))
9061 (set (match_operand:SWIM248 3 "register_operand")
9062 (<paired_mod>:SWIM248 (match_dup 1) (match_dup 2)))
9063 (clobber (reg:CC FLAGS_REG))])])
9064
9065 ;; Split with 8bit unsigned divide:
9066 ;; if (dividend an divisor are in [0-255])
9067 ;; use 8bit unsigned integer divide
9068 ;; else
9069 ;; use original integer divide
9070 (define_split
9071 [(set (match_operand:SWI48 0 "register_operand")
9072 (any_div:SWI48 (match_operand:SWI48 2 "register_operand")
9073 (match_operand:SWI48 3 "nonimmediate_operand")))
9074 (set (match_operand:SWI48 1 "register_operand")
9075 (<paired_mod>:SWI48 (match_dup 2) (match_dup 3)))
9076 (clobber (reg:CC FLAGS_REG))]
9077 "TARGET_USE_8BIT_IDIV
9078 && TARGET_QIMODE_MATH
9079 && can_create_pseudo_p ()
9080 && !optimize_insn_for_size_p ()"
9081 [(const_int 0)]
9082 "ix86_split_idivmod (<MODE>mode, operands, <u_bool>); DONE;")
9083
9084 (define_split
9085 [(set (match_operand:DI 0 "register_operand")
9086 (zero_extend:DI
9087 (any_div:SI (match_operand:SI 2 "register_operand")
9088 (match_operand:SI 3 "nonimmediate_operand"))))
9089 (set (match_operand:SI 1 "register_operand")
9090 (<paired_mod>:SI (match_dup 2) (match_dup 3)))
9091 (clobber (reg:CC FLAGS_REG))]
9092 "TARGET_64BIT
9093 && TARGET_USE_8BIT_IDIV
9094 && TARGET_QIMODE_MATH
9095 && can_create_pseudo_p ()
9096 && !optimize_insn_for_size_p ()"
9097 [(const_int 0)]
9098 "ix86_split_idivmod (SImode, operands, <u_bool>); DONE;")
9099
9100 (define_split
9101 [(set (match_operand:DI 1 "register_operand")
9102 (zero_extend:DI
9103 (<paired_mod>:SI (match_operand:SI 2 "register_operand")
9104 (match_operand:SI 3 "nonimmediate_operand"))))
9105 (set (match_operand:SI 0 "register_operand")
9106 (any_div:SI (match_dup 2) (match_dup 3)))
9107 (clobber (reg:CC FLAGS_REG))]
9108 "TARGET_64BIT
9109 && TARGET_USE_8BIT_IDIV
9110 && TARGET_QIMODE_MATH
9111 && can_create_pseudo_p ()
9112 && !optimize_insn_for_size_p ()"
9113 [(const_int 0)]
9114 "ix86_split_idivmod (SImode, operands, <u_bool>); DONE;")
9115
9116 (define_insn_and_split "divmod<mode>4_1"
9117 [(set (match_operand:SWI48 0 "register_operand" "=a")
9118 (div:SWI48 (match_operand:SWI48 2 "register_operand" "0")
9119 (match_operand:SWI48 3 "nonimmediate_operand" "rm")))
9120 (set (match_operand:SWI48 1 "register_operand" "=&d")
9121 (mod:SWI48 (match_dup 2) (match_dup 3)))
9122 (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT)
9123 (clobber (reg:CC FLAGS_REG))]
9124 ""
9125 "#"
9126 "reload_completed"
9127 [(parallel [(set (match_dup 1)
9128 (ashiftrt:SWI48 (match_dup 4) (match_dup 5)))
9129 (clobber (reg:CC FLAGS_REG))])
9130 (parallel [(set (match_dup 0)
9131 (div:SWI48 (match_dup 2) (match_dup 3)))
9132 (set (match_dup 1)
9133 (mod:SWI48 (match_dup 2) (match_dup 3)))
9134 (use (match_dup 1))
9135 (clobber (reg:CC FLAGS_REG))])]
9136 {
9137 operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode)-1);
9138
9139 if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
9140 operands[4] = operands[2];
9141 else
9142 {
9143 /* Avoid use of cltd in favor of a mov+shift. */
9144 emit_move_insn (operands[1], operands[2]);
9145 operands[4] = operands[1];
9146 }
9147 }
9148 [(set_attr "type" "multi")
9149 (set_attr "mode" "<MODE>")])
9150
9151 (define_insn_and_split "udivmod<mode>4_1"
9152 [(set (match_operand:SWI48 0 "register_operand" "=a")
9153 (udiv:SWI48 (match_operand:SWI48 2 "register_operand" "0")
9154 (match_operand:SWI48 3 "nonimmediate_operand" "rm")))
9155 (set (match_operand:SWI48 1 "register_operand" "=&d")
9156 (umod:SWI48 (match_dup 2) (match_dup 3)))
9157 (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT)
9158 (clobber (reg:CC FLAGS_REG))]
9159 ""
9160 "#"
9161 "reload_completed"
9162 [(set (match_dup 1) (const_int 0))
9163 (parallel [(set (match_dup 0)
9164 (udiv:SWI48 (match_dup 2) (match_dup 3)))
9165 (set (match_dup 1)
9166 (umod:SWI48 (match_dup 2) (match_dup 3)))
9167 (use (match_dup 1))
9168 (clobber (reg:CC FLAGS_REG))])]
9169 ""
9170 [(set_attr "type" "multi")
9171 (set_attr "mode" "<MODE>")])
9172
9173 (define_insn_and_split "divmodsi4_zext_1"
9174 [(set (match_operand:DI 0 "register_operand" "=a")
9175 (zero_extend:DI
9176 (div:SI (match_operand:SI 2 "register_operand" "0")
9177 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
9178 (set (match_operand:SI 1 "register_operand" "=&d")
9179 (mod:SI (match_dup 2) (match_dup 3)))
9180 (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT)
9181 (clobber (reg:CC FLAGS_REG))]
9182 "TARGET_64BIT"
9183 "#"
9184 "&& reload_completed"
9185 [(parallel [(set (match_dup 1)
9186 (ashiftrt:SI (match_dup 4) (match_dup 5)))
9187 (clobber (reg:CC FLAGS_REG))])
9188 (parallel [(set (match_dup 0)
9189 (zero_extend:DI (div:SI (match_dup 2) (match_dup 3))))
9190 (set (match_dup 1)
9191 (mod:SI (match_dup 2) (match_dup 3)))
9192 (use (match_dup 1))
9193 (clobber (reg:CC FLAGS_REG))])]
9194 {
9195 operands[5] = GEN_INT (GET_MODE_BITSIZE (SImode)-1);
9196
9197 if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
9198 operands[4] = operands[2];
9199 else
9200 {
9201 /* Avoid use of cltd in favor of a mov+shift. */
9202 emit_move_insn (operands[1], operands[2]);
9203 operands[4] = operands[1];
9204 }
9205 }
9206 [(set_attr "type" "multi")
9207 (set_attr "mode" "SI")])
9208
9209 (define_insn_and_split "udivmodsi4_zext_1"
9210 [(set (match_operand:DI 0 "register_operand" "=a")
9211 (zero_extend:DI
9212 (udiv:SI (match_operand:SI 2 "register_operand" "0")
9213 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
9214 (set (match_operand:SI 1 "register_operand" "=&d")
9215 (umod:SI (match_dup 2) (match_dup 3)))
9216 (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT)
9217 (clobber (reg:CC FLAGS_REG))]
9218 "TARGET_64BIT"
9219 "#"
9220 "&& reload_completed"
9221 [(set (match_dup 1) (const_int 0))
9222 (parallel [(set (match_dup 0)
9223 (zero_extend:DI (udiv:SI (match_dup 2) (match_dup 3))))
9224 (set (match_dup 1)
9225 (umod:SI (match_dup 2) (match_dup 3)))
9226 (use (match_dup 1))
9227 (clobber (reg:CC FLAGS_REG))])]
9228 ""
9229 [(set_attr "type" "multi")
9230 (set_attr "mode" "SI")])
9231
9232 (define_insn_and_split "divmodsi4_zext_2"
9233 [(set (match_operand:DI 1 "register_operand" "=&d")
9234 (zero_extend:DI
9235 (mod:SI (match_operand:SI 2 "register_operand" "0")
9236 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
9237 (set (match_operand:SI 0 "register_operand" "=a")
9238 (div:SI (match_dup 2) (match_dup 3)))
9239 (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT)
9240 (clobber (reg:CC FLAGS_REG))]
9241 "TARGET_64BIT"
9242 "#"
9243 "&& reload_completed"
9244 [(parallel [(set (match_dup 6)
9245 (ashiftrt:SI (match_dup 4) (match_dup 5)))
9246 (clobber (reg:CC FLAGS_REG))])
9247 (parallel [(set (match_dup 1)
9248 (zero_extend:DI (mod:SI (match_dup 2) (match_dup 3))))
9249 (set (match_dup 0)
9250 (div:SI (match_dup 2) (match_dup 3)))
9251 (use (match_dup 6))
9252 (clobber (reg:CC FLAGS_REG))])]
9253 {
9254 operands[5] = GEN_INT (GET_MODE_BITSIZE (SImode)-1);
9255 operands[6] = gen_lowpart (SImode, operands[1]);
9256
9257 if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
9258 operands[4] = operands[2];
9259 else
9260 {
9261 /* Avoid use of cltd in favor of a mov+shift. */
9262 emit_move_insn (operands[6], operands[2]);
9263 operands[4] = operands[6];
9264 }
9265 }
9266 [(set_attr "type" "multi")
9267 (set_attr "mode" "SI")])
9268
9269 (define_insn_and_split "udivmodsi4_zext_2"
9270 [(set (match_operand:DI 1 "register_operand" "=&d")
9271 (zero_extend:DI
9272 (umod:SI (match_operand:SI 2 "register_operand" "0")
9273 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
9274 (set (match_operand:SI 0 "register_operand" "=a")
9275 (udiv:SI (match_dup 2) (match_dup 3)))
9276 (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT)
9277 (clobber (reg:CC FLAGS_REG))]
9278 "TARGET_64BIT"
9279 "#"
9280 "&& reload_completed"
9281 [(set (match_dup 4) (const_int 0))
9282 (parallel [(set (match_dup 1)
9283 (zero_extend:DI (umod:SI (match_dup 2) (match_dup 3))))
9284 (set (match_dup 0)
9285 (udiv:SI (match_dup 2) (match_dup 3)))
9286 (use (match_dup 4))
9287 (clobber (reg:CC FLAGS_REG))])]
9288 "operands[4] = gen_lowpart (SImode, operands[1]);"
9289 [(set_attr "type" "multi")
9290 (set_attr "mode" "SI")])
9291
9292 (define_insn_and_split "*divmod<mode>4"
9293 [(set (match_operand:SWIM248 0 "register_operand" "=a")
9294 (div:SWIM248 (match_operand:SWIM248 2 "register_operand" "0")
9295 (match_operand:SWIM248 3 "nonimmediate_operand" "rm")))
9296 (set (match_operand:SWIM248 1 "register_operand" "=&d")
9297 (mod:SWIM248 (match_dup 2) (match_dup 3)))
9298 (clobber (reg:CC FLAGS_REG))]
9299 ""
9300 "#"
9301 "reload_completed"
9302 [(parallel [(set (match_dup 1)
9303 (ashiftrt:SWIM248 (match_dup 4) (match_dup 5)))
9304 (clobber (reg:CC FLAGS_REG))])
9305 (parallel [(set (match_dup 0)
9306 (div:SWIM248 (match_dup 2) (match_dup 3)))
9307 (set (match_dup 1)
9308 (mod:SWIM248 (match_dup 2) (match_dup 3)))
9309 (use (match_dup 1))
9310 (clobber (reg:CC FLAGS_REG))])]
9311 {
9312 operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode)-1);
9313
9314 if (<MODE>mode != HImode
9315 && (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD))
9316 operands[4] = operands[2];
9317 else
9318 {
9319 /* Avoid use of cltd in favor of a mov+shift. */
9320 emit_move_insn (operands[1], operands[2]);
9321 operands[4] = operands[1];
9322 }
9323 }
9324 [(set_attr "type" "multi")
9325 (set_attr "mode" "<MODE>")])
9326
9327 (define_insn_and_split "*udivmod<mode>4"
9328 [(set (match_operand:SWIM248 0 "register_operand" "=a")
9329 (udiv:SWIM248 (match_operand:SWIM248 2 "register_operand" "0")
9330 (match_operand:SWIM248 3 "nonimmediate_operand" "rm")))
9331 (set (match_operand:SWIM248 1 "register_operand" "=&d")
9332 (umod:SWIM248 (match_dup 2) (match_dup 3)))
9333 (clobber (reg:CC FLAGS_REG))]
9334 ""
9335 "#"
9336 "reload_completed"
9337 [(set (match_dup 1) (const_int 0))
9338 (parallel [(set (match_dup 0)
9339 (udiv:SWIM248 (match_dup 2) (match_dup 3)))
9340 (set (match_dup 1)
9341 (umod:SWIM248 (match_dup 2) (match_dup 3)))
9342 (use (match_dup 1))
9343 (clobber (reg:CC FLAGS_REG))])]
9344 ""
9345 [(set_attr "type" "multi")
9346 (set_attr "mode" "<MODE>")])
9347
9348 ;; Optimize division or modulo by constant power of 2, if the constant
9349 ;; materializes only after expansion.
9350 (define_insn_and_split "*udivmod<mode>4_pow2"
9351 [(set (match_operand:SWI48 0 "register_operand" "=r")
9352 (udiv:SWI48 (match_operand:SWI48 2 "register_operand" "0")
9353 (match_operand:SWI48 3 "const_int_operand")))
9354 (set (match_operand:SWI48 1 "register_operand" "=r")
9355 (umod:SWI48 (match_dup 2) (match_dup 3)))
9356 (clobber (reg:CC FLAGS_REG))]
9357 "IN_RANGE (exact_log2 (UINTVAL (operands[3])), 1, 31)"
9358 "#"
9359 "&& reload_completed"
9360 [(set (match_dup 1) (match_dup 2))
9361 (parallel [(set (match_dup 0) (lshiftrt:<MODE> (match_dup 2) (match_dup 4)))
9362 (clobber (reg:CC FLAGS_REG))])
9363 (parallel [(set (match_dup 1) (and:<MODE> (match_dup 1) (match_dup 5)))
9364 (clobber (reg:CC FLAGS_REG))])]
9365 {
9366 int v = exact_log2 (UINTVAL (operands[3]));
9367 operands[4] = GEN_INT (v);
9368 operands[5] = GEN_INT ((HOST_WIDE_INT_1U << v) - 1);
9369 }
9370 [(set_attr "type" "multi")
9371 (set_attr "mode" "<MODE>")])
9372
9373 (define_insn_and_split "*divmodsi4_zext_1"
9374 [(set (match_operand:DI 0 "register_operand" "=a")
9375 (zero_extend:DI
9376 (div:SI (match_operand:SI 2 "register_operand" "0")
9377 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
9378 (set (match_operand:SI 1 "register_operand" "=&d")
9379 (mod:SI (match_dup 2) (match_dup 3)))
9380 (clobber (reg:CC FLAGS_REG))]
9381 "TARGET_64BIT"
9382 "#"
9383 "&& reload_completed"
9384 [(parallel [(set (match_dup 1)
9385 (ashiftrt:SI (match_dup 4) (match_dup 5)))
9386 (clobber (reg:CC FLAGS_REG))])
9387 (parallel [(set (match_dup 0)
9388 (zero_extend:DI (div:SI (match_dup 2) (match_dup 3))))
9389 (set (match_dup 1)
9390 (mod:SI (match_dup 2) (match_dup 3)))
9391 (use (match_dup 1))
9392 (clobber (reg:CC FLAGS_REG))])]
9393 {
9394 operands[5] = GEN_INT (GET_MODE_BITSIZE (SImode)-1);
9395
9396 if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
9397 operands[4] = operands[2];
9398 else
9399 {
9400 /* Avoid use of cltd in favor of a mov+shift. */
9401 emit_move_insn (operands[1], operands[2]);
9402 operands[4] = operands[1];
9403 }
9404 }
9405 [(set_attr "type" "multi")
9406 (set_attr "mode" "SI")])
9407
9408 (define_insn_and_split "*udivmodsi4_zext_1"
9409 [(set (match_operand:DI 0 "register_operand" "=a")
9410 (zero_extend:DI
9411 (udiv:SI (match_operand:SI 2 "register_operand" "0")
9412 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
9413 (set (match_operand:SI 1 "register_operand" "=&d")
9414 (umod:SI (match_dup 2) (match_dup 3)))
9415 (clobber (reg:CC FLAGS_REG))]
9416 "TARGET_64BIT"
9417 "#"
9418 "&& reload_completed"
9419 [(set (match_dup 1) (const_int 0))
9420 (parallel [(set (match_dup 0)
9421 (zero_extend:DI (udiv:SI (match_dup 2) (match_dup 3))))
9422 (set (match_dup 1)
9423 (umod:SI (match_dup 2) (match_dup 3)))
9424 (use (match_dup 1))
9425 (clobber (reg:CC FLAGS_REG))])]
9426 ""
9427 [(set_attr "type" "multi")
9428 (set_attr "mode" "SI")])
9429
9430 (define_insn_and_split "*udivmodsi4_pow2_zext_1"
9431 [(set (match_operand:DI 0 "register_operand" "=r")
9432 (zero_extend:DI
9433 (udiv:SI (match_operand:SI 2 "register_operand" "0")
9434 (match_operand:SI 3 "const_int_operand"))))
9435 (set (match_operand:SI 1 "register_operand" "=r")
9436 (umod:SI (match_dup 2) (match_dup 3)))
9437 (clobber (reg:CC FLAGS_REG))]
9438 "TARGET_64BIT
9439 && IN_RANGE (exact_log2 (UINTVAL (operands[3])), 1, 31)"
9440 "#"
9441 "&& reload_completed"
9442 [(set (match_dup 1) (match_dup 2))
9443 (parallel [(set (match_dup 0)
9444 (zero_extend:DI (lshiftrt:SI (match_dup 2) (match_dup 4))))
9445 (clobber (reg:CC FLAGS_REG))])
9446 (parallel [(set (match_dup 1) (and:SI (match_dup 1) (match_dup 5)))
9447 (clobber (reg:CC FLAGS_REG))])]
9448 {
9449 int v = exact_log2 (UINTVAL (operands[3]));
9450 operands[4] = GEN_INT (v);
9451 operands[5] = GEN_INT ((HOST_WIDE_INT_1U << v) - 1);
9452 }
9453 [(set_attr "type" "multi")
9454 (set_attr "mode" "SI")])
9455
9456 (define_insn_and_split "*divmodsi4_zext_2"
9457 [(set (match_operand:DI 1 "register_operand" "=&d")
9458 (zero_extend:DI
9459 (mod:SI (match_operand:SI 2 "register_operand" "0")
9460 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
9461 (set (match_operand:SI 0 "register_operand" "=a")
9462 (div:SI (match_dup 2) (match_dup 3)))
9463 (clobber (reg:CC FLAGS_REG))]
9464 "TARGET_64BIT"
9465 "#"
9466 "&& reload_completed"
9467 [(parallel [(set (match_dup 6)
9468 (ashiftrt:SI (match_dup 4) (match_dup 5)))
9469 (clobber (reg:CC FLAGS_REG))])
9470 (parallel [(set (match_dup 1)
9471 (zero_extend:DI (mod:SI (match_dup 2) (match_dup 3))))
9472 (set (match_dup 0)
9473 (div:SI (match_dup 2) (match_dup 3)))
9474 (use (match_dup 6))
9475 (clobber (reg:CC FLAGS_REG))])]
9476 {
9477 operands[5] = GEN_INT (GET_MODE_BITSIZE (SImode)-1);
9478 operands[6] = gen_lowpart (SImode, operands[1]);
9479
9480 if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
9481 operands[4] = operands[2];
9482 else
9483 {
9484 /* Avoid use of cltd in favor of a mov+shift. */
9485 emit_move_insn (operands[6], operands[2]);
9486 operands[4] = operands[6];
9487 }
9488 }
9489 [(set_attr "type" "multi")
9490 (set_attr "mode" "SI")])
9491
9492 (define_insn_and_split "*udivmodsi4_zext_2"
9493 [(set (match_operand:DI 1 "register_operand" "=&d")
9494 (zero_extend:DI
9495 (umod:SI (match_operand:SI 2 "register_operand" "0")
9496 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
9497 (set (match_operand:SI 0 "register_operand" "=a")
9498 (udiv:SI (match_dup 2) (match_dup 3)))
9499 (clobber (reg:CC FLAGS_REG))]
9500 "TARGET_64BIT"
9501 "#"
9502 "&& reload_completed"
9503 [(set (match_dup 4) (const_int 0))
9504 (parallel [(set (match_dup 1)
9505 (zero_extend:DI (umod:SI (match_dup 2) (match_dup 3))))
9506 (set (match_dup 0)
9507 (udiv:SI (match_dup 2) (match_dup 3)))
9508 (use (match_dup 4))
9509 (clobber (reg:CC FLAGS_REG))])]
9510 "operands[4] = gen_lowpart (SImode, operands[1]);"
9511 [(set_attr "type" "multi")
9512 (set_attr "mode" "SI")])
9513
9514 (define_insn_and_split "*udivmodsi4_pow2_zext_2"
9515 [(set (match_operand:DI 1 "register_operand" "=r")
9516 (zero_extend:DI
9517 (umod:SI (match_operand:SI 2 "register_operand" "0")
9518 (match_operand:SI 3 "const_int_operand"))))
9519 (set (match_operand:SI 0 "register_operand" "=r")
9520 (udiv:SI (match_dup 2) (match_dup 3)))
9521 (clobber (reg:CC FLAGS_REG))]
9522 "TARGET_64BIT
9523 && IN_RANGE (exact_log2 (UINTVAL (operands[3])), 1, 31)"
9524 "#"
9525 "&& reload_completed"
9526 [(set (match_dup 1) (match_dup 2))
9527 (parallel [(set (match_dup 0) (lshiftrt:SI (match_dup 2) (match_dup 4)))
9528 (clobber (reg:CC FLAGS_REG))])
9529 (parallel [(set (match_dup 1)
9530 (zero_extend:DI (and:SI (match_dup 1) (match_dup 5))))
9531 (clobber (reg:CC FLAGS_REG))])]
9532 {
9533 int v = exact_log2 (UINTVAL (operands[3]));
9534 operands[4] = GEN_INT (v);
9535 operands[5] = GEN_INT ((HOST_WIDE_INT_1U << v) - 1);
9536 }
9537 [(set_attr "type" "multi")
9538 (set_attr "mode" "SI")])
9539
9540 (define_insn "*<u>divmod<mode>4_noext"
9541 [(set (match_operand:SWIM248 0 "register_operand" "=a")
9542 (any_div:SWIM248
9543 (match_operand:SWIM248 2 "register_operand" "0")
9544 (match_operand:SWIM248 3 "nonimmediate_operand" "rm")))
9545 (set (match_operand:SWIM248 1 "register_operand" "=d")
9546 (<paired_mod>:SWIM248 (match_dup 2) (match_dup 3)))
9547 (use (match_operand:SWIM248 4 "register_operand" "1"))
9548 (clobber (reg:CC FLAGS_REG))]
9549 ""
9550 "<sgnprefix>div{<imodesuffix>}\t%3"
9551 [(set_attr "type" "idiv")
9552 (set_attr "mode" "<MODE>")])
9553
9554 (define_insn "*<u>divmodsi4_noext_zext_1"
9555 [(set (match_operand:DI 0 "register_operand" "=a")
9556 (zero_extend:DI
9557 (any_div:SI (match_operand:SI 2 "register_operand" "0")
9558 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
9559 (set (match_operand:SI 1 "register_operand" "=d")
9560 (<paired_mod>:SI (match_dup 2) (match_dup 3)))
9561 (use (match_operand:SI 4 "register_operand" "1"))
9562 (clobber (reg:CC FLAGS_REG))]
9563 "TARGET_64BIT"
9564 "<sgnprefix>div{l}\t%3"
9565 [(set_attr "type" "idiv")
9566 (set_attr "mode" "SI")])
9567
9568 (define_insn "*<u>divmodsi4_noext_zext_2"
9569 [(set (match_operand:DI 1 "register_operand" "=d")
9570 (zero_extend:DI
9571 (<paired_mod>:SI (match_operand:SI 2 "register_operand" "0")
9572 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
9573 (set (match_operand:SI 0 "register_operand" "=a")
9574 (any_div:SI (match_dup 2) (match_dup 3)))
9575 (use (match_operand:SI 4 "register_operand" "1"))
9576 (clobber (reg:CC FLAGS_REG))]
9577 "TARGET_64BIT"
9578 "<sgnprefix>div{l}\t%3"
9579 [(set_attr "type" "idiv")
9580 (set_attr "mode" "SI")])
9581
9582 ;; Avoid sign-extension (using cdq) for constant numerators.
9583 (define_insn_and_split "*divmodsi4_const"
9584 [(set (match_operand:SI 0 "register_operand" "=&a")
9585 (div:SI (match_operand:SI 2 "const_int_operand")
9586 (match_operand:SI 3 "nonimmediate_operand" "rm")))
9587 (set (match_operand:SI 1 "register_operand" "=&d")
9588 (mod:SI (match_dup 2) (match_dup 3)))
9589 (clobber (reg:CC FLAGS_REG))]
9590 "!optimize_function_for_size_p (cfun)"
9591 "#"
9592 "&& reload_completed"
9593 [(set (match_dup 0) (match_dup 2))
9594 (set (match_dup 1) (match_dup 4))
9595 (parallel [(set (match_dup 0)
9596 (div:SI (match_dup 0) (match_dup 3)))
9597 (set (match_dup 1)
9598 (mod:SI (match_dup 0) (match_dup 3)))
9599 (use (match_dup 1))
9600 (clobber (reg:CC FLAGS_REG))])]
9601 {
9602 operands[4] = INTVAL (operands[2]) < 0 ? constm1_rtx : const0_rtx;
9603 }
9604 [(set_attr "type" "multi")
9605 (set_attr "mode" "SI")])
9606
9607 (define_expand "divmodqi4"
9608 [(parallel [(set (match_operand:QI 0 "register_operand")
9609 (div:QI
9610 (match_operand:QI 1 "register_operand")
9611 (match_operand:QI 2 "nonimmediate_operand")))
9612 (set (match_operand:QI 3 "register_operand")
9613 (mod:QI (match_dup 1) (match_dup 2)))
9614 (clobber (reg:CC FLAGS_REG))])]
9615 "TARGET_QIMODE_MATH"
9616 {
9617 rtx div, mod;
9618 rtx tmp0, tmp1;
9619
9620 tmp0 = gen_reg_rtx (HImode);
9621 tmp1 = gen_reg_rtx (HImode);
9622
9623 /* Extend operands[1] to HImode. Generate 8bit divide. Result is in AX. */
9624 emit_insn (gen_extendqihi2 (tmp1, operands[1]));
9625 emit_insn (gen_divmodhiqi3 (tmp0, tmp1, operands[2]));
9626
9627 /* Extract remainder from AH. */
9628 tmp1 = gen_rtx_ZERO_EXTRACT (HImode, tmp0, GEN_INT (8), GEN_INT (8));
9629 tmp1 = lowpart_subreg (QImode, tmp1, HImode);
9630 rtx_insn *insn = emit_move_insn (operands[3], tmp1);
9631
9632 mod = gen_rtx_MOD (QImode, operands[1], operands[2]);
9633 set_unique_reg_note (insn, REG_EQUAL, mod);
9634
9635 /* Extract quotient from AL. */
9636 insn = emit_move_insn (operands[0], gen_lowpart (QImode, tmp0));
9637
9638 div = gen_rtx_DIV (QImode, operands[1], operands[2]);
9639 set_unique_reg_note (insn, REG_EQUAL, div);
9640
9641 DONE;
9642 })
9643
9644 (define_expand "udivmodqi4"
9645 [(parallel [(set (match_operand:QI 0 "register_operand")
9646 (udiv:QI
9647 (match_operand:QI 1 "register_operand")
9648 (match_operand:QI 2 "nonimmediate_operand")))
9649 (set (match_operand:QI 3 "register_operand")
9650 (umod:QI (match_dup 1) (match_dup 2)))
9651 (clobber (reg:CC FLAGS_REG))])]
9652 "TARGET_QIMODE_MATH"
9653 {
9654 rtx div, mod;
9655 rtx tmp0, tmp1;
9656
9657 tmp0 = gen_reg_rtx (HImode);
9658 tmp1 = gen_reg_rtx (HImode);
9659
9660 /* Extend operands[1] to HImode. Generate 8bit divide. Result is in AX. */
9661 emit_insn (gen_zero_extendqihi2 (tmp1, operands[1]));
9662 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, operands[2]));
9663
9664 /* Extract remainder from AH. */
9665 tmp1 = gen_rtx_ZERO_EXTRACT (HImode, tmp0, GEN_INT (8), GEN_INT (8));
9666 tmp1 = lowpart_subreg (QImode, tmp1, HImode);
9667 rtx_insn *insn = emit_move_insn (operands[3], tmp1);
9668
9669 mod = gen_rtx_UMOD (QImode, operands[1], operands[2]);
9670 set_unique_reg_note (insn, REG_EQUAL, mod);
9671
9672 /* Extract quotient from AL. */
9673 insn = emit_move_insn (operands[0], gen_lowpart (QImode, tmp0));
9674
9675 div = gen_rtx_UDIV (QImode, operands[1], operands[2]);
9676 set_unique_reg_note (insn, REG_EQUAL, div);
9677
9678 DONE;
9679 })
9680
9681 ;; Divide AX by r/m8, with result stored in
9682 ;; AL <- Quotient
9683 ;; AH <- Remainder
9684 ;; Change div/mod to HImode and extend the second argument to HImode
9685 ;; so that mode of div/mod matches with mode of arguments. Otherwise
9686 ;; combine may fail.
9687 (define_insn "<u>divmodhiqi3"
9688 [(set (match_operand:HI 0 "register_operand" "=a")
9689 (ior:HI
9690 (ashift:HI
9691 (zero_extend:HI
9692 (truncate:QI
9693 (mod:HI (match_operand:HI 1 "register_operand" "0")
9694 (any_extend:HI
9695 (match_operand:QI 2 "nonimmediate_operand" "qm")))))
9696 (const_int 8))
9697 (zero_extend:HI
9698 (truncate:QI
9699 (div:HI (match_dup 1) (any_extend:HI (match_dup 2)))))))
9700 (clobber (reg:CC FLAGS_REG))]
9701 "TARGET_QIMODE_MATH"
9702 "<sgnprefix>div{b}\t%2"
9703 [(set_attr "type" "idiv")
9704 (set_attr "mode" "QI")])
9705
9706 ;; We cannot use div/idiv for double division, because it causes
9707 ;; "division by zero" on the overflow and that's not what we expect
9708 ;; from truncate. Because true (non truncating) double division is
9709 ;; never generated, we can't create this insn anyway.
9710 ;
9711 ;(define_insn ""
9712 ; [(set (match_operand:SI 0 "register_operand" "=a")
9713 ; (truncate:SI
9714 ; (udiv:DI (match_operand:DI 1 "register_operand" "A")
9715 ; (zero_extend:DI
9716 ; (match_operand:SI 2 "nonimmediate_operand" "rm")))))
9717 ; (set (match_operand:SI 3 "register_operand" "=d")
9718 ; (truncate:SI
9719 ; (umod:DI (match_dup 1) (zero_extend:DI (match_dup 2)))))
9720 ; (clobber (reg:CC FLAGS_REG))]
9721 ; ""
9722 ; "div{l}\t{%2, %0|%0, %2}"
9723 ; [(set_attr "type" "idiv")])
9724 \f
9725 ;;- Logical AND instructions
9726
9727 ;; On Pentium, "test imm, reg" is pairable only with eax, ax, and al.
9728 ;; Note that this excludes ah.
9729
9730 (define_expand "@test<mode>_ccno_1"
9731 [(set (reg:CCNO FLAGS_REG)
9732 (compare:CCNO
9733 (and:SWI48
9734 (match_operand:SWI48 0 "nonimmediate_operand")
9735 (match_operand:SWI48 1 "<nonmemory_szext_operand>"))
9736 (const_int 0)))])
9737
9738 (define_expand "testqi_ccz_1"
9739 [(set (reg:CCZ FLAGS_REG)
9740 (compare:CCZ
9741 (and:QI
9742 (match_operand:QI 0 "nonimmediate_operand")
9743 (match_operand:QI 1 "nonmemory_operand"))
9744 (const_int 0)))])
9745
9746 (define_insn "*testdi_1"
9747 [(set (reg FLAGS_REG)
9748 (compare
9749 (and:DI
9750 (match_operand:DI 0 "nonimmediate_operand" "%r,rm")
9751 (match_operand:DI 1 "x86_64_szext_nonmemory_operand" "Z,re"))
9752 (const_int 0)))]
9753 "TARGET_64BIT
9754 && ix86_match_ccmode
9755 (insn,
9756 /* If we are going to emit testl instead of testq, and the operands[1]
9757 constant might have the SImode sign bit set, make sure the sign
9758 flag isn't tested, because the instruction will set the sign flag
9759 based on bit 31 rather than bit 63. If it isn't CONST_INT,
9760 conservatively assume it might have bit 31 set. */
9761 (satisfies_constraint_Z (operands[1])
9762 && (!CONST_INT_P (operands[1])
9763 || val_signbit_known_set_p (SImode, INTVAL (operands[1]))))
9764 ? CCZmode : CCNOmode)"
9765 "@
9766 test{l}\t{%k1, %k0|%k0, %k1}
9767 test{q}\t{%1, %0|%0, %1}"
9768 [(set_attr "type" "test")
9769 (set_attr "mode" "SI,DI")])
9770
9771 (define_insn "*testqi_1_maybe_si"
9772 [(set (reg FLAGS_REG)
9773 (compare
9774 (and:QI
9775 (match_operand:QI 0 "nonimmediate_operand" "%qm,qm,r")
9776 (match_operand:QI 1 "nonmemory_operand" "q,n,n"))
9777 (const_int 0)))]
9778 "ix86_match_ccmode (insn,
9779 CONST_INT_P (operands[1])
9780 && INTVAL (operands[1]) >= 0 ? CCNOmode : CCZmode)"
9781 {
9782 if (get_attr_mode (insn) == MODE_SI)
9783 {
9784 if (CONST_INT_P (operands[1]) && INTVAL (operands[1]) < 0)
9785 operands[1] = GEN_INT (INTVAL (operands[1]) & 0xff);
9786 return "test{l}\t{%1, %k0|%k0, %1}";
9787 }
9788 return "test{b}\t{%1, %0|%0, %1}";
9789 }
9790 [(set_attr "type" "test")
9791 (set (attr "mode")
9792 (cond [(eq_attr "alternative" "2")
9793 (const_string "SI")
9794 (and (match_test "optimize_insn_for_size_p ()")
9795 (and (match_operand 0 "ext_QIreg_operand")
9796 (match_operand 1 "const_0_to_127_operand")))
9797 (const_string "SI")
9798 ]
9799 (const_string "QI")))
9800 (set_attr "pent_pair" "uv,np,np")])
9801
9802 (define_insn "*test<mode>_1"
9803 [(set (reg FLAGS_REG)
9804 (compare
9805 (and:SWI124
9806 (match_operand:SWI124 0 "nonimmediate_operand" "%<r>m,*a,<r>m")
9807 (match_operand:SWI124 1 "<nonmemory_szext_operand>" "<r>,<i>,<i>"))
9808 (const_int 0)))]
9809 "ix86_match_ccmode (insn, CCNOmode)"
9810 "test{<imodesuffix>}\t{%1, %0|%0, %1}"
9811 [(set_attr "type" "test")
9812 (set_attr "mode" "<MODE>")
9813 (set_attr "pent_pair" "uv,uv,np")])
9814
9815 (define_expand "testqi_ext_1_ccno"
9816 [(set (reg:CCNO FLAGS_REG)
9817 (compare:CCNO
9818 (and:QI
9819 (subreg:QI
9820 (zero_extract:HI
9821 (match_operand:HI 0 "register_operand")
9822 (const_int 8)
9823 (const_int 8)) 0)
9824 (match_operand:QI 1 "const_int_operand"))
9825 (const_int 0)))])
9826
9827 (define_insn "*testqi_ext<mode>_1"
9828 [(set (reg FLAGS_REG)
9829 (compare
9830 (and:QI
9831 (subreg:QI
9832 (zero_extract:SWI248
9833 (match_operand:SWI248 0 "register_operand" "Q,Q")
9834 (const_int 8)
9835 (const_int 8)) 0)
9836 (match_operand:QI 1 "general_operand" "QnBc,m"))
9837 (const_int 0)))]
9838 "ix86_match_ccmode (insn, CCNOmode)"
9839 "test{b}\t{%1, %h0|%h0, %1}"
9840 [(set_attr "isa" "*,nox64")
9841 (set_attr "type" "test")
9842 (set_attr "mode" "QI")])
9843
9844 (define_insn "*testqi_ext<mode>_2"
9845 [(set (reg FLAGS_REG)
9846 (compare
9847 (and:QI
9848 (subreg:QI
9849 (zero_extract:SWI248
9850 (match_operand:SWI248 0 "register_operand" "Q")
9851 (const_int 8)
9852 (const_int 8)) 0)
9853 (subreg:QI
9854 (zero_extract:SWI248
9855 (match_operand:SWI248 1 "register_operand" "Q")
9856 (const_int 8)
9857 (const_int 8)) 0))
9858 (const_int 0)))]
9859 "ix86_match_ccmode (insn, CCNOmode)"
9860 "test{b}\t{%h1, %h0|%h0, %h1}"
9861 [(set_attr "type" "test")
9862 (set_attr "mode" "QI")])
9863
9864 ;; Provide a *testti instruction that STV can implement using ptest.
9865 ;; This pattern splits into *andti3_doubleword and *cmpti_doubleword.
9866 (define_insn_and_split "*testti_doubleword"
9867 [(set (reg:CCZ FLAGS_REG)
9868 (compare:CCZ
9869 (and:TI (match_operand:TI 0 "register_operand")
9870 (match_operand:TI 1 "general_operand"))
9871 (const_int 0)))]
9872 "TARGET_64BIT
9873 && ix86_pre_reload_split ()"
9874 "#"
9875 "&& 1"
9876 [(parallel [(set (match_dup 2) (and:TI (match_dup 0) (match_dup 1)))
9877 (clobber (reg:CC FLAGS_REG))])
9878 (set (reg:CCZ FLAGS_REG) (compare:CCZ (match_dup 2) (const_int 0)))]
9879 {
9880 operands[2] = gen_reg_rtx (TImode);
9881 if (!x86_64_hilo_general_operand (operands[1], TImode))
9882 operands[1] = force_reg (TImode, operands[1]);
9883 })
9884
9885 ;; Combine likes to form bit extractions for some tests. Humor it.
9886 (define_insn_and_split "*testqi_ext_3"
9887 [(set (match_operand 0 "flags_reg_operand")
9888 (match_operator 1 "compare_operator"
9889 [(zero_extract:SWI248
9890 (match_operand 2 "int_nonimmediate_operand" "rm")
9891 (match_operand 3 "const_int_operand")
9892 (match_operand 4 "const_int_operand"))
9893 (const_int 0)]))]
9894 "/* Ensure that resulting mask is zero or sign extended operand. */
9895 INTVAL (operands[4]) >= 0
9896 && ((INTVAL (operands[3]) > 0
9897 && INTVAL (operands[3]) + INTVAL (operands[4]) <= 32)
9898 || (<MODE>mode == DImode
9899 && INTVAL (operands[3]) > 32
9900 && INTVAL (operands[3]) + INTVAL (operands[4]) == 64))
9901 && ix86_match_ccmode (insn,
9902 /* If zero_extract mode precision is the same
9903 as len, the SF of the zero_extract
9904 comparison will be the most significant
9905 extracted bit, but this could be matched
9906 after splitting only for pos 0 len all bits
9907 trivial extractions. Require CCZmode. */
9908 (GET_MODE_PRECISION (<MODE>mode)
9909 == INTVAL (operands[3]))
9910 /* Otherwise, require CCZmode if we'd use a mask
9911 with the most significant bit set and can't
9912 widen it to wider mode. *testdi_1 also
9913 requires CCZmode if the mask has bit
9914 31 set and all bits above it clear. */
9915 || (INTVAL (operands[3]) + INTVAL (operands[4])
9916 >= 32)
9917 /* We can't widen also if val is not a REG. */
9918 || (INTVAL (operands[3]) + INTVAL (operands[4])
9919 == GET_MODE_PRECISION (GET_MODE (operands[2]))
9920 && !register_operand (operands[2],
9921 GET_MODE (operands[2])))
9922 /* And we shouldn't widen if
9923 TARGET_PARTIAL_REG_STALL. */
9924 || (TARGET_PARTIAL_REG_STALL
9925 && (INTVAL (operands[3]) + INTVAL (operands[4])
9926 >= (paradoxical_subreg_p (operands[2])
9927 && (GET_MODE_CLASS
9928 (GET_MODE (SUBREG_REG (operands[2])))
9929 == MODE_INT)
9930 ? GET_MODE_PRECISION
9931 (GET_MODE (SUBREG_REG (operands[2])))
9932 : GET_MODE_PRECISION
9933 (GET_MODE (operands[2])))))
9934 ? CCZmode : CCNOmode)"
9935 "#"
9936 "&& 1"
9937 [(set (match_dup 0) (match_op_dup 1 [(match_dup 2) (const_int 0)]))]
9938 {
9939 rtx val = operands[2];
9940 HOST_WIDE_INT len = INTVAL (operands[3]);
9941 HOST_WIDE_INT pos = INTVAL (operands[4]);
9942 machine_mode mode = GET_MODE (val);
9943
9944 if (SUBREG_P (val))
9945 {
9946 machine_mode submode = GET_MODE (SUBREG_REG (val));
9947
9948 /* Narrow paradoxical subregs to prevent partial register stalls. */
9949 if (GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (submode)
9950 && GET_MODE_CLASS (submode) == MODE_INT
9951 && (GET_MODE (operands[0]) == CCZmode
9952 || pos + len < GET_MODE_PRECISION (submode)
9953 || REG_P (SUBREG_REG (val))))
9954 {
9955 val = SUBREG_REG (val);
9956 mode = submode;
9957 }
9958 }
9959
9960 /* Small HImode tests can be converted to QImode. */
9961 if (pos + len <= 8
9962 && register_operand (val, HImode))
9963 {
9964 rtx nval = gen_lowpart (QImode, val);
9965 if (!MEM_P (nval)
9966 || GET_MODE (operands[0]) == CCZmode
9967 || pos + len < 8)
9968 {
9969 val = nval;
9970 mode = QImode;
9971 }
9972 }
9973
9974 gcc_assert (pos + len <= GET_MODE_PRECISION (mode));
9975
9976 /* If the mask is going to have the sign bit set in the mode
9977 we want to do the comparison in and user isn't interested just
9978 in the zero flag, then we must widen the target mode. */
9979 if (pos + len == GET_MODE_PRECISION (mode)
9980 && GET_MODE (operands[0]) != CCZmode)
9981 {
9982 gcc_assert (pos + len < 32 && !MEM_P (val));
9983 mode = SImode;
9984 val = gen_lowpart (mode, val);
9985 }
9986
9987 wide_int mask
9988 = wi::shifted_mask (pos, len, false, GET_MODE_PRECISION (mode));
9989
9990 operands[2] = gen_rtx_AND (mode, val, immed_wide_int_const (mask, mode));
9991 })
9992
9993 ;; Split and;cmp (as optimized by combine) into not;test
9994 ;; Except when TARGET_BMI provides andn (*andn_<mode>_ccno).
9995 (define_insn_and_split "*test<mode>_not"
9996 [(set (reg:CCZ FLAGS_REG)
9997 (compare:CCZ
9998 (and:SWI
9999 (not:SWI (match_operand:SWI 0 "register_operand"))
10000 (match_operand:SWI 1 "<nonmemory_szext_operand>"))
10001 (const_int 0)))]
10002 "ix86_pre_reload_split ()
10003 && (!TARGET_BMI || !REG_P (operands[1]))"
10004 "#"
10005 "&& 1"
10006 [(set (match_dup 2) (not:SWI (match_dup 0)))
10007 (set (reg:CCZ FLAGS_REG)
10008 (compare:CCZ (and:SWI (match_dup 2) (match_dup 1))
10009 (const_int 0)))]
10010 "operands[2] = gen_reg_rtx (<MODE>mode);")
10011
10012 ;; Split and;cmp (as optimized by combine) into andn;cmp $0
10013 (define_insn_and_split "*test<mode>_not_doubleword"
10014 [(set (reg:CCZ FLAGS_REG)
10015 (compare:CCZ
10016 (and:DWI
10017 (not:DWI (match_operand:DWI 0 "nonimmediate_operand"))
10018 (match_operand:DWI 1 "nonimmediate_operand"))
10019 (const_int 0)))]
10020 "ix86_pre_reload_split ()"
10021 "#"
10022 "&& 1"
10023 [(parallel
10024 [(set (match_dup 2) (and:DWI (not:DWI (match_dup 0)) (match_dup 1)))
10025 (clobber (reg:CC FLAGS_REG))])
10026 (set (reg:CCZ FLAGS_REG) (compare:CCZ (match_dup 2) (const_int 0)))]
10027 {
10028 operands[0] = force_reg (<MODE>mode, operands[0]);
10029 operands[2] = gen_reg_rtx (<MODE>mode);
10030 })
10031
10032 ;; Convert HImode/SImode test instructions with immediate to QImode ones.
10033 ;; i386 does not allow to encode test with 8bit sign extended immediate, so
10034 ;; this is relatively important trick.
10035 ;; Do the conversion only post-reload to avoid limiting of the register class
10036 ;; to QI regs.
10037 (define_split
10038 [(set (match_operand 0 "flags_reg_operand")
10039 (match_operator 1 "compare_operator"
10040 [(and (match_operand 2 "QIreg_operand")
10041 (match_operand 3 "const_int_operand"))
10042 (const_int 0)]))]
10043 "reload_completed
10044 && GET_MODE (operands[2]) != QImode
10045 && ((ix86_match_ccmode (insn, CCZmode)
10046 && !(INTVAL (operands[3]) & ~(255 << 8)))
10047 || (ix86_match_ccmode (insn, CCNOmode)
10048 && !(INTVAL (operands[3]) & ~(127 << 8))))"
10049 [(set (match_dup 0)
10050 (match_op_dup 1
10051 [(and:QI
10052 (subreg:QI
10053 (zero_extract:SI (match_dup 2)
10054 (const_int 8)
10055 (const_int 8)) 0)
10056 (match_dup 3))
10057 (const_int 0)]))]
10058 {
10059 operands[2] = gen_lowpart (SImode, operands[2]);
10060 operands[3] = gen_int_mode (INTVAL (operands[3]) >> 8, QImode);
10061 })
10062
10063 (define_split
10064 [(set (match_operand 0 "flags_reg_operand")
10065 (match_operator 1 "compare_operator"
10066 [(and (match_operand 2 "nonimmediate_operand")
10067 (match_operand 3 "const_int_operand"))
10068 (const_int 0)]))]
10069 "reload_completed
10070 && GET_MODE (operands[2]) != QImode
10071 && (!REG_P (operands[2]) || ANY_QI_REG_P (operands[2]))
10072 && ((ix86_match_ccmode (insn, CCZmode)
10073 && !(INTVAL (operands[3]) & ~255))
10074 || (ix86_match_ccmode (insn, CCNOmode)
10075 && !(INTVAL (operands[3]) & ~127)))"
10076 [(set (match_dup 0)
10077 (match_op_dup 1 [(and:QI (match_dup 2) (match_dup 3))
10078 (const_int 0)]))]
10079 {
10080 operands[2] = gen_lowpart (QImode, operands[2]);
10081 operands[3] = gen_int_mode (INTVAL (operands[3]), QImode);
10082 })
10083
10084 ;; %%% This used to optimize known byte-wide and operations to memory,
10085 ;; and sometimes to QImode registers. If this is considered useful,
10086 ;; it should be done with splitters.
10087
10088 (define_expand "and<mode>3"
10089 [(set (match_operand:SDWIM 0 "nonimmediate_operand")
10090 (and:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand")
10091 (match_operand:SDWIM 2 "<general_szext_operand>")))]
10092 ""
10093 {
10094 machine_mode mode = <MODE>mode;
10095
10096 if (GET_MODE_SIZE (<MODE>mode) > UNITS_PER_WORD
10097 && !x86_64_hilo_general_operand (operands[2], <MODE>mode))
10098 operands[2] = force_reg (<MODE>mode, operands[2]);
10099
10100 if (GET_MODE_SIZE (<MODE>mode) <= UNITS_PER_WORD
10101 && const_int_operand (operands[2], <MODE>mode)
10102 && register_operand (operands[0], <MODE>mode)
10103 && !(TARGET_ZERO_EXTEND_WITH_AND
10104 && optimize_function_for_speed_p (cfun)))
10105 {
10106 unsigned HOST_WIDE_INT ival = UINTVAL (operands[2]);
10107
10108 if (ival == GET_MODE_MASK (SImode))
10109 mode = SImode;
10110 else if (ival == GET_MODE_MASK (HImode))
10111 mode = HImode;
10112 else if (ival == GET_MODE_MASK (QImode))
10113 mode = QImode;
10114 }
10115
10116 if (mode != <MODE>mode)
10117 emit_insn (gen_extend_insn
10118 (operands[0], gen_lowpart (mode, operands[1]),
10119 <MODE>mode, mode, 1));
10120 else
10121 ix86_expand_binary_operator (AND, <MODE>mode, operands);
10122
10123 DONE;
10124 })
10125
10126 (define_insn_and_split "*and<dwi>3_doubleword"
10127 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
10128 (and:<DWI>
10129 (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0")
10130 (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o")))
10131 (clobber (reg:CC FLAGS_REG))]
10132 "ix86_binary_operator_ok (AND, <DWI>mode, operands)"
10133 "#"
10134 "&& reload_completed"
10135 [(const_int:DWIH 0)]
10136 {
10137 bool emit_insn_deleted_note_p = false;
10138
10139 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
10140
10141 if (operands[2] == const0_rtx)
10142 emit_move_insn (operands[0], const0_rtx);
10143 else if (operands[2] == constm1_rtx)
10144 emit_insn_deleted_note_p = true;
10145 else
10146 ix86_expand_binary_operator (AND, <MODE>mode, &operands[0]);
10147
10148 if (operands[5] == const0_rtx)
10149 emit_move_insn (operands[3], const0_rtx);
10150 else if (operands[5] == constm1_rtx)
10151 {
10152 if (emit_insn_deleted_note_p)
10153 emit_note (NOTE_INSN_DELETED);
10154 }
10155 else
10156 ix86_expand_binary_operator (AND, <MODE>mode, &operands[3]);
10157
10158 DONE;
10159 })
10160
10161 (define_insn "*anddi_1"
10162 [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r,?k")
10163 (and:DI
10164 (match_operand:DI 1 "nonimmediate_operand" "%0,0,0,qm,k")
10165 (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,m,L,k")))
10166 (clobber (reg:CC FLAGS_REG))]
10167 "TARGET_64BIT && ix86_binary_operator_ok (AND, DImode, operands)"
10168 "@
10169 and{l}\t{%k2, %k0|%k0, %k2}
10170 and{q}\t{%2, %0|%0, %2}
10171 and{q}\t{%2, %0|%0, %2}
10172 #
10173 #"
10174 [(set_attr "isa" "x64,x64,x64,x64,avx512bw")
10175 (set_attr "type" "alu,alu,alu,imovx,msklog")
10176 (set_attr "length_immediate" "*,*,*,0,*")
10177 (set (attr "prefix_rex")
10178 (if_then_else
10179 (and (eq_attr "type" "imovx")
10180 (and (match_test "INTVAL (operands[2]) == 0xff")
10181 (match_operand 1 "ext_QIreg_operand")))
10182 (const_string "1")
10183 (const_string "*")))
10184 (set_attr "mode" "SI,DI,DI,SI,DI")])
10185
10186 (define_insn_and_split "*anddi_1_btr"
10187 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
10188 (and:DI
10189 (match_operand:DI 1 "nonimmediate_operand" "%0")
10190 (match_operand:DI 2 "const_int_operand" "n")))
10191 (clobber (reg:CC FLAGS_REG))]
10192 "TARGET_64BIT && TARGET_USE_BT
10193 && ix86_binary_operator_ok (AND, DImode, operands)
10194 && IN_RANGE (exact_log2 (~INTVAL (operands[2])), 31, 63)"
10195 "#"
10196 "&& reload_completed"
10197 [(parallel [(set (zero_extract:DI (match_dup 0)
10198 (const_int 1)
10199 (match_dup 3))
10200 (const_int 0))
10201 (clobber (reg:CC FLAGS_REG))])]
10202 "operands[3] = GEN_INT (exact_log2 (~INTVAL (operands[2])));"
10203 [(set_attr "type" "alu1")
10204 (set_attr "prefix_0f" "1")
10205 (set_attr "znver1_decode" "double")
10206 (set_attr "mode" "DI")])
10207
10208 ;; Turn *anddi_1 into *andsi_1_zext if possible.
10209 (define_split
10210 [(set (match_operand:DI 0 "register_operand")
10211 (and:DI (subreg:DI (match_operand:SI 1 "register_operand") 0)
10212 (match_operand:DI 2 "x86_64_zext_immediate_operand")))
10213 (clobber (reg:CC FLAGS_REG))]
10214 "TARGET_64BIT"
10215 [(parallel [(set (match_dup 0)
10216 (zero_extend:DI (and:SI (match_dup 1) (match_dup 2))))
10217 (clobber (reg:CC FLAGS_REG))])]
10218 {
10219 if (GET_CODE (operands[2]) == SYMBOL_REF
10220 || GET_CODE (operands[2]) == LABEL_REF)
10221 {
10222 operands[2] = shallow_copy_rtx (operands[2]);
10223 PUT_MODE (operands[2], SImode);
10224 }
10225 else if (GET_CODE (operands[2]) == CONST)
10226 {
10227 /* (const:DI (plus:DI (symbol_ref:DI ("...")) (const_int N))) */
10228 operands[2] = copy_rtx (operands[2]);
10229 PUT_MODE (operands[2], SImode);
10230 PUT_MODE (XEXP (operands[2], 0), SImode);
10231 PUT_MODE (XEXP (XEXP (operands[2], 0), 0), SImode);
10232 }
10233 else
10234 operands[2] = gen_lowpart (SImode, operands[2]);
10235 })
10236
10237 ;; See comment for addsi_1_zext why we do use nonimmediate_operand
10238 (define_insn "*andsi_1_zext"
10239 [(set (match_operand:DI 0 "register_operand" "=r")
10240 (zero_extend:DI
10241 (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
10242 (match_operand:SI 2 "x86_64_general_operand" "rBMe"))))
10243 (clobber (reg:CC FLAGS_REG))]
10244 "TARGET_64BIT && ix86_binary_operator_ok (AND, SImode, operands)"
10245 "and{l}\t{%2, %k0|%k0, %2}"
10246 [(set_attr "type" "alu")
10247 (set_attr "mode" "SI")])
10248
10249 (define_insn "*and<mode>_1"
10250 [(set (match_operand:SWI24 0 "nonimmediate_operand" "=rm,r,Ya,?k")
10251 (and:SWI24 (match_operand:SWI24 1 "nonimmediate_operand" "%0,0,qm,k")
10252 (match_operand:SWI24 2 "<general_operand>" "r<i>,<m>,L,k")))
10253 (clobber (reg:CC FLAGS_REG))]
10254 "ix86_binary_operator_ok (AND, <MODE>mode, operands)"
10255 "@
10256 and{<imodesuffix>}\t{%2, %0|%0, %2}
10257 and{<imodesuffix>}\t{%2, %0|%0, %2}
10258 #
10259 #"
10260 [(set (attr "isa")
10261 (cond [(eq_attr "alternative" "3")
10262 (if_then_else (eq_attr "mode" "SI")
10263 (const_string "avx512bw")
10264 (const_string "avx512f"))
10265 ]
10266 (const_string "*")))
10267 (set_attr "type" "alu,alu,imovx,msklog")
10268 (set_attr "length_immediate" "*,*,0,*")
10269 (set (attr "prefix_rex")
10270 (if_then_else
10271 (and (eq_attr "type" "imovx")
10272 (and (match_test "INTVAL (operands[2]) == 0xff")
10273 (match_operand 1 "ext_QIreg_operand")))
10274 (const_string "1")
10275 (const_string "*")))
10276 (set_attr "mode" "<MODE>,<MODE>,SI,<MODE>")])
10277
10278 (define_insn "*andqi_1"
10279 [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,?k")
10280 (and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,k")
10281 (match_operand:QI 2 "general_operand" "qn,m,rn,k")))
10282 (clobber (reg:CC FLAGS_REG))]
10283 "ix86_binary_operator_ok (AND, QImode, operands)"
10284 "@
10285 and{b}\t{%2, %0|%0, %2}
10286 and{b}\t{%2, %0|%0, %2}
10287 and{l}\t{%k2, %k0|%k0, %k2}
10288 #"
10289 [(set_attr "type" "alu,alu,alu,msklog")
10290 (set (attr "mode")
10291 (cond [(eq_attr "alternative" "2")
10292 (const_string "SI")
10293 (and (eq_attr "alternative" "3")
10294 (match_test "!TARGET_AVX512DQ"))
10295 (const_string "HI")
10296 ]
10297 (const_string "QI")))
10298 ;; Potential partial reg stall on alternative 2.
10299 (set (attr "preferred_for_speed")
10300 (cond [(eq_attr "alternative" "2")
10301 (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
10302 (symbol_ref "true")))])
10303
10304 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
10305 (define_insn_and_split "*and<mode>_1_slp"
10306 [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>,&<r>"))
10307 (and:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "%0,!<r>")
10308 (match_operand:SWI12 2 "general_operand" "<r>mn,<r>mn")))
10309 (clobber (reg:CC FLAGS_REG))]
10310 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
10311 "@
10312 and{<imodesuffix>}\t{%2, %0|%0, %2}
10313 #"
10314 "&& reload_completed"
10315 [(set (strict_low_part (match_dup 0)) (match_dup 1))
10316 (parallel
10317 [(set (strict_low_part (match_dup 0))
10318 (and:SWI12 (match_dup 0) (match_dup 2)))
10319 (clobber (reg:CC FLAGS_REG))])]
10320 ""
10321 [(set_attr "type" "alu")
10322 (set_attr "mode" "<MODE>")])
10323
10324 (define_split
10325 [(set (match_operand:SWI248 0 "register_operand")
10326 (and:SWI248 (match_operand:SWI248 1 "nonimmediate_operand")
10327 (match_operand:SWI248 2 "const_int_operand")))
10328 (clobber (reg:CC FLAGS_REG))]
10329 "reload_completed
10330 && (!REG_P (operands[1])
10331 || REGNO (operands[0]) != REGNO (operands[1]))"
10332 [(const_int 0)]
10333 {
10334 unsigned HOST_WIDE_INT ival = UINTVAL (operands[2]);
10335 machine_mode mode;
10336
10337 if (ival == GET_MODE_MASK (SImode))
10338 mode = SImode;
10339 else if (ival == GET_MODE_MASK (HImode))
10340 mode = HImode;
10341 else if (ival == GET_MODE_MASK (QImode))
10342 mode = QImode;
10343 else
10344 gcc_unreachable ();
10345
10346 /* Zero extend to SImode to avoid partial register stalls. */
10347 if (<MODE_SIZE> < GET_MODE_SIZE (SImode))
10348 operands[0] = gen_lowpart (SImode, operands[0]);
10349
10350 emit_insn (gen_extend_insn
10351 (operands[0], gen_lowpart (mode, operands[1]),
10352 GET_MODE (operands[0]), mode, 1));
10353 DONE;
10354 })
10355
10356 (define_split
10357 [(set (match_operand:SWI48 0 "register_operand")
10358 (and:SWI48 (match_dup 0)
10359 (const_int -65536)))
10360 (clobber (reg:CC FLAGS_REG))]
10361 "(TARGET_FAST_PREFIX && !TARGET_PARTIAL_REG_STALL)
10362 || optimize_function_for_size_p (cfun)"
10363 [(set (strict_low_part (match_dup 1)) (const_int 0))]
10364 "operands[1] = gen_lowpart (HImode, operands[0]);")
10365
10366 (define_split
10367 [(set (match_operand:SWI248 0 "any_QIreg_operand")
10368 (and:SWI248 (match_dup 0)
10369 (const_int -256)))
10370 (clobber (reg:CC FLAGS_REG))]
10371 "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
10372 && reload_completed"
10373 [(set (strict_low_part (match_dup 1)) (const_int 0))]
10374 "operands[1] = gen_lowpart (QImode, operands[0]);")
10375
10376 (define_split
10377 [(set (match_operand:SWI248 0 "QIreg_operand")
10378 (and:SWI248 (match_dup 0)
10379 (const_int -65281)))
10380 (clobber (reg:CC FLAGS_REG))]
10381 "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
10382 && reload_completed"
10383 [(parallel
10384 [(set (zero_extract:SI (match_dup 0)
10385 (const_int 8)
10386 (const_int 8))
10387 (subreg:SI
10388 (xor:QI
10389 (subreg:QI
10390 (zero_extract:SI (match_dup 0)
10391 (const_int 8)
10392 (const_int 8)) 0)
10393 (subreg:QI
10394 (zero_extract:SI (match_dup 0)
10395 (const_int 8)
10396 (const_int 8)) 0)) 0))
10397 (clobber (reg:CC FLAGS_REG))])]
10398 "operands[0] = gen_lowpart (SImode, operands[0]);")
10399
10400 (define_insn "*anddi_2"
10401 [(set (reg FLAGS_REG)
10402 (compare
10403 (and:DI
10404 (match_operand:DI 1 "nonimmediate_operand" "%0,0,0")
10405 (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,m"))
10406 (const_int 0)))
10407 (set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r")
10408 (and:DI (match_dup 1) (match_dup 2)))]
10409 "TARGET_64BIT
10410 && ix86_match_ccmode
10411 (insn,
10412 /* If we are going to emit andl instead of andq, and the operands[2]
10413 constant might have the SImode sign bit set, make sure the sign
10414 flag isn't tested, because the instruction will set the sign flag
10415 based on bit 31 rather than bit 63. If it isn't CONST_INT,
10416 conservatively assume it might have bit 31 set. */
10417 (satisfies_constraint_Z (operands[2])
10418 && (!CONST_INT_P (operands[2])
10419 || val_signbit_known_set_p (SImode, INTVAL (operands[2]))))
10420 ? CCZmode : CCNOmode)
10421 && ix86_binary_operator_ok (AND, DImode, operands)"
10422 "@
10423 and{l}\t{%k2, %k0|%k0, %k2}
10424 and{q}\t{%2, %0|%0, %2}
10425 and{q}\t{%2, %0|%0, %2}"
10426 [(set_attr "type" "alu")
10427 (set_attr "mode" "SI,DI,DI")])
10428
10429 ;; See comment for addsi_1_zext why we do use nonimmediate_operand
10430 (define_insn "*andsi_2_zext"
10431 [(set (reg FLAGS_REG)
10432 (compare (and:SI
10433 (match_operand:SI 1 "nonimmediate_operand" "%0")
10434 (match_operand:SI 2 "x86_64_general_operand" "rBMe"))
10435 (const_int 0)))
10436 (set (match_operand:DI 0 "register_operand" "=r")
10437 (zero_extend:DI (and:SI (match_dup 1) (match_dup 2))))]
10438 "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
10439 && ix86_binary_operator_ok (AND, SImode, operands)"
10440 "and{l}\t{%2, %k0|%k0, %2}"
10441 [(set_attr "type" "alu")
10442 (set_attr "mode" "SI")])
10443
10444 (define_insn "*andqi_2_maybe_si"
10445 [(set (reg FLAGS_REG)
10446 (compare (and:QI
10447 (match_operand:QI 1 "nonimmediate_operand" "%0,0,0")
10448 (match_operand:QI 2 "general_operand" "qn,m,n"))
10449 (const_int 0)))
10450 (set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r")
10451 (and:QI (match_dup 1) (match_dup 2)))]
10452 "ix86_binary_operator_ok (AND, QImode, operands)
10453 && ix86_match_ccmode (insn,
10454 CONST_INT_P (operands[2])
10455 && INTVAL (operands[2]) >= 0 ? CCNOmode : CCZmode)"
10456 {
10457 if (get_attr_mode (insn) == MODE_SI)
10458 {
10459 if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) < 0)
10460 operands[2] = GEN_INT (INTVAL (operands[2]) & 0xff);
10461 return "and{l}\t{%2, %k0|%k0, %2}";
10462 }
10463 return "and{b}\t{%2, %0|%0, %2}";
10464 }
10465 [(set_attr "type" "alu")
10466 (set (attr "mode")
10467 (cond [(eq_attr "alternative" "2")
10468 (const_string "SI")
10469 (and (match_test "optimize_insn_for_size_p ()")
10470 (and (match_operand 0 "ext_QIreg_operand")
10471 (match_operand 2 "const_0_to_127_operand")))
10472 (const_string "SI")
10473 ]
10474 (const_string "QI")))
10475 ;; Potential partial reg stall on alternative 2.
10476 (set (attr "preferred_for_speed")
10477 (cond [(eq_attr "alternative" "2")
10478 (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
10479 (symbol_ref "true")))])
10480
10481 (define_insn "*and<mode>_2"
10482 [(set (reg FLAGS_REG)
10483 (compare (and:SWI124
10484 (match_operand:SWI124 1 "nonimmediate_operand" "%0,0")
10485 (match_operand:SWI124 2 "<general_operand>" "<r><i>,<m>"))
10486 (const_int 0)))
10487 (set (match_operand:SWI124 0 "nonimmediate_operand" "=<r>m,<r>")
10488 (and:SWI124 (match_dup 1) (match_dup 2)))]
10489 "ix86_match_ccmode (insn, CCNOmode)
10490 && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
10491 "and{<imodesuffix>}\t{%2, %0|%0, %2}"
10492 [(set_attr "type" "alu")
10493 (set_attr "mode" "<MODE>")])
10494
10495 (define_expand "andqi_ext_1"
10496 [(parallel
10497 [(set (zero_extract:HI (match_operand:HI 0 "register_operand")
10498 (const_int 8)
10499 (const_int 8))
10500 (subreg:HI
10501 (and:QI
10502 (subreg:QI
10503 (zero_extract:HI (match_operand:HI 1 "register_operand")
10504 (const_int 8)
10505 (const_int 8)) 0)
10506 (match_operand:QI 2 "const_int_operand")) 0))
10507 (clobber (reg:CC FLAGS_REG))])])
10508
10509 (define_insn "*andqi_ext<mode>_1"
10510 [(set (zero_extract:SWI248
10511 (match_operand:SWI248 0 "register_operand" "+Q,Q")
10512 (const_int 8)
10513 (const_int 8))
10514 (subreg:SWI248
10515 (and:QI
10516 (subreg:QI
10517 (zero_extract:SWI248
10518 (match_operand:SWI248 1 "register_operand" "0,0")
10519 (const_int 8)
10520 (const_int 8)) 0)
10521 (match_operand:QI 2 "general_operand" "QnBc,m")) 0))
10522 (clobber (reg:CC FLAGS_REG))]
10523 "/* FIXME: without this LRA can't reload this pattern, see PR82524. */
10524 rtx_equal_p (operands[0], operands[1])"
10525 "and{b}\t{%2, %h0|%h0, %2}"
10526 [(set_attr "isa" "*,nox64")
10527 (set_attr "type" "alu")
10528 (set_attr "mode" "QI")])
10529
10530 ;; Generated by peephole translating test to and. This shows up
10531 ;; often in fp comparisons.
10532 (define_insn "*andqi_ext<mode>_1_cc"
10533 [(set (reg FLAGS_REG)
10534 (compare
10535 (and:QI
10536 (subreg:QI
10537 (zero_extract:SWI248
10538 (match_operand:SWI248 1 "register_operand" "0,0")
10539 (const_int 8)
10540 (const_int 8)) 0)
10541 (match_operand:QI 2 "general_operand" "QnBc,m"))
10542 (const_int 0)))
10543 (set (zero_extract:SWI248
10544 (match_operand:SWI248 0 "register_operand" "+Q,Q")
10545 (const_int 8)
10546 (const_int 8))
10547 (subreg:SWI248
10548 (and:QI
10549 (subreg:QI
10550 (zero_extract:SWI248
10551 (match_dup 1)
10552 (const_int 8)
10553 (const_int 8)) 0)
10554 (match_dup 2)) 0))]
10555 "ix86_match_ccmode (insn, CCNOmode)
10556 /* FIXME: without this LRA can't reload this pattern, see PR82524. */
10557 && rtx_equal_p (operands[0], operands[1])"
10558 "and{b}\t{%2, %h0|%h0, %2}"
10559 [(set_attr "isa" "*,nox64")
10560 (set_attr "type" "alu")
10561 (set_attr "mode" "QI")])
10562
10563 (define_insn "*andqi_ext<mode>_2"
10564 [(set (zero_extract:SWI248
10565 (match_operand:SWI248 0 "register_operand" "+Q")
10566 (const_int 8)
10567 (const_int 8))
10568 (subreg:SWI248
10569 (and:QI
10570 (subreg:QI
10571 (zero_extract:SWI248
10572 (match_operand:SWI248 1 "register_operand" "%0")
10573 (const_int 8)
10574 (const_int 8)) 0)
10575 (subreg:QI
10576 (zero_extract:SWI248
10577 (match_operand:SWI248 2 "register_operand" "Q")
10578 (const_int 8)
10579 (const_int 8)) 0)) 0))
10580 (clobber (reg:CC FLAGS_REG))]
10581 "/* FIXME: without this LRA can't reload this pattern, see PR82524. */
10582 rtx_equal_p (operands[0], operands[1])
10583 || rtx_equal_p (operands[0], operands[2])"
10584 "and{b}\t{%h2, %h0|%h0, %h2}"
10585 [(set_attr "type" "alu")
10586 (set_attr "mode" "QI")])
10587
10588 ;; Convert wide AND instructions with immediate operand to shorter QImode
10589 ;; equivalents when possible.
10590 ;; Don't do the splitting with memory operands, since it introduces risk
10591 ;; of memory mismatch stalls. We may want to do the splitting for optimizing
10592 ;; for size, but that can (should?) be handled by generic code instead.
10593 (define_split
10594 [(set (match_operand:SWI248 0 "QIreg_operand")
10595 (and:SWI248 (match_operand:SWI248 1 "register_operand")
10596 (match_operand:SWI248 2 "const_int_operand")))
10597 (clobber (reg:CC FLAGS_REG))]
10598 "reload_completed
10599 && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
10600 && !(~INTVAL (operands[2]) & ~(255 << 8))"
10601 [(parallel
10602 [(set (zero_extract:SI (match_dup 0)
10603 (const_int 8)
10604 (const_int 8))
10605 (subreg:SI
10606 (and:QI
10607 (subreg:QI
10608 (zero_extract:SI (match_dup 1)
10609 (const_int 8)
10610 (const_int 8)) 0)
10611 (match_dup 2)) 0))
10612 (clobber (reg:CC FLAGS_REG))])]
10613 {
10614 operands[0] = gen_lowpart (SImode, operands[0]);
10615 operands[1] = gen_lowpart (SImode, operands[1]);
10616 operands[2] = gen_int_mode (INTVAL (operands[2]) >> 8, QImode);
10617 })
10618
10619 ;; Since AND can be encoded with sign extended immediate, this is only
10620 ;; profitable when 7th bit is not set.
10621 (define_split
10622 [(set (match_operand:SWI248 0 "any_QIreg_operand")
10623 (and:SWI248 (match_operand:SWI248 1 "general_operand")
10624 (match_operand:SWI248 2 "const_int_operand")))
10625 (clobber (reg:CC FLAGS_REG))]
10626 "reload_completed
10627 && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
10628 && !(~INTVAL (operands[2]) & ~255)
10629 && !(INTVAL (operands[2]) & 128)"
10630 [(parallel [(set (strict_low_part (match_dup 0))
10631 (and:QI (match_dup 1)
10632 (match_dup 2)))
10633 (clobber (reg:CC FLAGS_REG))])]
10634 {
10635 operands[0] = gen_lowpart (QImode, operands[0]);
10636 operands[1] = gen_lowpart (QImode, operands[1]);
10637 operands[2] = gen_int_mode (INTVAL (operands[2]), QImode);
10638 })
10639
10640 (define_insn_and_split "*andn<dwi>3_doubleword_bmi"
10641 [(set (match_operand:<DWI> 0 "register_operand" "=&r,r,r")
10642 (and:<DWI>
10643 (not:<DWI> (match_operand:<DWI> 1 "register_operand" "r,0,r"))
10644 (match_operand:<DWI> 2 "nonimmediate_operand" "ro,ro,0")))
10645 (clobber (reg:CC FLAGS_REG))]
10646 "TARGET_BMI"
10647 "#"
10648 "&& reload_completed"
10649 [(parallel [(set (match_dup 0)
10650 (and:DWIH (not:DWIH (match_dup 1)) (match_dup 2)))
10651 (clobber (reg:CC FLAGS_REG))])
10652 (parallel [(set (match_dup 3)
10653 (and:DWIH (not:DWIH (match_dup 4)) (match_dup 5)))
10654 (clobber (reg:CC FLAGS_REG))])]
10655 "split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);")
10656
10657 (define_insn_and_split "*andn<mode>3_doubleword"
10658 [(set (match_operand:DWI 0 "register_operand")
10659 (and:DWI
10660 (not:DWI (match_operand:DWI 1 "register_operand"))
10661 (match_operand:DWI 2 "nonimmediate_operand")))
10662 (clobber (reg:CC FLAGS_REG))]
10663 "!TARGET_BMI
10664 && ix86_pre_reload_split ()"
10665 "#"
10666 "&& 1"
10667 [(set (match_dup 3) (not:DWI (match_dup 1)))
10668 (parallel [(set (match_dup 0)
10669 (and:DWI (match_dup 3) (match_dup 2)))
10670 (clobber (reg:CC FLAGS_REG))])]
10671 "operands[3] = gen_reg_rtx (<MODE>mode);")
10672
10673 (define_insn "*andn<mode>_1"
10674 [(set (match_operand:SWI48 0 "register_operand" "=r,r,?k")
10675 (and:SWI48
10676 (not:SWI48 (match_operand:SWI48 1 "register_operand" "r,r,k"))
10677 (match_operand:SWI48 2 "nonimmediate_operand" "r,m,k")))
10678 (clobber (reg:CC FLAGS_REG))]
10679 "TARGET_BMI || TARGET_AVX512BW"
10680 "@
10681 andn\t{%2, %1, %0|%0, %1, %2}
10682 andn\t{%2, %1, %0|%0, %1, %2}
10683 #"
10684 [(set_attr "isa" "bmi,bmi,avx512bw")
10685 (set_attr "type" "bitmanip,bitmanip,msklog")
10686 (set_attr "btver2_decode" "direct, double,*")
10687 (set_attr "mode" "<MODE>")])
10688
10689 (define_insn "*andn<mode>_1"
10690 [(set (match_operand:SWI12 0 "register_operand" "=r,?k")
10691 (and:SWI12
10692 (not:SWI12 (match_operand:SWI12 1 "register_operand" "r,k"))
10693 (match_operand:SWI12 2 "register_operand" "r,k")))
10694 (clobber (reg:CC FLAGS_REG))]
10695 "TARGET_BMI || TARGET_AVX512BW"
10696 "@
10697 andn\t{%k2, %k1, %k0|%k0, %k1, %k2}
10698 #"
10699 [(set_attr "isa" "bmi,avx512f")
10700 (set_attr "type" "bitmanip,msklog")
10701 (set_attr "btver2_decode" "direct,*")
10702 (set (attr "mode")
10703 (cond [(eq_attr "alternative" "0")
10704 (const_string "SI")
10705 (and (eq_attr "alternative" "1")
10706 (match_test "!TARGET_AVX512DQ"))
10707 (const_string "HI")
10708 ]
10709 (const_string "<MODE>")))])
10710
10711 (define_insn "*andn_<mode>_ccno"
10712 [(set (reg FLAGS_REG)
10713 (compare
10714 (and:SWI48
10715 (not:SWI48 (match_operand:SWI48 1 "register_operand" "r,r"))
10716 (match_operand:SWI48 2 "nonimmediate_operand" "r,m"))
10717 (const_int 0)))
10718 (clobber (match_scratch:SWI48 0 "=r,r"))]
10719 "TARGET_BMI && ix86_match_ccmode (insn, CCNOmode)"
10720 "andn\t{%2, %1, %0|%0, %1, %2}"
10721 [(set_attr "type" "bitmanip")
10722 (set_attr "btver2_decode" "direct, double")
10723 (set_attr "mode" "<MODE>")])
10724
10725 ;; Split *andnsi_1 after reload with -Oz when not;and is shorter.
10726 (define_split
10727 [(set (match_operand:SI 0 "register_operand")
10728 (and:SI (not:SI (match_operand:SI 1 "register_operand"))
10729 (match_operand:SI 2 "nonimmediate_operand")))
10730 (clobber (reg:CC FLAGS_REG))]
10731 "reload_completed
10732 && optimize_insn_for_size_p () && optimize_size > 1
10733 && REGNO (operands[0]) == REGNO (operands[1])
10734 && LEGACY_INT_REG_P (operands[0])
10735 && !REX_INT_REG_P (operands[2])
10736 && !reg_overlap_mentioned_p (operands[0], operands[2])"
10737 [(set (match_dup 0) (not:SI (match_dup 1)))
10738 (parallel [(set (match_dup 0) (and:SI (match_dup 0) (match_dup 2)))
10739 (clobber (reg:CC FLAGS_REG))])])
10740
10741 ;; Split *andn_si_ccno with -Oz when not;test is shorter.
10742 (define_split
10743 [(set (match_operand 0 "flags_reg_operand")
10744 (match_operator 1 "compare_operator"
10745 [(and:SI (not:SI (match_operand:SI 2 "general_reg_operand"))
10746 (match_operand:SI 3 "nonimmediate_operand"))
10747 (const_int 0)]))
10748 (clobber (match_dup 2))]
10749 "reload_completed
10750 && optimize_insn_for_size_p () && optimize_size > 1
10751 && LEGACY_INT_REG_P (operands[2])
10752 && !REX_INT_REG_P (operands[3])
10753 && !reg_overlap_mentioned_p (operands[2], operands[3])"
10754 [(set (match_dup 2) (not:SI (match_dup 2)))
10755 (set (match_dup 0) (match_op_dup 1
10756 [(and:SI (match_dup 3) (match_dup 2))
10757 (const_int 0)]))])
10758
10759 ;; Variant 1 of 4: Split ((A | B) ^ A) ^ C as (B & ~A) ^ C.
10760 (define_split
10761 [(set (match_operand:SWI48 0 "register_operand")
10762 (xor:SWI48
10763 (xor:SWI48
10764 (ior:SWI48 (match_operand:SWI48 1 "register_operand")
10765 (match_operand:SWI48 2 "nonimmediate_operand"))
10766 (match_dup 1))
10767 (match_operand:SWI48 3 "nonimmediate_operand")))
10768 (clobber (reg:CC FLAGS_REG))]
10769 "TARGET_BMI"
10770 [(parallel
10771 [(set (match_dup 4) (and:SWI48 (not:SWI48 (match_dup 1)) (match_dup 2)))
10772 (clobber (reg:CC FLAGS_REG))])
10773 (parallel
10774 [(set (match_dup 0) (xor:SWI48 (match_dup 4) (match_dup 3)))
10775 (clobber (reg:CC FLAGS_REG))])]
10776 "operands[4] = gen_reg_rtx (<MODE>mode);")
10777
10778 ;; Variant 2 of 4: Split ((A | B) ^ B) ^ C as (A & ~B) ^ C.
10779 (define_split
10780 [(set (match_operand:SWI48 0 "register_operand")
10781 (xor:SWI48
10782 (xor:SWI48
10783 (ior:SWI48 (match_operand:SWI48 1 "register_operand")
10784 (match_operand:SWI48 2 "register_operand"))
10785 (match_dup 2))
10786 (match_operand:SWI48 3 "nonimmediate_operand")))
10787 (clobber (reg:CC FLAGS_REG))]
10788 "TARGET_BMI"
10789 [(parallel
10790 [(set (match_dup 4) (and:SWI48 (not:SWI48 (match_dup 2)) (match_dup 1)))
10791 (clobber (reg:CC FLAGS_REG))])
10792 (parallel
10793 [(set (match_dup 0) (xor:SWI48 (match_dup 4) (match_dup 3)))
10794 (clobber (reg:CC FLAGS_REG))])]
10795 "operands[4] = gen_reg_rtx (<MODE>mode);")
10796
10797 ;; Variant 3 of 4: Split ((A | B) ^ C) ^ A as (B & ~A) ^ C.
10798 (define_split
10799 [(set (match_operand:SWI48 0 "register_operand")
10800 (xor:SWI48
10801 (xor:SWI48
10802 (ior:SWI48 (match_operand:SWI48 1 "register_operand")
10803 (match_operand:SWI48 2 "nonimmediate_operand"))
10804 (match_operand:SWI48 3 "nonimmediate_operand"))
10805 (match_dup 1)))
10806 (clobber (reg:CC FLAGS_REG))]
10807 "TARGET_BMI"
10808 [(parallel
10809 [(set (match_dup 4) (and:SWI48 (not:SWI48 (match_dup 1)) (match_dup 2)))
10810 (clobber (reg:CC FLAGS_REG))])
10811 (parallel
10812 [(set (match_dup 0) (xor:SWI48 (match_dup 4) (match_dup 3)))
10813 (clobber (reg:CC FLAGS_REG))])]
10814 "operands[4] = gen_reg_rtx (<MODE>mode);")
10815
10816 ;; Variant 4 of 4: Split ((A | B) ^ C) ^ B as (A & ~B) ^ C.
10817 (define_split
10818 [(set (match_operand:SWI48 0 "register_operand")
10819 (xor:SWI48
10820 (xor:SWI48
10821 (ior:SWI48 (match_operand:SWI48 1 "register_operand")
10822 (match_operand:SWI48 2 "register_operand"))
10823 (match_operand:SWI48 3 "nonimmediate_operand"))
10824 (match_dup 2)))
10825 (clobber (reg:CC FLAGS_REG))]
10826 "TARGET_BMI"
10827 [(parallel
10828 [(set (match_dup 4) (and:SWI48 (not:SWI48 (match_dup 2)) (match_dup 1)))
10829 (clobber (reg:CC FLAGS_REG))])
10830 (parallel
10831 [(set (match_dup 0) (xor:SWI48 (match_dup 4) (match_dup 3)))
10832 (clobber (reg:CC FLAGS_REG))])]
10833 "operands[4] = gen_reg_rtx (<MODE>mode);")
10834 \f
10835 ;; Logical inclusive and exclusive OR instructions
10836
10837 ;; %%% This used to optimize known byte-wide and operations to memory.
10838 ;; If this is considered useful, it should be done with splitters.
10839
10840 (define_expand "<code><mode>3"
10841 [(set (match_operand:SDWIM 0 "nonimmediate_operand")
10842 (any_or:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand")
10843 (match_operand:SDWIM 2 "<general_operand>")))]
10844 ""
10845 {
10846 if (GET_MODE_SIZE (<MODE>mode) > UNITS_PER_WORD
10847 && !x86_64_hilo_general_operand (operands[2], <MODE>mode))
10848 operands[2] = force_reg (<MODE>mode, operands[2]);
10849
10850 ix86_expand_binary_operator (<CODE>, <MODE>mode, operands);
10851 DONE;
10852 })
10853
10854 (define_insn_and_split "*<code><dwi>3_doubleword"
10855 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
10856 (any_or:<DWI>
10857 (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0")
10858 (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o")))
10859 (clobber (reg:CC FLAGS_REG))]
10860 "ix86_binary_operator_ok (<CODE>, <DWI>mode, operands)"
10861 "#"
10862 "&& reload_completed"
10863 [(const_int:DWIH 0)]
10864 {
10865 /* This insn may disappear completely when operands[2] == const0_rtx
10866 and operands[0] == operands[1], which requires a NOTE_INSN_DELETED. */
10867 bool emit_insn_deleted_note_p = false;
10868
10869 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
10870
10871 if (operands[2] == const0_rtx)
10872 emit_insn_deleted_note_p = true;
10873 else if (operands[2] == constm1_rtx)
10874 {
10875 if (<CODE> == IOR)
10876 emit_move_insn (operands[0], constm1_rtx);
10877 else
10878 ix86_expand_unary_operator (NOT, <MODE>mode, &operands[0]);
10879 }
10880 else
10881 ix86_expand_binary_operator (<CODE>, <MODE>mode, &operands[0]);
10882
10883 if (operands[5] == const0_rtx)
10884 {
10885 if (emit_insn_deleted_note_p)
10886 emit_note (NOTE_INSN_DELETED);
10887 }
10888 else if (operands[5] == constm1_rtx)
10889 {
10890 if (<CODE> == IOR)
10891 emit_move_insn (operands[3], constm1_rtx);
10892 else
10893 ix86_expand_unary_operator (NOT, <MODE>mode, &operands[3]);
10894 }
10895 else
10896 ix86_expand_binary_operator (<CODE>, <MODE>mode, &operands[3]);
10897
10898 DONE;
10899 })
10900
10901 (define_insn "*<code><mode>_1"
10902 [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r,?k")
10903 (any_or:SWI248
10904 (match_operand:SWI248 1 "nonimmediate_operand" "%0,0,k")
10905 (match_operand:SWI248 2 "<general_operand>" "r<i>,<m>,k")))
10906 (clobber (reg:CC FLAGS_REG))]
10907 "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10908 "@
10909 <logic>{<imodesuffix>}\t{%2, %0|%0, %2}
10910 <logic>{<imodesuffix>}\t{%2, %0|%0, %2}
10911 #"
10912 [(set (attr "isa")
10913 (cond [(eq_attr "alternative" "2")
10914 (if_then_else (eq_attr "mode" "SI,DI")
10915 (const_string "avx512bw")
10916 (const_string "avx512f"))
10917 ]
10918 (const_string "*")))
10919 (set_attr "type" "alu, alu, msklog")
10920 (set_attr "mode" "<MODE>")])
10921
10922 (define_insn_and_split "*notxor<mode>_1"
10923 [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r,?k")
10924 (not:SWI248
10925 (xor:SWI248
10926 (match_operand:SWI248 1 "nonimmediate_operand" "%0,0,k")
10927 (match_operand:SWI248 2 "<general_operand>" "r<i>,<m>,k"))))
10928 (clobber (reg:CC FLAGS_REG))]
10929 "ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
10930 "#"
10931 "&& reload_completed"
10932 [(parallel
10933 [(set (match_dup 0)
10934 (xor:SWI248 (match_dup 1) (match_dup 2)))
10935 (clobber (reg:CC FLAGS_REG))])
10936 (set (match_dup 0)
10937 (not:SWI248 (match_dup 0)))]
10938 {
10939 if (MASK_REG_P (operands[0]))
10940 {
10941 emit_insn (gen_kxnor<mode> (operands[0], operands[1], operands[2]));
10942 DONE;
10943 }
10944 }
10945 [(set (attr "isa")
10946 (cond [(eq_attr "alternative" "2")
10947 (if_then_else (eq_attr "mode" "SI,DI")
10948 (const_string "avx512bw")
10949 (const_string "avx512f"))
10950 ]
10951 (const_string "*")))
10952 (set_attr "type" "alu, alu, msklog")
10953 (set_attr "mode" "<MODE>")])
10954
10955 (define_insn_and_split "*iordi_1_bts"
10956 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
10957 (ior:DI
10958 (match_operand:DI 1 "nonimmediate_operand" "%0")
10959 (match_operand:DI 2 "const_int_operand" "n")))
10960 (clobber (reg:CC FLAGS_REG))]
10961 "TARGET_64BIT && TARGET_USE_BT
10962 && ix86_binary_operator_ok (IOR, DImode, operands)
10963 && IN_RANGE (exact_log2 (INTVAL (operands[2])), 31, 63)"
10964 "#"
10965 "&& reload_completed"
10966 [(parallel [(set (zero_extract:DI (match_dup 0)
10967 (const_int 1)
10968 (match_dup 3))
10969 (const_int 1))
10970 (clobber (reg:CC FLAGS_REG))])]
10971 "operands[3] = GEN_INT (exact_log2 (INTVAL (operands[2])));"
10972 [(set_attr "type" "alu1")
10973 (set_attr "prefix_0f" "1")
10974 (set_attr "znver1_decode" "double")
10975 (set_attr "mode" "DI")])
10976
10977 (define_insn_and_split "*xordi_1_btc"
10978 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
10979 (xor:DI
10980 (match_operand:DI 1 "nonimmediate_operand" "%0")
10981 (match_operand:DI 2 "const_int_operand" "n")))
10982 (clobber (reg:CC FLAGS_REG))]
10983 "TARGET_64BIT && TARGET_USE_BT
10984 && ix86_binary_operator_ok (XOR, DImode, operands)
10985 && IN_RANGE (exact_log2 (INTVAL (operands[2])), 31, 63)"
10986 "#"
10987 "&& reload_completed"
10988 [(parallel [(set (zero_extract:DI (match_dup 0)
10989 (const_int 1)
10990 (match_dup 3))
10991 (not:DI (zero_extract:DI (match_dup 0)
10992 (const_int 1)
10993 (match_dup 3))))
10994 (clobber (reg:CC FLAGS_REG))])]
10995 "operands[3] = GEN_INT (exact_log2 (INTVAL (operands[2])));"
10996 [(set_attr "type" "alu1")
10997 (set_attr "prefix_0f" "1")
10998 (set_attr "znver1_decode" "double")
10999 (set_attr "mode" "DI")])
11000
11001 ;; Optimize a ^ ((a ^ b) & mask) to (~mask & a) | (b & mask)
11002 (define_insn_and_split "*xor2andn"
11003 [(set (match_operand:SWI248 0 "register_operand")
11004 (xor:SWI248
11005 (and:SWI248
11006 (xor:SWI248
11007 (match_operand:SWI248 1 "nonimmediate_operand")
11008 (match_operand:SWI248 2 "nonimmediate_operand"))
11009 (match_operand:SWI248 3 "nonimmediate_operand"))
11010 (match_dup 1)))
11011 (clobber (reg:CC FLAGS_REG))]
11012 "TARGET_BMI && ix86_pre_reload_split ()"
11013 "#"
11014 "&& 1"
11015 [(parallel [(set (match_dup 4)
11016 (and:SWI248
11017 (not:SWI248
11018 (match_dup 3))
11019 (match_dup 1)))
11020 (clobber (reg:CC FLAGS_REG))])
11021 (parallel [(set (match_dup 5)
11022 (and:SWI248
11023 (match_dup 3)
11024 (match_dup 2)))
11025 (clobber (reg:CC FLAGS_REG))])
11026 (parallel [(set (match_dup 0)
11027 (ior:SWI248
11028 (match_dup 4)
11029 (match_dup 5)))
11030 (clobber (reg:CC FLAGS_REG))])]
11031 {
11032 operands[1] = force_reg (<MODE>mode, operands[1]);
11033 operands[3] = force_reg (<MODE>mode, operands[3]);
11034 operands[4] = gen_reg_rtx (<MODE>mode);
11035 operands[5] = gen_reg_rtx (<MODE>mode);
11036 })
11037
11038 ;; See comment for addsi_1_zext why we do use nonimmediate_operand
11039 (define_insn "*<code>si_1_zext"
11040 [(set (match_operand:DI 0 "register_operand" "=r")
11041 (zero_extend:DI
11042 (any_or:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
11043 (match_operand:SI 2 "x86_64_general_operand" "rBMe"))))
11044 (clobber (reg:CC FLAGS_REG))]
11045 "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
11046 "<logic>{l}\t{%2, %k0|%k0, %2}"
11047 [(set_attr "type" "alu")
11048 (set_attr "mode" "SI")])
11049
11050 (define_insn "*<code>si_1_zext_imm"
11051 [(set (match_operand:DI 0 "register_operand" "=r")
11052 (any_or:DI
11053 (zero_extend:DI (match_operand:SI 1 "register_operand" "%0"))
11054 (match_operand:DI 2 "x86_64_zext_immediate_operand" "Z")))
11055 (clobber (reg:CC FLAGS_REG))]
11056 "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
11057 "<logic>{l}\t{%2, %k0|%k0, %2}"
11058 [(set_attr "type" "alu")
11059 (set_attr "mode" "SI")])
11060
11061 (define_insn "*<code>qi_1"
11062 [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,?k")
11063 (any_or:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,k")
11064 (match_operand:QI 2 "general_operand" "qn,m,rn,k")))
11065 (clobber (reg:CC FLAGS_REG))]
11066 "ix86_binary_operator_ok (<CODE>, QImode, operands)"
11067 "@
11068 <logic>{b}\t{%2, %0|%0, %2}
11069 <logic>{b}\t{%2, %0|%0, %2}
11070 <logic>{l}\t{%k2, %k0|%k0, %k2}
11071 #"
11072 [(set_attr "isa" "*,*,*,avx512f")
11073 (set_attr "type" "alu,alu,alu,msklog")
11074 (set (attr "mode")
11075 (cond [(eq_attr "alternative" "2")
11076 (const_string "SI")
11077 (and (eq_attr "alternative" "3")
11078 (match_test "!TARGET_AVX512DQ"))
11079 (const_string "HI")
11080 ]
11081 (const_string "QI")))
11082 ;; Potential partial reg stall on alternative 2.
11083 (set (attr "preferred_for_speed")
11084 (cond [(eq_attr "alternative" "2")
11085 (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
11086 (symbol_ref "true")))])
11087
11088 (define_insn_and_split "*notxorqi_1"
11089 [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,?k")
11090 (not:QI
11091 (xor:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,k")
11092 (match_operand:QI 2 "general_operand" "qn,m,rn,k"))))
11093 (clobber (reg:CC FLAGS_REG))]
11094 "ix86_binary_operator_ok (XOR, QImode, operands)"
11095 "#"
11096 "&& reload_completed"
11097 [(parallel
11098 [(set (match_dup 0)
11099 (xor:QI (match_dup 1) (match_dup 2)))
11100 (clobber (reg:CC FLAGS_REG))])
11101 (set (match_dup 0)
11102 (not:QI (match_dup 0)))]
11103 {
11104 if (mask_reg_operand (operands[0], QImode))
11105 {
11106 emit_insn (gen_kxnorqi (operands[0], operands[1], operands[2]));
11107 DONE;
11108 }
11109 }
11110 [(set_attr "isa" "*,*,*,avx512f")
11111 (set_attr "type" "alu,alu,alu,msklog")
11112 (set (attr "mode")
11113 (cond [(eq_attr "alternative" "2")
11114 (const_string "SI")
11115 (and (eq_attr "alternative" "3")
11116 (match_test "!TARGET_AVX512DQ"))
11117 (const_string "HI")
11118 ]
11119 (const_string "QI")))
11120 ;; Potential partial reg stall on alternative 2.
11121 (set (attr "preferred_for_speed")
11122 (cond [(eq_attr "alternative" "2")
11123 (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
11124 (symbol_ref "true")))])
11125
11126 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
11127 (define_insn_and_split "*<code><mode>_1_slp"
11128 [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>,&<r>"))
11129 (any_or:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "%0,!<r>")
11130 (match_operand:SWI12 2 "general_operand" "<r>mn,<r>mn")))
11131 (clobber (reg:CC FLAGS_REG))]
11132 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
11133 "@
11134 <logic>{<imodesuffix>}\t{%2, %0|%0, %2}
11135 #"
11136 "&& reload_completed"
11137 [(set (strict_low_part (match_dup 0)) (match_dup 1))
11138 (parallel
11139 [(set (strict_low_part (match_dup 0))
11140 (any_or:SWI12 (match_dup 0) (match_dup 2)))
11141 (clobber (reg:CC FLAGS_REG))])]
11142 ""
11143 [(set_attr "type" "alu")
11144 (set_attr "mode" "<MODE>")])
11145
11146 ;; convert (sign_extend:WIDE (any_logic:NARROW (memory, immediate)))
11147 ;; to (any_logic:WIDE (sign_extend (memory)), (sign_extend (immediate))).
11148 ;; This eliminates sign extension after logic operation.
11149
11150 (define_split
11151 [(set (match_operand:SWI248 0 "register_operand")
11152 (sign_extend:SWI248
11153 (any_logic:QI (match_operand:QI 1 "memory_operand")
11154 (match_operand:QI 2 "const_int_operand"))))]
11155 ""
11156 [(set (match_dup 3) (sign_extend:SWI248 (match_dup 1)))
11157 (set (match_dup 0) (any_logic:SWI248 (match_dup 3) (match_dup 2)))]
11158 "operands[3] = gen_reg_rtx (<MODE>mode);")
11159
11160 (define_split
11161 [(set (match_operand:SWI48 0 "register_operand")
11162 (sign_extend:SWI48
11163 (any_logic:HI (match_operand:HI 1 "memory_operand")
11164 (match_operand:HI 2 "const_int_operand"))))]
11165 ""
11166 [(set (match_dup 3) (sign_extend:SWI48 (match_dup 1)))
11167 (set (match_dup 0) (any_logic:SWI48 (match_dup 3) (match_dup 2)))]
11168 "operands[3] = gen_reg_rtx (<MODE>mode);")
11169
11170 (define_split
11171 [(set (match_operand:DI 0 "register_operand")
11172 (sign_extend:DI
11173 (any_logic:SI (match_operand:SI 1 "memory_operand")
11174 (match_operand:SI 2 "const_int_operand"))))]
11175 "TARGET_64BIT"
11176 [(set (match_dup 3) (sign_extend:DI (match_dup 1)))
11177 (set (match_dup 0) (any_logic:DI (match_dup 3) (match_dup 2)))]
11178 "operands[3] = gen_reg_rtx (DImode);")
11179
11180 (define_insn "*<code><mode>_2"
11181 [(set (reg FLAGS_REG)
11182 (compare (any_or:SWI
11183 (match_operand:SWI 1 "nonimmediate_operand" "%0,0")
11184 (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>"))
11185 (const_int 0)))
11186 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
11187 (any_or:SWI (match_dup 1) (match_dup 2)))]
11188 "ix86_match_ccmode (insn, CCNOmode)
11189 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
11190 "<logic>{<imodesuffix>}\t{%2, %0|%0, %2}"
11191 [(set_attr "type" "alu")
11192 (set_attr "mode" "<MODE>")])
11193
11194 ;; See comment for addsi_1_zext why we do use nonimmediate_operand
11195 ;; ??? Special case for immediate operand is missing - it is tricky.
11196 (define_insn "*<code>si_2_zext"
11197 [(set (reg FLAGS_REG)
11198 (compare (any_or:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
11199 (match_operand:SI 2 "x86_64_general_operand" "rBMe"))
11200 (const_int 0)))
11201 (set (match_operand:DI 0 "register_operand" "=r")
11202 (zero_extend:DI (any_or:SI (match_dup 1) (match_dup 2))))]
11203 "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
11204 && ix86_binary_operator_ok (<CODE>, SImode, operands)"
11205 "<logic>{l}\t{%2, %k0|%k0, %2}"
11206 [(set_attr "type" "alu")
11207 (set_attr "mode" "SI")])
11208
11209 (define_insn "*<code>si_2_zext_imm"
11210 [(set (reg FLAGS_REG)
11211 (compare (any_or:SI
11212 (match_operand:SI 1 "nonimmediate_operand" "%0")
11213 (match_operand:SI 2 "x86_64_zext_immediate_operand" "Z"))
11214 (const_int 0)))
11215 (set (match_operand:DI 0 "register_operand" "=r")
11216 (any_or:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))]
11217 "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
11218 && ix86_binary_operator_ok (<CODE>, SImode, operands)"
11219 "<logic>{l}\t{%2, %k0|%k0, %2}"
11220 [(set_attr "type" "alu")
11221 (set_attr "mode" "SI")])
11222
11223 (define_insn "*<code><mode>_3"
11224 [(set (reg FLAGS_REG)
11225 (compare (any_or:SWI
11226 (match_operand:SWI 1 "nonimmediate_operand" "%0")
11227 (match_operand:SWI 2 "<general_operand>" "<g>"))
11228 (const_int 0)))
11229 (clobber (match_scratch:SWI 0 "=<r>"))]
11230 "ix86_match_ccmode (insn, CCNOmode)
11231 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11232 "<logic>{<imodesuffix>}\t{%2, %0|%0, %2}"
11233 [(set_attr "type" "alu")
11234 (set_attr "mode" "<MODE>")])
11235
11236 (define_insn "*<code>qi_ext<mode>_1"
11237 [(set (zero_extract:SWI248
11238 (match_operand:SWI248 0 "register_operand" "+Q,Q")
11239 (const_int 8)
11240 (const_int 8))
11241 (subreg:SWI248
11242 (any_or:QI
11243 (subreg:QI
11244 (zero_extract:SWI248
11245 (match_operand:SWI248 1 "register_operand" "0,0")
11246 (const_int 8)
11247 (const_int 8)) 0)
11248 (match_operand:QI 2 "general_operand" "QnBc,m")) 0))
11249 (clobber (reg:CC FLAGS_REG))]
11250 "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
11251 /* FIXME: without this LRA can't reload this pattern, see PR82524. */
11252 && rtx_equal_p (operands[0], operands[1])"
11253 "<logic>{b}\t{%2, %h0|%h0, %2}"
11254 [(set_attr "isa" "*,nox64")
11255 (set_attr "type" "alu")
11256 (set_attr "mode" "QI")])
11257
11258 (define_insn "*<code>qi_ext<mode>_2"
11259 [(set (zero_extract:SWI248
11260 (match_operand:SWI248 0 "register_operand" "+Q")
11261 (const_int 8)
11262 (const_int 8))
11263 (subreg:SWI248
11264 (any_or:QI
11265 (subreg:QI
11266 (zero_extract:SWI248
11267 (match_operand:SWI248 1 "register_operand" "%0")
11268 (const_int 8)
11269 (const_int 8)) 0)
11270 (subreg:QI
11271 (zero_extract:SWI248
11272 (match_operand:SWI248 2 "register_operand" "Q")
11273 (const_int 8)
11274 (const_int 8)) 0)) 0))
11275 (clobber (reg:CC FLAGS_REG))]
11276 "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
11277 /* FIXME: without this LRA can't reload this pattern, see PR82524. */
11278 && (rtx_equal_p (operands[0], operands[1])
11279 || rtx_equal_p (operands[0], operands[2]))"
11280 "<logic>{b}\t{%h2, %h0|%h0, %h2}"
11281 [(set_attr "type" "alu")
11282 (set_attr "mode" "QI")])
11283
11284 ;; Convert wide OR instructions with immediate operand to shorter QImode
11285 ;; equivalents when possible.
11286 ;; Don't do the splitting with memory operands, since it introduces risk
11287 ;; of memory mismatch stalls. We may want to do the splitting for optimizing
11288 ;; for size, but that can (should?) be handled by generic code instead.
11289 (define_split
11290 [(set (match_operand:SWI248 0 "QIreg_operand")
11291 (any_or:SWI248 (match_operand:SWI248 1 "register_operand")
11292 (match_operand:SWI248 2 "const_int_operand")))
11293 (clobber (reg:CC FLAGS_REG))]
11294 "reload_completed
11295 && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
11296 && !(INTVAL (operands[2]) & ~(255 << 8))"
11297 [(parallel
11298 [(set (zero_extract:SI (match_dup 0)
11299 (const_int 8)
11300 (const_int 8))
11301 (subreg:SI
11302 (any_or:QI
11303 (subreg:QI
11304 (zero_extract:SI (match_dup 1)
11305 (const_int 8)
11306 (const_int 8)) 0)
11307 (match_dup 2)) 0))
11308 (clobber (reg:CC FLAGS_REG))])]
11309 {
11310 /* Handle the case where INTVAL (operands[2]) == 0. */
11311 if (operands[2] == const0_rtx)
11312 {
11313 if (!rtx_equal_p (operands[0], operands[1]))
11314 emit_move_insn (operands[0], operands[1]);
11315 else
11316 emit_note (NOTE_INSN_DELETED);
11317 DONE;
11318 }
11319 operands[0] = gen_lowpart (SImode, operands[0]);
11320 operands[1] = gen_lowpart (SImode, operands[1]);
11321 operands[2] = gen_int_mode (INTVAL (operands[2]) >> 8, QImode);
11322 })
11323
11324 ;; Since OR can be encoded with sign extended immediate, this is only
11325 ;; profitable when 7th bit is set.
11326 (define_split
11327 [(set (match_operand:SWI248 0 "any_QIreg_operand")
11328 (any_or:SWI248 (match_operand:SWI248 1 "general_operand")
11329 (match_operand:SWI248 2 "const_int_operand")))
11330 (clobber (reg:CC FLAGS_REG))]
11331 "reload_completed
11332 && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
11333 && !(INTVAL (operands[2]) & ~255)
11334 && (INTVAL (operands[2]) & 128)"
11335 [(parallel [(set (strict_low_part (match_dup 0))
11336 (any_or:QI (match_dup 1)
11337 (match_dup 2)))
11338 (clobber (reg:CC FLAGS_REG))])]
11339 {
11340 operands[0] = gen_lowpart (QImode, operands[0]);
11341 operands[1] = gen_lowpart (QImode, operands[1]);
11342 operands[2] = gen_int_mode (INTVAL (operands[2]), QImode);
11343 })
11344
11345 (define_expand "xorqi_ext_1_cc"
11346 [(parallel
11347 [(set (reg:CCNO FLAGS_REG)
11348 (compare:CCNO
11349 (xor:QI
11350 (subreg:QI
11351 (zero_extract:HI (match_operand:HI 1 "register_operand")
11352 (const_int 8)
11353 (const_int 8)) 0)
11354 (match_operand:QI 2 "const_int_operand"))
11355 (const_int 0)))
11356 (set (zero_extract:HI (match_operand:HI 0 "register_operand")
11357 (const_int 8)
11358 (const_int 8))
11359 (subreg:HI
11360 (xor:QI
11361 (subreg:QI
11362 (zero_extract:HI (match_dup 1)
11363 (const_int 8)
11364 (const_int 8)) 0)
11365 (match_dup 2)) 0))])])
11366
11367 (define_insn "*xorqi_ext<mode>_1_cc"
11368 [(set (reg FLAGS_REG)
11369 (compare
11370 (xor:QI
11371 (subreg:QI
11372 (zero_extract:SWI248
11373 (match_operand:SWI248 1 "register_operand" "0,0")
11374 (const_int 8)
11375 (const_int 8)) 0)
11376 (match_operand:QI 2 "general_operand" "QnBc,m"))
11377 (const_int 0)))
11378 (set (zero_extract:SWI248
11379 (match_operand:SWI248 0 "register_operand" "+Q,Q")
11380 (const_int 8)
11381 (const_int 8))
11382 (subreg:SWI248
11383 (xor:QI
11384 (subreg:QI
11385 (zero_extract:SWI248
11386 (match_dup 1)
11387 (const_int 8)
11388 (const_int 8)) 0)
11389 (match_dup 2)) 0))]
11390 "ix86_match_ccmode (insn, CCNOmode)
11391 /* FIXME: without this LRA can't reload this pattern, see PR82524. */
11392 && rtx_equal_p (operands[0], operands[1])"
11393 "xor{b}\t{%2, %h0|%h0, %2}"
11394 [(set_attr "isa" "*,nox64")
11395 (set_attr "type" "alu")
11396 (set_attr "mode" "QI")])
11397
11398 ;; Split DST = (HI<<32)|LO early to minimize register usage.
11399 (define_code_iterator any_or_plus [plus ior xor])
11400 (define_insn_and_split "*concat<mode><dwi>3_1"
11401 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro")
11402 (any_or_plus:<DWI>
11403 (ashift:<DWI> (match_operand:<DWI> 1 "register_operand" "r")
11404 (match_operand:<DWI> 2 "const_int_operand"))
11405 (zero_extend:<DWI> (match_operand:DWIH 3 "register_operand" "r"))))]
11406 "INTVAL (operands[2]) == <MODE_SIZE> * BITS_PER_UNIT"
11407 "#"
11408 "&& reload_completed"
11409 [(clobber (const_int 0))]
11410 {
11411 split_double_concat (<DWI>mode, operands[0], operands[3],
11412 gen_lowpart (<MODE>mode, operands[1]));
11413 DONE;
11414 })
11415
11416 (define_insn_and_split "*concat<mode><dwi>3_2"
11417 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro")
11418 (any_or_plus:<DWI>
11419 (zero_extend:<DWI> (match_operand:DWIH 1 "register_operand" "r"))
11420 (ashift:<DWI> (match_operand:<DWI> 2 "register_operand" "r")
11421 (match_operand:<DWI> 3 "const_int_operand"))))]
11422 "INTVAL (operands[3]) == <MODE_SIZE> * BITS_PER_UNIT"
11423 "#"
11424 "&& reload_completed"
11425 [(clobber (const_int 0))]
11426 {
11427 split_double_concat (<DWI>mode, operands[0], operands[1],
11428 gen_lowpart (<MODE>mode, operands[2]));
11429 DONE;
11430 })
11431
11432 (define_insn_and_split "*concat<mode><dwi>3_3"
11433 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro")
11434 (any_or_plus:<DWI>
11435 (ashift:<DWI>
11436 (zero_extend:<DWI> (match_operand:DWIH 1 "register_operand" "r"))
11437 (match_operand:<DWI> 2 "const_int_operand"))
11438 (zero_extend:<DWI> (match_operand:DWIH 3 "register_operand" "r"))))]
11439 "INTVAL (operands[2]) == <MODE_SIZE> * BITS_PER_UNIT"
11440 "#"
11441 "&& reload_completed"
11442 [(clobber (const_int 0))]
11443 {
11444 split_double_concat (<DWI>mode, operands[0], operands[3], operands[1]);
11445 DONE;
11446 })
11447
11448 (define_insn_and_split "*concat<mode><dwi>3_4"
11449 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro")
11450 (any_or_plus:<DWI>
11451 (zero_extend:<DWI> (match_operand:DWIH 1 "register_operand" "r"))
11452 (ashift:<DWI>
11453 (zero_extend:<DWI> (match_operand:DWIH 2 "register_operand" "r"))
11454 (match_operand:<DWI> 3 "const_int_operand"))))]
11455 "INTVAL (operands[3]) == <MODE_SIZE> * BITS_PER_UNIT"
11456 "#"
11457 "&& reload_completed"
11458 [(clobber (const_int 0))]
11459 {
11460 split_double_concat (<DWI>mode, operands[0], operands[1], operands[2]);
11461 DONE;
11462 })
11463 \f
11464 ;; Negation instructions
11465
11466 (define_expand "neg<mode>2"
11467 [(set (match_operand:SDWIM 0 "nonimmediate_operand")
11468 (neg:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand")))]
11469 ""
11470 "ix86_expand_unary_operator (NEG, <MODE>mode, operands); DONE;")
11471
11472 (define_insn_and_split "*neg<dwi>2_doubleword"
11473 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro")
11474 (neg:<DWI> (match_operand:<DWI> 1 "nonimmediate_operand" "0")))
11475 (clobber (reg:CC FLAGS_REG))]
11476 "ix86_unary_operator_ok (NEG, <DWI>mode, operands)"
11477 "#"
11478 "&& reload_completed"
11479 [(parallel
11480 [(set (reg:CCC FLAGS_REG)
11481 (unspec:CCC [(match_dup 1) (const_int 0)] UNSPEC_CC_NE))
11482 (set (match_dup 0) (neg:DWIH (match_dup 1)))])
11483 (parallel
11484 [(set (match_dup 2)
11485 (plus:DWIH (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
11486 (match_dup 3))
11487 (const_int 0)))
11488 (clobber (reg:CC FLAGS_REG))])
11489 (parallel
11490 [(set (match_dup 2)
11491 (neg:DWIH (match_dup 2)))
11492 (clobber (reg:CC FLAGS_REG))])]
11493 "split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[2]);")
11494
11495 ;; Convert:
11496 ;; mov %esi, %edx
11497 ;; negl %eax
11498 ;; adcl $0, %edx
11499 ;; negl %edx
11500 ;; to:
11501 ;; xorl %edx, %edx
11502 ;; negl %eax
11503 ;; sbbl %esi, %edx
11504
11505 (define_peephole2
11506 [(set (match_operand:SWI48 0 "general_reg_operand")
11507 (match_operand:SWI48 1 "nonimmediate_gr_operand"))
11508 (parallel
11509 [(set (reg:CCC FLAGS_REG)
11510 (unspec:CCC [(match_operand:SWI48 2 "general_reg_operand")
11511 (const_int 0)] UNSPEC_CC_NE))
11512 (set (match_dup 2) (neg:SWI48 (match_dup 2)))])
11513 (parallel
11514 [(set (match_dup 0)
11515 (plus:SWI48 (plus:SWI48
11516 (ltu:SWI48 (reg:CC FLAGS_REG) (const_int 0))
11517 (match_dup 0))
11518 (const_int 0)))
11519 (clobber (reg:CC FLAGS_REG))])
11520 (parallel
11521 [(set (match_dup 0)
11522 (neg:SWI48 (match_dup 0)))
11523 (clobber (reg:CC FLAGS_REG))])]
11524 "REGNO (operands[0]) != REGNO (operands[2])
11525 && !reg_mentioned_p (operands[0], operands[1])
11526 && !reg_mentioned_p (operands[2], operands[1])"
11527 [(parallel
11528 [(set (reg:CCC FLAGS_REG)
11529 (unspec:CCC [(match_dup 2) (const_int 0)] UNSPEC_CC_NE))
11530 (set (match_dup 2) (neg:SWI48 (match_dup 2)))])
11531 (parallel
11532 [(set (match_dup 0)
11533 (minus:SWI48 (minus:SWI48
11534 (match_dup 0)
11535 (ltu:SWI48 (reg:CC FLAGS_REG) (const_int 0)))
11536 (match_dup 1)))
11537 (clobber (reg:CC FLAGS_REG))])]
11538 "ix86_expand_clear (operands[0]);")
11539
11540 ;; Convert:
11541 ;; xorl %edx, %edx
11542 ;; negl %eax
11543 ;; adcl $0, %edx
11544 ;; negl %edx
11545 ;; to:
11546 ;; negl %eax
11547 ;; sbbl %edx, %edx // *x86_mov<mode>cc_0_m1
11548
11549 (define_peephole2
11550 [(parallel
11551 [(set (match_operand:SWI48 0 "general_reg_operand") (const_int 0))
11552 (clobber (reg:CC FLAGS_REG))])
11553 (parallel
11554 [(set (reg:CCC FLAGS_REG)
11555 (unspec:CCC [(match_operand:SWI48 1 "general_reg_operand")
11556 (const_int 0)] UNSPEC_CC_NE))
11557 (set (match_dup 1) (neg:SWI48 (match_dup 1)))])
11558 (parallel
11559 [(set (match_dup 0)
11560 (plus:SWI48 (plus:SWI48
11561 (ltu:SWI48 (reg:CC FLAGS_REG) (const_int 0))
11562 (match_dup 0))
11563 (const_int 0)))
11564 (clobber (reg:CC FLAGS_REG))])
11565 (parallel
11566 [(set (match_dup 0)
11567 (neg:SWI48 (match_dup 0)))
11568 (clobber (reg:CC FLAGS_REG))])]
11569 "REGNO (operands[0]) != REGNO (operands[1])"
11570 [(parallel
11571 [(set (reg:CCC FLAGS_REG)
11572 (unspec:CCC [(match_dup 1) (const_int 0)] UNSPEC_CC_NE))
11573 (set (match_dup 1) (neg:SWI48 (match_dup 1)))])
11574 (parallel
11575 [(set (match_dup 0)
11576 (if_then_else:SWI48 (ltu:SWI48 (reg:CC FLAGS_REG) (const_int 0))
11577 (const_int -1)
11578 (const_int 0)))
11579 (clobber (reg:CC FLAGS_REG))])])
11580
11581 (define_insn "*neg<mode>_1"
11582 [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
11583 (neg:SWI (match_operand:SWI 1 "nonimmediate_operand" "0")))
11584 (clobber (reg:CC FLAGS_REG))]
11585 "ix86_unary_operator_ok (NEG, <MODE>mode, operands)"
11586 "neg{<imodesuffix>}\t%0"
11587 [(set_attr "type" "negnot")
11588 (set_attr "mode" "<MODE>")])
11589
11590 (define_insn "*negsi_1_zext"
11591 [(set (match_operand:DI 0 "register_operand" "=r")
11592 (zero_extend:DI
11593 (neg:SI (match_operand:SI 1 "register_operand" "0"))))
11594 (clobber (reg:CC FLAGS_REG))]
11595 "TARGET_64BIT && ix86_unary_operator_ok (NEG, SImode, operands)"
11596 "neg{l}\t%k0"
11597 [(set_attr "type" "negnot")
11598 (set_attr "mode" "SI")])
11599
11600 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
11601 (define_insn_and_split "*neg<mode>_1_slp"
11602 [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>,&<r>"))
11603 (neg:SWI12 (match_operand:SWI12 1 "register_operand" "0,!<r>")))
11604 (clobber (reg:CC FLAGS_REG))]
11605 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
11606 "@
11607 neg{<imodesuffix>}\t%0
11608 #"
11609 "&& reload_completed"
11610 [(set (strict_low_part (match_dup 0)) (match_dup 1))
11611 (parallel
11612 [(set (strict_low_part (match_dup 0))
11613 (neg:SWI12 (match_dup 0)))
11614 (clobber (reg:CC FLAGS_REG))])]
11615 ""
11616 [(set_attr "type" "negnot")
11617 (set_attr "mode" "<MODE>")])
11618
11619 (define_insn "*neg<mode>_2"
11620 [(set (reg FLAGS_REG)
11621 (compare
11622 (neg:SWI (match_operand:SWI 1 "nonimmediate_operand" "0"))
11623 (const_int 0)))
11624 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
11625 (neg:SWI (match_dup 1)))]
11626 "ix86_match_ccmode (insn, CCGOCmode)
11627 && ix86_unary_operator_ok (NEG, <MODE>mode, operands)"
11628 "neg{<imodesuffix>}\t%0"
11629 [(set_attr "type" "negnot")
11630 (set_attr "mode" "<MODE>")])
11631
11632 (define_insn "*negsi_2_zext"
11633 [(set (reg FLAGS_REG)
11634 (compare
11635 (neg:SI (match_operand:SI 1 "register_operand" "0"))
11636 (const_int 0)))
11637 (set (match_operand:DI 0 "register_operand" "=r")
11638 (zero_extend:DI
11639 (neg:SI (match_dup 1))))]
11640 "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
11641 && ix86_unary_operator_ok (NEG, SImode, operands)"
11642 "neg{l}\t%k0"
11643 [(set_attr "type" "negnot")
11644 (set_attr "mode" "SI")])
11645
11646 (define_insn "*neg<mode>_ccc_1"
11647 [(set (reg:CCC FLAGS_REG)
11648 (unspec:CCC
11649 [(match_operand:SWI 1 "nonimmediate_operand" "0")
11650 (const_int 0)] UNSPEC_CC_NE))
11651 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
11652 (neg:SWI (match_dup 1)))]
11653 ""
11654 "neg{<imodesuffix>}\t%0"
11655 [(set_attr "type" "negnot")
11656 (set_attr "mode" "<MODE>")])
11657
11658 (define_insn "*neg<mode>_ccc_2"
11659 [(set (reg:CCC FLAGS_REG)
11660 (unspec:CCC
11661 [(match_operand:SWI 1 "nonimmediate_operand" "0")
11662 (const_int 0)] UNSPEC_CC_NE))
11663 (clobber (match_scratch:SWI 0 "=<r>"))]
11664 ""
11665 "neg{<imodesuffix>}\t%0"
11666 [(set_attr "type" "negnot")
11667 (set_attr "mode" "<MODE>")])
11668
11669 (define_expand "x86_neg<mode>_ccc"
11670 [(parallel
11671 [(set (reg:CCC FLAGS_REG)
11672 (unspec:CCC [(match_operand:SWI48 1 "register_operand")
11673 (const_int 0)] UNSPEC_CC_NE))
11674 (set (match_operand:SWI48 0 "register_operand")
11675 (neg:SWI48 (match_dup 1)))])])
11676
11677 (define_insn "*negqi_ext<mode>_2"
11678 [(set (zero_extract:SWI248
11679 (match_operand:SWI248 0 "register_operand" "+Q")
11680 (const_int 8)
11681 (const_int 8))
11682 (subreg:SWI248
11683 (neg:QI
11684 (subreg:QI
11685 (zero_extract:SWI248
11686 (match_operand:SWI248 1 "register_operand" "0")
11687 (const_int 8)
11688 (const_int 8)) 0)) 0))
11689 (clobber (reg:CC FLAGS_REG))]
11690 "/* FIXME: without this LRA can't reload this pattern, see PR82524. */
11691 rtx_equal_p (operands[0], operands[1])"
11692 "neg{b}\t%h0"
11693 [(set_attr "type" "negnot")
11694 (set_attr "mode" "QI")])
11695
11696 ;; Negate with jump on overflow.
11697 (define_expand "negv<mode>3"
11698 [(parallel [(set (reg:CCO FLAGS_REG)
11699 (unspec:CCO
11700 [(match_operand:SWI 1 "register_operand")
11701 (match_dup 3)] UNSPEC_CC_NE))
11702 (set (match_operand:SWI 0 "register_operand")
11703 (neg:SWI (match_dup 1)))])
11704 (set (pc) (if_then_else
11705 (eq (reg:CCO FLAGS_REG) (const_int 0))
11706 (label_ref (match_operand 2))
11707 (pc)))]
11708 ""
11709 {
11710 operands[3]
11711 = gen_int_mode (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (<MODE>mode) - 1),
11712 <MODE>mode);
11713 })
11714
11715 (define_insn "*negv<mode>3"
11716 [(set (reg:CCO FLAGS_REG)
11717 (unspec:CCO [(match_operand:SWI 1 "nonimmediate_operand" "0")
11718 (match_operand:SWI 2 "const_int_operand")]
11719 UNSPEC_CC_NE))
11720 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
11721 (neg:SWI (match_dup 1)))]
11722 "ix86_unary_operator_ok (NEG, <MODE>mode, operands)
11723 && mode_signbit_p (<MODE>mode, operands[2])"
11724 "neg{<imodesuffix>}\t%0"
11725 [(set_attr "type" "negnot")
11726 (set_attr "mode" "<MODE>")])
11727
11728 ;; Optimize *negsi_1 followed by *cmpsi_ccno_1 (PR target/91384)
11729 (define_peephole2
11730 [(set (match_operand:SWI 0 "general_reg_operand")
11731 (match_operand:SWI 1 "general_reg_operand"))
11732 (parallel [(set (match_dup 0) (neg:SWI (match_dup 0)))
11733 (clobber (reg:CC FLAGS_REG))])
11734 (set (reg:CCZ FLAGS_REG) (compare:CCZ (match_dup 1) (const_int 0)))]
11735 ""
11736 [(set (match_dup 0) (match_dup 1))
11737 (parallel [(set (reg:CCZ FLAGS_REG)
11738 (compare:CCZ (neg:SWI (match_dup 0)) (const_int 0)))
11739 (set (match_dup 0) (neg:SWI (match_dup 0)))])])
11740
11741 ;; Special expand pattern to handle integer mode abs
11742
11743 (define_expand "abs<mode>2"
11744 [(parallel
11745 [(set (match_operand:SDWIM 0 "register_operand")
11746 (abs:SDWIM
11747 (match_operand:SDWIM 1 "general_operand")))
11748 (clobber (reg:CC FLAGS_REG))])]
11749 "TARGET_CMOVE
11750 && (<MODE>mode != QImode || !TARGET_PARTIAL_REG_STALL)"
11751 {
11752 if (TARGET_EXPAND_ABS)
11753 {
11754 machine_mode mode = <MODE>mode;
11755 operands[1] = force_reg (mode, operands[1]);
11756
11757 /* Generate rtx abs using:
11758 abs (x) = (((signed) x >> (W-1)) ^ x) - ((signed) x >> (W-1)) */
11759
11760 rtx shift_amount = gen_int_mode (GET_MODE_PRECISION (mode) - 1, QImode);
11761 rtx shift_dst = expand_simple_binop (mode, ASHIFTRT, operands[1],
11762 shift_amount, NULL_RTX,
11763 0, OPTAB_DIRECT);
11764 rtx xor_dst = expand_simple_binop (mode, XOR, shift_dst, operands[1],
11765 operands[0], 0, OPTAB_DIRECT);
11766 rtx minus_dst = expand_simple_binop (mode, MINUS, xor_dst, shift_dst,
11767 operands[0], 0, OPTAB_DIRECT);
11768 if (!rtx_equal_p (minus_dst, operands[0]))
11769 emit_move_insn (operands[0], minus_dst);
11770 DONE;
11771 }
11772 })
11773
11774 (define_insn_and_split "*abs<dwi>2_doubleword"
11775 [(set (match_operand:<DWI> 0 "register_operand")
11776 (abs:<DWI>
11777 (match_operand:<DWI> 1 "general_operand")))
11778 (clobber (reg:CC FLAGS_REG))]
11779 "TARGET_CMOVE
11780 && ix86_pre_reload_split ()"
11781 "#"
11782 "&& 1"
11783 [(parallel
11784 [(set (reg:CCC FLAGS_REG)
11785 (unspec:CCC [(match_dup 1) (const_int 0)] UNSPEC_CC_NE))
11786 (set (match_dup 2) (neg:DWIH (match_dup 1)))])
11787 (parallel
11788 [(set (match_dup 5)
11789 (plus:DWIH (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
11790 (match_dup 4))
11791 (const_int 0)))
11792 (clobber (reg:CC FLAGS_REG))])
11793 (parallel
11794 [(set (reg:CCGOC FLAGS_REG)
11795 (compare:CCGOC
11796 (neg:DWIH (match_dup 5))
11797 (const_int 0)))
11798 (set (match_dup 5)
11799 (neg:DWIH (match_dup 5)))])
11800 (set (match_dup 0)
11801 (if_then_else:DWIH
11802 (ge (reg:CCGOC FLAGS_REG) (const_int 0))
11803 (match_dup 2)
11804 (match_dup 1)))
11805 (set (match_dup 3)
11806 (if_then_else:DWIH
11807 (ge (reg:CCGOC FLAGS_REG) (const_int 0))
11808 (match_dup 5)
11809 (match_dup 4)))]
11810 {
11811 operands[1] = force_reg (<DWI>mode, operands[1]);
11812 operands[2] = gen_reg_rtx (<DWI>mode);
11813
11814 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
11815 })
11816
11817 (define_insn_and_split "*nabs<dwi>2_doubleword"
11818 [(set (match_operand:<DWI> 0 "register_operand")
11819 (neg:<DWI>
11820 (abs:<DWI>
11821 (match_operand:<DWI> 1 "general_operand"))))
11822 (clobber (reg:CC FLAGS_REG))]
11823 "TARGET_CMOVE
11824 && ix86_pre_reload_split ()"
11825 "#"
11826 "&& 1"
11827 [(parallel
11828 [(set (reg:CCC FLAGS_REG)
11829 (unspec:CCC [(match_dup 1) (const_int 0)] UNSPEC_CC_NE))
11830 (set (match_dup 2) (neg:DWIH (match_dup 1)))])
11831 (parallel
11832 [(set (match_dup 5)
11833 (plus:DWIH (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
11834 (match_dup 4))
11835 (const_int 0)))
11836 (clobber (reg:CC FLAGS_REG))])
11837 (parallel
11838 [(set (reg:CCGOC FLAGS_REG)
11839 (compare:CCGOC
11840 (neg:DWIH (match_dup 5))
11841 (const_int 0)))
11842 (set (match_dup 5)
11843 (neg:DWIH (match_dup 5)))])
11844 (set (match_dup 0)
11845 (if_then_else:DWIH
11846 (lt (reg:CCGOC FLAGS_REG) (const_int 0))
11847 (match_dup 2)
11848 (match_dup 1)))
11849 (set (match_dup 3)
11850 (if_then_else:DWIH
11851 (lt (reg:CCGOC FLAGS_REG) (const_int 0))
11852 (match_dup 5)
11853 (match_dup 4)))]
11854 {
11855 operands[1] = force_reg (<DWI>mode, operands[1]);
11856 operands[2] = gen_reg_rtx (<DWI>mode);
11857
11858 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
11859 })
11860
11861 (define_insn_and_split "*abs<mode>2_1"
11862 [(set (match_operand:SWI 0 "register_operand")
11863 (abs:SWI
11864 (match_operand:SWI 1 "general_operand")))
11865 (clobber (reg:CC FLAGS_REG))]
11866 "TARGET_CMOVE
11867 && (<MODE>mode != QImode || !TARGET_PARTIAL_REG_STALL)
11868 && ix86_pre_reload_split ()"
11869 "#"
11870 "&& 1"
11871 [(parallel
11872 [(set (reg:CCGOC FLAGS_REG)
11873 (compare:CCGOC
11874 (neg:SWI (match_dup 1))
11875 (const_int 0)))
11876 (set (match_dup 2)
11877 (neg:SWI (match_dup 1)))])
11878 (set (match_dup 0)
11879 (if_then_else:SWI
11880 (ge (reg:CCGOC FLAGS_REG) (const_int 0))
11881 (match_dup 2)
11882 (match_dup 1)))]
11883 {
11884 operands[1] = force_reg (<MODE>mode, operands[1]);
11885 operands[2] = gen_reg_rtx (<MODE>mode);
11886 })
11887
11888 (define_insn_and_split "*nabs<mode>2_1"
11889 [(set (match_operand:SWI 0 "register_operand")
11890 (neg:SWI
11891 (abs:SWI
11892 (match_operand:SWI 1 "general_operand"))))
11893 (clobber (reg:CC FLAGS_REG))]
11894 "TARGET_CMOVE
11895 && (<MODE>mode != QImode || !TARGET_PARTIAL_REG_STALL)
11896 && ix86_pre_reload_split ()"
11897 "#"
11898 "&& 1"
11899 [(parallel
11900 [(set (reg:CCGOC FLAGS_REG)
11901 (compare:CCGOC
11902 (neg:SWI (match_dup 1))
11903 (const_int 0)))
11904 (set (match_dup 2)
11905 (neg:SWI (match_dup 1)))])
11906 (set (match_dup 0)
11907 (if_then_else:SWI
11908 (lt (reg:CCGOC FLAGS_REG) (const_int 0))
11909 (match_dup 2)
11910 (match_dup 1)))]
11911 {
11912 operands[1] = force_reg (<MODE>mode, operands[1]);
11913 operands[2] = gen_reg_rtx (<MODE>mode);
11914 })
11915
11916 (define_expand "<code>tf2"
11917 [(set (match_operand:TF 0 "register_operand")
11918 (absneg:TF (match_operand:TF 1 "register_operand")))]
11919 "TARGET_SSE"
11920 "ix86_expand_fp_absneg_operator (<CODE>, TFmode, operands); DONE;")
11921
11922 (define_insn_and_split "*<code>tf2_1"
11923 [(set (match_operand:TF 0 "register_operand" "=x,x,Yv,Yv")
11924 (absneg:TF
11925 (match_operand:TF 1 "vector_operand" "0,xBm,Yv,m")))
11926 (use (match_operand:TF 2 "vector_operand" "xBm,0,Yvm,Yv"))]
11927 "TARGET_SSE"
11928 "#"
11929 "&& reload_completed"
11930 [(set (match_dup 0)
11931 (<absneg_op>:TF (match_dup 1) (match_dup 2)))]
11932 {
11933 if (TARGET_AVX)
11934 {
11935 if (MEM_P (operands[1]))
11936 std::swap (operands[1], operands[2]);
11937 }
11938 else
11939 {
11940 if (operands_match_p (operands[0], operands[2]))
11941 std::swap (operands[1], operands[2]);
11942 }
11943 }
11944 [(set_attr "isa" "noavx,noavx,avx,avx")])
11945
11946 (define_insn_and_split "*nabstf2_1"
11947 [(set (match_operand:TF 0 "register_operand" "=x,x,Yv,Yv")
11948 (neg:TF
11949 (abs:TF
11950 (match_operand:TF 1 "vector_operand" "0,xBm,Yv,m"))))
11951 (use (match_operand:TF 2 "vector_operand" "xBm,0,Yvm,Yv"))]
11952 "TARGET_SSE"
11953 "#"
11954 "&& reload_completed"
11955 [(set (match_dup 0)
11956 (ior:TF (match_dup 1) (match_dup 2)))]
11957 {
11958 if (TARGET_AVX)
11959 {
11960 if (MEM_P (operands[1]))
11961 std::swap (operands[1], operands[2]);
11962 }
11963 else
11964 {
11965 if (operands_match_p (operands[0], operands[2]))
11966 std::swap (operands[1], operands[2]);
11967 }
11968 }
11969 [(set_attr "isa" "noavx,noavx,avx,avx")])
11970
11971 (define_expand "<code>hf2"
11972 [(set (match_operand:HF 0 "register_operand")
11973 (absneg:HF (match_operand:HF 1 "register_operand")))]
11974 "TARGET_AVX512FP16"
11975 "ix86_expand_fp_absneg_operator (<CODE>, HFmode, operands); DONE;")
11976
11977 (define_expand "<code><mode>2"
11978 [(set (match_operand:X87MODEF 0 "register_operand")
11979 (absneg:X87MODEF (match_operand:X87MODEF 1 "register_operand")))]
11980 "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
11981 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
11982
11983 ;; Changing of sign for FP values is doable using integer unit too.
11984 (define_insn "*<code><mode>2_i387_1"
11985 [(set (match_operand:X87MODEF 0 "register_operand" "=f,!r")
11986 (absneg:X87MODEF
11987 (match_operand:X87MODEF 1 "register_operand" "0,0")))
11988 (clobber (reg:CC FLAGS_REG))]
11989 "TARGET_80387 && !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
11990 "#")
11991
11992 (define_split
11993 [(set (match_operand:X87MODEF 0 "fp_register_operand")
11994 (absneg:X87MODEF (match_operand:X87MODEF 1 "fp_register_operand")))
11995 (clobber (reg:CC FLAGS_REG))]
11996 "TARGET_80387 && reload_completed"
11997 [(set (match_dup 0) (absneg:X87MODEF (match_dup 1)))])
11998
11999 (define_split
12000 [(set (match_operand:X87MODEF 0 "general_reg_operand")
12001 (absneg:X87MODEF (match_operand:X87MODEF 1 "general_reg_operand")))
12002 (clobber (reg:CC FLAGS_REG))]
12003 "TARGET_80387 && reload_completed"
12004 [(const_int 0)]
12005 "ix86_split_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
12006
12007 (define_insn_and_split "*<code>hf2_1"
12008 [(set (match_operand:HF 0 "register_operand" "=Yv")
12009 (absneg:HF
12010 (match_operand:HF 1 "register_operand" "Yv")))
12011 (use (match_operand:V8HF 2 "vector_operand" "Yvm"))
12012 (clobber (reg:CC FLAGS_REG))]
12013 "TARGET_AVX512FP16"
12014 "#"
12015 "&& reload_completed"
12016 [(set (match_dup 0)
12017 (<absneg_op>:V8HF (match_dup 1) (match_dup 2)))]
12018 {
12019 operands[0] = lowpart_subreg (V8HFmode, operands[0], HFmode);
12020 operands[1] = lowpart_subreg (V8HFmode, operands[1], HFmode);
12021 })
12022
12023 (define_insn "*<code><mode>2_1"
12024 [(set (match_operand:MODEF 0 "register_operand" "=x,x,Yv,f,!r")
12025 (absneg:MODEF
12026 (match_operand:MODEF 1 "register_operand" "0,x,Yv,0,0")))
12027 (use (match_operand:<ssevecmode> 2 "vector_operand" "xBm,0,Yvm,X,X"))
12028 (clobber (reg:CC FLAGS_REG))]
12029 "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
12030 "#"
12031 [(set_attr "isa" "noavx,noavx,avx,*,*")
12032 (set (attr "enabled")
12033 (if_then_else
12034 (match_test ("SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"))
12035 (if_then_else
12036 (eq_attr "alternative" "3,4")
12037 (symbol_ref "TARGET_MIX_SSE_I387")
12038 (const_string "*"))
12039 (if_then_else
12040 (eq_attr "alternative" "3,4")
12041 (symbol_ref "true")
12042 (symbol_ref "false"))))])
12043
12044 (define_split
12045 [(set (match_operand:MODEF 0 "sse_reg_operand")
12046 (absneg:MODEF
12047 (match_operand:MODEF 1 "sse_reg_operand")))
12048 (use (match_operand:<ssevecmodef> 2 "vector_operand"))
12049 (clobber (reg:CC FLAGS_REG))]
12050 "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
12051 && reload_completed"
12052 [(set (match_dup 0)
12053 (<absneg_op>:<ssevecmodef> (match_dup 1) (match_dup 2)))]
12054 {
12055 machine_mode mode = <MODE>mode;
12056 machine_mode vmode = <ssevecmodef>mode;
12057
12058 operands[0] = lowpart_subreg (vmode, operands[0], mode);
12059 operands[1] = lowpart_subreg (vmode, operands[1], mode);
12060
12061 if (!TARGET_AVX && operands_match_p (operands[0], operands[2]))
12062 std::swap (operands[1], operands[2]);
12063 })
12064
12065 (define_split
12066 [(set (match_operand:MODEF 0 "fp_register_operand")
12067 (absneg:MODEF (match_operand:MODEF 1 "fp_register_operand")))
12068 (use (match_operand 2))
12069 (clobber (reg:CC FLAGS_REG))]
12070 "TARGET_80387 && reload_completed"
12071 [(set (match_dup 0) (absneg:MODEF (match_dup 1)))])
12072
12073 (define_split
12074 [(set (match_operand:MODEF 0 "general_reg_operand")
12075 (absneg:MODEF (match_operand:MODEF 1 "general_reg_operand")))
12076 (use (match_operand 2))
12077 (clobber (reg:CC FLAGS_REG))]
12078 "TARGET_80387 && reload_completed"
12079 [(const_int 0)]
12080 "ix86_split_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
12081
12082 (define_insn_and_split "*nabs<mode>2_1"
12083 [(set (match_operand:MODEF 0 "register_operand" "=x,x,Yv")
12084 (neg:MODEF
12085 (abs:MODEF
12086 (match_operand:MODEF 1 "register_operand" "0,x,Yv"))))
12087 (use (match_operand:<ssevecmode> 2 "vector_operand" "xBm,0,Yvm"))]
12088 "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
12089 "#"
12090 "&& reload_completed"
12091 [(set (match_dup 0)
12092 (ior:<ssevecmodef> (match_dup 1) (match_dup 2)))]
12093 {
12094 machine_mode mode = <MODE>mode;
12095 machine_mode vmode = <ssevecmodef>mode;
12096
12097 operands[0] = lowpart_subreg (vmode, operands[0], mode);
12098 operands[1] = lowpart_subreg (vmode, operands[1], mode);
12099
12100 if (!TARGET_AVX && operands_match_p (operands[0], operands[2]))
12101 std::swap (operands[1], operands[2]);
12102 }
12103 [(set_attr "isa" "noavx,noavx,avx")])
12104
12105 ;; Conditionalize these after reload. If they match before reload, we
12106 ;; lose the clobber and ability to use integer instructions.
12107
12108 (define_insn "*<code><mode>2_i387"
12109 [(set (match_operand:X87MODEF 0 "register_operand" "=f")
12110 (absneg:X87MODEF (match_operand:X87MODEF 1 "register_operand" "0")))]
12111 "TARGET_80387 && reload_completed"
12112 "<absneg_mnemonic>"
12113 [(set_attr "type" "fsgn")
12114 (set_attr "mode" "<MODE>")])
12115
12116 ;; Copysign instructions
12117
12118 (define_expand "copysign<mode>3"
12119 [(match_operand:SSEMODEF 0 "register_operand")
12120 (match_operand:SSEMODEF 1 "nonmemory_operand")
12121 (match_operand:SSEMODEF 2 "register_operand")]
12122 "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
12123 || (TARGET_SSE && (<MODE>mode == TFmode))
12124 || (TARGET_AVX512FP16 && (<MODE>mode ==HFmode))"
12125 "ix86_expand_copysign (operands); DONE;")
12126
12127 (define_expand "xorsign<mode>3"
12128 [(match_operand:MODEFH 0 "register_operand")
12129 (match_operand:MODEFH 1 "register_operand")
12130 (match_operand:MODEFH 2 "register_operand")]
12131 "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
12132 || <MODE>mode == HFmode"
12133 {
12134 if (rtx_equal_p (operands[1], operands[2]))
12135 emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
12136 else
12137 ix86_expand_xorsign (operands);
12138 DONE;
12139 })
12140 \f
12141 ;; One complement instructions
12142
12143 (define_expand "one_cmpl<mode>2"
12144 [(set (match_operand:SDWIM 0 "nonimmediate_operand")
12145 (not:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand")))]
12146 ""
12147 "ix86_expand_unary_operator (NOT, <MODE>mode, operands); DONE;")
12148
12149 (define_insn_and_split "*one_cmpl<dwi>2_doubleword"
12150 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro")
12151 (not:<DWI> (match_operand:<DWI> 1 "nonimmediate_operand" "0")))]
12152 "ix86_unary_operator_ok (NOT, <DWI>mode, operands)"
12153 "#"
12154 "&& reload_completed"
12155 [(set (match_dup 0)
12156 (not:DWIH (match_dup 1)))
12157 (set (match_dup 2)
12158 (not:DWIH (match_dup 3)))]
12159 "split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[2]);")
12160
12161 (define_insn "*one_cmpl<mode>2_1"
12162 [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,?k")
12163 (not:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "0,k")))]
12164 "ix86_unary_operator_ok (NOT, <MODE>mode, operands)"
12165 "@
12166 not{<imodesuffix>}\t%0
12167 #"
12168 [(set (attr "isa")
12169 (cond [(eq_attr "alternative" "1")
12170 (if_then_else (eq_attr "mode" "SI,DI")
12171 (const_string "avx512bw")
12172 (const_string "avx512f"))
12173 ]
12174 (const_string "*")))
12175 (set_attr "type" "negnot,msklog")
12176 (set_attr "mode" "<MODE>")])
12177
12178 (define_insn "*one_cmplsi2_1_zext"
12179 [(set (match_operand:DI 0 "register_operand" "=r,?k")
12180 (zero_extend:DI
12181 (not:SI (match_operand:SI 1 "register_operand" "0,k"))))]
12182 "TARGET_64BIT && ix86_unary_operator_ok (NOT, SImode, operands)"
12183 "@
12184 not{l}\t%k0
12185 #"
12186 [(set_attr "isa" "x64,avx512bw")
12187 (set_attr "type" "negnot,msklog")
12188 (set_attr "mode" "SI,SI")])
12189
12190 (define_insn "*one_cmplqi2_1"
12191 [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,?k")
12192 (not:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,k")))]
12193 "ix86_unary_operator_ok (NOT, QImode, operands)"
12194 "@
12195 not{b}\t%0
12196 not{l}\t%k0
12197 #"
12198 [(set_attr "isa" "*,*,avx512f")
12199 (set_attr "type" "negnot,negnot,msklog")
12200 (set (attr "mode")
12201 (cond [(eq_attr "alternative" "1")
12202 (const_string "SI")
12203 (and (eq_attr "alternative" "2")
12204 (match_test "!TARGET_AVX512DQ"))
12205 (const_string "HI")
12206 ]
12207 (const_string "QI")))
12208 ;; Potential partial reg stall on alternative 1.
12209 (set (attr "preferred_for_speed")
12210 (cond [(eq_attr "alternative" "1")
12211 (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
12212 (symbol_ref "true")))])
12213
12214 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
12215 (define_insn_and_split "*one_cmpl<mode>_1_slp"
12216 [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>,&<r>"))
12217 (not:SWI12 (match_operand:SWI12 1 "register_operand" "0,!<r>")))]
12218 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
12219 "@
12220 not{<imodesuffix>}\t%0
12221 #"
12222 "&& reload_completed"
12223 [(set (strict_low_part (match_dup 0)) (match_dup 1))
12224 (set (strict_low_part (match_dup 0))
12225 (not:SWI12 (match_dup 0)))]
12226 ""
12227 [(set_attr "type" "negnot")
12228 (set_attr "mode" "<MODE>")])
12229
12230 (define_insn "*one_cmpl<mode>2_2"
12231 [(set (reg FLAGS_REG)
12232 (compare (not:SWI (match_operand:SWI 1 "nonimmediate_operand" "0"))
12233 (const_int 0)))
12234 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
12235 (not:SWI (match_dup 1)))]
12236 "ix86_match_ccmode (insn, CCNOmode)
12237 && ix86_unary_operator_ok (NOT, <MODE>mode, operands)"
12238 "#"
12239 [(set_attr "type" "alu1")
12240 (set_attr "mode" "<MODE>")])
12241
12242 (define_split
12243 [(set (match_operand 0 "flags_reg_operand")
12244 (match_operator 2 "compare_operator"
12245 [(not:SWI (match_operand:SWI 3 "nonimmediate_operand"))
12246 (const_int 0)]))
12247 (set (match_operand:SWI 1 "nonimmediate_operand")
12248 (not:SWI (match_dup 3)))]
12249 "ix86_match_ccmode (insn, CCNOmode)"
12250 [(parallel [(set (match_dup 0)
12251 (match_op_dup 2 [(xor:SWI (match_dup 3) (const_int -1))
12252 (const_int 0)]))
12253 (set (match_dup 1)
12254 (xor:SWI (match_dup 3) (const_int -1)))])])
12255
12256 (define_insn "*one_cmplsi2_2_zext"
12257 [(set (reg FLAGS_REG)
12258 (compare (not:SI (match_operand:SI 1 "register_operand" "0"))
12259 (const_int 0)))
12260 (set (match_operand:DI 0 "register_operand" "=r")
12261 (zero_extend:DI (not:SI (match_dup 1))))]
12262 "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
12263 && ix86_unary_operator_ok (NOT, SImode, operands)"
12264 "#"
12265 [(set_attr "type" "alu1")
12266 (set_attr "mode" "SI")])
12267
12268 (define_split
12269 [(set (match_operand 0 "flags_reg_operand")
12270 (match_operator 2 "compare_operator"
12271 [(not:SI (match_operand:SI 3 "register_operand"))
12272 (const_int 0)]))
12273 (set (match_operand:DI 1 "register_operand")
12274 (zero_extend:DI (not:SI (match_dup 3))))]
12275 "ix86_match_ccmode (insn, CCNOmode)"
12276 [(parallel [(set (match_dup 0)
12277 (match_op_dup 2 [(xor:SI (match_dup 3) (const_int -1))
12278 (const_int 0)]))
12279 (set (match_dup 1)
12280 (zero_extend:DI (xor:SI (match_dup 3) (const_int -1))))])])
12281 \f
12282 ;; Shift instructions
12283
12284 ;; DImode shifts are implemented using the i386 "shift double" opcode,
12285 ;; which is written as "sh[lr]d[lw] imm,reg,reg/mem". If the shift count
12286 ;; is variable, then the count is in %cl and the "imm" operand is dropped
12287 ;; from the assembler input.
12288 ;;
12289 ;; This instruction shifts the target reg/mem as usual, but instead of
12290 ;; shifting in zeros, bits are shifted in from reg operand. If the insn
12291 ;; is a left shift double, bits are taken from the high order bits of
12292 ;; reg, else if the insn is a shift right double, bits are taken from the
12293 ;; low order bits of reg. So if %eax is "1234" and %edx is "5678",
12294 ;; "shldl $8,%edx,%eax" leaves %edx unchanged and sets %eax to "2345".
12295 ;;
12296 ;; Since sh[lr]d does not change the `reg' operand, that is done
12297 ;; separately, making all shifts emit pairs of shift double and normal
12298 ;; shift. Since sh[lr]d does not shift more than 31 bits, and we wish to
12299 ;; support a 63 bit shift, each shift where the count is in a reg expands
12300 ;; to a pair of shifts, a branch, a shift by 32 and a label.
12301 ;;
12302 ;; If the shift count is a constant, we need never emit more than one
12303 ;; shift pair, instead using moves and sign extension for counts greater
12304 ;; than 31.
12305
12306 (define_expand "ashl<mode>3"
12307 [(set (match_operand:SDWIM 0 "<shift_operand>")
12308 (ashift:SDWIM (match_operand:SDWIM 1 "<ashl_input_operand>")
12309 (match_operand:QI 2 "nonmemory_operand")))]
12310 ""
12311 "ix86_expand_binary_operator (ASHIFT, <MODE>mode, operands); DONE;")
12312
12313 (define_insn_and_split "*ashl<dwi>3_doubleword_mask"
12314 [(set (match_operand:<DWI> 0 "register_operand")
12315 (ashift:<DWI>
12316 (match_operand:<DWI> 1 "register_operand")
12317 (subreg:QI
12318 (and
12319 (match_operand 2 "register_operand" "c")
12320 (match_operand 3 "const_int_operand")) 0)))
12321 (clobber (reg:CC FLAGS_REG))]
12322 "((INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT)) == 0
12323 || ((INTVAL (operands[3]) & (2 * <MODE_SIZE> * BITS_PER_UNIT - 1))
12324 == (2 * <MODE_SIZE> * BITS_PER_UNIT - 1)))
12325 && GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT
12326 && IN_RANGE (GET_MODE_SIZE (GET_MODE (operands[2])), 2,
12327 4 << (TARGET_64BIT ? 1 : 0))
12328 && ix86_pre_reload_split ()"
12329 "#"
12330 "&& 1"
12331 [(parallel
12332 [(set (match_dup 6)
12333 (ior:DWIH (ashift:DWIH (match_dup 6)
12334 (and:QI (match_dup 2) (match_dup 8)))
12335 (subreg:DWIH
12336 (lshiftrt:<DWI> (zero_extend:<DWI> (match_dup 5))
12337 (minus:QI (match_dup 9)
12338 (and:QI (match_dup 2) (match_dup 8)))) 0)))
12339 (clobber (reg:CC FLAGS_REG))])
12340 (parallel
12341 [(set (match_dup 4)
12342 (ashift:DWIH (match_dup 5) (match_dup 2)))
12343 (clobber (reg:CC FLAGS_REG))])]
12344 {
12345 if ((INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT)) != 0)
12346 {
12347 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
12348 operands[2] = gen_lowpart (QImode, operands[2]);
12349 emit_insn (gen_ashl<dwi>3_doubleword (operands[0], operands[1],
12350 operands[2]));
12351 DONE;
12352 }
12353
12354 split_double_mode (<DWI>mode, &operands[0], 2, &operands[4], &operands[6]);
12355
12356 operands[8] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT - 1);
12357 operands[9] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);
12358
12359 if ((INTVAL (operands[3]) & ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
12360 != ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
12361 {
12362 rtx xops[3];
12363 xops[0] = gen_reg_rtx (GET_MODE (operands[2]));
12364 xops[1] = operands[2];
12365 xops[2] = GEN_INT (INTVAL (operands[3])
12366 & ((<MODE_SIZE> * BITS_PER_UNIT) - 1));
12367 ix86_expand_binary_operator (AND, GET_MODE (operands[2]), xops);
12368 operands[2] = xops[0];
12369 }
12370
12371 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
12372 operands[2] = gen_lowpart (QImode, operands[2]);
12373
12374 if (!rtx_equal_p (operands[6], operands[7]))
12375 emit_move_insn (operands[6], operands[7]);
12376 })
12377
12378 (define_insn_and_split "*ashl<dwi>3_doubleword_mask_1"
12379 [(set (match_operand:<DWI> 0 "register_operand")
12380 (ashift:<DWI>
12381 (match_operand:<DWI> 1 "register_operand")
12382 (and:QI
12383 (match_operand:QI 2 "register_operand" "c")
12384 (match_operand:QI 3 "const_int_operand"))))
12385 (clobber (reg:CC FLAGS_REG))]
12386 "((INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT)) == 0
12387 || ((INTVAL (operands[3]) & (2 * <MODE_SIZE> * BITS_PER_UNIT - 1))
12388 == (2 * <MODE_SIZE> * BITS_PER_UNIT - 1)))
12389 && ix86_pre_reload_split ()"
12390 "#"
12391 "&& 1"
12392 [(parallel
12393 [(set (match_dup 6)
12394 (ior:DWIH (ashift:DWIH (match_dup 6)
12395 (and:QI (match_dup 2) (match_dup 8)))
12396 (subreg:DWIH
12397 (lshiftrt:<DWI> (zero_extend:<DWI> (match_dup 5))
12398 (minus:QI (match_dup 9)
12399 (and:QI (match_dup 2) (match_dup 8)))) 0)))
12400 (clobber (reg:CC FLAGS_REG))])
12401 (parallel
12402 [(set (match_dup 4)
12403 (ashift:DWIH (match_dup 5) (match_dup 2)))
12404 (clobber (reg:CC FLAGS_REG))])]
12405 {
12406 if ((INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT)) != 0)
12407 {
12408 emit_insn (gen_ashl<dwi>3_doubleword (operands[0], operands[1],
12409 operands[2]));
12410 DONE;
12411 }
12412
12413 split_double_mode (<DWI>mode, &operands[0], 2, &operands[4], &operands[6]);
12414
12415 operands[8] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT - 1);
12416 operands[9] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);
12417
12418 if ((INTVAL (operands[3]) & ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
12419 != ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
12420 {
12421 rtx tem = gen_reg_rtx (QImode);
12422 emit_insn (gen_andqi3 (tem, operands[2], operands[3]));
12423 operands[2] = tem;
12424 }
12425
12426 if (!rtx_equal_p (operands[6], operands[7]))
12427 emit_move_insn (operands[6], operands[7]);
12428 })
12429
12430 (define_insn "ashl<mode>3_doubleword"
12431 [(set (match_operand:DWI 0 "register_operand" "=&r")
12432 (ashift:DWI (match_operand:DWI 1 "reg_or_pm1_operand" "0n")
12433 (match_operand:QI 2 "nonmemory_operand" "<S>c")))
12434 (clobber (reg:CC FLAGS_REG))]
12435 ""
12436 "#"
12437 [(set_attr "type" "multi")])
12438
12439 (define_split
12440 [(set (match_operand:DWI 0 "register_operand")
12441 (ashift:DWI (match_operand:DWI 1 "nonmemory_operand")
12442 (match_operand:QI 2 "nonmemory_operand")))
12443 (clobber (reg:CC FLAGS_REG))]
12444 "epilogue_completed"
12445 [(const_int 0)]
12446 "ix86_split_ashl (operands, NULL_RTX, <MODE>mode); DONE;")
12447
12448 ;; By default we don't ask for a scratch register, because when DWImode
12449 ;; values are manipulated, registers are already at a premium. But if
12450 ;; we have one handy, we won't turn it away.
12451
12452 (define_peephole2
12453 [(match_scratch:DWIH 3 "r")
12454 (parallel [(set (match_operand:<DWI> 0 "register_operand")
12455 (ashift:<DWI>
12456 (match_operand:<DWI> 1 "nonmemory_operand")
12457 (match_operand:QI 2 "nonmemory_operand")))
12458 (clobber (reg:CC FLAGS_REG))])
12459 (match_dup 3)]
12460 "TARGET_CMOVE"
12461 [(const_int 0)]
12462 "ix86_split_ashl (operands, operands[3], <DWI>mode); DONE;")
12463
12464 (define_insn "x86_64_shld"
12465 [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
12466 (ior:DI (ashift:DI (match_dup 0)
12467 (and:QI (match_operand:QI 2 "nonmemory_operand" "Jc")
12468 (const_int 63)))
12469 (subreg:DI
12470 (lshiftrt:TI
12471 (zero_extend:TI
12472 (match_operand:DI 1 "register_operand" "r"))
12473 (minus:QI (const_int 64)
12474 (and:QI (match_dup 2) (const_int 63)))) 0)))
12475 (clobber (reg:CC FLAGS_REG))]
12476 "TARGET_64BIT"
12477 "shld{q}\t{%s2%1, %0|%0, %1, %2}"
12478 [(set_attr "type" "ishift")
12479 (set_attr "prefix_0f" "1")
12480 (set_attr "mode" "DI")
12481 (set_attr "athlon_decode" "vector")
12482 (set_attr "amdfam10_decode" "vector")
12483 (set_attr "bdver1_decode" "vector")])
12484
12485 (define_insn "x86_64_shld_1"
12486 [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
12487 (ior:DI (ashift:DI (match_dup 0)
12488 (match_operand:QI 2 "const_0_to_63_operand"))
12489 (subreg:DI
12490 (lshiftrt:TI
12491 (zero_extend:TI
12492 (match_operand:DI 1 "register_operand" "r"))
12493 (match_operand:QI 3 "const_0_to_255_operand")) 0)))
12494 (clobber (reg:CC FLAGS_REG))]
12495 "TARGET_64BIT
12496 && INTVAL (operands[3]) == 64 - INTVAL (operands[2])"
12497 "shld{q}\t{%2, %1, %0|%0, %1, %2}"
12498 [(set_attr "type" "ishift")
12499 (set_attr "prefix_0f" "1")
12500 (set_attr "mode" "DI")
12501 (set_attr "length_immediate" "1")
12502 (set_attr "athlon_decode" "vector")
12503 (set_attr "amdfam10_decode" "vector")
12504 (set_attr "bdver1_decode" "vector")])
12505
12506 (define_insn_and_split "*x86_64_shld_shrd_1_nozext"
12507 [(set (match_operand:DI 0 "nonimmediate_operand")
12508 (ior:DI (ashift:DI (match_operand:DI 4 "nonimmediate_operand")
12509 (match_operand:QI 2 "const_0_to_63_operand"))
12510 (lshiftrt:DI
12511 (match_operand:DI 1 "nonimmediate_operand")
12512 (match_operand:QI 3 "const_0_to_63_operand"))))
12513 (clobber (reg:CC FLAGS_REG))]
12514 "TARGET_64BIT
12515 && INTVAL (operands[3]) == 64 - INTVAL (operands[2])
12516 && ix86_pre_reload_split ()"
12517 "#"
12518 "&& 1"
12519 [(const_int 0)]
12520 {
12521 if (rtx_equal_p (operands[4], operands[0]))
12522 {
12523 operands[1] = force_reg (DImode, operands[1]);
12524 emit_insn (gen_x86_64_shld_1 (operands[0], operands[1], operands[2], operands[3]));
12525 }
12526 else if (rtx_equal_p (operands[1], operands[0]))
12527 {
12528 operands[4] = force_reg (DImode, operands[4]);
12529 emit_insn (gen_x86_64_shrd_1 (operands[0], operands[4], operands[3], operands[2]));
12530 }
12531 else
12532 {
12533 operands[1] = force_reg (DImode, operands[1]);
12534 rtx tmp = gen_reg_rtx (DImode);
12535 emit_move_insn (tmp, operands[4]);
12536 emit_insn (gen_x86_64_shld_1 (tmp, operands[1], operands[2], operands[3]));
12537 emit_move_insn (operands[0], tmp);
12538 }
12539 DONE;
12540 })
12541
12542 (define_insn_and_split "*x86_64_shld_2"
12543 [(set (match_operand:DI 0 "nonimmediate_operand")
12544 (ior:DI (ashift:DI (match_dup 0)
12545 (match_operand:QI 2 "nonmemory_operand"))
12546 (lshiftrt:DI (match_operand:DI 1 "register_operand")
12547 (minus:QI (const_int 64) (match_dup 2)))))
12548 (clobber (reg:CC FLAGS_REG))]
12549 "TARGET_64BIT && ix86_pre_reload_split ()"
12550 "#"
12551 "&& 1"
12552 [(parallel [(set (match_dup 0)
12553 (ior:DI (ashift:DI (match_dup 0)
12554 (and:QI (match_dup 2) (const_int 63)))
12555 (subreg:DI
12556 (lshiftrt:TI
12557 (zero_extend:TI (match_dup 1))
12558 (minus:QI (const_int 64)
12559 (and:QI (match_dup 2)
12560 (const_int 63)))) 0)))
12561 (clobber (reg:CC FLAGS_REG))])])
12562
12563 (define_insn "x86_shld"
12564 [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
12565 (ior:SI (ashift:SI (match_dup 0)
12566 (and:QI (match_operand:QI 2 "nonmemory_operand" "Ic")
12567 (const_int 31)))
12568 (subreg:SI
12569 (lshiftrt:DI
12570 (zero_extend:DI
12571 (match_operand:SI 1 "register_operand" "r"))
12572 (minus:QI (const_int 32)
12573 (and:QI (match_dup 2) (const_int 31)))) 0)))
12574 (clobber (reg:CC FLAGS_REG))]
12575 ""
12576 "shld{l}\t{%s2%1, %0|%0, %1, %2}"
12577 [(set_attr "type" "ishift")
12578 (set_attr "prefix_0f" "1")
12579 (set_attr "mode" "SI")
12580 (set_attr "pent_pair" "np")
12581 (set_attr "athlon_decode" "vector")
12582 (set_attr "amdfam10_decode" "vector")
12583 (set_attr "bdver1_decode" "vector")])
12584
12585 (define_insn "x86_shld_1"
12586 [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
12587 (ior:SI (ashift:SI (match_dup 0)
12588 (match_operand:QI 2 "const_0_to_31_operand"))
12589 (subreg:SI
12590 (lshiftrt:DI
12591 (zero_extend:DI
12592 (match_operand:SI 1 "register_operand" "r"))
12593 (match_operand:QI 3 "const_0_to_63_operand")) 0)))
12594 (clobber (reg:CC FLAGS_REG))]
12595 "INTVAL (operands[3]) == 32 - INTVAL (operands[2])"
12596 "shld{l}\t{%2, %1, %0|%0, %1, %2}"
12597 [(set_attr "type" "ishift")
12598 (set_attr "prefix_0f" "1")
12599 (set_attr "length_immediate" "1")
12600 (set_attr "mode" "SI")
12601 (set_attr "pent_pair" "np")
12602 (set_attr "athlon_decode" "vector")
12603 (set_attr "amdfam10_decode" "vector")
12604 (set_attr "bdver1_decode" "vector")])
12605
12606 (define_insn_and_split "*x86_shld_shrd_1_nozext"
12607 [(set (match_operand:SI 0 "nonimmediate_operand")
12608 (ior:SI (ashift:SI (match_operand:SI 4 "nonimmediate_operand")
12609 (match_operand:QI 2 "const_0_to_31_operand"))
12610 (lshiftrt:SI
12611 (match_operand:SI 1 "nonimmediate_operand")
12612 (match_operand:QI 3 "const_0_to_31_operand"))))
12613 (clobber (reg:CC FLAGS_REG))]
12614 "INTVAL (operands[3]) == 32 - INTVAL (operands[2])
12615 && ix86_pre_reload_split ()"
12616 "#"
12617 "&& 1"
12618 [(const_int 0)]
12619 {
12620 if (rtx_equal_p (operands[4], operands[0]))
12621 {
12622 operands[1] = force_reg (SImode, operands[1]);
12623 emit_insn (gen_x86_shld_1 (operands[0], operands[1], operands[2], operands[3]));
12624 }
12625 else if (rtx_equal_p (operands[1], operands[0]))
12626 {
12627 operands[4] = force_reg (SImode, operands[4]);
12628 emit_insn (gen_x86_shrd_1 (operands[0], operands[4], operands[3], operands[2]));
12629 }
12630 else
12631 {
12632 operands[1] = force_reg (SImode, operands[1]);
12633 rtx tmp = gen_reg_rtx (SImode);
12634 emit_move_insn (tmp, operands[4]);
12635 emit_insn (gen_x86_shld_1 (tmp, operands[1], operands[2], operands[3]));
12636 emit_move_insn (operands[0], tmp);
12637 }
12638 DONE;
12639 })
12640
12641 (define_insn_and_split "*x86_shld_2"
12642 [(set (match_operand:SI 0 "nonimmediate_operand")
12643 (ior:SI (ashift:SI (match_dup 0)
12644 (match_operand:QI 2 "nonmemory_operand"))
12645 (lshiftrt:SI (match_operand:SI 1 "register_operand")
12646 (minus:QI (const_int 32) (match_dup 2)))))
12647 (clobber (reg:CC FLAGS_REG))]
12648 "TARGET_64BIT && ix86_pre_reload_split ()"
12649 "#"
12650 "&& 1"
12651 [(parallel [(set (match_dup 0)
12652 (ior:SI (ashift:SI (match_dup 0)
12653 (and:QI (match_dup 2) (const_int 31)))
12654 (subreg:SI
12655 (lshiftrt:DI
12656 (zero_extend:DI (match_dup 1))
12657 (minus:QI (const_int 32)
12658 (and:QI (match_dup 2)
12659 (const_int 31)))) 0)))
12660 (clobber (reg:CC FLAGS_REG))])])
12661
12662 (define_expand "@x86_shift<mode>_adj_1"
12663 [(set (reg:CCZ FLAGS_REG)
12664 (compare:CCZ (and:QI (match_operand:QI 2 "register_operand")
12665 (match_dup 4))
12666 (const_int 0)))
12667 (set (match_operand:SWI48 0 "register_operand")
12668 (if_then_else:SWI48 (ne (reg:CCZ FLAGS_REG) (const_int 0))
12669 (match_operand:SWI48 1 "register_operand")
12670 (match_dup 0)))
12671 (set (match_dup 1)
12672 (if_then_else:SWI48 (ne (reg:CCZ FLAGS_REG) (const_int 0))
12673 (match_operand:SWI48 3 "register_operand")
12674 (match_dup 1)))]
12675 "TARGET_CMOVE"
12676 "operands[4] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));")
12677
12678 (define_expand "@x86_shift<mode>_adj_2"
12679 [(use (match_operand:SWI48 0 "register_operand"))
12680 (use (match_operand:SWI48 1 "register_operand"))
12681 (use (match_operand:QI 2 "register_operand"))]
12682 ""
12683 {
12684 rtx_code_label *label = gen_label_rtx ();
12685 rtx tmp;
12686
12687 emit_insn (gen_testqi_ccz_1 (operands[2],
12688 GEN_INT (GET_MODE_BITSIZE (<MODE>mode))));
12689
12690 tmp = gen_rtx_REG (CCZmode, FLAGS_REG);
12691 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
12692 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
12693 gen_rtx_LABEL_REF (VOIDmode, label),
12694 pc_rtx);
12695 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
12696 JUMP_LABEL (tmp) = label;
12697
12698 emit_move_insn (operands[0], operands[1]);
12699 ix86_expand_clear (operands[1]);
12700
12701 emit_label (label);
12702 LABEL_NUSES (label) = 1;
12703
12704 DONE;
12705 })
12706
12707 ;; Avoid useless masking of count operand.
12708 (define_insn_and_split "*ashl<mode>3_mask"
12709 [(set (match_operand:SWI48 0 "nonimmediate_operand")
12710 (ashift:SWI48
12711 (match_operand:SWI48 1 "nonimmediate_operand")
12712 (subreg:QI
12713 (and
12714 (match_operand 2 "register_operand" "c,r")
12715 (match_operand 3 "const_int_operand")) 0)))
12716 (clobber (reg:CC FLAGS_REG))]
12717 "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)
12718 && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
12719 == GET_MODE_BITSIZE (<MODE>mode)-1
12720 && GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT
12721 && IN_RANGE (GET_MODE_SIZE (GET_MODE (operands[2])), 2,
12722 4 << (TARGET_64BIT ? 1 : 0))
12723 && ix86_pre_reload_split ()"
12724 "#"
12725 "&& 1"
12726 [(parallel
12727 [(set (match_dup 0)
12728 (ashift:SWI48 (match_dup 1)
12729 (match_dup 2)))
12730 (clobber (reg:CC FLAGS_REG))])]
12731 {
12732 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
12733 operands[2] = gen_lowpart (QImode, operands[2]);
12734 }
12735 [(set_attr "isa" "*,bmi2")])
12736
12737 (define_insn_and_split "*ashl<mode>3_mask_1"
12738 [(set (match_operand:SWI48 0 "nonimmediate_operand")
12739 (ashift:SWI48
12740 (match_operand:SWI48 1 "nonimmediate_operand")
12741 (and:QI
12742 (match_operand:QI 2 "register_operand" "c,r")
12743 (match_operand:QI 3 "const_int_operand"))))
12744 (clobber (reg:CC FLAGS_REG))]
12745 "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)
12746 && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
12747 == GET_MODE_BITSIZE (<MODE>mode)-1
12748 && ix86_pre_reload_split ()"
12749 "#"
12750 "&& 1"
12751 [(parallel
12752 [(set (match_dup 0)
12753 (ashift:SWI48 (match_dup 1)
12754 (match_dup 2)))
12755 (clobber (reg:CC FLAGS_REG))])]
12756 ""
12757 [(set_attr "isa" "*,bmi2")])
12758
12759 (define_insn "*bmi2_ashl<mode>3_1"
12760 [(set (match_operand:SWI48 0 "register_operand" "=r")
12761 (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
12762 (match_operand:SWI48 2 "register_operand" "r")))]
12763 "TARGET_BMI2"
12764 "shlx\t{%2, %1, %0|%0, %1, %2}"
12765 [(set_attr "type" "ishiftx")
12766 (set_attr "mode" "<MODE>")])
12767
12768 (define_insn "*ashl<mode>3_1"
12769 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,?k")
12770 (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l,rm,k")
12771 (match_operand:QI 2 "nonmemory_operand" "c<S>,M,r,<KS>")))
12772 (clobber (reg:CC FLAGS_REG))]
12773 "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)"
12774 {
12775 switch (get_attr_type (insn))
12776 {
12777 case TYPE_LEA:
12778 case TYPE_ISHIFTX:
12779 case TYPE_MSKLOG:
12780 return "#";
12781
12782 case TYPE_ALU:
12783 gcc_assert (operands[2] == const1_rtx);
12784 gcc_assert (rtx_equal_p (operands[0], operands[1]));
12785 return "add{<imodesuffix>}\t%0, %0";
12786
12787 default:
12788 if (operands[2] == const1_rtx
12789 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
12790 return "sal{<imodesuffix>}\t%0";
12791 else
12792 return "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
12793 }
12794 }
12795 [(set_attr "isa" "*,*,bmi2,avx512bw")
12796 (set (attr "type")
12797 (cond [(eq_attr "alternative" "1")
12798 (const_string "lea")
12799 (eq_attr "alternative" "2")
12800 (const_string "ishiftx")
12801 (and (and (match_test "TARGET_DOUBLE_WITH_ADD")
12802 (match_operand 0 "register_operand"))
12803 (match_operand 2 "const1_operand"))
12804 (const_string "alu")
12805 (eq_attr "alternative" "3")
12806 (const_string "msklog")
12807 ]
12808 (const_string "ishift")))
12809 (set (attr "length_immediate")
12810 (if_then_else
12811 (ior (eq_attr "type" "alu")
12812 (and (eq_attr "type" "ishift")
12813 (and (match_operand 2 "const1_operand")
12814 (ior (match_test "TARGET_SHIFT1")
12815 (match_test "optimize_function_for_size_p (cfun)")))))
12816 (const_string "0")
12817 (const_string "*")))
12818 (set_attr "mode" "<MODE>")])
12819
12820 ;; Convert shift to the shiftx pattern to avoid flags dependency.
12821 (define_split
12822 [(set (match_operand:SWI48 0 "register_operand")
12823 (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
12824 (match_operand:QI 2 "register_operand")))
12825 (clobber (reg:CC FLAGS_REG))]
12826 "TARGET_BMI2 && reload_completed"
12827 [(set (match_dup 0)
12828 (ashift:SWI48 (match_dup 1) (match_dup 2)))]
12829 "operands[2] = gen_lowpart (<MODE>mode, operands[2]);")
12830
12831 (define_insn "*bmi2_ashlsi3_1_zext"
12832 [(set (match_operand:DI 0 "register_operand" "=r")
12833 (zero_extend:DI
12834 (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
12835 (match_operand:SI 2 "register_operand" "r"))))]
12836 "TARGET_64BIT && TARGET_BMI2"
12837 "shlx\t{%2, %1, %k0|%k0, %1, %2}"
12838 [(set_attr "type" "ishiftx")
12839 (set_attr "mode" "SI")])
12840
12841 (define_insn "*ashlsi3_1_zext"
12842 [(set (match_operand:DI 0 "register_operand" "=r,r,r")
12843 (zero_extend:DI
12844 (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0,l,rm")
12845 (match_operand:QI 2 "nonmemory_operand" "cI,M,r"))))
12846 (clobber (reg:CC FLAGS_REG))]
12847 "TARGET_64BIT && ix86_binary_operator_ok (ASHIFT, SImode, operands)"
12848 {
12849 switch (get_attr_type (insn))
12850 {
12851 case TYPE_LEA:
12852 case TYPE_ISHIFTX:
12853 return "#";
12854
12855 case TYPE_ALU:
12856 gcc_assert (operands[2] == const1_rtx);
12857 return "add{l}\t%k0, %k0";
12858
12859 default:
12860 if (operands[2] == const1_rtx
12861 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
12862 return "sal{l}\t%k0";
12863 else
12864 return "sal{l}\t{%2, %k0|%k0, %2}";
12865 }
12866 }
12867 [(set_attr "isa" "*,*,bmi2")
12868 (set (attr "type")
12869 (cond [(eq_attr "alternative" "1")
12870 (const_string "lea")
12871 (eq_attr "alternative" "2")
12872 (const_string "ishiftx")
12873 (and (match_test "TARGET_DOUBLE_WITH_ADD")
12874 (match_operand 2 "const1_operand"))
12875 (const_string "alu")
12876 ]
12877 (const_string "ishift")))
12878 (set (attr "length_immediate")
12879 (if_then_else
12880 (ior (eq_attr "type" "alu")
12881 (and (eq_attr "type" "ishift")
12882 (and (match_operand 2 "const1_operand")
12883 (ior (match_test "TARGET_SHIFT1")
12884 (match_test "optimize_function_for_size_p (cfun)")))))
12885 (const_string "0")
12886 (const_string "*")))
12887 (set_attr "mode" "SI")])
12888
12889 ;; Convert shift to the shiftx pattern to avoid flags dependency.
12890 (define_split
12891 [(set (match_operand:DI 0 "register_operand")
12892 (zero_extend:DI
12893 (ashift:SI (match_operand:SI 1 "nonimmediate_operand")
12894 (match_operand:QI 2 "register_operand"))))
12895 (clobber (reg:CC FLAGS_REG))]
12896 "TARGET_64BIT && TARGET_BMI2 && reload_completed"
12897 [(set (match_dup 0)
12898 (zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))]
12899 "operands[2] = gen_lowpart (SImode, operands[2]);")
12900
12901 (define_insn "*ashlhi3_1"
12902 [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,Yp,?k")
12903 (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l,k")
12904 (match_operand:QI 2 "nonmemory_operand" "cI,M,Ww")))
12905 (clobber (reg:CC FLAGS_REG))]
12906 "ix86_binary_operator_ok (ASHIFT, HImode, operands)"
12907 {
12908 switch (get_attr_type (insn))
12909 {
12910 case TYPE_LEA:
12911 case TYPE_MSKLOG:
12912 return "#";
12913
12914 case TYPE_ALU:
12915 gcc_assert (operands[2] == const1_rtx);
12916 return "add{w}\t%0, %0";
12917
12918 default:
12919 if (operands[2] == const1_rtx
12920 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
12921 return "sal{w}\t%0";
12922 else
12923 return "sal{w}\t{%2, %0|%0, %2}";
12924 }
12925 }
12926 [(set_attr "isa" "*,*,avx512f")
12927 (set (attr "type")
12928 (cond [(eq_attr "alternative" "1")
12929 (const_string "lea")
12930 (eq_attr "alternative" "2")
12931 (const_string "msklog")
12932 (and (and (match_test "TARGET_DOUBLE_WITH_ADD")
12933 (match_operand 0 "register_operand"))
12934 (match_operand 2 "const1_operand"))
12935 (const_string "alu")
12936 ]
12937 (const_string "ishift")))
12938 (set (attr "length_immediate")
12939 (if_then_else
12940 (ior (eq_attr "type" "alu")
12941 (and (eq_attr "type" "ishift")
12942 (and (match_operand 2 "const1_operand")
12943 (ior (match_test "TARGET_SHIFT1")
12944 (match_test "optimize_function_for_size_p (cfun)")))))
12945 (const_string "0")
12946 (const_string "*")))
12947 (set_attr "mode" "HI,SI,HI")])
12948
12949 (define_insn "*ashlqi3_1"
12950 [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,Yp,?k")
12951 (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l,k")
12952 (match_operand:QI 2 "nonmemory_operand" "cI,cI,M,Wb")))
12953 (clobber (reg:CC FLAGS_REG))]
12954 "ix86_binary_operator_ok (ASHIFT, QImode, operands)"
12955 {
12956 switch (get_attr_type (insn))
12957 {
12958 case TYPE_LEA:
12959 case TYPE_MSKLOG:
12960 return "#";
12961
12962 case TYPE_ALU:
12963 gcc_assert (operands[2] == const1_rtx);
12964 if (REG_P (operands[1]) && !ANY_QI_REGNO_P (REGNO (operands[1])))
12965 return "add{l}\t%k0, %k0";
12966 else
12967 return "add{b}\t%0, %0";
12968
12969 default:
12970 if (operands[2] == const1_rtx
12971 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
12972 {
12973 if (get_attr_mode (insn) == MODE_SI)
12974 return "sal{l}\t%k0";
12975 else
12976 return "sal{b}\t%0";
12977 }
12978 else
12979 {
12980 if (get_attr_mode (insn) == MODE_SI)
12981 return "sal{l}\t{%2, %k0|%k0, %2}";
12982 else
12983 return "sal{b}\t{%2, %0|%0, %2}";
12984 }
12985 }
12986 }
12987 [(set_attr "isa" "*,*,*,avx512dq")
12988 (set (attr "type")
12989 (cond [(eq_attr "alternative" "2")
12990 (const_string "lea")
12991 (eq_attr "alternative" "3")
12992 (const_string "msklog")
12993 (and (and (match_test "TARGET_DOUBLE_WITH_ADD")
12994 (match_operand 0 "register_operand"))
12995 (match_operand 2 "const1_operand"))
12996 (const_string "alu")
12997 ]
12998 (const_string "ishift")))
12999 (set (attr "length_immediate")
13000 (if_then_else
13001 (ior (eq_attr "type" "alu")
13002 (and (eq_attr "type" "ishift")
13003 (and (match_operand 2 "const1_operand")
13004 (ior (match_test "TARGET_SHIFT1")
13005 (match_test "optimize_function_for_size_p (cfun)")))))
13006 (const_string "0")
13007 (const_string "*")))
13008 (set_attr "mode" "QI,SI,SI,QI")
13009 ;; Potential partial reg stall on alternative 1.
13010 (set (attr "preferred_for_speed")
13011 (cond [(eq_attr "alternative" "1")
13012 (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
13013 (symbol_ref "true")))])
13014
13015 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
13016 (define_insn_and_split "*ashl<mode>3_1_slp"
13017 [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>,&<r>"))
13018 (ashift:SWI12 (match_operand:SWI12 1 "register_operand" "0,!<r>")
13019 (match_operand:QI 2 "nonmemory_operand" "cI,cI")))
13020 (clobber (reg:CC FLAGS_REG))]
13021 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
13022 {
13023 if (which_alternative)
13024 return "#";
13025
13026 switch (get_attr_type (insn))
13027 {
13028 case TYPE_ALU:
13029 gcc_assert (operands[2] == const1_rtx);
13030 return "add{<imodesuffix>}\t%0, %0";
13031
13032 default:
13033 if (operands[2] == const1_rtx
13034 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
13035 return "sal{<imodesuffix>}\t%0";
13036 else
13037 return "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
13038 }
13039 }
13040 "&& reload_completed"
13041 [(set (strict_low_part (match_dup 0)) (match_dup 1))
13042 (parallel
13043 [(set (strict_low_part (match_dup 0))
13044 (ashift:SWI12 (match_dup 0) (match_dup 2)))
13045 (clobber (reg:CC FLAGS_REG))])]
13046 ""
13047 [(set (attr "type")
13048 (cond [(and (match_test "TARGET_DOUBLE_WITH_ADD")
13049 (match_operand 2 "const1_operand"))
13050 (const_string "alu")
13051 ]
13052 (const_string "ishift")))
13053 (set (attr "length_immediate")
13054 (if_then_else
13055 (ior (eq_attr "type" "alu")
13056 (and (eq_attr "type" "ishift")
13057 (and (match_operand 2 "const1_operand")
13058 (ior (match_test "TARGET_SHIFT1")
13059 (match_test "optimize_function_for_size_p (cfun)")))))
13060 (const_string "0")
13061 (const_string "*")))
13062 (set_attr "mode" "<MODE>")])
13063
13064 ;; Convert ashift to the lea pattern to avoid flags dependency.
13065 (define_split
13066 [(set (match_operand:SWI 0 "register_operand")
13067 (ashift:SWI (match_operand:SWI 1 "index_register_operand")
13068 (match_operand 2 "const_0_to_3_operand")))
13069 (clobber (reg:CC FLAGS_REG))]
13070 "reload_completed
13071 && REGNO (operands[0]) != REGNO (operands[1])"
13072 [(set (match_dup 0)
13073 (mult:<LEAMODE> (match_dup 1) (match_dup 2)))]
13074 {
13075 if (<MODE>mode != <LEAMODE>mode)
13076 {
13077 operands[0] = gen_lowpart (<LEAMODE>mode, operands[0]);
13078 operands[1] = gen_lowpart (<LEAMODE>mode, operands[1]);
13079 }
13080 operands[2] = GEN_INT (1 << INTVAL (operands[2]));
13081 })
13082
13083 ;; Convert ashift to the lea pattern to avoid flags dependency.
13084 (define_split
13085 [(set (match_operand:DI 0 "register_operand")
13086 (zero_extend:DI
13087 (ashift:SI (match_operand:SI 1 "index_register_operand")
13088 (match_operand 2 "const_0_to_3_operand"))))
13089 (clobber (reg:CC FLAGS_REG))]
13090 "TARGET_64BIT && reload_completed
13091 && REGNO (operands[0]) != REGNO (operands[1])"
13092 [(set (match_dup 0)
13093 (zero_extend:DI (mult:SI (match_dup 1) (match_dup 2))))]
13094 {
13095 operands[1] = gen_lowpart (SImode, operands[1]);
13096 operands[2] = GEN_INT (1 << INTVAL (operands[2]));
13097 })
13098
13099 ;; This pattern can't accept a variable shift count, since shifts by
13100 ;; zero don't affect the flags. We assume that shifts by constant
13101 ;; zero are optimized away.
13102 (define_insn "*ashl<mode>3_cmp"
13103 [(set (reg FLAGS_REG)
13104 (compare
13105 (ashift:SWI (match_operand:SWI 1 "nonimmediate_operand" "0")
13106 (match_operand:QI 2 "<shift_immediate_operand>" "<S>"))
13107 (const_int 0)))
13108 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
13109 (ashift:SWI (match_dup 1) (match_dup 2)))]
13110 "(optimize_function_for_size_p (cfun)
13111 || !TARGET_PARTIAL_FLAG_REG_STALL
13112 || (operands[2] == const1_rtx
13113 && (TARGET_SHIFT1
13114 || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))
13115 && ix86_match_ccmode (insn, CCGOCmode)
13116 && ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)"
13117 {
13118 switch (get_attr_type (insn))
13119 {
13120 case TYPE_ALU:
13121 gcc_assert (operands[2] == const1_rtx);
13122 return "add{<imodesuffix>}\t%0, %0";
13123
13124 default:
13125 if (operands[2] == const1_rtx
13126 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
13127 return "sal{<imodesuffix>}\t%0";
13128 else
13129 return "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
13130 }
13131 }
13132 [(set (attr "type")
13133 (cond [(and (and (match_test "TARGET_DOUBLE_WITH_ADD")
13134 (match_operand 0 "register_operand"))
13135 (match_operand 2 "const1_operand"))
13136 (const_string "alu")
13137 ]
13138 (const_string "ishift")))
13139 (set (attr "length_immediate")
13140 (if_then_else
13141 (ior (eq_attr "type" "alu")
13142 (and (eq_attr "type" "ishift")
13143 (and (match_operand 2 "const1_operand")
13144 (ior (match_test "TARGET_SHIFT1")
13145 (match_test "optimize_function_for_size_p (cfun)")))))
13146 (const_string "0")
13147 (const_string "*")))
13148 (set_attr "mode" "<MODE>")])
13149
13150 (define_insn "*ashlsi3_cmp_zext"
13151 [(set (reg FLAGS_REG)
13152 (compare
13153 (ashift:SI (match_operand:SI 1 "register_operand" "0")
13154 (match_operand:QI 2 "const_1_to_31_operand"))
13155 (const_int 0)))
13156 (set (match_operand:DI 0 "register_operand" "=r")
13157 (zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))]
13158 "TARGET_64BIT
13159 && (optimize_function_for_size_p (cfun)
13160 || !TARGET_PARTIAL_FLAG_REG_STALL
13161 || (operands[2] == const1_rtx
13162 && (TARGET_SHIFT1
13163 || TARGET_DOUBLE_WITH_ADD)))
13164 && ix86_match_ccmode (insn, CCGOCmode)
13165 && ix86_binary_operator_ok (ASHIFT, SImode, operands)"
13166 {
13167 switch (get_attr_type (insn))
13168 {
13169 case TYPE_ALU:
13170 gcc_assert (operands[2] == const1_rtx);
13171 return "add{l}\t%k0, %k0";
13172
13173 default:
13174 if (operands[2] == const1_rtx
13175 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
13176 return "sal{l}\t%k0";
13177 else
13178 return "sal{l}\t{%2, %k0|%k0, %2}";
13179 }
13180 }
13181 [(set (attr "type")
13182 (cond [(and (match_test "TARGET_DOUBLE_WITH_ADD")
13183 (match_operand 2 "const1_operand"))
13184 (const_string "alu")
13185 ]
13186 (const_string "ishift")))
13187 (set (attr "length_immediate")
13188 (if_then_else
13189 (ior (eq_attr "type" "alu")
13190 (and (eq_attr "type" "ishift")
13191 (and (match_operand 2 "const1_operand")
13192 (ior (match_test "TARGET_SHIFT1")
13193 (match_test "optimize_function_for_size_p (cfun)")))))
13194 (const_string "0")
13195 (const_string "*")))
13196 (set_attr "mode" "SI")])
13197
13198 (define_insn "*ashl<mode>3_cconly"
13199 [(set (reg FLAGS_REG)
13200 (compare
13201 (ashift:SWI (match_operand:SWI 1 "register_operand" "0")
13202 (match_operand:QI 2 "<shift_immediate_operand>" "<S>"))
13203 (const_int 0)))
13204 (clobber (match_scratch:SWI 0 "=<r>"))]
13205 "(optimize_function_for_size_p (cfun)
13206 || !TARGET_PARTIAL_FLAG_REG_STALL
13207 || (operands[2] == const1_rtx
13208 && (TARGET_SHIFT1
13209 || TARGET_DOUBLE_WITH_ADD)))
13210 && ix86_match_ccmode (insn, CCGOCmode)"
13211 {
13212 switch (get_attr_type (insn))
13213 {
13214 case TYPE_ALU:
13215 gcc_assert (operands[2] == const1_rtx);
13216 return "add{<imodesuffix>}\t%0, %0";
13217
13218 default:
13219 if (operands[2] == const1_rtx
13220 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
13221 return "sal{<imodesuffix>}\t%0";
13222 else
13223 return "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
13224 }
13225 }
13226 [(set (attr "type")
13227 (cond [(and (and (match_test "TARGET_DOUBLE_WITH_ADD")
13228 (match_operand 0 "register_operand"))
13229 (match_operand 2 "const1_operand"))
13230 (const_string "alu")
13231 ]
13232 (const_string "ishift")))
13233 (set (attr "length_immediate")
13234 (if_then_else
13235 (ior (eq_attr "type" "alu")
13236 (and (eq_attr "type" "ishift")
13237 (and (match_operand 2 "const1_operand")
13238 (ior (match_test "TARGET_SHIFT1")
13239 (match_test "optimize_function_for_size_p (cfun)")))))
13240 (const_string "0")
13241 (const_string "*")))
13242 (set_attr "mode" "<MODE>")])
13243
13244 (define_insn "*ashlqi_ext<mode>_2"
13245 [(set (zero_extract:SWI248
13246 (match_operand:SWI248 0 "register_operand" "+Q")
13247 (const_int 8)
13248 (const_int 8))
13249 (subreg:SWI248
13250 (ashift:QI
13251 (subreg:QI
13252 (zero_extract:SWI248
13253 (match_operand:SWI248 1 "register_operand" "0")
13254 (const_int 8)
13255 (const_int 8)) 0)
13256 (match_operand:QI 2 "nonmemory_operand" "cI")) 0))
13257 (clobber (reg:CC FLAGS_REG))]
13258 "/* FIXME: without this LRA can't reload this pattern, see PR82524. */
13259 rtx_equal_p (operands[0], operands[1])"
13260 {
13261 switch (get_attr_type (insn))
13262 {
13263 case TYPE_ALU:
13264 gcc_assert (operands[2] == const1_rtx);
13265 return "add{b}\t%h0, %h0";
13266
13267 default:
13268 if (operands[2] == const1_rtx
13269 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
13270 return "sal{b}\t%h0";
13271 else
13272 return "sal{b}\t{%2, %h0|%h0, %2}";
13273 }
13274 }
13275 [(set (attr "type")
13276 (cond [(and (match_test "TARGET_DOUBLE_WITH_ADD")
13277 (match_operand 2 "const1_operand"))
13278 (const_string "alu")
13279 ]
13280 (const_string "ishift")))
13281 (set (attr "length_immediate")
13282 (if_then_else
13283 (ior (eq_attr "type" "alu")
13284 (and (eq_attr "type" "ishift")
13285 (and (match_operand 2 "const1_operand")
13286 (ior (match_test "TARGET_SHIFT1")
13287 (match_test "optimize_function_for_size_p (cfun)")))))
13288 (const_string "0")
13289 (const_string "*")))
13290 (set_attr "mode" "QI")])
13291
13292 ;; See comment above `ashl<mode>3' about how this works.
13293
13294 (define_expand "<insn><mode>3"
13295 [(set (match_operand:SDWIM 0 "<shift_operand>")
13296 (any_shiftrt:SDWIM (match_operand:SDWIM 1 "<shift_operand>")
13297 (match_operand:QI 2 "nonmemory_operand")))]
13298 ""
13299 "ix86_expand_binary_operator (<CODE>, <MODE>mode, operands); DONE;")
13300
13301 ;; Avoid useless masking of count operand.
13302 (define_insn_and_split "*<insn><mode>3_mask"
13303 [(set (match_operand:SWI48 0 "nonimmediate_operand")
13304 (any_shiftrt:SWI48
13305 (match_operand:SWI48 1 "nonimmediate_operand")
13306 (subreg:QI
13307 (and
13308 (match_operand 2 "register_operand" "c,r")
13309 (match_operand 3 "const_int_operand")) 0)))
13310 (clobber (reg:CC FLAGS_REG))]
13311 "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
13312 && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
13313 == GET_MODE_BITSIZE (<MODE>mode)-1
13314 && GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT
13315 && IN_RANGE (GET_MODE_SIZE (GET_MODE (operands[2])), 2,
13316 4 << (TARGET_64BIT ? 1 : 0))
13317 && ix86_pre_reload_split ()"
13318 "#"
13319 "&& 1"
13320 [(parallel
13321 [(set (match_dup 0)
13322 (any_shiftrt:SWI48 (match_dup 1)
13323 (match_dup 2)))
13324 (clobber (reg:CC FLAGS_REG))])]
13325 {
13326 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
13327 operands[2] = gen_lowpart (QImode, operands[2]);
13328 }
13329 [(set_attr "isa" "*,bmi2")])
13330
13331 (define_insn_and_split "*<insn><mode>3_mask_1"
13332 [(set (match_operand:SWI48 0 "nonimmediate_operand")
13333 (any_shiftrt:SWI48
13334 (match_operand:SWI48 1 "nonimmediate_operand")
13335 (and:QI
13336 (match_operand:QI 2 "register_operand" "c,r")
13337 (match_operand:QI 3 "const_int_operand"))))
13338 (clobber (reg:CC FLAGS_REG))]
13339 "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
13340 && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
13341 == GET_MODE_BITSIZE (<MODE>mode)-1
13342 && ix86_pre_reload_split ()"
13343 "#"
13344 "&& 1"
13345 [(parallel
13346 [(set (match_dup 0)
13347 (any_shiftrt:SWI48 (match_dup 1)
13348 (match_dup 2)))
13349 (clobber (reg:CC FLAGS_REG))])]
13350 ""
13351 [(set_attr "isa" "*,bmi2")])
13352
13353 (define_insn_and_split "*<insn><dwi>3_doubleword_mask"
13354 [(set (match_operand:<DWI> 0 "register_operand")
13355 (any_shiftrt:<DWI>
13356 (match_operand:<DWI> 1 "register_operand")
13357 (subreg:QI
13358 (and
13359 (match_operand 2 "register_operand" "c")
13360 (match_operand 3 "const_int_operand")) 0)))
13361 (clobber (reg:CC FLAGS_REG))]
13362 "((INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT)) == 0
13363 || ((INTVAL (operands[3]) & (2 * <MODE_SIZE> * BITS_PER_UNIT - 1))
13364 == (2 * <MODE_SIZE> * BITS_PER_UNIT - 1)))
13365 && GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT
13366 && IN_RANGE (GET_MODE_SIZE (GET_MODE (operands[2])), 2,
13367 4 << (TARGET_64BIT ? 1 : 0))
13368 && ix86_pre_reload_split ()"
13369 "#"
13370 "&& 1"
13371 [(parallel
13372 [(set (match_dup 4)
13373 (ior:DWIH (lshiftrt:DWIH (match_dup 4)
13374 (and:QI (match_dup 2) (match_dup 8)))
13375 (subreg:DWIH
13376 (ashift:<DWI> (zero_extend:<DWI> (match_dup 7))
13377 (minus:QI (match_dup 9)
13378 (and:QI (match_dup 2) (match_dup 8)))) 0)))
13379 (clobber (reg:CC FLAGS_REG))])
13380 (parallel
13381 [(set (match_dup 6)
13382 (any_shiftrt:DWIH (match_dup 7) (match_dup 2)))
13383 (clobber (reg:CC FLAGS_REG))])]
13384 {
13385 if ((INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT)) != 0)
13386 {
13387 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
13388 operands[2] = gen_lowpart (QImode, operands[2]);
13389 emit_insn (gen_<insn><dwi>3_doubleword (operands[0], operands[1],
13390 operands[2]));
13391 DONE;
13392 }
13393
13394 split_double_mode (<DWI>mode, &operands[0], 2, &operands[4], &operands[6]);
13395
13396 operands[8] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT - 1);
13397 operands[9] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);
13398
13399 if ((INTVAL (operands[3]) & ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
13400 != ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
13401 {
13402 rtx xops[3];
13403 xops[0] = gen_reg_rtx (GET_MODE (operands[2]));
13404 xops[1] = operands[2];
13405 xops[2] = GEN_INT (INTVAL (operands[3])
13406 & ((<MODE_SIZE> * BITS_PER_UNIT) - 1));
13407 ix86_expand_binary_operator (AND, GET_MODE (operands[2]), xops);
13408 operands[2] = xops[0];
13409 }
13410
13411 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
13412 operands[2] = gen_lowpart (QImode, operands[2]);
13413
13414 if (!rtx_equal_p (operands[4], operands[5]))
13415 emit_move_insn (operands[4], operands[5]);
13416 })
13417
13418 (define_insn_and_split "*<insn><dwi>3_doubleword_mask_1"
13419 [(set (match_operand:<DWI> 0 "register_operand")
13420 (any_shiftrt:<DWI>
13421 (match_operand:<DWI> 1 "register_operand")
13422 (and:QI
13423 (match_operand:QI 2 "register_operand" "c")
13424 (match_operand:QI 3 "const_int_operand"))))
13425 (clobber (reg:CC FLAGS_REG))]
13426 "((INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT)) == 0
13427 || ((INTVAL (operands[3]) & (2 * <MODE_SIZE> * BITS_PER_UNIT - 1))
13428 == (2 * <MODE_SIZE> * BITS_PER_UNIT - 1)))
13429 && ix86_pre_reload_split ()"
13430 "#"
13431 "&& 1"
13432 [(parallel
13433 [(set (match_dup 4)
13434 (ior:DWIH (lshiftrt:DWIH (match_dup 4)
13435 (and:QI (match_dup 2) (match_dup 8)))
13436 (subreg:DWIH
13437 (ashift:<DWI> (zero_extend:<DWI> (match_dup 7))
13438 (minus:QI (match_dup 9)
13439 (and:QI (match_dup 2) (match_dup 8)))) 0)))
13440 (clobber (reg:CC FLAGS_REG))])
13441 (parallel
13442 [(set (match_dup 6)
13443 (any_shiftrt:DWIH (match_dup 7) (match_dup 2)))
13444 (clobber (reg:CC FLAGS_REG))])]
13445 {
13446 if ((INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT)) != 0)
13447 {
13448 emit_insn (gen_<insn><dwi>3_doubleword (operands[0], operands[1],
13449 operands[2]));
13450 DONE;
13451 }
13452
13453 split_double_mode (<DWI>mode, &operands[0], 2, &operands[4], &operands[6]);
13454
13455 operands[8] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT - 1);
13456 operands[9] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);
13457
13458 if ((INTVAL (operands[3]) & ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
13459 != ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
13460 {
13461 rtx tem = gen_reg_rtx (QImode);
13462 emit_insn (gen_andqi3 (tem, operands[2], operands[3]));
13463 operands[2] = tem;
13464 }
13465
13466 if (!rtx_equal_p (operands[4], operands[5]))
13467 emit_move_insn (operands[4], operands[5]);
13468 })
13469
13470 (define_insn_and_split "<insn><mode>3_doubleword"
13471 [(set (match_operand:DWI 0 "register_operand" "=&r")
13472 (any_shiftrt:DWI (match_operand:DWI 1 "register_operand" "0")
13473 (match_operand:QI 2 "nonmemory_operand" "<S>c")))
13474 (clobber (reg:CC FLAGS_REG))]
13475 ""
13476 "#"
13477 "epilogue_completed"
13478 [(const_int 0)]
13479 "ix86_split_<insn> (operands, NULL_RTX, <MODE>mode); DONE;"
13480 [(set_attr "type" "multi")])
13481
13482 ;; By default we don't ask for a scratch register, because when DWImode
13483 ;; values are manipulated, registers are already at a premium. But if
13484 ;; we have one handy, we won't turn it away.
13485
13486 (define_peephole2
13487 [(match_scratch:DWIH 3 "r")
13488 (parallel [(set (match_operand:<DWI> 0 "register_operand")
13489 (any_shiftrt:<DWI>
13490 (match_operand:<DWI> 1 "register_operand")
13491 (match_operand:QI 2 "nonmemory_operand")))
13492 (clobber (reg:CC FLAGS_REG))])
13493 (match_dup 3)]
13494 "TARGET_CMOVE"
13495 [(const_int 0)]
13496 "ix86_split_<insn> (operands, operands[3], <DWI>mode); DONE;")
13497
13498 (define_insn "x86_64_shrd"
13499 [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
13500 (ior:DI (lshiftrt:DI (match_dup 0)
13501 (and:QI (match_operand:QI 2 "nonmemory_operand" "Jc")
13502 (const_int 63)))
13503 (subreg:DI
13504 (ashift:TI
13505 (zero_extend:TI
13506 (match_operand:DI 1 "register_operand" "r"))
13507 (minus:QI (const_int 64)
13508 (and:QI (match_dup 2) (const_int 63)))) 0)))
13509 (clobber (reg:CC FLAGS_REG))]
13510 "TARGET_64BIT"
13511 "shrd{q}\t{%s2%1, %0|%0, %1, %2}"
13512 [(set_attr "type" "ishift")
13513 (set_attr "prefix_0f" "1")
13514 (set_attr "mode" "DI")
13515 (set_attr "athlon_decode" "vector")
13516 (set_attr "amdfam10_decode" "vector")
13517 (set_attr "bdver1_decode" "vector")])
13518
13519 (define_insn "x86_64_shrd_1"
13520 [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
13521 (ior:DI (lshiftrt:DI (match_dup 0)
13522 (match_operand:QI 2 "const_0_to_63_operand"))
13523 (subreg:DI
13524 (ashift:TI
13525 (zero_extend:TI
13526 (match_operand:DI 1 "register_operand" "r"))
13527 (match_operand:QI 3 "const_0_to_255_operand")) 0)))
13528 (clobber (reg:CC FLAGS_REG))]
13529 "TARGET_64BIT
13530 && INTVAL (operands[3]) == 64 - INTVAL (operands[2])"
13531 "shrd{q}\t{%2, %1, %0|%0, %1, %2}"
13532 [(set_attr "type" "ishift")
13533 (set_attr "prefix_0f" "1")
13534 (set_attr "length_immediate" "1")
13535 (set_attr "mode" "DI")
13536 (set_attr "athlon_decode" "vector")
13537 (set_attr "amdfam10_decode" "vector")
13538 (set_attr "bdver1_decode" "vector")])
13539
13540 (define_insn_and_split "*x86_64_shrd_shld_1_nozext"
13541 [(set (match_operand:DI 0 "nonimmediate_operand")
13542 (ior:DI (lshiftrt:DI (match_operand:DI 4 "nonimmediate_operand")
13543 (match_operand:QI 2 "const_0_to_63_operand"))
13544 (ashift:DI
13545 (match_operand:DI 1 "nonimmediate_operand")
13546 (match_operand:QI 3 "const_0_to_63_operand"))))
13547 (clobber (reg:CC FLAGS_REG))]
13548 "TARGET_64BIT
13549 && INTVAL (operands[3]) == 64 - INTVAL (operands[2])
13550 && ix86_pre_reload_split ()"
13551 "#"
13552 "&& 1"
13553 [(const_int 0)]
13554 {
13555 if (rtx_equal_p (operands[4], operands[0]))
13556 {
13557 operands[1] = force_reg (DImode, operands[1]);
13558 emit_insn (gen_x86_64_shrd_1 (operands[0], operands[1], operands[2], operands[3]));
13559 }
13560 else if (rtx_equal_p (operands[1], operands[0]))
13561 {
13562 operands[4] = force_reg (DImode, operands[4]);
13563 emit_insn (gen_x86_64_shld_1 (operands[0], operands[4], operands[3], operands[2]));
13564 }
13565 else
13566 {
13567 operands[1] = force_reg (DImode, operands[1]);
13568 rtx tmp = gen_reg_rtx (DImode);
13569 emit_move_insn (tmp, operands[4]);
13570 emit_insn (gen_x86_64_shrd_1 (tmp, operands[1], operands[2], operands[3]));
13571 emit_move_insn (operands[0], tmp);
13572 }
13573 DONE;
13574 })
13575
13576 (define_insn_and_split "*x86_64_shrd_2"
13577 [(set (match_operand:DI 0 "nonimmediate_operand")
13578 (ior:DI (lshiftrt:DI (match_dup 0)
13579 (match_operand:QI 2 "nonmemory_operand"))
13580 (ashift:DI (match_operand:DI 1 "register_operand")
13581 (minus:QI (const_int 64) (match_dup 2)))))
13582 (clobber (reg:CC FLAGS_REG))]
13583 "TARGET_64BIT && ix86_pre_reload_split ()"
13584 "#"
13585 "&& 1"
13586 [(parallel [(set (match_dup 0)
13587 (ior:DI (lshiftrt:DI (match_dup 0)
13588 (and:QI (match_dup 2) (const_int 63)))
13589 (subreg:DI
13590 (ashift:TI
13591 (zero_extend:TI (match_dup 1))
13592 (minus:QI (const_int 64)
13593 (and:QI (match_dup 2)
13594 (const_int 63)))) 0)))
13595 (clobber (reg:CC FLAGS_REG))])])
13596
13597 (define_insn "x86_shrd"
13598 [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
13599 (ior:SI (lshiftrt:SI (match_dup 0)
13600 (and:QI (match_operand:QI 2 "nonmemory_operand" "Ic")
13601 (const_int 31)))
13602 (subreg:SI
13603 (ashift:DI
13604 (zero_extend:DI
13605 (match_operand:SI 1 "register_operand" "r"))
13606 (minus:QI (const_int 32)
13607 (and:QI (match_dup 2) (const_int 31)))) 0)))
13608 (clobber (reg:CC FLAGS_REG))]
13609 ""
13610 "shrd{l}\t{%s2%1, %0|%0, %1, %2}"
13611 [(set_attr "type" "ishift")
13612 (set_attr "prefix_0f" "1")
13613 (set_attr "mode" "SI")
13614 (set_attr "pent_pair" "np")
13615 (set_attr "athlon_decode" "vector")
13616 (set_attr "amdfam10_decode" "vector")
13617 (set_attr "bdver1_decode" "vector")])
13618
13619 (define_insn "x86_shrd_1"
13620 [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
13621 (ior:SI (lshiftrt:SI (match_dup 0)
13622 (match_operand:QI 2 "const_0_to_31_operand"))
13623 (subreg:SI
13624 (ashift:DI
13625 (zero_extend:DI
13626 (match_operand:SI 1 "register_operand" "r"))
13627 (match_operand:QI 3 "const_0_to_63_operand")) 0)))
13628 (clobber (reg:CC FLAGS_REG))]
13629 "INTVAL (operands[3]) == 32 - INTVAL (operands[2])"
13630 "shrd{l}\t{%2, %1, %0|%0, %1, %2}"
13631 [(set_attr "type" "ishift")
13632 (set_attr "prefix_0f" "1")
13633 (set_attr "length_immediate" "1")
13634 (set_attr "mode" "SI")
13635 (set_attr "pent_pair" "np")
13636 (set_attr "athlon_decode" "vector")
13637 (set_attr "amdfam10_decode" "vector")
13638 (set_attr "bdver1_decode" "vector")])
13639
13640 (define_insn_and_split "*x86_shrd_shld_1_nozext"
13641 [(set (match_operand:SI 0 "nonimmediate_operand")
13642 (ior:SI (lshiftrt:SI (match_operand:SI 4 "nonimmediate_operand")
13643 (match_operand:QI 2 "const_0_to_31_operand"))
13644 (ashift:SI
13645 (match_operand:SI 1 "nonimmediate_operand")
13646 (match_operand:QI 3 "const_0_to_31_operand"))))
13647 (clobber (reg:CC FLAGS_REG))]
13648 "INTVAL (operands[3]) == 32 - INTVAL (operands[2])
13649 && ix86_pre_reload_split ()"
13650 "#"
13651 "&& 1"
13652 [(const_int 0)]
13653 {
13654 if (rtx_equal_p (operands[4], operands[0]))
13655 {
13656 operands[1] = force_reg (SImode, operands[1]);
13657 emit_insn (gen_x86_shrd_1 (operands[0], operands[1], operands[2], operands[3]));
13658 }
13659 else if (rtx_equal_p (operands[1], operands[0]))
13660 {
13661 operands[4] = force_reg (SImode, operands[4]);
13662 emit_insn (gen_x86_shld_1 (operands[0], operands[4], operands[3], operands[2]));
13663 }
13664 else
13665 {
13666 operands[1] = force_reg (SImode, operands[1]);
13667 rtx tmp = gen_reg_rtx (SImode);
13668 emit_move_insn (tmp, operands[4]);
13669 emit_insn (gen_x86_shrd_1 (tmp, operands[1], operands[2], operands[3]));
13670 emit_move_insn (operands[0], tmp);
13671 }
13672 DONE;
13673 })
13674
13675 (define_insn_and_split "*x86_shrd_2"
13676 [(set (match_operand:SI 0 "nonimmediate_operand")
13677 (ior:SI (lshiftrt:SI (match_dup 0)
13678 (match_operand:QI 2 "nonmemory_operand"))
13679 (ashift:SI (match_operand:SI 1 "register_operand")
13680 (minus:QI (const_int 32) (match_dup 2)))))
13681 (clobber (reg:CC FLAGS_REG))]
13682 "TARGET_64BIT && ix86_pre_reload_split ()"
13683 "#"
13684 "&& 1"
13685 [(parallel [(set (match_dup 0)
13686 (ior:SI (lshiftrt:SI (match_dup 0)
13687 (and:QI (match_dup 2) (const_int 31)))
13688 (subreg:SI
13689 (ashift:DI
13690 (zero_extend:DI (match_dup 1))
13691 (minus:QI (const_int 32)
13692 (and:QI (match_dup 2)
13693 (const_int 31)))) 0)))
13694 (clobber (reg:CC FLAGS_REG))])])
13695
13696 ;; Base name for insn mnemonic.
13697 (define_mode_attr cvt_mnemonic
13698 [(SI "{cltd|cdq}") (DI "{cqto|cqo}")])
13699
13700 (define_insn "ashr<mode>3_cvt"
13701 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=*d,rm")
13702 (ashiftrt:SWI48
13703 (match_operand:SWI48 1 "nonimmediate_operand" "*a,0")
13704 (match_operand:QI 2 "const_int_operand")))
13705 (clobber (reg:CC FLAGS_REG))]
13706 "INTVAL (operands[2]) == GET_MODE_BITSIZE (<MODE>mode)-1
13707 && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun))
13708 && ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands)"
13709 "@
13710 <cvt_mnemonic>
13711 sar{<imodesuffix>}\t{%2, %0|%0, %2}"
13712 [(set_attr "type" "imovx,ishift")
13713 (set_attr "prefix_0f" "0,*")
13714 (set_attr "length_immediate" "0,*")
13715 (set_attr "modrm" "0,1")
13716 (set_attr "mode" "<MODE>")])
13717
13718 (define_insn "*ashrsi3_cvt_zext"
13719 [(set (match_operand:DI 0 "register_operand" "=*d,r")
13720 (zero_extend:DI
13721 (ashiftrt:SI (match_operand:SI 1 "register_operand" "*a,0")
13722 (match_operand:QI 2 "const_int_operand"))))
13723 (clobber (reg:CC FLAGS_REG))]
13724 "TARGET_64BIT && INTVAL (operands[2]) == 31
13725 && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun))
13726 && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
13727 "@
13728 {cltd|cdq}
13729 sar{l}\t{%2, %k0|%k0, %2}"
13730 [(set_attr "type" "imovx,ishift")
13731 (set_attr "prefix_0f" "0,*")
13732 (set_attr "length_immediate" "0,*")
13733 (set_attr "modrm" "0,1")
13734 (set_attr "mode" "SI")])
13735
13736 (define_expand "@x86_shift<mode>_adj_3"
13737 [(use (match_operand:SWI48 0 "register_operand"))
13738 (use (match_operand:SWI48 1 "register_operand"))
13739 (use (match_operand:QI 2 "register_operand"))]
13740 ""
13741 {
13742 rtx_code_label *label = gen_label_rtx ();
13743 rtx tmp;
13744
13745 emit_insn (gen_testqi_ccz_1 (operands[2],
13746 GEN_INT (GET_MODE_BITSIZE (<MODE>mode))));
13747
13748 tmp = gen_rtx_REG (CCZmode, FLAGS_REG);
13749 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13750 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
13751 gen_rtx_LABEL_REF (VOIDmode, label),
13752 pc_rtx);
13753 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
13754 JUMP_LABEL (tmp) = label;
13755
13756 emit_move_insn (operands[0], operands[1]);
13757 emit_insn (gen_ashr<mode>3_cvt (operands[1], operands[1],
13758 GEN_INT (GET_MODE_BITSIZE (<MODE>mode)-1)));
13759 emit_label (label);
13760 LABEL_NUSES (label) = 1;
13761
13762 DONE;
13763 })
13764
13765 (define_insn "*bmi2_<insn><mode>3_1"
13766 [(set (match_operand:SWI48 0 "register_operand" "=r")
13767 (any_shiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
13768 (match_operand:SWI48 2 "register_operand" "r")))]
13769 "TARGET_BMI2"
13770 "<shift>x\t{%2, %1, %0|%0, %1, %2}"
13771 [(set_attr "type" "ishiftx")
13772 (set_attr "mode" "<MODE>")])
13773
13774 (define_insn "*ashr<mode>3_1"
13775 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
13776 (ashiftrt:SWI48
13777 (match_operand:SWI48 1 "nonimmediate_operand" "0,rm")
13778 (match_operand:QI 2 "nonmemory_operand" "c<S>,r")))
13779 (clobber (reg:CC FLAGS_REG))]
13780 "ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands)"
13781 {
13782 switch (get_attr_type (insn))
13783 {
13784 case TYPE_ISHIFTX:
13785 return "#";
13786
13787 default:
13788 if (operands[2] == const1_rtx
13789 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
13790 return "sar{<imodesuffix>}\t%0";
13791 else
13792 return "sar{<imodesuffix>}\t{%2, %0|%0, %2}";
13793 }
13794 }
13795 [(set_attr "isa" "*,bmi2")
13796 (set_attr "type" "ishift,ishiftx")
13797 (set (attr "length_immediate")
13798 (if_then_else
13799 (and (match_operand 2 "const1_operand")
13800 (ior (match_test "TARGET_SHIFT1")
13801 (match_test "optimize_function_for_size_p (cfun)")))
13802 (const_string "0")
13803 (const_string "*")))
13804 (set_attr "mode" "<MODE>")])
13805
13806 ;; Specialization of *lshr<mode>3_1 below, extracting the SImode
13807 ;; highpart of a DI to be extracted, but allowing it to be clobbered.
13808 (define_insn_and_split "*highpartdisi2"
13809 [(set (subreg:DI (match_operand:SI 0 "register_operand" "=r,x,?k") 0)
13810 (lshiftrt:DI (match_operand:DI 1 "register_operand" "0,0,k")
13811 (const_int 32)))
13812 (clobber (reg:CC FLAGS_REG))]
13813 "TARGET_64BIT"
13814 "#"
13815 "&& reload_completed"
13816 [(parallel
13817 [(set (match_dup 0) (lshiftrt:DI (match_dup 1) (const_int 32)))
13818 (clobber (reg:CC FLAGS_REG))])]
13819 {
13820 if (SSE_REG_P (operands[0]))
13821 {
13822 rtx tmp = gen_rtx_REG (V4SImode, REGNO (operands[0]));
13823 emit_insn (gen_sse_shufps_v4si (tmp, tmp, tmp,
13824 const1_rtx, const1_rtx,
13825 GEN_INT (5), GEN_INT (5)));
13826 DONE;
13827 }
13828 operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
13829 })
13830
13831 (define_insn "*lshr<mode>3_1"
13832 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,?k")
13833 (lshiftrt:SWI48
13834 (match_operand:SWI48 1 "nonimmediate_operand" "0,rm,k")
13835 (match_operand:QI 2 "nonmemory_operand" "c<S>,r,<KS>")))
13836 (clobber (reg:CC FLAGS_REG))]
13837 "ix86_binary_operator_ok (LSHIFTRT, <MODE>mode, operands)"
13838 {
13839 switch (get_attr_type (insn))
13840 {
13841 case TYPE_ISHIFTX:
13842 case TYPE_MSKLOG:
13843 return "#";
13844
13845 default:
13846 if (operands[2] == const1_rtx
13847 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
13848 return "shr{<imodesuffix>}\t%0";
13849 else
13850 return "shr{<imodesuffix>}\t{%2, %0|%0, %2}";
13851 }
13852 }
13853 [(set_attr "isa" "*,bmi2,avx512bw")
13854 (set_attr "type" "ishift,ishiftx,msklog")
13855 (set (attr "length_immediate")
13856 (if_then_else
13857 (and (and (match_operand 2 "const1_operand")
13858 (eq_attr "alternative" "0"))
13859 (ior (match_test "TARGET_SHIFT1")
13860 (match_test "optimize_function_for_size_p (cfun)")))
13861 (const_string "0")
13862 (const_string "*")))
13863 (set_attr "mode" "<MODE>")])
13864
13865 ;; Convert shift to the shiftx pattern to avoid flags dependency.
13866 (define_split
13867 [(set (match_operand:SWI48 0 "register_operand")
13868 (any_shiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
13869 (match_operand:QI 2 "register_operand")))
13870 (clobber (reg:CC FLAGS_REG))]
13871 "TARGET_BMI2 && reload_completed"
13872 [(set (match_dup 0)
13873 (any_shiftrt:SWI48 (match_dup 1) (match_dup 2)))]
13874 "operands[2] = gen_lowpart (<MODE>mode, operands[2]);")
13875
13876 (define_insn "*bmi2_<insn>si3_1_zext"
13877 [(set (match_operand:DI 0 "register_operand" "=r")
13878 (zero_extend:DI
13879 (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
13880 (match_operand:SI 2 "register_operand" "r"))))]
13881 "TARGET_64BIT && TARGET_BMI2"
13882 "<shift>x\t{%2, %1, %k0|%k0, %1, %2}"
13883 [(set_attr "type" "ishiftx")
13884 (set_attr "mode" "SI")])
13885
13886 (define_insn "*<insn>si3_1_zext"
13887 [(set (match_operand:DI 0 "register_operand" "=r,r")
13888 (zero_extend:DI
13889 (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm")
13890 (match_operand:QI 2 "nonmemory_operand" "cI,r"))))
13891 (clobber (reg:CC FLAGS_REG))]
13892 "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
13893 {
13894 switch (get_attr_type (insn))
13895 {
13896 case TYPE_ISHIFTX:
13897 return "#";
13898
13899 default:
13900 if (operands[2] == const1_rtx
13901 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
13902 return "<shift>{l}\t%k0";
13903 else
13904 return "<shift>{l}\t{%2, %k0|%k0, %2}";
13905 }
13906 }
13907 [(set_attr "isa" "*,bmi2")
13908 (set_attr "type" "ishift,ishiftx")
13909 (set (attr "length_immediate")
13910 (if_then_else
13911 (and (match_operand 2 "const1_operand")
13912 (ior (match_test "TARGET_SHIFT1")
13913 (match_test "optimize_function_for_size_p (cfun)")))
13914 (const_string "0")
13915 (const_string "*")))
13916 (set_attr "mode" "SI")])
13917
13918 ;; Convert shift to the shiftx pattern to avoid flags dependency.
13919 (define_split
13920 [(set (match_operand:DI 0 "register_operand")
13921 (zero_extend:DI
13922 (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand")
13923 (match_operand:QI 2 "register_operand"))))
13924 (clobber (reg:CC FLAGS_REG))]
13925 "TARGET_64BIT && TARGET_BMI2 && reload_completed"
13926 [(set (match_dup 0)
13927 (zero_extend:DI (any_shiftrt:SI (match_dup 1) (match_dup 2))))]
13928 "operands[2] = gen_lowpart (SImode, operands[2]);")
13929
13930 (define_insn "*ashr<mode>3_1"
13931 [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m")
13932 (ashiftrt:SWI12
13933 (match_operand:SWI12 1 "nonimmediate_operand" "0")
13934 (match_operand:QI 2 "nonmemory_operand" "c<S>")))
13935 (clobber (reg:CC FLAGS_REG))]
13936 "ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands)"
13937 {
13938 if (operands[2] == const1_rtx
13939 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
13940 return "sar{<imodesuffix>}\t%0";
13941 else
13942 return "sar{<imodesuffix>}\t{%2, %0|%0, %2}";
13943 }
13944 [(set_attr "type" "ishift")
13945 (set (attr "length_immediate")
13946 (if_then_else
13947 (and (match_operand 2 "const1_operand")
13948 (ior (match_test "TARGET_SHIFT1")
13949 (match_test "optimize_function_for_size_p (cfun)")))
13950 (const_string "0")
13951 (const_string "*")))
13952 (set_attr "mode" "<MODE>")])
13953
13954 (define_insn "*lshrqi3_1"
13955 [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,?k")
13956 (lshiftrt:QI
13957 (match_operand:QI 1 "nonimmediate_operand" "0, k")
13958 (match_operand:QI 2 "nonmemory_operand" "cI,Wb")))
13959 (clobber (reg:CC FLAGS_REG))]
13960 "ix86_binary_operator_ok (LSHIFTRT, QImode, operands)"
13961 {
13962 switch (get_attr_type (insn))
13963 {
13964 case TYPE_ISHIFT:
13965 if (operands[2] == const1_rtx
13966 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
13967 return "shr{b}\t%0";
13968 else
13969 return "shr{b}\t{%2, %0|%0, %2}";
13970 case TYPE_MSKLOG:
13971 return "#";
13972 default:
13973 gcc_unreachable ();
13974 }
13975 }
13976 [(set_attr "isa" "*,avx512dq")
13977 (set_attr "type" "ishift,msklog")
13978 (set (attr "length_immediate")
13979 (if_then_else
13980 (and (and (match_operand 2 "const1_operand")
13981 (eq_attr "alternative" "0"))
13982 (ior (match_test "TARGET_SHIFT1")
13983 (match_test "optimize_function_for_size_p (cfun)")))
13984 (const_string "0")
13985 (const_string "*")))
13986 (set_attr "mode" "QI")])
13987
13988 (define_insn "*lshrhi3_1"
13989 [(set (match_operand:HI 0 "nonimmediate_operand" "=rm, ?k")
13990 (lshiftrt:HI
13991 (match_operand:HI 1 "nonimmediate_operand" "0, k")
13992 (match_operand:QI 2 "nonmemory_operand" "cI, Ww")))
13993 (clobber (reg:CC FLAGS_REG))]
13994 "ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
13995 {
13996 switch (get_attr_type (insn))
13997 {
13998 case TYPE_ISHIFT:
13999 if (operands[2] == const1_rtx
14000 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
14001 return "shr{w}\t%0";
14002 else
14003 return "shr{w}\t{%2, %0|%0, %2}";
14004 case TYPE_MSKLOG:
14005 return "#";
14006 default:
14007 gcc_unreachable ();
14008 }
14009 }
14010 [(set_attr "isa" "*, avx512f")
14011 (set_attr "type" "ishift,msklog")
14012 (set (attr "length_immediate")
14013 (if_then_else
14014 (and (and (match_operand 2 "const1_operand")
14015 (eq_attr "alternative" "0"))
14016 (ior (match_test "TARGET_SHIFT1")
14017 (match_test "optimize_function_for_size_p (cfun)")))
14018 (const_string "0")
14019 (const_string "*")))
14020 (set_attr "mode" "HI")])
14021
14022 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
14023 (define_insn_and_split "*<insn><mode>3_1_slp"
14024 [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>,&<r>"))
14025 (any_shiftrt:SWI12 (match_operand:SWI12 1 "register_operand" "0,!<r>")
14026 (match_operand:QI 2 "nonmemory_operand" "cI,cI")))
14027 (clobber (reg:CC FLAGS_REG))]
14028 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
14029 {
14030 if (which_alternative)
14031 return "#";
14032
14033 if (operands[2] == const1_rtx
14034 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
14035 return "<shift>{<imodesuffix>}\t%0";
14036 else
14037 return "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}";
14038 }
14039 "&& reload_completed"
14040 [(set (strict_low_part (match_dup 0)) (match_dup 1))
14041 (parallel
14042 [(set (strict_low_part (match_dup 0))
14043 (any_shiftrt:SWI12 (match_dup 0) (match_dup 2)))
14044 (clobber (reg:CC FLAGS_REG))])]
14045 ""
14046 [(set_attr "type" "ishift")
14047 (set (attr "length_immediate")
14048 (if_then_else
14049 (and (match_operand 2 "const1_operand")
14050 (ior (match_test "TARGET_SHIFT1")
14051 (match_test "optimize_function_for_size_p (cfun)")))
14052 (const_string "0")
14053 (const_string "*")))
14054 (set_attr "mode" "<MODE>")])
14055
14056 ;; This pattern can't accept a variable shift count, since shifts by
14057 ;; zero don't affect the flags. We assume that shifts by constant
14058 ;; zero are optimized away.
14059 (define_insn "*<insn><mode>3_cmp"
14060 [(set (reg FLAGS_REG)
14061 (compare
14062 (any_shiftrt:SWI
14063 (match_operand:SWI 1 "nonimmediate_operand" "0")
14064 (match_operand:QI 2 "<shift_immediate_operand>" "<S>"))
14065 (const_int 0)))
14066 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
14067 (any_shiftrt:SWI (match_dup 1) (match_dup 2)))]
14068 "(optimize_function_for_size_p (cfun)
14069 || !TARGET_PARTIAL_FLAG_REG_STALL
14070 || (operands[2] == const1_rtx
14071 && TARGET_SHIFT1))
14072 && ix86_match_ccmode (insn, CCGOCmode)
14073 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
14074 {
14075 if (operands[2] == const1_rtx
14076 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
14077 return "<shift>{<imodesuffix>}\t%0";
14078 else
14079 return "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}";
14080 }
14081 [(set_attr "type" "ishift")
14082 (set (attr "length_immediate")
14083 (if_then_else
14084 (and (match_operand 2 "const1_operand")
14085 (ior (match_test "TARGET_SHIFT1")
14086 (match_test "optimize_function_for_size_p (cfun)")))
14087 (const_string "0")
14088 (const_string "*")))
14089 (set_attr "mode" "<MODE>")])
14090
14091 (define_insn "*<insn>si3_cmp_zext"
14092 [(set (reg FLAGS_REG)
14093 (compare
14094 (any_shiftrt:SI (match_operand:SI 1 "register_operand" "0")
14095 (match_operand:QI 2 "const_1_to_31_operand"))
14096 (const_int 0)))
14097 (set (match_operand:DI 0 "register_operand" "=r")
14098 (zero_extend:DI (any_shiftrt:SI (match_dup 1) (match_dup 2))))]
14099 "TARGET_64BIT
14100 && (optimize_function_for_size_p (cfun)
14101 || !TARGET_PARTIAL_FLAG_REG_STALL
14102 || (operands[2] == const1_rtx
14103 && TARGET_SHIFT1))
14104 && ix86_match_ccmode (insn, CCGOCmode)
14105 && ix86_binary_operator_ok (<CODE>, SImode, operands)"
14106 {
14107 if (operands[2] == const1_rtx
14108 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
14109 return "<shift>{l}\t%k0";
14110 else
14111 return "<shift>{l}\t{%2, %k0|%k0, %2}";
14112 }
14113 [(set_attr "type" "ishift")
14114 (set (attr "length_immediate")
14115 (if_then_else
14116 (and (match_operand 2 "const1_operand")
14117 (ior (match_test "TARGET_SHIFT1")
14118 (match_test "optimize_function_for_size_p (cfun)")))
14119 (const_string "0")
14120 (const_string "*")))
14121 (set_attr "mode" "SI")])
14122
14123 (define_insn "*<insn><mode>3_cconly"
14124 [(set (reg FLAGS_REG)
14125 (compare
14126 (any_shiftrt:SWI
14127 (match_operand:SWI 1 "register_operand" "0")
14128 (match_operand:QI 2 "<shift_immediate_operand>" "<S>"))
14129 (const_int 0)))
14130 (clobber (match_scratch:SWI 0 "=<r>"))]
14131 "(optimize_function_for_size_p (cfun)
14132 || !TARGET_PARTIAL_FLAG_REG_STALL
14133 || (operands[2] == const1_rtx
14134 && TARGET_SHIFT1))
14135 && ix86_match_ccmode (insn, CCGOCmode)"
14136 {
14137 if (operands[2] == const1_rtx
14138 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
14139 return "<shift>{<imodesuffix>}\t%0";
14140 else
14141 return "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}";
14142 }
14143 [(set_attr "type" "ishift")
14144 (set (attr "length_immediate")
14145 (if_then_else
14146 (and (match_operand 2 "const1_operand")
14147 (ior (match_test "TARGET_SHIFT1")
14148 (match_test "optimize_function_for_size_p (cfun)")))
14149 (const_string "0")
14150 (const_string "*")))
14151 (set_attr "mode" "<MODE>")])
14152
14153 (define_insn "*<insn>qi_ext<mode>_2"
14154 [(set (zero_extract:SWI248
14155 (match_operand:SWI248 0 "register_operand" "+Q")
14156 (const_int 8)
14157 (const_int 8))
14158 (subreg:SWI248
14159 (any_shiftrt:QI
14160 (subreg:QI
14161 (zero_extract:SWI248
14162 (match_operand:SWI248 1 "register_operand" "0")
14163 (const_int 8)
14164 (const_int 8)) 0)
14165 (match_operand:QI 2 "nonmemory_operand" "cI")) 0))
14166 (clobber (reg:CC FLAGS_REG))]
14167 "/* FIXME: without this LRA can't reload this pattern, see PR82524. */
14168 rtx_equal_p (operands[0], operands[1])"
14169 {
14170 if (operands[2] == const1_rtx
14171 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
14172 return "<shift>{b}\t%h0";
14173 else
14174 return "<shift>{b}\t{%2, %h0|%h0, %2}";
14175 }
14176 [(set_attr "type" "ishift")
14177 (set (attr "length_immediate")
14178 (if_then_else
14179 (and (match_operand 2 "const1_operand")
14180 (ior (match_test "TARGET_SHIFT1")
14181 (match_test "optimize_function_for_size_p (cfun)")))
14182 (const_string "0")
14183 (const_string "*")))
14184 (set_attr "mode" "QI")])
14185 \f
14186 ;; Rotate instructions
14187
14188 (define_expand "<insn>ti3"
14189 [(set (match_operand:TI 0 "register_operand")
14190 (any_rotate:TI (match_operand:TI 1 "register_operand")
14191 (match_operand:QI 2 "nonmemory_operand")))]
14192 "TARGET_64BIT"
14193 {
14194 if (const_1_to_63_operand (operands[2], VOIDmode))
14195 emit_insn (gen_ix86_<insn>ti3_doubleword
14196 (operands[0], operands[1], operands[2]));
14197 else if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 64)
14198 emit_insn (gen_<insn>64ti2_doubleword (operands[0], operands[1]));
14199 else
14200 {
14201 rtx amount = force_reg (QImode, operands[2]);
14202 rtx src_lo = gen_lowpart (DImode, operands[1]);
14203 rtx src_hi = gen_highpart (DImode, operands[1]);
14204 rtx tmp_lo = gen_reg_rtx (DImode);
14205 rtx tmp_hi = gen_reg_rtx (DImode);
14206 emit_move_insn (tmp_lo, src_lo);
14207 emit_move_insn (tmp_hi, src_hi);
14208 rtx (*shiftd) (rtx, rtx, rtx)
14209 = (<CODE> == ROTATE) ? gen_x86_64_shld : gen_x86_64_shrd;
14210 emit_insn (shiftd (tmp_lo, src_hi, amount));
14211 emit_insn (shiftd (tmp_hi, src_lo, amount));
14212 rtx dst_lo = gen_lowpart (DImode, operands[0]);
14213 rtx dst_hi = gen_highpart (DImode, operands[0]);
14214 emit_move_insn (dst_lo, tmp_lo);
14215 emit_move_insn (dst_hi, tmp_hi);
14216 emit_insn (gen_x86_shiftdi_adj_1 (dst_lo, dst_hi, amount, tmp_lo));
14217 }
14218 DONE;
14219 })
14220
14221 (define_expand "<insn>di3"
14222 [(set (match_operand:DI 0 "shiftdi_operand")
14223 (any_rotate:DI (match_operand:DI 1 "shiftdi_operand")
14224 (match_operand:QI 2 "nonmemory_operand")))]
14225 ""
14226 {
14227 if (TARGET_64BIT)
14228 ix86_expand_binary_operator (<CODE>, DImode, operands);
14229 else if (const_1_to_31_operand (operands[2], VOIDmode))
14230 emit_insn (gen_ix86_<insn>di3_doubleword
14231 (operands[0], operands[1], operands[2]));
14232 else if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 32)
14233 emit_insn (gen_<insn>32di2_doubleword (operands[0], operands[1]));
14234 else
14235 FAIL;
14236
14237 DONE;
14238 })
14239
14240 (define_expand "<insn><mode>3"
14241 [(set (match_operand:SWIM124 0 "nonimmediate_operand")
14242 (any_rotate:SWIM124 (match_operand:SWIM124 1 "nonimmediate_operand")
14243 (match_operand:QI 2 "nonmemory_operand")))]
14244 ""
14245 "ix86_expand_binary_operator (<CODE>, <MODE>mode, operands); DONE;")
14246
14247 ;; Avoid useless masking of count operand.
14248 (define_insn_and_split "*<insn><mode>3_mask"
14249 [(set (match_operand:SWI 0 "nonimmediate_operand")
14250 (any_rotate:SWI
14251 (match_operand:SWI 1 "nonimmediate_operand")
14252 (subreg:QI
14253 (and
14254 (match_operand 2 "register_operand" "c")
14255 (match_operand 3 "const_int_operand")) 0)))
14256 (clobber (reg:CC FLAGS_REG))]
14257 "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
14258 && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
14259 == GET_MODE_BITSIZE (<MODE>mode)-1
14260 && GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT
14261 && IN_RANGE (GET_MODE_SIZE (GET_MODE (operands[2])), 2,
14262 4 << (TARGET_64BIT ? 1 : 0))
14263 && ix86_pre_reload_split ()"
14264 "#"
14265 "&& 1"
14266 [(parallel
14267 [(set (match_dup 0)
14268 (any_rotate:SWI (match_dup 1)
14269 (match_dup 2)))
14270 (clobber (reg:CC FLAGS_REG))])]
14271 {
14272 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
14273 operands[2] = gen_lowpart (QImode, operands[2]);
14274 })
14275
14276 (define_split
14277 [(set (match_operand:SWI 0 "register_operand")
14278 (any_rotate:SWI
14279 (match_operand:SWI 1 "const_int_operand")
14280 (subreg:QI
14281 (and
14282 (match_operand 2 "register_operand")
14283 (match_operand 3 "const_int_operand")) 0)))]
14284 "(INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode) - 1))
14285 == GET_MODE_BITSIZE (<MODE>mode) - 1
14286 && GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT
14287 && IN_RANGE (GET_MODE_SIZE (GET_MODE (operands[2])), 2,
14288 4 << (TARGET_64BIT ? 1 : 0))"
14289 [(set (match_dup 4) (match_dup 1))
14290 (set (match_dup 0)
14291 (any_rotate:SWI (match_dup 4)
14292 (subreg:QI (match_dup 2) 0)))]
14293 "operands[4] = gen_reg_rtx (<MODE>mode);")
14294
14295 (define_insn_and_split "*<insn><mode>3_mask_1"
14296 [(set (match_operand:SWI 0 "nonimmediate_operand")
14297 (any_rotate:SWI
14298 (match_operand:SWI 1 "nonimmediate_operand")
14299 (and:QI
14300 (match_operand:QI 2 "register_operand" "c")
14301 (match_operand:QI 3 "const_int_operand"))))
14302 (clobber (reg:CC FLAGS_REG))]
14303 "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
14304 && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
14305 == GET_MODE_BITSIZE (<MODE>mode)-1
14306 && ix86_pre_reload_split ()"
14307 "#"
14308 "&& 1"
14309 [(parallel
14310 [(set (match_dup 0)
14311 (any_rotate:SWI (match_dup 1)
14312 (match_dup 2)))
14313 (clobber (reg:CC FLAGS_REG))])])
14314
14315 (define_split
14316 [(set (match_operand:SWI 0 "register_operand")
14317 (any_rotate:SWI
14318 (match_operand:SWI 1 "const_int_operand")
14319 (and:QI
14320 (match_operand:QI 2 "register_operand")
14321 (match_operand:QI 3 "const_int_operand"))))]
14322 "(INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode) - 1))
14323 == GET_MODE_BITSIZE (<MODE>mode) - 1"
14324 [(set (match_dup 4) (match_dup 1))
14325 (set (match_dup 0)
14326 (any_rotate:SWI (match_dup 4) (match_dup 2)))]
14327 "operands[4] = gen_reg_rtx (<MODE>mode);")
14328
14329 ;; Implement rotation using two double-precision
14330 ;; shift instructions and a scratch register.
14331
14332 (define_insn_and_split "ix86_rotl<dwi>3_doubleword"
14333 [(set (match_operand:<DWI> 0 "register_operand" "=r")
14334 (rotate:<DWI> (match_operand:<DWI> 1 "register_operand" "0")
14335 (match_operand:QI 2 "<shift_immediate_operand>" "<S>")))
14336 (clobber (reg:CC FLAGS_REG))
14337 (clobber (match_scratch:DWIH 3 "=&r"))]
14338 ""
14339 "#"
14340 "reload_completed"
14341 [(set (match_dup 3) (match_dup 4))
14342 (parallel
14343 [(set (match_dup 4)
14344 (ior:DWIH (ashift:DWIH (match_dup 4)
14345 (and:QI (match_dup 2) (match_dup 6)))
14346 (subreg:DWIH
14347 (lshiftrt:<DWI> (zero_extend:<DWI> (match_dup 5))
14348 (minus:QI (match_dup 7)
14349 (and:QI (match_dup 2)
14350 (match_dup 6)))) 0)))
14351 (clobber (reg:CC FLAGS_REG))])
14352 (parallel
14353 [(set (match_dup 5)
14354 (ior:DWIH (ashift:DWIH (match_dup 5)
14355 (and:QI (match_dup 2) (match_dup 6)))
14356 (subreg:DWIH
14357 (lshiftrt:<DWI> (zero_extend:<DWI> (match_dup 3))
14358 (minus:QI (match_dup 7)
14359 (and:QI (match_dup 2)
14360 (match_dup 6)))) 0)))
14361 (clobber (reg:CC FLAGS_REG))])]
14362 {
14363 operands[6] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode) - 1);
14364 operands[7] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
14365
14366 split_double_mode (<DWI>mode, &operands[0], 1, &operands[4], &operands[5]);
14367 })
14368
14369 (define_insn_and_split "ix86_rotr<dwi>3_doubleword"
14370 [(set (match_operand:<DWI> 0 "register_operand" "=r")
14371 (rotatert:<DWI> (match_operand:<DWI> 1 "register_operand" "0")
14372 (match_operand:QI 2 "<shift_immediate_operand>" "<S>")))
14373 (clobber (reg:CC FLAGS_REG))
14374 (clobber (match_scratch:DWIH 3 "=&r"))]
14375 ""
14376 "#"
14377 "reload_completed"
14378 [(set (match_dup 3) (match_dup 4))
14379 (parallel
14380 [(set (match_dup 4)
14381 (ior:DWIH (lshiftrt:DWIH (match_dup 4)
14382 (and:QI (match_dup 2) (match_dup 6)))
14383 (subreg:DWIH
14384 (ashift:<DWI> (zero_extend:<DWI> (match_dup 5))
14385 (minus:QI (match_dup 7)
14386 (and:QI (match_dup 2)
14387 (match_dup 6)))) 0)))
14388 (clobber (reg:CC FLAGS_REG))])
14389 (parallel
14390 [(set (match_dup 5)
14391 (ior:DWIH (lshiftrt:DWIH (match_dup 5)
14392 (and:QI (match_dup 2) (match_dup 6)))
14393 (subreg:DWIH
14394 (ashift:<DWI> (zero_extend:<DWI> (match_dup 3))
14395 (minus:QI (match_dup 7)
14396 (and:QI (match_dup 2)
14397 (match_dup 6)))) 0)))
14398 (clobber (reg:CC FLAGS_REG))])]
14399 {
14400 operands[6] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode) - 1);
14401 operands[7] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
14402
14403 split_double_mode (<DWI>mode, &operands[0], 1, &operands[4], &operands[5]);
14404 })
14405
14406 (define_insn_and_split "<insn>32di2_doubleword"
14407 [(set (match_operand:DI 0 "register_operand" "=r,r,r")
14408 (any_rotate:DI (match_operand:DI 1 "nonimmediate_operand" "0,r,o")
14409 (const_int 32)))]
14410 "!TARGET_64BIT"
14411 "#"
14412 "&& reload_completed"
14413 [(set (match_dup 0) (match_dup 3))
14414 (set (match_dup 2) (match_dup 1))]
14415 {
14416 split_double_mode (DImode, &operands[0], 2, &operands[0], &operands[2]);
14417 if (rtx_equal_p (operands[0], operands[1]))
14418 {
14419 emit_insn (gen_swapsi (operands[0], operands[2]));
14420 DONE;
14421 }
14422 })
14423
14424 (define_insn_and_split "<insn>64ti2_doubleword"
14425 [(set (match_operand:TI 0 "register_operand" "=r,r,r")
14426 (any_rotate:TI (match_operand:TI 1 "nonimmediate_operand" "0,r,o")
14427 (const_int 64)))]
14428 "TARGET_64BIT"
14429 "#"
14430 "&& reload_completed"
14431 [(set (match_dup 0) (match_dup 3))
14432 (set (match_dup 2) (match_dup 1))]
14433 {
14434 split_double_mode (TImode, &operands[0], 2, &operands[0], &operands[2]);
14435 if (rtx_equal_p (operands[0], operands[1]))
14436 {
14437 emit_insn (gen_swapdi (operands[0], operands[2]));
14438 DONE;
14439 }
14440 })
14441
14442 (define_mode_attr rorx_immediate_operand
14443 [(SI "const_0_to_31_operand")
14444 (DI "const_0_to_63_operand")])
14445
14446 (define_insn "*bmi2_rorx<mode>3_1"
14447 [(set (match_operand:SWI48 0 "register_operand" "=r")
14448 (rotatert:SWI48
14449 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
14450 (match_operand:QI 2 "<rorx_immediate_operand>" "<S>")))]
14451 "TARGET_BMI2 && !optimize_function_for_size_p (cfun)"
14452 "rorx\t{%2, %1, %0|%0, %1, %2}"
14453 [(set_attr "type" "rotatex")
14454 (set_attr "mode" "<MODE>")])
14455
14456 (define_insn "*<insn><mode>3_1"
14457 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
14458 (any_rotate:SWI48
14459 (match_operand:SWI48 1 "nonimmediate_operand" "0,rm")
14460 (match_operand:QI 2 "nonmemory_operand" "c<S>,<S>")))
14461 (clobber (reg:CC FLAGS_REG))]
14462 "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
14463 {
14464 switch (get_attr_type (insn))
14465 {
14466 case TYPE_ROTATEX:
14467 return "#";
14468
14469 default:
14470 if (operands[2] == const1_rtx
14471 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
14472 return "<rotate>{<imodesuffix>}\t%0";
14473 else
14474 return "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
14475 }
14476 }
14477 [(set_attr "isa" "*,bmi2")
14478 (set_attr "type" "rotate,rotatex")
14479 (set (attr "preferred_for_size")
14480 (cond [(eq_attr "alternative" "0")
14481 (symbol_ref "true")]
14482 (symbol_ref "false")))
14483 (set (attr "length_immediate")
14484 (if_then_else
14485 (and (eq_attr "type" "rotate")
14486 (and (match_operand 2 "const1_operand")
14487 (ior (match_test "TARGET_SHIFT1")
14488 (match_test "optimize_function_for_size_p (cfun)"))))
14489 (const_string "0")
14490 (const_string "*")))
14491 (set_attr "mode" "<MODE>")])
14492
14493 ;; Convert rotate to the rotatex pattern to avoid flags dependency.
14494 (define_split
14495 [(set (match_operand:SWI48 0 "register_operand")
14496 (rotate:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
14497 (match_operand:QI 2 "const_int_operand")))
14498 (clobber (reg:CC FLAGS_REG))]
14499 "TARGET_BMI2 && reload_completed && !optimize_function_for_size_p (cfun)"
14500 [(set (match_dup 0)
14501 (rotatert:SWI48 (match_dup 1) (match_dup 2)))]
14502 {
14503 int bitsize = GET_MODE_BITSIZE (<MODE>mode);
14504
14505 operands[2] = GEN_INT ((bitsize - INTVAL (operands[2])) % bitsize);
14506 })
14507
14508 (define_split
14509 [(set (match_operand:SWI48 0 "register_operand")
14510 (rotatert:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
14511 (match_operand:QI 2 "const_int_operand")))
14512 (clobber (reg:CC FLAGS_REG))]
14513 "TARGET_BMI2 && reload_completed && !optimize_function_for_size_p (cfun)"
14514 [(set (match_dup 0)
14515 (rotatert:SWI48 (match_dup 1) (match_dup 2)))])
14516
14517 (define_insn "*bmi2_rorxsi3_1_zext"
14518 [(set (match_operand:DI 0 "register_operand" "=r")
14519 (zero_extend:DI
14520 (rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
14521 (match_operand:QI 2 "const_0_to_31_operand"))))]
14522 "TARGET_64BIT && TARGET_BMI2 && !optimize_function_for_size_p (cfun)"
14523 "rorx\t{%2, %1, %k0|%k0, %1, %2}"
14524 [(set_attr "type" "rotatex")
14525 (set_attr "mode" "SI")])
14526
14527 (define_insn "*<insn>si3_1_zext"
14528 [(set (match_operand:DI 0 "register_operand" "=r,r")
14529 (zero_extend:DI
14530 (any_rotate:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm")
14531 (match_operand:QI 2 "nonmemory_operand" "cI,I"))))
14532 (clobber (reg:CC FLAGS_REG))]
14533 "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
14534 {
14535 switch (get_attr_type (insn))
14536 {
14537 case TYPE_ROTATEX:
14538 return "#";
14539
14540 default:
14541 if (operands[2] == const1_rtx
14542 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
14543 return "<rotate>{l}\t%k0";
14544 else
14545 return "<rotate>{l}\t{%2, %k0|%k0, %2}";
14546 }
14547 }
14548 [(set_attr "isa" "*,bmi2")
14549 (set_attr "type" "rotate,rotatex")
14550 (set (attr "preferred_for_size")
14551 (cond [(eq_attr "alternative" "0")
14552 (symbol_ref "true")]
14553 (symbol_ref "false")))
14554 (set (attr "length_immediate")
14555 (if_then_else
14556 (and (eq_attr "type" "rotate")
14557 (and (match_operand 2 "const1_operand")
14558 (ior (match_test "TARGET_SHIFT1")
14559 (match_test "optimize_function_for_size_p (cfun)"))))
14560 (const_string "0")
14561 (const_string "*")))
14562 (set_attr "mode" "SI")])
14563
14564 ;; Convert rotate to the rotatex pattern to avoid flags dependency.
14565 (define_split
14566 [(set (match_operand:DI 0 "register_operand")
14567 (zero_extend:DI
14568 (rotate:SI (match_operand:SI 1 "nonimmediate_operand")
14569 (match_operand:QI 2 "const_int_operand"))))
14570 (clobber (reg:CC FLAGS_REG))]
14571 "TARGET_64BIT && TARGET_BMI2 && reload_completed
14572 && !optimize_function_for_size_p (cfun)"
14573 [(set (match_dup 0)
14574 (zero_extend:DI (rotatert:SI (match_dup 1) (match_dup 2))))]
14575 {
14576 int bitsize = GET_MODE_BITSIZE (SImode);
14577
14578 operands[2] = GEN_INT ((bitsize - INTVAL (operands[2])) % bitsize);
14579 })
14580
14581 (define_split
14582 [(set (match_operand:DI 0 "register_operand")
14583 (zero_extend:DI
14584 (rotatert:SI (match_operand:SI 1 "nonimmediate_operand")
14585 (match_operand:QI 2 "const_int_operand"))))
14586 (clobber (reg:CC FLAGS_REG))]
14587 "TARGET_64BIT && TARGET_BMI2 && reload_completed
14588 && !optimize_function_for_size_p (cfun)"
14589 [(set (match_dup 0)
14590 (zero_extend:DI (rotatert:SI (match_dup 1) (match_dup 2))))])
14591
14592 (define_insn "*<insn><mode>3_1"
14593 [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m")
14594 (any_rotate:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "0")
14595 (match_operand:QI 2 "nonmemory_operand" "c<S>")))
14596 (clobber (reg:CC FLAGS_REG))]
14597 "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
14598 {
14599 if (operands[2] == const1_rtx
14600 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
14601 return "<rotate>{<imodesuffix>}\t%0";
14602 else
14603 return "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
14604 }
14605 [(set_attr "type" "rotate")
14606 (set (attr "length_immediate")
14607 (if_then_else
14608 (and (match_operand 2 "const1_operand")
14609 (ior (match_test "TARGET_SHIFT1")
14610 (match_test "optimize_function_for_size_p (cfun)")))
14611 (const_string "0")
14612 (const_string "*")))
14613 (set_attr "mode" "<MODE>")])
14614
14615 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
14616 (define_insn_and_split "*<insn><mode>3_1_slp"
14617 [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>,&<r>"))
14618 (any_rotate:SWI12 (match_operand:SWI12 1 "register_operand" "0,!<r>")
14619 (match_operand:QI 2 "nonmemory_operand" "cI,cI")))
14620 (clobber (reg:CC FLAGS_REG))]
14621 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
14622 {
14623 if (which_alternative)
14624 return "#";
14625
14626 if (operands[2] == const1_rtx
14627 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
14628 return "<rotate>{<imodesuffix>}\t%0";
14629 else
14630 return "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
14631 }
14632 "&& reload_completed"
14633 [(set (strict_low_part (match_dup 0)) (match_dup 1))
14634 (parallel
14635 [(set (strict_low_part (match_dup 0))
14636 (any_rotate:SWI12 (match_dup 0) (match_dup 2)))
14637 (clobber (reg:CC FLAGS_REG))])]
14638 ""
14639 [(set_attr "type" "rotate")
14640 (set (attr "length_immediate")
14641 (if_then_else
14642 (and (match_operand 2 "const1_operand")
14643 (ior (match_test "TARGET_SHIFT1")
14644 (match_test "optimize_function_for_size_p (cfun)")))
14645 (const_string "0")
14646 (const_string "*")))
14647 (set_attr "mode" "<MODE>")])
14648
14649 (define_split
14650 [(set (match_operand:HI 0 "QIreg_operand")
14651 (any_rotate:HI (match_dup 0) (const_int 8)))
14652 (clobber (reg:CC FLAGS_REG))]
14653 "reload_completed
14654 && (TARGET_USE_XCHGB || optimize_function_for_size_p (cfun))"
14655 [(parallel [(set (strict_low_part (match_dup 0))
14656 (bswap:HI (match_dup 0)))
14657 (clobber (reg:CC FLAGS_REG))])])
14658 \f
14659 ;; Bit set / bit test instructions
14660
14661 ;; %%% bts, btr, btc
14662
14663 ;; These instructions are *slow* when applied to memory.
14664
14665 (define_code_attr btsc [(ior "bts") (xor "btc")])
14666
14667 (define_insn "*<btsc><mode>"
14668 [(set (match_operand:SWI48 0 "register_operand" "=r")
14669 (any_or:SWI48
14670 (ashift:SWI48 (const_int 1)
14671 (match_operand:QI 2 "register_operand" "r"))
14672 (match_operand:SWI48 1 "register_operand" "0")))
14673 (clobber (reg:CC FLAGS_REG))]
14674 "TARGET_USE_BT"
14675 "<btsc>{<imodesuffix>}\t{%<k>2, %0|%0, %<k>2}"
14676 [(set_attr "type" "alu1")
14677 (set_attr "prefix_0f" "1")
14678 (set_attr "znver1_decode" "double")
14679 (set_attr "mode" "<MODE>")])
14680
14681 ;; Avoid useless masking of count operand.
14682 (define_insn_and_split "*<btsc><mode>_mask"
14683 [(set (match_operand:SWI48 0 "register_operand")
14684 (any_or:SWI48
14685 (ashift:SWI48
14686 (const_int 1)
14687 (subreg:QI
14688 (and
14689 (match_operand 1 "register_operand")
14690 (match_operand 2 "const_int_operand")) 0))
14691 (match_operand:SWI48 3 "register_operand")))
14692 (clobber (reg:CC FLAGS_REG))]
14693 "TARGET_USE_BT
14694 && (INTVAL (operands[2]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
14695 == GET_MODE_BITSIZE (<MODE>mode)-1
14696 && GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14697 && IN_RANGE (GET_MODE_SIZE (GET_MODE (operands[1])), 2,
14698 4 << (TARGET_64BIT ? 1 : 0))
14699 && ix86_pre_reload_split ()"
14700 "#"
14701 "&& 1"
14702 [(parallel
14703 [(set (match_dup 0)
14704 (any_or:SWI48
14705 (ashift:SWI48 (const_int 1)
14706 (match_dup 1))
14707 (match_dup 3)))
14708 (clobber (reg:CC FLAGS_REG))])]
14709 {
14710 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
14711 operands[1] = gen_lowpart (QImode, operands[1]);
14712 })
14713
14714 (define_insn_and_split "*<btsc><mode>_mask_1"
14715 [(set (match_operand:SWI48 0 "register_operand")
14716 (any_or:SWI48
14717 (ashift:SWI48
14718 (const_int 1)
14719 (and:QI
14720 (match_operand:QI 1 "register_operand")
14721 (match_operand:QI 2 "const_int_operand")))
14722 (match_operand:SWI48 3 "register_operand")))
14723 (clobber (reg:CC FLAGS_REG))]
14724 "TARGET_USE_BT
14725 && (INTVAL (operands[2]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
14726 == GET_MODE_BITSIZE (<MODE>mode)-1
14727 && ix86_pre_reload_split ()"
14728 "#"
14729 "&& 1"
14730 [(parallel
14731 [(set (match_dup 0)
14732 (any_or:SWI48
14733 (ashift:SWI48 (const_int 1)
14734 (match_dup 1))
14735 (match_dup 3)))
14736 (clobber (reg:CC FLAGS_REG))])])
14737
14738 (define_insn "*btr<mode>"
14739 [(set (match_operand:SWI48 0 "register_operand" "=r")
14740 (and:SWI48
14741 (rotate:SWI48 (const_int -2)
14742 (match_operand:QI 2 "register_operand" "r"))
14743 (match_operand:SWI48 1 "register_operand" "0")))
14744 (clobber (reg:CC FLAGS_REG))]
14745 "TARGET_USE_BT"
14746 "btr{<imodesuffix>}\t{%<k>2, %0|%0, %<k>2}"
14747 [(set_attr "type" "alu1")
14748 (set_attr "prefix_0f" "1")
14749 (set_attr "znver1_decode" "double")
14750 (set_attr "mode" "<MODE>")])
14751
14752 ;; Avoid useless masking of count operand.
14753 (define_insn_and_split "*btr<mode>_mask"
14754 [(set (match_operand:SWI48 0 "register_operand")
14755 (and:SWI48
14756 (rotate:SWI48
14757 (const_int -2)
14758 (subreg:QI
14759 (and
14760 (match_operand 1 "register_operand")
14761 (match_operand 2 "const_int_operand")) 0))
14762 (match_operand:SWI48 3 "register_operand")))
14763 (clobber (reg:CC FLAGS_REG))]
14764 "TARGET_USE_BT
14765 && (INTVAL (operands[2]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
14766 == GET_MODE_BITSIZE (<MODE>mode)-1
14767 && GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14768 && IN_RANGE (GET_MODE_SIZE (GET_MODE (operands[1])), 2,
14769 4 << (TARGET_64BIT ? 1 : 0))
14770 && ix86_pre_reload_split ()"
14771 "#"
14772 "&& 1"
14773 [(parallel
14774 [(set (match_dup 0)
14775 (and:SWI48
14776 (rotate:SWI48 (const_int -2)
14777 (match_dup 1))
14778 (match_dup 3)))
14779 (clobber (reg:CC FLAGS_REG))])]
14780 {
14781 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
14782 operands[1] = gen_lowpart (QImode, operands[1]);
14783 })
14784
14785 (define_insn_and_split "*btr<mode>_mask_1"
14786 [(set (match_operand:SWI48 0 "register_operand")
14787 (and:SWI48
14788 (rotate:SWI48
14789 (const_int -2)
14790 (and:QI
14791 (match_operand:QI 1 "register_operand")
14792 (match_operand:QI 2 "const_int_operand")))
14793 (match_operand:SWI48 3 "register_operand")))
14794 (clobber (reg:CC FLAGS_REG))]
14795 "TARGET_USE_BT
14796 && (INTVAL (operands[2]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
14797 == GET_MODE_BITSIZE (<MODE>mode)-1
14798 && ix86_pre_reload_split ()"
14799 "#"
14800 "&& 1"
14801 [(parallel
14802 [(set (match_dup 0)
14803 (and:SWI48
14804 (rotate:SWI48 (const_int -2)
14805 (match_dup 1))
14806 (match_dup 3)))
14807 (clobber (reg:CC FLAGS_REG))])])
14808
14809 (define_insn_and_split "*btr<mode>_1"
14810 [(set (match_operand:SWI12 0 "register_operand")
14811 (and:SWI12
14812 (subreg:SWI12
14813 (rotate:SI (const_int -2)
14814 (match_operand:QI 2 "register_operand")) 0)
14815 (match_operand:SWI12 1 "nonimmediate_operand")))
14816 (clobber (reg:CC FLAGS_REG))]
14817 "TARGET_USE_BT && ix86_pre_reload_split ()"
14818 "#"
14819 "&& 1"
14820 [(parallel
14821 [(set (match_dup 0)
14822 (and:SI (rotate:SI (const_int -2) (match_dup 2))
14823 (match_dup 1)))
14824 (clobber (reg:CC FLAGS_REG))])]
14825 {
14826 operands[0] = lowpart_subreg (SImode, operands[0], <MODE>mode);
14827 operands[1] = force_reg (<MODE>mode, operands[1]);
14828 operands[1] = lowpart_subreg (SImode, operands[1], <MODE>mode);
14829 })
14830
14831 (define_insn_and_split "*btr<mode>_2"
14832 [(set (zero_extract:HI
14833 (match_operand:SWI12 0 "nonimmediate_operand")
14834 (const_int 1)
14835 (zero_extend:SI (match_operand:QI 1 "register_operand")))
14836 (const_int 0))
14837 (clobber (reg:CC FLAGS_REG))]
14838 "TARGET_USE_BT && ix86_pre_reload_split ()"
14839 "#"
14840 "&& MEM_P (operands[0])"
14841 [(set (match_dup 2) (match_dup 0))
14842 (parallel
14843 [(set (match_dup 3)
14844 (and:SI (rotate:SI (const_int -2) (match_dup 1))
14845 (match_dup 4)))
14846 (clobber (reg:CC FLAGS_REG))])
14847 (set (match_dup 0) (match_dup 5))]
14848 {
14849 operands[2] = gen_reg_rtx (<MODE>mode);
14850 operands[5] = gen_reg_rtx (<MODE>mode);
14851 operands[3] = lowpart_subreg (SImode, operands[5], <MODE>mode);
14852 operands[4] = lowpart_subreg (SImode, operands[2], <MODE>mode);
14853 })
14854
14855 (define_split
14856 [(set (zero_extract:HI
14857 (match_operand:SWI12 0 "register_operand")
14858 (const_int 1)
14859 (zero_extend:SI (match_operand:QI 1 "register_operand")))
14860 (const_int 0))
14861 (clobber (reg:CC FLAGS_REG))]
14862 "TARGET_USE_BT && ix86_pre_reload_split ()"
14863 [(parallel
14864 [(set (match_dup 0)
14865 (and:SI (rotate:SI (const_int -2) (match_dup 1))
14866 (match_dup 2)))
14867 (clobber (reg:CC FLAGS_REG))])]
14868 {
14869 operands[2] = lowpart_subreg (SImode, operands[0], <MODE>mode);
14870 operands[0] = lowpart_subreg (SImode, operands[0], <MODE>mode);
14871 })
14872
14873 ;; These instructions are never faster than the corresponding
14874 ;; and/ior/xor operations when using immediate operand, so with
14875 ;; 32-bit there's no point. But in 64-bit, we can't hold the
14876 ;; relevant immediates within the instruction itself, so operating
14877 ;; on bits in the high 32-bits of a register becomes easier.
14878 ;;
14879 ;; These are slow on Nocona, but fast on Athlon64. We do require the use
14880 ;; of btrq and btcq for corner cases of post-reload expansion of absdf and
14881 ;; negdf respectively, so they can never be disabled entirely.
14882
14883 (define_insn "*btsq_imm"
14884 [(set (zero_extract:DI (match_operand:DI 0 "nonimmediate_operand" "+rm")
14885 (const_int 1)
14886 (match_operand 1 "const_0_to_63_operand"))
14887 (const_int 1))
14888 (clobber (reg:CC FLAGS_REG))]
14889 "TARGET_64BIT && (TARGET_USE_BT || reload_completed)"
14890 "bts{q}\t{%1, %0|%0, %1}"
14891 [(set_attr "type" "alu1")
14892 (set_attr "prefix_0f" "1")
14893 (set_attr "znver1_decode" "double")
14894 (set_attr "mode" "DI")])
14895
14896 (define_insn "*btrq_imm"
14897 [(set (zero_extract:DI (match_operand:DI 0 "nonimmediate_operand" "+rm")
14898 (const_int 1)
14899 (match_operand 1 "const_0_to_63_operand"))
14900 (const_int 0))
14901 (clobber (reg:CC FLAGS_REG))]
14902 "TARGET_64BIT && (TARGET_USE_BT || reload_completed)"
14903 "btr{q}\t{%1, %0|%0, %1}"
14904 [(set_attr "type" "alu1")
14905 (set_attr "prefix_0f" "1")
14906 (set_attr "znver1_decode" "double")
14907 (set_attr "mode" "DI")])
14908
14909 (define_insn "*btcq_imm"
14910 [(set (zero_extract:DI (match_operand:DI 0 "nonimmediate_operand" "+rm")
14911 (const_int 1)
14912 (match_operand 1 "const_0_to_63_operand"))
14913 (not:DI (zero_extract:DI (match_dup 0) (const_int 1) (match_dup 1))))
14914 (clobber (reg:CC FLAGS_REG))]
14915 "TARGET_64BIT && (TARGET_USE_BT || reload_completed)"
14916 "btc{q}\t{%1, %0|%0, %1}"
14917 [(set_attr "type" "alu1")
14918 (set_attr "prefix_0f" "1")
14919 (set_attr "znver1_decode" "double")
14920 (set_attr "mode" "DI")])
14921
14922 ;; Allow Nocona to avoid these instructions if a register is available.
14923
14924 (define_peephole2
14925 [(match_scratch:DI 2 "r")
14926 (parallel [(set (zero_extract:DI
14927 (match_operand:DI 0 "nonimmediate_operand")
14928 (const_int 1)
14929 (match_operand 1 "const_0_to_63_operand"))
14930 (const_int 1))
14931 (clobber (reg:CC FLAGS_REG))])]
14932 "TARGET_64BIT && !TARGET_USE_BT"
14933 [(parallel [(set (match_dup 0)
14934 (ior:DI (match_dup 0) (match_dup 3)))
14935 (clobber (reg:CC FLAGS_REG))])]
14936 {
14937 int i = INTVAL (operands[1]);
14938
14939 operands[3] = gen_int_mode (HOST_WIDE_INT_1U << i, DImode);
14940
14941 if (!x86_64_immediate_operand (operands[3], DImode))
14942 {
14943 emit_move_insn (operands[2], operands[3]);
14944 operands[3] = operands[2];
14945 }
14946 })
14947
14948 (define_peephole2
14949 [(match_scratch:DI 2 "r")
14950 (parallel [(set (zero_extract:DI
14951 (match_operand:DI 0 "nonimmediate_operand")
14952 (const_int 1)
14953 (match_operand 1 "const_0_to_63_operand"))
14954 (const_int 0))
14955 (clobber (reg:CC FLAGS_REG))])]
14956 "TARGET_64BIT && !TARGET_USE_BT"
14957 [(parallel [(set (match_dup 0)
14958 (and:DI (match_dup 0) (match_dup 3)))
14959 (clobber (reg:CC FLAGS_REG))])]
14960 {
14961 int i = INTVAL (operands[1]);
14962
14963 operands[3] = gen_int_mode (~(HOST_WIDE_INT_1U << i), DImode);
14964
14965 if (!x86_64_immediate_operand (operands[3], DImode))
14966 {
14967 emit_move_insn (operands[2], operands[3]);
14968 operands[3] = operands[2];
14969 }
14970 })
14971
14972 (define_peephole2
14973 [(match_scratch:DI 2 "r")
14974 (parallel [(set (zero_extract:DI
14975 (match_operand:DI 0 "nonimmediate_operand")
14976 (const_int 1)
14977 (match_operand 1 "const_0_to_63_operand"))
14978 (not:DI (zero_extract:DI
14979 (match_dup 0) (const_int 1) (match_dup 1))))
14980 (clobber (reg:CC FLAGS_REG))])]
14981 "TARGET_64BIT && !TARGET_USE_BT"
14982 [(parallel [(set (match_dup 0)
14983 (xor:DI (match_dup 0) (match_dup 3)))
14984 (clobber (reg:CC FLAGS_REG))])]
14985 {
14986 int i = INTVAL (operands[1]);
14987
14988 operands[3] = gen_int_mode (HOST_WIDE_INT_1U << i, DImode);
14989
14990 if (!x86_64_immediate_operand (operands[3], DImode))
14991 {
14992 emit_move_insn (operands[2], operands[3]);
14993 operands[3] = operands[2];
14994 }
14995 })
14996
14997 ;; %%% bt
14998
14999 (define_insn "*bt<mode>"
15000 [(set (reg:CCC FLAGS_REG)
15001 (compare:CCC
15002 (zero_extract:SWI48
15003 (match_operand:SWI48 0 "nonimmediate_operand" "r,m")
15004 (const_int 1)
15005 (match_operand:SI 1 "nonmemory_operand" "r<S>,<S>"))
15006 (const_int 0)))]
15007 ""
15008 {
15009 switch (get_attr_mode (insn))
15010 {
15011 case MODE_SI:
15012 return "bt{l}\t{%1, %k0|%k0, %1}";
15013
15014 case MODE_DI:
15015 return "bt{q}\t{%q1, %0|%0, %q1}";
15016
15017 default:
15018 gcc_unreachable ();
15019 }
15020 }
15021 [(set_attr "type" "alu1")
15022 (set_attr "prefix_0f" "1")
15023 (set (attr "mode")
15024 (if_then_else
15025 (and (match_test "CONST_INT_P (operands[1])")
15026 (match_test "INTVAL (operands[1]) < 32"))
15027 (const_string "SI")
15028 (const_string "<MODE>")))])
15029
15030 (define_insn_and_split "*jcc_bt<mode>"
15031 [(set (pc)
15032 (if_then_else (match_operator 0 "bt_comparison_operator"
15033 [(zero_extract:SWI48
15034 (match_operand:SWI48 1 "nonimmediate_operand")
15035 (const_int 1)
15036 (match_operand:SI 2 "nonmemory_operand"))
15037 (const_int 0)])
15038 (label_ref (match_operand 3))
15039 (pc)))
15040 (clobber (reg:CC FLAGS_REG))]
15041 "(TARGET_USE_BT || optimize_function_for_size_p (cfun))
15042 && (CONST_INT_P (operands[2])
15043 ? (INTVAL (operands[2]) < GET_MODE_BITSIZE (<MODE>mode)
15044 && INTVAL (operands[2])
15045 >= (optimize_function_for_size_p (cfun) ? 8 : 32))
15046 : !memory_operand (operands[1], <MODE>mode))
15047 && ix86_pre_reload_split ()"
15048 "#"
15049 "&& 1"
15050 [(set (reg:CCC FLAGS_REG)
15051 (compare:CCC
15052 (zero_extract:SWI48
15053 (match_dup 1)
15054 (const_int 1)
15055 (match_dup 2))
15056 (const_int 0)))
15057 (set (pc)
15058 (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
15059 (label_ref (match_dup 3))
15060 (pc)))]
15061 {
15062 operands[0] = shallow_copy_rtx (operands[0]);
15063 PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
15064 })
15065
15066 (define_insn_and_split "*jcc_bt<mode>_1"
15067 [(set (pc)
15068 (if_then_else (match_operator 0 "bt_comparison_operator"
15069 [(zero_extract:SWI48
15070 (match_operand:SWI48 1 "register_operand")
15071 (const_int 1)
15072 (zero_extend:SI
15073 (match_operand:QI 2 "register_operand")))
15074 (const_int 0)])
15075 (label_ref (match_operand 3))
15076 (pc)))
15077 (clobber (reg:CC FLAGS_REG))]
15078 "(TARGET_USE_BT || optimize_function_for_size_p (cfun))
15079 && ix86_pre_reload_split ()"
15080 "#"
15081 "&& 1"
15082 [(set (reg:CCC FLAGS_REG)
15083 (compare:CCC
15084 (zero_extract:SWI48
15085 (match_dup 1)
15086 (const_int 1)
15087 (match_dup 2))
15088 (const_int 0)))
15089 (set (pc)
15090 (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
15091 (label_ref (match_dup 3))
15092 (pc)))]
15093 {
15094 operands[2] = lowpart_subreg (SImode, operands[2], QImode);
15095 operands[0] = shallow_copy_rtx (operands[0]);
15096 PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
15097 })
15098
15099 ;; Avoid useless masking of bit offset operand.
15100 (define_insn_and_split "*jcc_bt<mode>_mask"
15101 [(set (pc)
15102 (if_then_else (match_operator 0 "bt_comparison_operator"
15103 [(zero_extract:SWI48
15104 (match_operand:SWI48 1 "register_operand")
15105 (const_int 1)
15106 (and:SI
15107 (match_operand:SI 2 "register_operand")
15108 (match_operand 3 "const_int_operand")))])
15109 (label_ref (match_operand 4))
15110 (pc)))
15111 (clobber (reg:CC FLAGS_REG))]
15112 "(TARGET_USE_BT || optimize_function_for_size_p (cfun))
15113 && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
15114 == GET_MODE_BITSIZE (<MODE>mode)-1
15115 && ix86_pre_reload_split ()"
15116 "#"
15117 "&& 1"
15118 [(set (reg:CCC FLAGS_REG)
15119 (compare:CCC
15120 (zero_extract:SWI48
15121 (match_dup 1)
15122 (const_int 1)
15123 (match_dup 2))
15124 (const_int 0)))
15125 (set (pc)
15126 (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
15127 (label_ref (match_dup 4))
15128 (pc)))]
15129 {
15130 operands[0] = shallow_copy_rtx (operands[0]);
15131 PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
15132 })
15133
15134 (define_insn_and_split "*jcc_bt<mode>_mask_1"
15135 [(set (pc)
15136 (if_then_else (match_operator 0 "bt_comparison_operator"
15137 [(zero_extract:SWI48
15138 (match_operand:SWI48 1 "register_operand")
15139 (const_int 1)
15140 (zero_extend:SI
15141 (subreg:QI
15142 (and
15143 (match_operand 2 "register_operand")
15144 (match_operand 3 "const_int_operand")) 0)))])
15145 (label_ref (match_operand 4))
15146 (pc)))
15147 (clobber (reg:CC FLAGS_REG))]
15148 "(TARGET_USE_BT || optimize_function_for_size_p (cfun))
15149 && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
15150 == GET_MODE_BITSIZE (<MODE>mode)-1
15151 && GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT
15152 && IN_RANGE (GET_MODE_SIZE (GET_MODE (operands[2])), 2,
15153 4 << (TARGET_64BIT ? 1 : 0))
15154 && ix86_pre_reload_split ()"
15155 "#"
15156 "&& 1"
15157 [(set (reg:CCC FLAGS_REG)
15158 (compare:CCC
15159 (zero_extract:SWI48
15160 (match_dup 1)
15161 (const_int 1)
15162 (match_dup 2))
15163 (const_int 0)))
15164 (set (pc)
15165 (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
15166 (label_ref (match_dup 4))
15167 (pc)))]
15168 {
15169 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
15170 operands[2] = gen_lowpart (SImode, operands[2]);
15171 operands[0] = shallow_copy_rtx (operands[0]);
15172 PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
15173 })
15174
15175 ;; Help combine recognize bt followed by cmov
15176 (define_split
15177 [(set (match_operand:SWI248 0 "register_operand")
15178 (if_then_else:SWI248
15179 (match_operator 5 "bt_comparison_operator"
15180 [(zero_extract:SWI48
15181 (match_operand:SWI48 1 "register_operand")
15182 (const_int 1)
15183 (zero_extend:SI (match_operand:QI 2 "register_operand")))
15184 (const_int 0)])
15185 (match_operand:SWI248 3 "nonimmediate_operand")
15186 (match_operand:SWI248 4 "nonimmediate_operand")))]
15187 "TARGET_USE_BT && TARGET_CMOVE
15188 && !(MEM_P (operands[3]) && MEM_P (operands[4]))
15189 && ix86_pre_reload_split ()"
15190 [(set (reg:CCC FLAGS_REG)
15191 (compare:CCC
15192 (zero_extract:SWI48 (match_dup 1) (const_int 1) (match_dup 2))
15193 (const_int 0)))
15194 (set (match_dup 0)
15195 (if_then_else:SWI248 (eq (reg:CCC FLAGS_REG) (const_int 0))
15196 (match_dup 3)
15197 (match_dup 4)))]
15198 {
15199 if (GET_CODE (operands[5]) == EQ)
15200 std::swap (operands[3], operands[4]);
15201 operands[2] = lowpart_subreg (SImode, operands[2], QImode);
15202 })
15203
15204 ;; Help combine recognize bt followed by setc
15205 (define_insn_and_split "*bt<mode>_setcqi"
15206 [(set (subreg:SWI48 (match_operand:QI 0 "register_operand") 0)
15207 (zero_extract:SWI48
15208 (match_operand:SWI48 1 "register_operand")
15209 (const_int 1)
15210 (zero_extend:SI (match_operand:QI 2 "register_operand"))))
15211 (clobber (reg:CC FLAGS_REG))]
15212 "TARGET_USE_BT && ix86_pre_reload_split ()"
15213 "#"
15214 "&& 1"
15215 [(set (reg:CCC FLAGS_REG)
15216 (compare:CCC
15217 (zero_extract:SWI48 (match_dup 1) (const_int 1) (match_dup 2))
15218 (const_int 0)))
15219 (set (match_dup 0)
15220 (eq:QI (reg:CCC FLAGS_REG) (const_int 0)))]
15221 {
15222 operands[2] = lowpart_subreg (SImode, operands[2], QImode);
15223 })
15224
15225 ;; Help combine recognize bt followed by setnc
15226 (define_insn_and_split "*bt<mode>_setncqi"
15227 [(set (match_operand:QI 0 "register_operand")
15228 (and:QI
15229 (not:QI
15230 (subreg:QI
15231 (lshiftrt:SWI48 (match_operand:SWI48 1 "register_operand")
15232 (match_operand:QI 2 "register_operand")) 0))
15233 (const_int 1)))
15234 (clobber (reg:CC FLAGS_REG))]
15235 "TARGET_USE_BT && ix86_pre_reload_split ()"
15236 "#"
15237 "&& 1"
15238 [(set (reg:CCC FLAGS_REG)
15239 (compare:CCC
15240 (zero_extract:SWI48 (match_dup 1) (const_int 1) (match_dup 2))
15241 (const_int 0)))
15242 (set (match_dup 0)
15243 (ne:QI (reg:CCC FLAGS_REG) (const_int 0)))]
15244 {
15245 operands[2] = lowpart_subreg (SImode, operands[2], QImode);
15246 })
15247
15248 (define_insn_and_split "*bt<mode>_setnc<mode>"
15249 [(set (match_operand:SWI48 0 "register_operand")
15250 (and:SWI48
15251 (not:SWI48
15252 (lshiftrt:SWI48 (match_operand:SWI48 1 "register_operand")
15253 (match_operand:QI 2 "register_operand")))
15254 (const_int 1)))
15255 (clobber (reg:CC FLAGS_REG))]
15256 "TARGET_USE_BT && ix86_pre_reload_split ()"
15257 "#"
15258 "&& 1"
15259 [(set (reg:CCC FLAGS_REG)
15260 (compare:CCC
15261 (zero_extract:SWI48 (match_dup 1) (const_int 1) (match_dup 2))
15262 (const_int 0)))
15263 (set (match_dup 3)
15264 (ne:QI (reg:CCC FLAGS_REG) (const_int 0)))
15265 (set (match_dup 0) (zero_extend:SWI48 (match_dup 3)))]
15266 {
15267 operands[2] = lowpart_subreg (SImode, operands[2], QImode);
15268 operands[3] = gen_reg_rtx (QImode);
15269 })
15270 \f
15271 ;; Store-flag instructions.
15272
15273 (define_split
15274 [(set (match_operand:QI 0 "nonimmediate_operand")
15275 (match_operator:QI 1 "add_comparison_operator"
15276 [(not:SWI (match_operand:SWI 2 "register_operand"))
15277 (match_operand:SWI 3 "nonimmediate_operand")]))]
15278 ""
15279 [(set (reg:CCC FLAGS_REG)
15280 (compare:CCC
15281 (plus:SWI (match_dup 2) (match_dup 3))
15282 (match_dup 2)))
15283 (set (match_dup 0)
15284 (match_op_dup 1 [(reg:CCC FLAGS_REG) (const_int 0)]))])
15285
15286 (define_split
15287 [(set (match_operand:QI 0 "nonimmediate_operand")
15288 (match_operator:QI 1 "shr_comparison_operator"
15289 [(match_operand:DI 2 "register_operand")
15290 (match_operand 3 "const_int_operand")]))]
15291 "TARGET_64BIT
15292 && IN_RANGE (exact_log2 (UINTVAL (operands[3]) + 1), 32, 63)"
15293 [(set (reg:CCZ FLAGS_REG)
15294 (compare:CCZ
15295 (lshiftrt:DI (match_dup 2) (match_dup 4))
15296 (const_int 0)))
15297 (set (match_dup 0)
15298 (match_op_dup 1 [(reg:CCZ FLAGS_REG) (const_int 0)]))]
15299 {
15300 enum rtx_code new_code;
15301
15302 operands[1] = shallow_copy_rtx (operands[1]);
15303 switch (GET_CODE (operands[1]))
15304 {
15305 case GTU: new_code = NE; break;
15306 case LEU: new_code = EQ; break;
15307 default: gcc_unreachable ();
15308 }
15309 PUT_CODE (operands[1], new_code);
15310
15311 operands[4] = GEN_INT (exact_log2 (UINTVAL (operands[3]) + 1));
15312 })
15313
15314 ;; For all sCOND expanders, also expand the compare or test insn that
15315 ;; generates cc0. Generate an equality comparison if `seq' or `sne'.
15316
15317 (define_insn_and_split "*setcc_di_1"
15318 [(set (match_operand:DI 0 "register_operand" "=q")
15319 (match_operator:DI 1 "ix86_comparison_operator"
15320 [(reg FLAGS_REG) (const_int 0)]))]
15321 "TARGET_64BIT && !TARGET_PARTIAL_REG_STALL"
15322 "#"
15323 "&& reload_completed"
15324 [(set (match_dup 2) (match_dup 1))
15325 (set (match_dup 0) (zero_extend:DI (match_dup 2)))]
15326 {
15327 operands[1] = shallow_copy_rtx (operands[1]);
15328 PUT_MODE (operands[1], QImode);
15329 operands[2] = gen_lowpart (QImode, operands[0]);
15330 })
15331
15332 (define_insn_and_split "*setcc_<mode>_1_and"
15333 [(set (match_operand:SWI24 0 "register_operand" "=q")
15334 (match_operator:SWI24 1 "ix86_comparison_operator"
15335 [(reg FLAGS_REG) (const_int 0)]))
15336 (clobber (reg:CC FLAGS_REG))]
15337 "!TARGET_PARTIAL_REG_STALL
15338 && TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)"
15339 "#"
15340 "&& reload_completed"
15341 [(set (match_dup 2) (match_dup 1))
15342 (parallel [(set (match_dup 0) (zero_extend:SWI24 (match_dup 2)))
15343 (clobber (reg:CC FLAGS_REG))])]
15344 {
15345 operands[1] = shallow_copy_rtx (operands[1]);
15346 PUT_MODE (operands[1], QImode);
15347 operands[2] = gen_lowpart (QImode, operands[0]);
15348 })
15349
15350 (define_insn_and_split "*setcc_<mode>_1_movzbl"
15351 [(set (match_operand:SWI24 0 "register_operand" "=q")
15352 (match_operator:SWI24 1 "ix86_comparison_operator"
15353 [(reg FLAGS_REG) (const_int 0)]))]
15354 "!TARGET_PARTIAL_REG_STALL
15355 && (!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun))"
15356 "#"
15357 "&& reload_completed"
15358 [(set (match_dup 2) (match_dup 1))
15359 (set (match_dup 0) (zero_extend:SWI24 (match_dup 2)))]
15360 {
15361 operands[1] = shallow_copy_rtx (operands[1]);
15362 PUT_MODE (operands[1], QImode);
15363 operands[2] = gen_lowpart (QImode, operands[0]);
15364 })
15365
15366 (define_insn "*setcc_qi"
15367 [(set (match_operand:QI 0 "nonimmediate_operand" "=qm")
15368 (match_operator:QI 1 "ix86_comparison_operator"
15369 [(reg FLAGS_REG) (const_int 0)]))]
15370 ""
15371 "set%C1\t%0"
15372 [(set_attr "type" "setcc")
15373 (set_attr "mode" "QI")])
15374
15375 (define_insn "*setcc_qi_slp"
15376 [(set (strict_low_part (match_operand:QI 0 "register_operand" "+q"))
15377 (match_operator:QI 1 "ix86_comparison_operator"
15378 [(reg FLAGS_REG) (const_int 0)]))]
15379 ""
15380 "set%C1\t%0"
15381 [(set_attr "type" "setcc")
15382 (set_attr "mode" "QI")])
15383
15384 ;; In general it is not safe to assume too much about CCmode registers,
15385 ;; so simplify-rtx stops when it sees a second one. Under certain
15386 ;; conditions this is safe on x86, so help combine not create
15387 ;;
15388 ;; seta %al
15389 ;; testb %al, %al
15390 ;; sete %al
15391
15392 (define_split
15393 [(set (match_operand:QI 0 "nonimmediate_operand")
15394 (ne:QI (match_operator 1 "ix86_comparison_operator"
15395 [(reg FLAGS_REG) (const_int 0)])
15396 (const_int 0)))]
15397 ""
15398 [(set (match_dup 0) (match_dup 1))]
15399 {
15400 operands[1] = shallow_copy_rtx (operands[1]);
15401 PUT_MODE (operands[1], QImode);
15402 })
15403
15404 (define_split
15405 [(set (strict_low_part (match_operand:QI 0 "register_operand"))
15406 (ne:QI (match_operator 1 "ix86_comparison_operator"
15407 [(reg FLAGS_REG) (const_int 0)])
15408 (const_int 0)))]
15409 ""
15410 [(set (match_dup 0) (match_dup 1))]
15411 {
15412 operands[1] = shallow_copy_rtx (operands[1]);
15413 PUT_MODE (operands[1], QImode);
15414 })
15415
15416 (define_split
15417 [(set (match_operand:QI 0 "nonimmediate_operand")
15418 (eq:QI (match_operator 1 "ix86_comparison_operator"
15419 [(reg FLAGS_REG) (const_int 0)])
15420 (const_int 0)))]
15421 ""
15422 [(set (match_dup 0) (match_dup 1))]
15423 {
15424 operands[1] = shallow_copy_rtx (operands[1]);
15425 PUT_MODE (operands[1], QImode);
15426 PUT_CODE (operands[1],
15427 ix86_reverse_condition (GET_CODE (operands[1]),
15428 GET_MODE (XEXP (operands[1], 0))));
15429
15430 /* Make sure that (a) the CCmode we have for the flags is strong
15431 enough for the reversed compare or (b) we have a valid FP compare. */
15432 if (! ix86_comparison_operator (operands[1], VOIDmode))
15433 FAIL;
15434 })
15435
15436 (define_split
15437 [(set (strict_low_part (match_operand:QI 0 "register_operand"))
15438 (eq:QI (match_operator 1 "ix86_comparison_operator"
15439 [(reg FLAGS_REG) (const_int 0)])
15440 (const_int 0)))]
15441 ""
15442 [(set (match_dup 0) (match_dup 1))]
15443 {
15444 operands[1] = shallow_copy_rtx (operands[1]);
15445 PUT_MODE (operands[1], QImode);
15446 PUT_CODE (operands[1],
15447 ix86_reverse_condition (GET_CODE (operands[1]),
15448 GET_MODE (XEXP (operands[1], 0))));
15449
15450 /* Make sure that (a) the CCmode we have for the flags is strong
15451 enough for the reversed compare or (b) we have a valid FP compare. */
15452 if (! ix86_comparison_operator (operands[1], VOIDmode))
15453 FAIL;
15454 })
15455
15456 ;; The SSE store flag instructions saves 0 or 0xffffffff to the result.
15457 ;; subsequent logical operations are used to imitate conditional moves.
15458 ;; 0xffffffff is NaN, but not in normalized form, so we can't represent
15459 ;; it directly.
15460
15461 (define_insn "setcc_<mode>_sse"
15462 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
15463 (match_operator:MODEF 3 "sse_comparison_operator"
15464 [(match_operand:MODEF 1 "register_operand" "0,x")
15465 (match_operand:MODEF 2 "nonimmediate_operand" "xm,xm")]))]
15466 "SSE_FLOAT_MODE_P (<MODE>mode)"
15467 "@
15468 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
15469 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
15470 [(set_attr "isa" "noavx,avx")
15471 (set_attr "type" "ssecmp")
15472 (set_attr "length_immediate" "1")
15473 (set_attr "prefix" "orig,vex")
15474 (set_attr "mode" "<MODE>")])
15475
15476 (define_insn "setcc_hf_mask"
15477 [(set (match_operand:QI 0 "register_operand" "=k")
15478 (unspec:QI
15479 [(match_operand:HF 1 "register_operand" "v")
15480 (match_operand:HF 2 "nonimmediate_operand" "vm")
15481 (match_operand:SI 3 "const_0_to_31_operand")]
15482 UNSPEC_PCMP))]
15483 "TARGET_AVX512FP16"
15484 "vcmpsh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15485 [(set_attr "type" "ssecmp")
15486 (set_attr "prefix" "evex")
15487 (set_attr "mode" "HF")])
15488
15489 \f
15490 ;; Basic conditional jump instructions.
15491
15492 (define_split
15493 [(set (pc)
15494 (if_then_else
15495 (match_operator 1 "add_comparison_operator"
15496 [(not:SWI (match_operand:SWI 2 "register_operand"))
15497 (match_operand:SWI 3 "nonimmediate_operand")])
15498 (label_ref (match_operand 0))
15499 (pc)))]
15500 ""
15501 [(set (reg:CCC FLAGS_REG)
15502 (compare:CCC
15503 (plus:SWI (match_dup 2) (match_dup 3))
15504 (match_dup 2)))
15505 (set (pc)
15506 (if_then_else (match_op_dup 1 [(reg:CCC FLAGS_REG) (const_int 0)])
15507 (label_ref (match_operand 0))
15508 (pc)))])
15509
15510 (define_split
15511 [(set (pc)
15512 (if_then_else
15513 (match_operator 1 "shr_comparison_operator"
15514 [(match_operand:DI 2 "register_operand")
15515 (match_operand 3 "const_int_operand")])
15516 (label_ref (match_operand 0))
15517 (pc)))]
15518 "TARGET_64BIT
15519 && IN_RANGE (exact_log2 (UINTVAL (operands[3]) + 1), 32, 63)"
15520 [(set (reg:CCZ FLAGS_REG)
15521 (compare:CCZ
15522 (lshiftrt:DI (match_dup 2) (match_dup 4))
15523 (const_int 0)))
15524 (set (pc)
15525 (if_then_else (match_op_dup 1 [(reg:CCZ FLAGS_REG) (const_int 0)])
15526 (label_ref (match_operand 0))
15527 (pc)))]
15528 {
15529 enum rtx_code new_code;
15530
15531 operands[1] = shallow_copy_rtx (operands[1]);
15532 switch (GET_CODE (operands[1]))
15533 {
15534 case GTU: new_code = NE; break;
15535 case LEU: new_code = EQ; break;
15536 default: gcc_unreachable ();
15537 }
15538 PUT_CODE (operands[1], new_code);
15539
15540 operands[4] = GEN_INT (exact_log2 (UINTVAL (operands[3]) + 1));
15541 })
15542
15543 ;; We ignore the overflow flag for signed branch instructions.
15544
15545 (define_insn "*jcc"
15546 [(set (pc)
15547 (if_then_else (match_operator 1 "ix86_comparison_operator"
15548 [(reg FLAGS_REG) (const_int 0)])
15549 (label_ref (match_operand 0))
15550 (pc)))]
15551 ""
15552 "%!%+j%C1\t%l0"
15553 [(set_attr "type" "ibr")
15554 (set_attr "modrm" "0")
15555 (set (attr "length")
15556 (if_then_else
15557 (and (ge (minus (match_dup 0) (pc))
15558 (const_int -126))
15559 (lt (minus (match_dup 0) (pc))
15560 (const_int 128)))
15561 (const_int 2)
15562 (const_int 6)))])
15563
15564 ;; In general it is not safe to assume too much about CCmode registers,
15565 ;; so simplify-rtx stops when it sees a second one. Under certain
15566 ;; conditions this is safe on x86, so help combine not create
15567 ;;
15568 ;; seta %al
15569 ;; testb %al, %al
15570 ;; je Lfoo
15571
15572 (define_split
15573 [(set (pc)
15574 (if_then_else (ne (match_operator 0 "ix86_comparison_operator"
15575 [(reg FLAGS_REG) (const_int 0)])
15576 (const_int 0))
15577 (label_ref (match_operand 1))
15578 (pc)))]
15579 ""
15580 [(set (pc)
15581 (if_then_else (match_dup 0)
15582 (label_ref (match_dup 1))
15583 (pc)))]
15584 {
15585 operands[0] = shallow_copy_rtx (operands[0]);
15586 PUT_MODE (operands[0], VOIDmode);
15587 })
15588
15589 (define_split
15590 [(set (pc)
15591 (if_then_else (eq (match_operator 0 "ix86_comparison_operator"
15592 [(reg FLAGS_REG) (const_int 0)])
15593 (const_int 0))
15594 (label_ref (match_operand 1))
15595 (pc)))]
15596 ""
15597 [(set (pc)
15598 (if_then_else (match_dup 0)
15599 (label_ref (match_dup 1))
15600 (pc)))]
15601 {
15602 operands[0] = shallow_copy_rtx (operands[0]);
15603 PUT_MODE (operands[0], VOIDmode);
15604 PUT_CODE (operands[0],
15605 ix86_reverse_condition (GET_CODE (operands[0]),
15606 GET_MODE (XEXP (operands[0], 0))));
15607
15608 /* Make sure that (a) the CCmode we have for the flags is strong
15609 enough for the reversed compare or (b) we have a valid FP compare. */
15610 if (! ix86_comparison_operator (operands[0], VOIDmode))
15611 FAIL;
15612 })
15613 \f
15614 ;; Unconditional and other jump instructions
15615
15616 (define_insn "jump"
15617 [(set (pc)
15618 (label_ref (match_operand 0)))]
15619 ""
15620 "%!jmp\t%l0"
15621 [(set_attr "type" "ibr")
15622 (set_attr "modrm" "0")
15623 (set (attr "length")
15624 (if_then_else
15625 (and (ge (minus (match_dup 0) (pc))
15626 (const_int -126))
15627 (lt (minus (match_dup 0) (pc))
15628 (const_int 128)))
15629 (const_int 2)
15630 (const_int 5)))])
15631
15632 (define_expand "indirect_jump"
15633 [(set (pc) (match_operand 0 "indirect_branch_operand"))]
15634 ""
15635 {
15636 if (TARGET_X32 || TARGET_INDIRECT_BRANCH_REGISTER)
15637 operands[0] = convert_memory_address (word_mode, operands[0]);
15638 cfun->machine->has_local_indirect_jump = true;
15639 })
15640
15641 (define_insn "*indirect_jump"
15642 [(set (pc) (match_operand:W 0 "indirect_branch_operand" "rBw"))]
15643 ""
15644 "* return ix86_output_indirect_jmp (operands[0]);"
15645 [(set (attr "type")
15646 (if_then_else (match_test "(cfun->machine->indirect_branch_type
15647 != indirect_branch_keep)")
15648 (const_string "multi")
15649 (const_string "ibr")))
15650 (set_attr "length_immediate" "0")])
15651
15652 (define_expand "tablejump"
15653 [(parallel [(set (pc) (match_operand 0 "indirect_branch_operand"))
15654 (use (label_ref (match_operand 1)))])]
15655 ""
15656 {
15657 /* In PIC mode, the table entries are stored GOT (32-bit) or PC (64-bit)
15658 relative. Convert the relative address to an absolute address. */
15659 if (flag_pic)
15660 {
15661 rtx op0, op1;
15662 enum rtx_code code;
15663
15664 /* We can't use @GOTOFF for text labels on VxWorks;
15665 see gotoff_operand. */
15666 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
15667 {
15668 code = PLUS;
15669 op0 = operands[0];
15670 op1 = gen_rtx_LABEL_REF (Pmode, operands[1]);
15671 }
15672 else if (TARGET_MACHO || HAVE_AS_GOTOFF_IN_DATA)
15673 {
15674 code = PLUS;
15675 op0 = operands[0];
15676 op1 = pic_offset_table_rtx;
15677 }
15678 else
15679 {
15680 code = MINUS;
15681 op0 = pic_offset_table_rtx;
15682 op1 = operands[0];
15683 }
15684
15685 operands[0] = expand_simple_binop (Pmode, code, op0, op1, NULL_RTX, 0,
15686 OPTAB_DIRECT);
15687 }
15688
15689 if (TARGET_X32 || TARGET_INDIRECT_BRANCH_REGISTER)
15690 operands[0] = convert_memory_address (word_mode, operands[0]);
15691 cfun->machine->has_local_indirect_jump = true;
15692 })
15693
15694 (define_insn "*tablejump_1"
15695 [(set (pc) (match_operand:W 0 "indirect_branch_operand" "rBw"))
15696 (use (label_ref (match_operand 1)))]
15697 ""
15698 "* return ix86_output_indirect_jmp (operands[0]);"
15699 [(set (attr "type")
15700 (if_then_else (match_test "(cfun->machine->indirect_branch_type
15701 != indirect_branch_keep)")
15702 (const_string "multi")
15703 (const_string "ibr")))
15704 (set_attr "length_immediate" "0")])
15705 \f
15706 ;; Convert setcc + movzbl to xor + setcc if operands don't overlap.
15707
15708 (define_peephole2
15709 [(set (match_operand 4 "flags_reg_operand") (match_operand 0))
15710 (set (match_operand:QI 1 "register_operand")
15711 (match_operator:QI 2 "ix86_comparison_operator"
15712 [(reg FLAGS_REG) (const_int 0)]))
15713 (set (match_operand 3 "any_QIreg_operand")
15714 (zero_extend (match_dup 1)))]
15715 "(peep2_reg_dead_p (3, operands[1])
15716 || operands_match_p (operands[1], operands[3]))
15717 && ! reg_overlap_mentioned_p (operands[3], operands[0])
15718 && peep2_regno_dead_p (0, FLAGS_REG)"
15719 [(set (match_dup 4) (match_dup 0))
15720 (set (strict_low_part (match_dup 5))
15721 (match_dup 2))]
15722 {
15723 operands[5] = gen_lowpart (QImode, operands[3]);
15724 ix86_expand_clear (operands[3]);
15725 })
15726
15727 (define_peephole2
15728 [(parallel [(set (match_operand 5 "flags_reg_operand") (match_operand 0))
15729 (match_operand 4)])
15730 (set (match_operand:QI 1 "register_operand")
15731 (match_operator:QI 2 "ix86_comparison_operator"
15732 [(reg FLAGS_REG) (const_int 0)]))
15733 (set (match_operand 3 "any_QIreg_operand")
15734 (zero_extend (match_dup 1)))]
15735 "(peep2_reg_dead_p (3, operands[1])
15736 || operands_match_p (operands[1], operands[3]))
15737 && ! reg_overlap_mentioned_p (operands[3], operands[0])
15738 && ! reg_overlap_mentioned_p (operands[3], operands[4])
15739 && ! reg_set_p (operands[3], operands[4])
15740 && peep2_regno_dead_p (0, FLAGS_REG)"
15741 [(parallel [(set (match_dup 5) (match_dup 0))
15742 (match_dup 4)])
15743 (set (strict_low_part (match_dup 6))
15744 (match_dup 2))]
15745 {
15746 operands[6] = gen_lowpart (QImode, operands[3]);
15747 ix86_expand_clear (operands[3]);
15748 })
15749
15750 (define_peephole2
15751 [(set (match_operand 6 "flags_reg_operand") (match_operand 0))
15752 (parallel [(set (match_operand 7 "flags_reg_operand") (match_operand 1))
15753 (match_operand 5)])
15754 (set (match_operand:QI 2 "register_operand")
15755 (match_operator:QI 3 "ix86_comparison_operator"
15756 [(reg FLAGS_REG) (const_int 0)]))
15757 (set (match_operand 4 "any_QIreg_operand")
15758 (zero_extend (match_dup 2)))]
15759 "(peep2_reg_dead_p (4, operands[2])
15760 || operands_match_p (operands[2], operands[4]))
15761 && ! reg_overlap_mentioned_p (operands[4], operands[0])
15762 && ! reg_overlap_mentioned_p (operands[4], operands[1])
15763 && ! reg_overlap_mentioned_p (operands[4], operands[5])
15764 && ! reg_set_p (operands[4], operands[5])
15765 && refers_to_regno_p (FLAGS_REG, operands[1], (rtx *)NULL)
15766 && peep2_regno_dead_p (0, FLAGS_REG)"
15767 [(set (match_dup 6) (match_dup 0))
15768 (parallel [(set (match_dup 7) (match_dup 1))
15769 (match_dup 5)])
15770 (set (strict_low_part (match_dup 8))
15771 (match_dup 3))]
15772 {
15773 operands[8] = gen_lowpart (QImode, operands[4]);
15774 ix86_expand_clear (operands[4]);
15775 })
15776
15777 ;; Similar, but match zero extend with andsi3.
15778
15779 (define_peephole2
15780 [(set (match_operand 4 "flags_reg_operand") (match_operand 0))
15781 (set (match_operand:QI 1 "register_operand")
15782 (match_operator:QI 2 "ix86_comparison_operator"
15783 [(reg FLAGS_REG) (const_int 0)]))
15784 (parallel [(set (match_operand:SI 3 "any_QIreg_operand")
15785 (and:SI (match_dup 3) (const_int 255)))
15786 (clobber (reg:CC FLAGS_REG))])]
15787 "REGNO (operands[1]) == REGNO (operands[3])
15788 && ! reg_overlap_mentioned_p (operands[3], operands[0])
15789 && peep2_regno_dead_p (0, FLAGS_REG)"
15790 [(set (match_dup 4) (match_dup 0))
15791 (set (strict_low_part (match_dup 5))
15792 (match_dup 2))]
15793 {
15794 operands[5] = gen_lowpart (QImode, operands[3]);
15795 ix86_expand_clear (operands[3]);
15796 })
15797
15798 (define_peephole2
15799 [(parallel [(set (match_operand 5 "flags_reg_operand") (match_operand 0))
15800 (match_operand 4)])
15801 (set (match_operand:QI 1 "register_operand")
15802 (match_operator:QI 2 "ix86_comparison_operator"
15803 [(reg FLAGS_REG) (const_int 0)]))
15804 (parallel [(set (match_operand 3 "any_QIreg_operand")
15805 (zero_extend (match_dup 1)))
15806 (clobber (reg:CC FLAGS_REG))])]
15807 "(peep2_reg_dead_p (3, operands[1])
15808 || operands_match_p (operands[1], operands[3]))
15809 && ! reg_overlap_mentioned_p (operands[3], operands[0])
15810 && ! reg_overlap_mentioned_p (operands[3], operands[4])
15811 && ! reg_set_p (operands[3], operands[4])
15812 && peep2_regno_dead_p (0, FLAGS_REG)"
15813 [(parallel [(set (match_dup 5) (match_dup 0))
15814 (match_dup 4)])
15815 (set (strict_low_part (match_dup 6))
15816 (match_dup 2))]
15817 {
15818 operands[6] = gen_lowpart (QImode, operands[3]);
15819 ix86_expand_clear (operands[3]);
15820 })
15821
15822 (define_peephole2
15823 [(set (match_operand 6 "flags_reg_operand") (match_operand 0))
15824 (parallel [(set (match_operand 7 "flags_reg_operand") (match_operand 1))
15825 (match_operand 5)])
15826 (set (match_operand:QI 2 "register_operand")
15827 (match_operator:QI 3 "ix86_comparison_operator"
15828 [(reg FLAGS_REG) (const_int 0)]))
15829 (parallel [(set (match_operand 4 "any_QIreg_operand")
15830 (zero_extend (match_dup 2)))
15831 (clobber (reg:CC FLAGS_REG))])]
15832 "(peep2_reg_dead_p (4, operands[2])
15833 || operands_match_p (operands[2], operands[4]))
15834 && ! reg_overlap_mentioned_p (operands[4], operands[0])
15835 && ! reg_overlap_mentioned_p (operands[4], operands[1])
15836 && ! reg_overlap_mentioned_p (operands[4], operands[5])
15837 && ! reg_set_p (operands[4], operands[5])
15838 && refers_to_regno_p (FLAGS_REG, operands[1], (rtx *)NULL)
15839 && peep2_regno_dead_p (0, FLAGS_REG)"
15840 [(set (match_dup 6) (match_dup 0))
15841 (parallel [(set (match_dup 7) (match_dup 1))
15842 (match_dup 5)])
15843 (set (strict_low_part (match_dup 8))
15844 (match_dup 3))]
15845 {
15846 operands[8] = gen_lowpart (QImode, operands[4]);
15847 ix86_expand_clear (operands[4]);
15848 })
15849 \f
15850 ;; Call instructions.
15851
15852 ;; The predicates normally associated with named expanders are not properly
15853 ;; checked for calls. This is a bug in the generic code, but it isn't that
15854 ;; easy to fix. Ignore it for now and be prepared to fix things up.
15855
15856 ;; P6 processors will jump to the address after the decrement when %esp
15857 ;; is used as a call operand, so they will execute return address as a code.
15858 ;; See Pentium Pro errata 70, Pentium 2 errata A33 and Pentium 3 errata E17.
15859
15860 ;; Register constraint for call instruction.
15861 (define_mode_attr c [(SI "l") (DI "r")])
15862
15863 ;; Call subroutine returning no value.
15864
15865 (define_expand "call"
15866 [(call (match_operand:QI 0)
15867 (match_operand 1))
15868 (use (match_operand 2))]
15869 ""
15870 {
15871 ix86_expand_call (NULL, operands[0], operands[1],
15872 operands[2], NULL, false);
15873 DONE;
15874 })
15875
15876 (define_expand "sibcall"
15877 [(call (match_operand:QI 0)
15878 (match_operand 1))
15879 (use (match_operand 2))]
15880 ""
15881 {
15882 ix86_expand_call (NULL, operands[0], operands[1],
15883 operands[2], NULL, true);
15884 DONE;
15885 })
15886
15887 (define_insn "*call"
15888 [(call (mem:QI (match_operand:W 0 "call_insn_operand" "<c>BwBz"))
15889 (match_operand 1))]
15890 "!SIBLING_CALL_P (insn)"
15891 "* return ix86_output_call_insn (insn, operands[0]);"
15892 [(set_attr "type" "call")])
15893
15894 ;; This covers both call and sibcall since only GOT slot is allowed.
15895 (define_insn "*call_got_x32"
15896 [(call (mem:QI (zero_extend:DI
15897 (match_operand:SI 0 "GOT_memory_operand" "Bg")))
15898 (match_operand 1))]
15899 "TARGET_X32"
15900 {
15901 rtx fnaddr = gen_const_mem (DImode, XEXP (operands[0], 0));
15902 return ix86_output_call_insn (insn, fnaddr);
15903 }
15904 [(set_attr "type" "call")])
15905
15906 ;; Since sibcall never returns, we can only use call-clobbered register
15907 ;; as GOT base.
15908 (define_insn "*sibcall_GOT_32"
15909 [(call (mem:QI
15910 (mem:SI (plus:SI
15911 (match_operand:SI 0 "register_no_elim_operand" "U")
15912 (match_operand:SI 1 "GOT32_symbol_operand"))))
15913 (match_operand 2))]
15914 "!TARGET_MACHO
15915 && !TARGET_64BIT
15916 && !TARGET_INDIRECT_BRANCH_REGISTER
15917 && SIBLING_CALL_P (insn)"
15918 {
15919 rtx fnaddr = gen_rtx_PLUS (SImode, operands[0], operands[1]);
15920 fnaddr = gen_const_mem (SImode, fnaddr);
15921 return ix86_output_call_insn (insn, fnaddr);
15922 }
15923 [(set_attr "type" "call")])
15924
15925 (define_insn "*sibcall"
15926 [(call (mem:QI (match_operand:W 0 "sibcall_insn_operand" "UBsBz"))
15927 (match_operand 1))]
15928 "SIBLING_CALL_P (insn)"
15929 "* return ix86_output_call_insn (insn, operands[0]);"
15930 [(set_attr "type" "call")])
15931
15932 (define_insn "*sibcall_memory"
15933 [(call (mem:QI (match_operand:W 0 "memory_operand" "m"))
15934 (match_operand 1))
15935 (unspec [(const_int 0)] UNSPEC_PEEPSIB)]
15936 "!TARGET_X32 && !TARGET_INDIRECT_BRANCH_REGISTER"
15937 "* return ix86_output_call_insn (insn, operands[0]);"
15938 [(set_attr "type" "call")])
15939
15940 (define_peephole2
15941 [(set (match_operand:W 0 "register_operand")
15942 (match_operand:W 1 "memory_operand"))
15943 (call (mem:QI (match_dup 0))
15944 (match_operand 3))]
15945 "!TARGET_X32
15946 && !TARGET_INDIRECT_BRANCH_REGISTER
15947 && SIBLING_CALL_P (peep2_next_insn (1))
15948 && !reg_mentioned_p (operands[0],
15949 CALL_INSN_FUNCTION_USAGE (peep2_next_insn (1)))"
15950 [(parallel [(call (mem:QI (match_dup 1))
15951 (match_dup 3))
15952 (unspec [(const_int 0)] UNSPEC_PEEPSIB)])])
15953
15954 (define_peephole2
15955 [(set (match_operand:W 0 "register_operand")
15956 (match_operand:W 1 "memory_operand"))
15957 (unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
15958 (call (mem:QI (match_dup 0))
15959 (match_operand 3))]
15960 "!TARGET_X32
15961 && !TARGET_INDIRECT_BRANCH_REGISTER
15962 && SIBLING_CALL_P (peep2_next_insn (2))
15963 && !reg_mentioned_p (operands[0],
15964 CALL_INSN_FUNCTION_USAGE (peep2_next_insn (2)))"
15965 [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
15966 (parallel [(call (mem:QI (match_dup 1))
15967 (match_dup 3))
15968 (unspec [(const_int 0)] UNSPEC_PEEPSIB)])])
15969
15970 (define_expand "call_pop"
15971 [(parallel [(call (match_operand:QI 0)
15972 (match_operand:SI 1))
15973 (set (reg:SI SP_REG)
15974 (plus:SI (reg:SI SP_REG)
15975 (match_operand:SI 3)))])]
15976 "!TARGET_64BIT"
15977 {
15978 ix86_expand_call (NULL, operands[0], operands[1],
15979 operands[2], operands[3], false);
15980 DONE;
15981 })
15982
15983 (define_insn "*call_pop"
15984 [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lBwBz"))
15985 (match_operand 1))
15986 (set (reg:SI SP_REG)
15987 (plus:SI (reg:SI SP_REG)
15988 (match_operand:SI 2 "immediate_operand" "i")))]
15989 "!TARGET_64BIT && !SIBLING_CALL_P (insn)"
15990 "* return ix86_output_call_insn (insn, operands[0]);"
15991 [(set_attr "type" "call")])
15992
15993 (define_insn "*sibcall_pop"
15994 [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "UBsBz"))
15995 (match_operand 1))
15996 (set (reg:SI SP_REG)
15997 (plus:SI (reg:SI SP_REG)
15998 (match_operand:SI 2 "immediate_operand" "i")))]
15999 "!TARGET_64BIT && SIBLING_CALL_P (insn)"
16000 "* return ix86_output_call_insn (insn, operands[0]);"
16001 [(set_attr "type" "call")])
16002
16003 (define_insn "*sibcall_pop_memory"
16004 [(call (mem:QI (match_operand:SI 0 "memory_operand" "Bs"))
16005 (match_operand 1))
16006 (set (reg:SI SP_REG)
16007 (plus:SI (reg:SI SP_REG)
16008 (match_operand:SI 2 "immediate_operand" "i")))
16009 (unspec [(const_int 0)] UNSPEC_PEEPSIB)]
16010 "!TARGET_64BIT"
16011 "* return ix86_output_call_insn (insn, operands[0]);"
16012 [(set_attr "type" "call")])
16013
16014 (define_peephole2
16015 [(set (match_operand:SI 0 "register_operand")
16016 (match_operand:SI 1 "memory_operand"))
16017 (parallel [(call (mem:QI (match_dup 0))
16018 (match_operand 3))
16019 (set (reg:SI SP_REG)
16020 (plus:SI (reg:SI SP_REG)
16021 (match_operand:SI 4 "immediate_operand")))])]
16022 "!TARGET_64BIT && SIBLING_CALL_P (peep2_next_insn (1))
16023 && !reg_mentioned_p (operands[0],
16024 CALL_INSN_FUNCTION_USAGE (peep2_next_insn (1)))"
16025 [(parallel [(call (mem:QI (match_dup 1))
16026 (match_dup 3))
16027 (set (reg:SI SP_REG)
16028 (plus:SI (reg:SI SP_REG)
16029 (match_dup 4)))
16030 (unspec [(const_int 0)] UNSPEC_PEEPSIB)])])
16031
16032 (define_peephole2
16033 [(set (match_operand:SI 0 "register_operand")
16034 (match_operand:SI 1 "memory_operand"))
16035 (unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
16036 (parallel [(call (mem:QI (match_dup 0))
16037 (match_operand 3))
16038 (set (reg:SI SP_REG)
16039 (plus:SI (reg:SI SP_REG)
16040 (match_operand:SI 4 "immediate_operand")))])]
16041 "!TARGET_64BIT && SIBLING_CALL_P (peep2_next_insn (2))
16042 && !reg_mentioned_p (operands[0],
16043 CALL_INSN_FUNCTION_USAGE (peep2_next_insn (2)))"
16044 [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
16045 (parallel [(call (mem:QI (match_dup 1))
16046 (match_dup 3))
16047 (set (reg:SI SP_REG)
16048 (plus:SI (reg:SI SP_REG)
16049 (match_dup 4)))
16050 (unspec [(const_int 0)] UNSPEC_PEEPSIB)])])
16051
16052 ;; Combining simple memory jump instruction
16053
16054 (define_peephole2
16055 [(set (match_operand:W 0 "register_operand")
16056 (match_operand:W 1 "memory_operand"))
16057 (set (pc) (match_dup 0))]
16058 "!TARGET_X32
16059 && !TARGET_INDIRECT_BRANCH_REGISTER
16060 && peep2_reg_dead_p (2, operands[0])"
16061 [(set (pc) (match_dup 1))])
16062
16063 ;; Call subroutine, returning value in operand 0
16064
16065 (define_expand "call_value"
16066 [(set (match_operand 0)
16067 (call (match_operand:QI 1)
16068 (match_operand 2)))
16069 (use (match_operand 3))]
16070 ""
16071 {
16072 ix86_expand_call (operands[0], operands[1], operands[2],
16073 operands[3], NULL, false);
16074 DONE;
16075 })
16076
16077 (define_expand "sibcall_value"
16078 [(set (match_operand 0)
16079 (call (match_operand:QI 1)
16080 (match_operand 2)))
16081 (use (match_operand 3))]
16082 ""
16083 {
16084 ix86_expand_call (operands[0], operands[1], operands[2],
16085 operands[3], NULL, true);
16086 DONE;
16087 })
16088
16089 (define_insn "*call_value"
16090 [(set (match_operand 0)
16091 (call (mem:QI (match_operand:W 1 "call_insn_operand" "<c>BwBz"))
16092 (match_operand 2)))]
16093 "!SIBLING_CALL_P (insn)"
16094 "* return ix86_output_call_insn (insn, operands[1]);"
16095 [(set_attr "type" "callv")])
16096
16097 ;; This covers both call and sibcall since only GOT slot is allowed.
16098 (define_insn "*call_value_got_x32"
16099 [(set (match_operand 0)
16100 (call (mem:QI
16101 (zero_extend:DI
16102 (match_operand:SI 1 "GOT_memory_operand" "Bg")))
16103 (match_operand 2)))]
16104 "TARGET_X32"
16105 {
16106 rtx fnaddr = gen_const_mem (DImode, XEXP (operands[1], 0));
16107 return ix86_output_call_insn (insn, fnaddr);
16108 }
16109 [(set_attr "type" "callv")])
16110
16111 ;; Since sibcall never returns, we can only use call-clobbered register
16112 ;; as GOT base.
16113 (define_insn "*sibcall_value_GOT_32"
16114 [(set (match_operand 0)
16115 (call (mem:QI
16116 (mem:SI (plus:SI
16117 (match_operand:SI 1 "register_no_elim_operand" "U")
16118 (match_operand:SI 2 "GOT32_symbol_operand"))))
16119 (match_operand 3)))]
16120 "!TARGET_MACHO
16121 && !TARGET_64BIT
16122 && !TARGET_INDIRECT_BRANCH_REGISTER
16123 && SIBLING_CALL_P (insn)"
16124 {
16125 rtx fnaddr = gen_rtx_PLUS (SImode, operands[1], operands[2]);
16126 fnaddr = gen_const_mem (SImode, fnaddr);
16127 return ix86_output_call_insn (insn, fnaddr);
16128 }
16129 [(set_attr "type" "callv")])
16130
16131 (define_insn "*sibcall_value"
16132 [(set (match_operand 0)
16133 (call (mem:QI (match_operand:W 1 "sibcall_insn_operand" "UBsBz"))
16134 (match_operand 2)))]
16135 "SIBLING_CALL_P (insn)"
16136 "* return ix86_output_call_insn (insn, operands[1]);"
16137 [(set_attr "type" "callv")])
16138
16139 (define_insn "*sibcall_value_memory"
16140 [(set (match_operand 0)
16141 (call (mem:QI (match_operand:W 1 "memory_operand" "m"))
16142 (match_operand 2)))
16143 (unspec [(const_int 0)] UNSPEC_PEEPSIB)]
16144 "!TARGET_X32 && !TARGET_INDIRECT_BRANCH_REGISTER"
16145 "* return ix86_output_call_insn (insn, operands[1]);"
16146 [(set_attr "type" "callv")])
16147
16148 (define_peephole2
16149 [(set (match_operand:W 0 "register_operand")
16150 (match_operand:W 1 "memory_operand"))
16151 (set (match_operand 2)
16152 (call (mem:QI (match_dup 0))
16153 (match_operand 3)))]
16154 "!TARGET_X32
16155 && !TARGET_INDIRECT_BRANCH_REGISTER
16156 && SIBLING_CALL_P (peep2_next_insn (1))
16157 && !reg_mentioned_p (operands[0],
16158 CALL_INSN_FUNCTION_USAGE (peep2_next_insn (1)))"
16159 [(parallel [(set (match_dup 2)
16160 (call (mem:QI (match_dup 1))
16161 (match_dup 3)))
16162 (unspec [(const_int 0)] UNSPEC_PEEPSIB)])])
16163
16164 (define_peephole2
16165 [(set (match_operand:W 0 "register_operand")
16166 (match_operand:W 1 "memory_operand"))
16167 (unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
16168 (set (match_operand 2)
16169 (call (mem:QI (match_dup 0))
16170 (match_operand 3)))]
16171 "!TARGET_X32
16172 && !TARGET_INDIRECT_BRANCH_REGISTER
16173 && SIBLING_CALL_P (peep2_next_insn (2))
16174 && !reg_mentioned_p (operands[0],
16175 CALL_INSN_FUNCTION_USAGE (peep2_next_insn (2)))"
16176 [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
16177 (parallel [(set (match_dup 2)
16178 (call (mem:QI (match_dup 1))
16179 (match_dup 3)))
16180 (unspec [(const_int 0)] UNSPEC_PEEPSIB)])])
16181
16182 (define_expand "call_value_pop"
16183 [(parallel [(set (match_operand 0)
16184 (call (match_operand:QI 1)
16185 (match_operand:SI 2)))
16186 (set (reg:SI SP_REG)
16187 (plus:SI (reg:SI SP_REG)
16188 (match_operand:SI 4)))])]
16189 "!TARGET_64BIT"
16190 {
16191 ix86_expand_call (operands[0], operands[1], operands[2],
16192 operands[3], operands[4], false);
16193 DONE;
16194 })
16195
16196 (define_insn "*call_value_pop"
16197 [(set (match_operand 0)
16198 (call (mem:QI (match_operand:SI 1 "call_insn_operand" "lBwBz"))
16199 (match_operand 2)))
16200 (set (reg:SI SP_REG)
16201 (plus:SI (reg:SI SP_REG)
16202 (match_operand:SI 3 "immediate_operand" "i")))]
16203 "!TARGET_64BIT && !SIBLING_CALL_P (insn)"
16204 "* return ix86_output_call_insn (insn, operands[1]);"
16205 [(set_attr "type" "callv")])
16206
16207 (define_insn "*sibcall_value_pop"
16208 [(set (match_operand 0)
16209 (call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "UBsBz"))
16210 (match_operand 2)))
16211 (set (reg:SI SP_REG)
16212 (plus:SI (reg:SI SP_REG)
16213 (match_operand:SI 3 "immediate_operand" "i")))]
16214 "!TARGET_64BIT && SIBLING_CALL_P (insn)"
16215 "* return ix86_output_call_insn (insn, operands[1]);"
16216 [(set_attr "type" "callv")])
16217
16218 (define_insn "*sibcall_value_pop_memory"
16219 [(set (match_operand 0)
16220 (call (mem:QI (match_operand:SI 1 "memory_operand" "m"))
16221 (match_operand 2)))
16222 (set (reg:SI SP_REG)
16223 (plus:SI (reg:SI SP_REG)
16224 (match_operand:SI 3 "immediate_operand" "i")))
16225 (unspec [(const_int 0)] UNSPEC_PEEPSIB)]
16226 "!TARGET_64BIT"
16227 "* return ix86_output_call_insn (insn, operands[1]);"
16228 [(set_attr "type" "callv")])
16229
16230 (define_peephole2
16231 [(set (match_operand:SI 0 "register_operand")
16232 (match_operand:SI 1 "memory_operand"))
16233 (parallel [(set (match_operand 2)
16234 (call (mem:QI (match_dup 0))
16235 (match_operand 3)))
16236 (set (reg:SI SP_REG)
16237 (plus:SI (reg:SI SP_REG)
16238 (match_operand:SI 4 "immediate_operand")))])]
16239 "!TARGET_64BIT && SIBLING_CALL_P (peep2_next_insn (1))
16240 && !reg_mentioned_p (operands[0],
16241 CALL_INSN_FUNCTION_USAGE (peep2_next_insn (1)))"
16242 [(parallel [(set (match_dup 2)
16243 (call (mem:QI (match_dup 1))
16244 (match_dup 3)))
16245 (set (reg:SI SP_REG)
16246 (plus:SI (reg:SI SP_REG)
16247 (match_dup 4)))
16248 (unspec [(const_int 0)] UNSPEC_PEEPSIB)])])
16249
16250 (define_peephole2
16251 [(set (match_operand:SI 0 "register_operand")
16252 (match_operand:SI 1 "memory_operand"))
16253 (unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
16254 (parallel [(set (match_operand 2)
16255 (call (mem:QI (match_dup 0))
16256 (match_operand 3)))
16257 (set (reg:SI SP_REG)
16258 (plus:SI (reg:SI SP_REG)
16259 (match_operand:SI 4 "immediate_operand")))])]
16260 "!TARGET_64BIT && SIBLING_CALL_P (peep2_next_insn (2))
16261 && !reg_mentioned_p (operands[0],
16262 CALL_INSN_FUNCTION_USAGE (peep2_next_insn (2)))"
16263 [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
16264 (parallel [(set (match_dup 2)
16265 (call (mem:QI (match_dup 1))
16266 (match_dup 3)))
16267 (set (reg:SI SP_REG)
16268 (plus:SI (reg:SI SP_REG)
16269 (match_dup 4)))
16270 (unspec [(const_int 0)] UNSPEC_PEEPSIB)])])
16271
16272 ;; Call subroutine returning any type.
16273
16274 (define_expand "untyped_call"
16275 [(parallel [(call (match_operand 0)
16276 (const_int 0))
16277 (match_operand 1)
16278 (match_operand 2)])]
16279 ""
16280 {
16281 int i;
16282
16283 /* In order to give reg-stack an easier job in validating two
16284 coprocessor registers as containing a possible return value,
16285 simply pretend the untyped call returns a complex long double
16286 value.
16287
16288 We can't use SSE_REGPARM_MAX here since callee is unprototyped
16289 and should have the default ABI. */
16290
16291 ix86_expand_call ((TARGET_FLOAT_RETURNS_IN_80387
16292 ? gen_rtx_REG (XCmode, FIRST_FLOAT_REG) : NULL),
16293 operands[0], const0_rtx,
16294 GEN_INT ((TARGET_64BIT
16295 ? (ix86_abi == SYSV_ABI
16296 ? X86_64_SSE_REGPARM_MAX
16297 : X86_64_MS_SSE_REGPARM_MAX)
16298 : X86_32_SSE_REGPARM_MAX)
16299 - 1),
16300 NULL, false);
16301
16302 for (i = 0; i < XVECLEN (operands[2], 0); i++)
16303 {
16304 rtx set = XVECEXP (operands[2], 0, i);
16305 emit_move_insn (SET_DEST (set), SET_SRC (set));
16306 }
16307
16308 /* The optimizer does not know that the call sets the function value
16309 registers we stored in the result block. We avoid problems by
16310 claiming that all hard registers are used and clobbered at this
16311 point. */
16312 emit_insn (gen_blockage ());
16313
16314 DONE;
16315 })
16316 \f
16317 ;; Prologue and epilogue instructions
16318
16319 ;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and
16320 ;; all of memory. This blocks insns from being moved across this point.
16321
16322 (define_insn "blockage"
16323 [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)]
16324 ""
16325 ""
16326 [(set_attr "length" "0")])
16327
16328 ;; Do not schedule instructions accessing memory across this point.
16329
16330 (define_expand "memory_blockage"
16331 [(set (match_dup 0)
16332 (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BLOCKAGE))]
16333 ""
16334 {
16335 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
16336 MEM_VOLATILE_P (operands[0]) = 1;
16337 })
16338
16339 (define_insn "*memory_blockage"
16340 [(set (match_operand:BLK 0)
16341 (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BLOCKAGE))]
16342 ""
16343 ""
16344 [(set_attr "length" "0")])
16345
16346 ;; As USE insns aren't meaningful after reload, this is used instead
16347 ;; to prevent deleting instructions setting registers for PIC code
16348 (define_insn "prologue_use"
16349 [(unspec_volatile [(match_operand 0)] UNSPECV_PROLOGUE_USE)]
16350 ""
16351 ""
16352 [(set_attr "length" "0")])
16353
16354 ;; Insn emitted into the body of a function to return from a function.
16355 ;; This is only done if the function's epilogue is known to be simple.
16356 ;; See comments for ix86_can_use_return_insn_p in i386.cc.
16357
16358 (define_expand "return"
16359 [(simple_return)]
16360 "ix86_can_use_return_insn_p ()"
16361 {
16362 if (crtl->args.pops_args)
16363 {
16364 rtx popc = GEN_INT (crtl->args.pops_args);
16365 emit_jump_insn (gen_simple_return_pop_internal (popc));
16366 DONE;
16367 }
16368 })
16369
16370 ;; We need to disable this for TARGET_SEH, as otherwise
16371 ;; shrink-wrapped prologue gets enabled too. This might exceed
16372 ;; the maximum size of prologue in unwind information.
16373 ;; Also disallow shrink-wrapping if using stack slot to pass the
16374 ;; static chain pointer - the first instruction has to be pushl %esi
16375 ;; and it can't be moved around, as we use alternate entry points
16376 ;; in that case.
16377 ;; Also disallow for ms_hook_prologue functions which have frame
16378 ;; pointer set up in function label which is correctly handled in
16379 ;; ix86_expand_{prologue|epligoue}() only.
16380
16381 (define_expand "simple_return"
16382 [(simple_return)]
16383 "!TARGET_SEH && !ix86_static_chain_on_stack && !ix86_function_ms_hook_prologue (cfun->decl)"
16384 {
16385 if (crtl->args.pops_args)
16386 {
16387 rtx popc = GEN_INT (crtl->args.pops_args);
16388 emit_jump_insn (gen_simple_return_pop_internal (popc));
16389 DONE;
16390 }
16391 })
16392
16393 (define_insn "simple_return_internal"
16394 [(simple_return)]
16395 "reload_completed"
16396 "* return ix86_output_function_return (false);"
16397 [(set_attr "length" "1")
16398 (set_attr "atom_unit" "jeu")
16399 (set_attr "length_immediate" "0")
16400 (set_attr "modrm" "0")])
16401
16402 (define_insn "interrupt_return"
16403 [(simple_return)
16404 (unspec [(const_int 0)] UNSPEC_INTERRUPT_RETURN)]
16405 "reload_completed"
16406 {
16407 return TARGET_64BIT ? (TARGET_UINTR ? "uiret" : "iretq") : "iret";
16408 })
16409
16410 ;; Used by x86_machine_dependent_reorg to avoid penalty on single byte RET
16411 ;; instruction Athlon and K8 have.
16412
16413 (define_insn "simple_return_internal_long"
16414 [(simple_return)
16415 (unspec [(const_int 0)] UNSPEC_REP)]
16416 "reload_completed"
16417 "* return ix86_output_function_return (true);"
16418 [(set_attr "length" "2")
16419 (set_attr "atom_unit" "jeu")
16420 (set_attr "length_immediate" "0")
16421 (set_attr "prefix_rep" "1")
16422 (set_attr "modrm" "0")])
16423
16424 (define_insn_and_split "simple_return_pop_internal"
16425 [(simple_return)
16426 (use (match_operand:SI 0 "const_int_operand"))]
16427 "reload_completed"
16428 "ret\t%0"
16429 "&& cfun->machine->function_return_type != indirect_branch_keep"
16430 [(const_int 0)]
16431 "ix86_split_simple_return_pop_internal (operands[0]); DONE;"
16432 [(set_attr "length" "3")
16433 (set_attr "atom_unit" "jeu")
16434 (set_attr "length_immediate" "2")
16435 (set_attr "modrm" "0")])
16436
16437 (define_expand "simple_return_indirect_internal"
16438 [(parallel
16439 [(simple_return)
16440 (use (match_operand 0 "register_operand"))])])
16441
16442 (define_insn "*simple_return_indirect_internal<mode>"
16443 [(simple_return)
16444 (use (match_operand:W 0 "register_operand" "r"))]
16445 "reload_completed"
16446 "* return ix86_output_indirect_function_return (operands[0]);"
16447 [(set (attr "type")
16448 (if_then_else (match_test "(cfun->machine->indirect_branch_type
16449 != indirect_branch_keep)")
16450 (const_string "multi")
16451 (const_string "ibr")))
16452 (set_attr "length_immediate" "0")])
16453
16454 (define_insn "nop"
16455 [(const_int 0)]
16456 ""
16457 "nop"
16458 [(set_attr "length" "1")
16459 (set_attr "length_immediate" "0")
16460 (set_attr "modrm" "0")])
16461
16462 ;; Generate nops. Operand 0 is the number of nops, up to 8.
16463 (define_insn "nops"
16464 [(unspec_volatile [(match_operand 0 "const_int_operand")]
16465 UNSPECV_NOPS)]
16466 "reload_completed"
16467 {
16468 int num = INTVAL (operands[0]);
16469
16470 gcc_assert (IN_RANGE (num, 1, 8));
16471
16472 while (num--)
16473 fputs ("\tnop\n", asm_out_file);
16474
16475 return "";
16476 }
16477 [(set (attr "length") (symbol_ref "INTVAL (operands[0])"))
16478 (set_attr "length_immediate" "0")
16479 (set_attr "modrm" "0")])
16480
16481 ;; Pad to 16-byte boundary, max skip in op0. Used to avoid
16482 ;; branch prediction penalty for the third jump in a 16-byte
16483 ;; block on K8.
16484
16485 (define_insn "pad"
16486 [(unspec_volatile [(match_operand 0)] UNSPECV_ALIGN)]
16487 ""
16488 {
16489 #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
16490 ASM_OUTPUT_MAX_SKIP_ALIGN (asm_out_file, 4, (int)INTVAL (operands[0]));
16491 #else
16492 /* It is tempting to use ASM_OUTPUT_ALIGN here, but we don't want to do that.
16493 The align insn is used to avoid 3 jump instructions in the row to improve
16494 branch prediction and the benefits hardly outweigh the cost of extra 8
16495 nops on the average inserted by full alignment pseudo operation. */
16496 #endif
16497 return "";
16498 }
16499 [(set_attr "length" "16")])
16500
16501 (define_expand "prologue"
16502 [(const_int 0)]
16503 ""
16504 "ix86_expand_prologue (); DONE;")
16505
16506 (define_expand "set_got"
16507 [(parallel
16508 [(set (match_operand:SI 0 "register_operand")
16509 (unspec:SI [(const_int 0)] UNSPEC_SET_GOT))
16510 (clobber (reg:CC FLAGS_REG))])]
16511 "!TARGET_64BIT"
16512 {
16513 if (flag_pic && !TARGET_VXWORKS_RTP)
16514 ix86_pc_thunk_call_expanded = true;
16515 })
16516
16517 (define_insn "*set_got"
16518 [(set (match_operand:SI 0 "register_operand" "=r")
16519 (unspec:SI [(const_int 0)] UNSPEC_SET_GOT))
16520 (clobber (reg:CC FLAGS_REG))]
16521 "!TARGET_64BIT"
16522 "* return output_set_got (operands[0], NULL_RTX);"
16523 [(set_attr "type" "multi")
16524 (set_attr "length" "12")])
16525
16526 (define_expand "set_got_labelled"
16527 [(parallel
16528 [(set (match_operand:SI 0 "register_operand")
16529 (unspec:SI [(label_ref (match_operand 1))]
16530 UNSPEC_SET_GOT))
16531 (clobber (reg:CC FLAGS_REG))])]
16532 "!TARGET_64BIT"
16533 {
16534 if (flag_pic && !TARGET_VXWORKS_RTP)
16535 ix86_pc_thunk_call_expanded = true;
16536 })
16537
16538 (define_insn "*set_got_labelled"
16539 [(set (match_operand:SI 0 "register_operand" "=r")
16540 (unspec:SI [(label_ref (match_operand 1))]
16541 UNSPEC_SET_GOT))
16542 (clobber (reg:CC FLAGS_REG))]
16543 "!TARGET_64BIT"
16544 "* return output_set_got (operands[0], operands[1]);"
16545 [(set_attr "type" "multi")
16546 (set_attr "length" "12")])
16547
16548 (define_insn "set_got_rex64"
16549 [(set (match_operand:DI 0 "register_operand" "=r")
16550 (unspec:DI [(const_int 0)] UNSPEC_SET_GOT))]
16551 "TARGET_64BIT"
16552 "lea{q}\t{_GLOBAL_OFFSET_TABLE_(%%rip), %0|%0, _GLOBAL_OFFSET_TABLE_[rip]}"
16553 [(set_attr "type" "lea")
16554 (set_attr "length_address" "4")
16555 (set_attr "mode" "DI")])
16556
16557 (define_insn "set_rip_rex64"
16558 [(set (match_operand:DI 0 "register_operand" "=r")
16559 (unspec:DI [(label_ref (match_operand 1))] UNSPEC_SET_RIP))]
16560 "TARGET_64BIT"
16561 "lea{q}\t{%l1(%%rip), %0|%0, %l1[rip]}"
16562 [(set_attr "type" "lea")
16563 (set_attr "length_address" "4")
16564 (set_attr "mode" "DI")])
16565
16566 (define_insn "set_got_offset_rex64"
16567 [(set (match_operand:DI 0 "register_operand" "=r")
16568 (unspec:DI
16569 [(label_ref (match_operand 1))]
16570 UNSPEC_SET_GOT_OFFSET))]
16571 "TARGET_LP64"
16572 "movabs{q}\t{$_GLOBAL_OFFSET_TABLE_-%l1, %0|%0, OFFSET FLAT:_GLOBAL_OFFSET_TABLE_-%l1}"
16573 [(set_attr "type" "imov")
16574 (set_attr "length_immediate" "0")
16575 (set_attr "length_address" "8")
16576 (set_attr "mode" "DI")])
16577
16578 (define_expand "epilogue"
16579 [(const_int 0)]
16580 ""
16581 "ix86_expand_epilogue (1); DONE;")
16582
16583 (define_expand "sibcall_epilogue"
16584 [(const_int 0)]
16585 ""
16586 "ix86_expand_epilogue (0); DONE;")
16587
16588 (define_expand "eh_return"
16589 [(use (match_operand 0 "register_operand"))]
16590 ""
16591 {
16592 rtx tmp, sa = EH_RETURN_STACKADJ_RTX, ra = operands[0];
16593
16594 /* Tricky bit: we write the address of the handler to which we will
16595 be returning into someone else's stack frame, one word below the
16596 stack address we wish to restore. */
16597 tmp = gen_rtx_PLUS (Pmode, arg_pointer_rtx, sa);
16598 tmp = plus_constant (Pmode, tmp, -UNITS_PER_WORD);
16599 /* Return address is always in word_mode. */
16600 tmp = gen_rtx_MEM (word_mode, tmp);
16601 if (GET_MODE (ra) != word_mode)
16602 ra = convert_to_mode (word_mode, ra, 1);
16603 emit_move_insn (tmp, ra);
16604
16605 emit_jump_insn (gen_eh_return_internal ());
16606 emit_barrier ();
16607 DONE;
16608 })
16609
16610 (define_insn_and_split "eh_return_internal"
16611 [(eh_return)]
16612 ""
16613 "#"
16614 "epilogue_completed"
16615 [(const_int 0)]
16616 "ix86_expand_epilogue (2); DONE;")
16617
16618 (define_expand "@leave_<mode>"
16619 [(parallel
16620 [(set (reg:W SP_REG) (plus:W (reg:W BP_REG) (match_dup 0)))
16621 (set (reg:W BP_REG) (mem:W (reg:W BP_REG)))
16622 (clobber (mem:BLK (scratch)))])]
16623 ""
16624 "operands[0] = GEN_INT (<MODE_SIZE>);")
16625
16626 (define_insn "*leave"
16627 [(set (reg:SI SP_REG) (plus:SI (reg:SI BP_REG) (const_int 4)))
16628 (set (reg:SI BP_REG) (mem:SI (reg:SI BP_REG)))
16629 (clobber (mem:BLK (scratch)))]
16630 "!TARGET_64BIT"
16631 "leave"
16632 [(set_attr "type" "leave")])
16633
16634 (define_insn "*leave_rex64"
16635 [(set (reg:DI SP_REG) (plus:DI (reg:DI BP_REG) (const_int 8)))
16636 (set (reg:DI BP_REG) (mem:DI (reg:DI BP_REG)))
16637 (clobber (mem:BLK (scratch)))]
16638 "TARGET_64BIT"
16639 "leave"
16640 [(set_attr "type" "leave")])
16641 \f
16642 ;; Handle -fsplit-stack.
16643
16644 (define_expand "split_stack_prologue"
16645 [(const_int 0)]
16646 ""
16647 {
16648 ix86_expand_split_stack_prologue ();
16649 DONE;
16650 })
16651
16652 ;; In order to support the call/return predictor, we use a return
16653 ;; instruction which the middle-end doesn't see.
16654 (define_insn "split_stack_return"
16655 [(unspec_volatile [(match_operand:SI 0 "const_int_operand")]
16656 UNSPECV_SPLIT_STACK_RETURN)]
16657 ""
16658 {
16659 if (operands[0] == const0_rtx)
16660 return "ret";
16661 else
16662 return "ret\t%0";
16663 }
16664 [(set_attr "atom_unit" "jeu")
16665 (set_attr "modrm" "0")
16666 (set (attr "length")
16667 (if_then_else (match_operand:SI 0 "const0_operand")
16668 (const_int 1)
16669 (const_int 3)))
16670 (set (attr "length_immediate")
16671 (if_then_else (match_operand:SI 0 "const0_operand")
16672 (const_int 0)
16673 (const_int 2)))])
16674
16675 ;; If there are operand 0 bytes available on the stack, jump to
16676 ;; operand 1.
16677
16678 (define_expand "split_stack_space_check"
16679 [(set (pc) (if_then_else
16680 (ltu (minus (reg SP_REG)
16681 (match_operand 0 "register_operand"))
16682 (match_dup 2))
16683 (label_ref (match_operand 1))
16684 (pc)))]
16685 ""
16686 {
16687 rtx reg = gen_reg_rtx (Pmode);
16688
16689 emit_insn (gen_sub3_insn (reg, stack_pointer_rtx, operands[0]));
16690
16691 operands[2] = ix86_split_stack_guard ();
16692 ix86_expand_branch (GEU, reg, operands[2], operands[1]);
16693
16694 DONE;
16695 })
16696 \f
16697 ;; Bit manipulation instructions.
16698
16699 (define_expand "ffs<mode>2"
16700 [(set (match_dup 2) (const_int -1))
16701 (parallel [(set (match_dup 3) (match_dup 4))
16702 (set (match_operand:SWI48 0 "register_operand")
16703 (ctz:SWI48
16704 (match_operand:SWI48 1 "nonimmediate_operand")))])
16705 (set (match_dup 0) (if_then_else:SWI48
16706 (eq (match_dup 3) (const_int 0))
16707 (match_dup 2)
16708 (match_dup 0)))
16709 (parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (const_int 1)))
16710 (clobber (reg:CC FLAGS_REG))])]
16711 ""
16712 {
16713 machine_mode flags_mode;
16714
16715 if (<MODE>mode == SImode && !TARGET_CMOVE)
16716 {
16717 emit_insn (gen_ffssi2_no_cmove (operands[0], operands [1]));
16718 DONE;
16719 }
16720
16721 flags_mode = TARGET_BMI ? CCCmode : CCZmode;
16722
16723 operands[2] = gen_reg_rtx (<MODE>mode);
16724 operands[3] = gen_rtx_REG (flags_mode, FLAGS_REG);
16725 operands[4] = gen_rtx_COMPARE (flags_mode, operands[1], const0_rtx);
16726 })
16727
16728 (define_insn_and_split "ffssi2_no_cmove"
16729 [(set (match_operand:SI 0 "register_operand" "=r")
16730 (ffs:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))
16731 (clobber (match_scratch:SI 2 "=&q"))
16732 (clobber (reg:CC FLAGS_REG))]
16733 "!TARGET_CMOVE"
16734 "#"
16735 "&& reload_completed"
16736 [(parallel [(set (match_dup 4) (match_dup 5))
16737 (set (match_dup 0) (ctz:SI (match_dup 1)))])
16738 (set (strict_low_part (match_dup 3))
16739 (eq:QI (match_dup 4) (const_int 0)))
16740 (parallel [(set (match_dup 2) (neg:SI (match_dup 2)))
16741 (clobber (reg:CC FLAGS_REG))])
16742 (parallel [(set (match_dup 0) (ior:SI (match_dup 0) (match_dup 2)))
16743 (clobber (reg:CC FLAGS_REG))])
16744 (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1)))
16745 (clobber (reg:CC FLAGS_REG))])]
16746 {
16747 machine_mode flags_mode = TARGET_BMI ? CCCmode : CCZmode;
16748
16749 operands[3] = gen_lowpart (QImode, operands[2]);
16750 operands[4] = gen_rtx_REG (flags_mode, FLAGS_REG);
16751 operands[5] = gen_rtx_COMPARE (flags_mode, operands[1], const0_rtx);
16752
16753 ix86_expand_clear (operands[2]);
16754 })
16755
16756 (define_insn_and_split "*tzcnt<mode>_1"
16757 [(set (reg:CCC FLAGS_REG)
16758 (compare:CCC (match_operand:SWI48 1 "nonimmediate_operand" "rm")
16759 (const_int 0)))
16760 (set (match_operand:SWI48 0 "register_operand" "=r")
16761 (ctz:SWI48 (match_dup 1)))]
16762 "TARGET_BMI"
16763 "tzcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
16764 "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
16765 && optimize_function_for_speed_p (cfun)
16766 && !reg_mentioned_p (operands[0], operands[1])"
16767 [(parallel
16768 [(set (reg:CCC FLAGS_REG)
16769 (compare:CCC (match_dup 1) (const_int 0)))
16770 (set (match_dup 0)
16771 (ctz:SWI48 (match_dup 1)))
16772 (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])]
16773 "ix86_expand_clear (operands[0]);"
16774 [(set_attr "type" "alu1")
16775 (set_attr "prefix_0f" "1")
16776 (set_attr "prefix_rep" "1")
16777 (set_attr "btver2_decode" "double")
16778 (set_attr "mode" "<MODE>")])
16779
16780 ; False dependency happens when destination is only updated by tzcnt,
16781 ; lzcnt or popcnt. There is no false dependency when destination is
16782 ; also used in source.
16783 (define_insn "*tzcnt<mode>_1_falsedep"
16784 [(set (reg:CCC FLAGS_REG)
16785 (compare:CCC (match_operand:SWI48 1 "nonimmediate_operand" "rm")
16786 (const_int 0)))
16787 (set (match_operand:SWI48 0 "register_operand" "=r")
16788 (ctz:SWI48 (match_dup 1)))
16789 (unspec [(match_operand:SWI48 2 "register_operand" "0")]
16790 UNSPEC_INSN_FALSE_DEP)]
16791 "TARGET_BMI"
16792 "tzcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
16793 [(set_attr "type" "alu1")
16794 (set_attr "prefix_0f" "1")
16795 (set_attr "prefix_rep" "1")
16796 (set_attr "btver2_decode" "double")
16797 (set_attr "mode" "<MODE>")])
16798
16799 (define_insn "*bsf<mode>_1"
16800 [(set (reg:CCZ FLAGS_REG)
16801 (compare:CCZ (match_operand:SWI48 1 "nonimmediate_operand" "rm")
16802 (const_int 0)))
16803 (set (match_operand:SWI48 0 "register_operand" "=r")
16804 (ctz:SWI48 (match_dup 1)))]
16805 ""
16806 "bsf{<imodesuffix>}\t{%1, %0|%0, %1}"
16807 [(set_attr "type" "alu1")
16808 (set_attr "prefix_0f" "1")
16809 (set_attr "btver2_decode" "double")
16810 (set_attr "znver1_decode" "vector")
16811 (set_attr "mode" "<MODE>")])
16812
16813 (define_insn_and_split "ctz<mode>2"
16814 [(set (match_operand:SWI48 0 "register_operand" "=r")
16815 (ctz:SWI48
16816 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
16817 (clobber (reg:CC FLAGS_REG))]
16818 ""
16819 {
16820 if (TARGET_BMI)
16821 return "tzcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
16822 else if (optimize_function_for_size_p (cfun))
16823 ;
16824 else if (TARGET_CPU_P (GENERIC))
16825 /* tzcnt expands to 'rep bsf' and we can use it even if !TARGET_BMI. */
16826 return "rep%; bsf{<imodesuffix>}\t{%1, %0|%0, %1}";
16827
16828 return "bsf{<imodesuffix>}\t{%1, %0|%0, %1}";
16829 }
16830 "(TARGET_BMI || TARGET_CPU_P (GENERIC))
16831 && TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
16832 && optimize_function_for_speed_p (cfun)
16833 && !reg_mentioned_p (operands[0], operands[1])"
16834 [(parallel
16835 [(set (match_dup 0)
16836 (ctz:SWI48 (match_dup 1)))
16837 (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
16838 (clobber (reg:CC FLAGS_REG))])]
16839 "ix86_expand_clear (operands[0]);"
16840 [(set_attr "type" "alu1")
16841 (set_attr "prefix_0f" "1")
16842 (set (attr "prefix_rep")
16843 (if_then_else
16844 (ior (match_test "TARGET_BMI")
16845 (and (not (match_test "optimize_function_for_size_p (cfun)"))
16846 (match_test "TARGET_CPU_P (GENERIC)")))
16847 (const_string "1")
16848 (const_string "0")))
16849 (set_attr "mode" "<MODE>")])
16850
16851 ; False dependency happens when destination is only updated by tzcnt,
16852 ; lzcnt or popcnt. There is no false dependency when destination is
16853 ; also used in source.
16854 (define_insn "*ctz<mode>2_falsedep"
16855 [(set (match_operand:SWI48 0 "register_operand" "=r")
16856 (ctz:SWI48
16857 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
16858 (unspec [(match_operand:SWI48 2 "register_operand" "0")]
16859 UNSPEC_INSN_FALSE_DEP)
16860 (clobber (reg:CC FLAGS_REG))]
16861 ""
16862 {
16863 if (TARGET_BMI)
16864 return "tzcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
16865 else if (TARGET_CPU_P (GENERIC))
16866 /* tzcnt expands to 'rep bsf' and we can use it even if !TARGET_BMI. */
16867 return "rep%; bsf{<imodesuffix>}\t{%1, %0|%0, %1}";
16868 else
16869 gcc_unreachable ();
16870 }
16871 [(set_attr "type" "alu1")
16872 (set_attr "prefix_0f" "1")
16873 (set_attr "prefix_rep" "1")
16874 (set_attr "mode" "<MODE>")])
16875
16876 (define_insn_and_split "*ctzsi2_zext"
16877 [(set (match_operand:DI 0 "register_operand" "=r")
16878 (and:DI
16879 (subreg:DI
16880 (ctz:SI
16881 (match_operand:SI 1 "nonimmediate_operand" "rm")) 0)
16882 (const_int 63)))
16883 (clobber (reg:CC FLAGS_REG))]
16884 "TARGET_BMI && TARGET_64BIT"
16885 "tzcnt{l}\t{%1, %k0|%k0, %1}"
16886 "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
16887 && optimize_function_for_speed_p (cfun)
16888 && !reg_mentioned_p (operands[0], operands[1])"
16889 [(parallel
16890 [(set (match_dup 0)
16891 (and:DI (subreg:DI (ctz:SI (match_dup 1)) 0) (const_int 63)))
16892 (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
16893 (clobber (reg:CC FLAGS_REG))])]
16894 "ix86_expand_clear (operands[0]);"
16895 [(set_attr "type" "alu1")
16896 (set_attr "prefix_0f" "1")
16897 (set_attr "prefix_rep" "1")
16898 (set_attr "mode" "SI")])
16899
16900 ; False dependency happens when destination is only updated by tzcnt,
16901 ; lzcnt or popcnt. There is no false dependency when destination is
16902 ; also used in source.
16903 (define_insn "*ctzsi2_zext_falsedep"
16904 [(set (match_operand:DI 0 "register_operand" "=r")
16905 (and:DI
16906 (subreg:DI
16907 (ctz:SI
16908 (match_operand:SI 1 "nonimmediate_operand" "rm")) 0)
16909 (const_int 63)))
16910 (unspec [(match_operand:DI 2 "register_operand" "0")]
16911 UNSPEC_INSN_FALSE_DEP)
16912 (clobber (reg:CC FLAGS_REG))]
16913 "TARGET_BMI && TARGET_64BIT"
16914 "tzcnt{l}\t{%1, %k0|%k0, %1}"
16915 [(set_attr "type" "alu1")
16916 (set_attr "prefix_0f" "1")
16917 (set_attr "prefix_rep" "1")
16918 (set_attr "mode" "SI")])
16919
16920 (define_insn_and_split "*ctzsidi2_<s>ext"
16921 [(set (match_operand:DI 0 "register_operand" "=r")
16922 (any_extend:DI
16923 (ctz:SI
16924 (match_operand:SI 1 "nonimmediate_operand" "rm"))))
16925 (clobber (reg:CC FLAGS_REG))]
16926 "TARGET_64BIT"
16927 {
16928 if (TARGET_BMI)
16929 return "tzcnt{l}\t{%1, %k0|%k0, %1}";
16930 else if (TARGET_CPU_P (GENERIC)
16931 && !optimize_function_for_size_p (cfun))
16932 /* tzcnt expands to 'rep bsf' and we can use it even if !TARGET_BMI. */
16933 return "rep%; bsf{l}\t{%1, %k0|%k0, %1}";
16934 return "bsf{l}\t{%1, %k0|%k0, %1}";
16935 }
16936 "(TARGET_BMI || TARGET_CPU_P (GENERIC))
16937 && TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
16938 && optimize_function_for_speed_p (cfun)
16939 && !reg_mentioned_p (operands[0], operands[1])"
16940 [(parallel
16941 [(set (match_dup 0)
16942 (any_extend:DI (ctz:SI (match_dup 1))))
16943 (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
16944 (clobber (reg:CC FLAGS_REG))])]
16945 "ix86_expand_clear (operands[0]);"
16946 [(set_attr "type" "alu1")
16947 (set_attr "prefix_0f" "1")
16948 (set (attr "prefix_rep")
16949 (if_then_else
16950 (ior (match_test "TARGET_BMI")
16951 (and (not (match_test "optimize_function_for_size_p (cfun)"))
16952 (match_test "TARGET_CPU_P (GENERIC)")))
16953 (const_string "1")
16954 (const_string "0")))
16955 (set_attr "mode" "SI")])
16956
16957 (define_insn "*ctzsidi2_<s>ext_falsedep"
16958 [(set (match_operand:DI 0 "register_operand" "=r")
16959 (any_extend:DI
16960 (ctz:SI
16961 (match_operand:SI 1 "nonimmediate_operand" "rm"))))
16962 (unspec [(match_operand:DI 2 "register_operand" "0")]
16963 UNSPEC_INSN_FALSE_DEP)
16964 (clobber (reg:CC FLAGS_REG))]
16965 "TARGET_64BIT"
16966 {
16967 if (TARGET_BMI)
16968 return "tzcnt{l}\t{%1, %k0|%k0, %1}";
16969 else if (TARGET_CPU_P (GENERIC))
16970 /* tzcnt expands to 'rep bsf' and we can use it even if !TARGET_BMI. */
16971 return "rep%; bsf{l}\t{%1, %k0|%k0, %1}";
16972 else
16973 gcc_unreachable ();
16974 }
16975 [(set_attr "type" "alu1")
16976 (set_attr "prefix_0f" "1")
16977 (set_attr "prefix_rep" "1")
16978 (set_attr "mode" "SI")])
16979
16980 (define_insn "bsr_rex64"
16981 [(set (reg:CCZ FLAGS_REG)
16982 (compare:CCZ (match_operand:DI 1 "nonimmediate_operand" "rm")
16983 (const_int 0)))
16984 (set (match_operand:DI 0 "register_operand" "=r")
16985 (minus:DI (const_int 63)
16986 (clz:DI (match_dup 1))))]
16987 "TARGET_64BIT"
16988 "bsr{q}\t{%1, %0|%0, %1}"
16989 [(set_attr "type" "alu1")
16990 (set_attr "prefix_0f" "1")
16991 (set_attr "znver1_decode" "vector")
16992 (set_attr "mode" "DI")])
16993
16994 (define_insn "bsr_rex64_1"
16995 [(set (match_operand:DI 0 "register_operand" "=r")
16996 (minus:DI (const_int 63)
16997 (clz:DI (match_operand:DI 1 "nonimmediate_operand" "rm"))))
16998 (clobber (reg:CC FLAGS_REG))]
16999 "!TARGET_LZCNT && TARGET_64BIT"
17000 "bsr{q}\t{%1, %0|%0, %1}"
17001 [(set_attr "type" "alu1")
17002 (set_attr "prefix_0f" "1")
17003 (set_attr "znver1_decode" "vector")
17004 (set_attr "mode" "DI")])
17005
17006 (define_insn "bsr_rex64_1_zext"
17007 [(set (match_operand:DI 0 "register_operand" "=r")
17008 (zero_extend:DI
17009 (minus:SI (const_int 63)
17010 (subreg:SI
17011 (clz:DI (match_operand:DI 1 "nonimmediate_operand" "rm"))
17012 0))))
17013 (clobber (reg:CC FLAGS_REG))]
17014 "!TARGET_LZCNT && TARGET_64BIT"
17015 "bsr{q}\t{%1, %0|%0, %1}"
17016 [(set_attr "type" "alu1")
17017 (set_attr "prefix_0f" "1")
17018 (set_attr "znver1_decode" "vector")
17019 (set_attr "mode" "DI")])
17020
17021 (define_insn "bsr"
17022 [(set (reg:CCZ FLAGS_REG)
17023 (compare:CCZ (match_operand:SI 1 "nonimmediate_operand" "rm")
17024 (const_int 0)))
17025 (set (match_operand:SI 0 "register_operand" "=r")
17026 (minus:SI (const_int 31)
17027 (clz:SI (match_dup 1))))]
17028 ""
17029 "bsr{l}\t{%1, %0|%0, %1}"
17030 [(set_attr "type" "alu1")
17031 (set_attr "prefix_0f" "1")
17032 (set_attr "znver1_decode" "vector")
17033 (set_attr "mode" "SI")])
17034
17035 (define_insn "bsr_1"
17036 [(set (match_operand:SI 0 "register_operand" "=r")
17037 (minus:SI (const_int 31)
17038 (clz:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))))
17039 (clobber (reg:CC FLAGS_REG))]
17040 "!TARGET_LZCNT"
17041 "bsr{l}\t{%1, %0|%0, %1}"
17042 [(set_attr "type" "alu1")
17043 (set_attr "prefix_0f" "1")
17044 (set_attr "znver1_decode" "vector")
17045 (set_attr "mode" "SI")])
17046
17047 (define_insn "bsr_zext_1"
17048 [(set (match_operand:DI 0 "register_operand" "=r")
17049 (zero_extend:DI
17050 (minus:SI
17051 (const_int 31)
17052 (clz:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))))
17053 (clobber (reg:CC FLAGS_REG))]
17054 "!TARGET_LZCNT && TARGET_64BIT"
17055 "bsr{l}\t{%1, %k0|%k0, %1}"
17056 [(set_attr "type" "alu1")
17057 (set_attr "prefix_0f" "1")
17058 (set_attr "znver1_decode" "vector")
17059 (set_attr "mode" "SI")])
17060
17061 ; As bsr is undefined behavior on zero and for other input
17062 ; values it is in range 0 to 63, we can optimize away sign-extends.
17063 (define_insn_and_split "*bsr_rex64_2"
17064 [(set (match_operand:DI 0 "register_operand")
17065 (xor:DI
17066 (sign_extend:DI
17067 (minus:SI
17068 (const_int 63)
17069 (subreg:SI (clz:DI (match_operand:DI 1 "nonimmediate_operand"))
17070 0)))
17071 (const_int 63)))
17072 (clobber (reg:CC FLAGS_REG))]
17073 "!TARGET_LZCNT && TARGET_64BIT && ix86_pre_reload_split ()"
17074 "#"
17075 "&& 1"
17076 [(parallel [(set (reg:CCZ FLAGS_REG)
17077 (compare:CCZ (match_dup 1) (const_int 0)))
17078 (set (match_dup 2)
17079 (minus:DI (const_int 63) (clz:DI (match_dup 1))))])
17080 (parallel [(set (match_dup 0)
17081 (zero_extend:DI (xor:SI (match_dup 3) (const_int 63))))
17082 (clobber (reg:CC FLAGS_REG))])]
17083 {
17084 operands[2] = gen_reg_rtx (DImode);
17085 operands[3] = lowpart_subreg (SImode, operands[2], DImode);
17086 })
17087
17088 (define_insn_and_split "*bsr_2"
17089 [(set (match_operand:DI 0 "register_operand")
17090 (sign_extend:DI
17091 (xor:SI
17092 (minus:SI
17093 (const_int 31)
17094 (clz:SI (match_operand:SI 1 "nonimmediate_operand")))
17095 (const_int 31))))
17096 (clobber (reg:CC FLAGS_REG))]
17097 "!TARGET_LZCNT && TARGET_64BIT && ix86_pre_reload_split ()"
17098 "#"
17099 "&& 1"
17100 [(parallel [(set (reg:CCZ FLAGS_REG)
17101 (compare:CCZ (match_dup 1) (const_int 0)))
17102 (set (match_dup 2)
17103 (minus:SI (const_int 31) (clz:SI (match_dup 1))))])
17104 (parallel [(set (match_dup 0)
17105 (zero_extend:DI (xor:SI (match_dup 2) (const_int 31))))
17106 (clobber (reg:CC FLAGS_REG))])]
17107 "operands[2] = gen_reg_rtx (SImode);")
17108
17109 ; Splitters to optimize 64 - __builtin_clzl (x) or 32 - __builtin_clz (x).
17110 ; Again, as for !TARGET_LZCNT CLZ is UB at zero, CLZ is guaranteed to be
17111 ; in [0, 63] or [0, 31] range.
17112 (define_split
17113 [(set (match_operand:SI 0 "register_operand")
17114 (minus:SI
17115 (match_operand:SI 2 "const_int_operand")
17116 (xor:SI
17117 (minus:SI (const_int 63)
17118 (subreg:SI
17119 (clz:DI (match_operand:DI 1 "nonimmediate_operand"))
17120 0))
17121 (const_int 63))))]
17122 "!TARGET_LZCNT && TARGET_64BIT && ix86_pre_reload_split ()"
17123 [(set (match_dup 3)
17124 (minus:DI (const_int 63) (clz:DI (match_dup 1))))
17125 (set (match_dup 0)
17126 (plus:SI (match_dup 5) (match_dup 4)))]
17127 {
17128 operands[3] = gen_reg_rtx (DImode);
17129 operands[5] = lowpart_subreg (SImode, operands[3], DImode);
17130 if (INTVAL (operands[2]) == 63)
17131 {
17132 emit_insn (gen_bsr_rex64_1_zext (operands[3], operands[1]));
17133 emit_move_insn (operands[0], operands[5]);
17134 DONE;
17135 }
17136 operands[4] = gen_int_mode (UINTVAL (operands[2]) - 63, SImode);
17137 })
17138
17139 (define_split
17140 [(set (match_operand:SI 0 "register_operand")
17141 (minus:SI
17142 (match_operand:SI 2 "const_int_operand")
17143 (xor:SI
17144 (minus:SI (const_int 31)
17145 (clz:SI (match_operand:SI 1 "nonimmediate_operand")))
17146 (const_int 31))))]
17147 "!TARGET_LZCNT && ix86_pre_reload_split ()"
17148 [(set (match_dup 3)
17149 (minus:SI (const_int 31) (clz:SI (match_dup 1))))
17150 (set (match_dup 0)
17151 (plus:SI (match_dup 3) (match_dup 4)))]
17152 {
17153 if (INTVAL (operands[2]) == 31)
17154 {
17155 emit_insn (gen_bsr_1 (operands[0], operands[1]));
17156 DONE;
17157 }
17158 operands[3] = gen_reg_rtx (SImode);
17159 operands[4] = gen_int_mode (UINTVAL (operands[2]) - 31, SImode);
17160 })
17161
17162 (define_split
17163 [(set (match_operand:DI 0 "register_operand")
17164 (minus:DI
17165 (match_operand:DI 2 "const_int_operand")
17166 (xor:DI
17167 (sign_extend:DI
17168 (minus:SI (const_int 63)
17169 (subreg:SI
17170 (clz:DI (match_operand:DI 1 "nonimmediate_operand"))
17171 0)))
17172 (const_int 63))))]
17173 "!TARGET_LZCNT
17174 && TARGET_64BIT
17175 && ix86_pre_reload_split ()
17176 && ((unsigned HOST_WIDE_INT)
17177 trunc_int_for_mode (UINTVAL (operands[2]) - 63, SImode)
17178 == UINTVAL (operands[2]) - 63)"
17179 [(set (match_dup 3)
17180 (minus:DI (const_int 63) (clz:DI (match_dup 1))))
17181 (set (match_dup 0)
17182 (plus:DI (match_dup 3) (match_dup 4)))]
17183 {
17184 if (INTVAL (operands[2]) == 63)
17185 {
17186 emit_insn (gen_bsr_rex64_1 (operands[0], operands[1]));
17187 DONE;
17188 }
17189 operands[3] = gen_reg_rtx (DImode);
17190 operands[4] = GEN_INT (UINTVAL (operands[2]) - 63);
17191 })
17192
17193 (define_split
17194 [(set (match_operand:DI 0 "register_operand")
17195 (minus:DI
17196 (match_operand:DI 2 "const_int_operand")
17197 (sign_extend:DI
17198 (xor:SI
17199 (minus:SI (const_int 31)
17200 (clz:SI (match_operand:SI 1 "nonimmediate_operand")))
17201 (const_int 31)))))]
17202 "!TARGET_LZCNT
17203 && TARGET_64BIT
17204 && ix86_pre_reload_split ()
17205 && ((unsigned HOST_WIDE_INT)
17206 trunc_int_for_mode (UINTVAL (operands[2]) - 31, SImode)
17207 == UINTVAL (operands[2]) - 31)"
17208 [(set (match_dup 3)
17209 (zero_extend:DI (minus:SI (const_int 31) (clz:SI (match_dup 1)))))
17210 (set (match_dup 0)
17211 (plus:DI (match_dup 3) (match_dup 4)))]
17212 {
17213 if (INTVAL (operands[2]) == 31)
17214 {
17215 emit_insn (gen_bsr_zext_1 (operands[0], operands[1]));
17216 DONE;
17217 }
17218 operands[3] = gen_reg_rtx (DImode);
17219 operands[4] = GEN_INT (UINTVAL (operands[2]) - 31);
17220 })
17221
17222 (define_expand "clz<mode>2"
17223 [(parallel
17224 [(set (reg:CCZ FLAGS_REG)
17225 (compare:CCZ (match_operand:SWI48 1 "nonimmediate_operand" "rm")
17226 (const_int 0)))
17227 (set (match_dup 3) (minus:SWI48
17228 (match_dup 2)
17229 (clz:SWI48 (match_dup 1))))])
17230 (parallel
17231 [(set (match_operand:SWI48 0 "register_operand")
17232 (xor:SWI48 (match_dup 3) (match_dup 2)))
17233 (clobber (reg:CC FLAGS_REG))])]
17234 ""
17235 {
17236 if (TARGET_LZCNT)
17237 {
17238 emit_insn (gen_clz<mode>2_lzcnt (operands[0], operands[1]));
17239 DONE;
17240 }
17241 operands[2] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode)-1);
17242 operands[3] = gen_reg_rtx (<MODE>mode);
17243 })
17244
17245 (define_insn_and_split "clz<mode>2_lzcnt"
17246 [(set (match_operand:SWI48 0 "register_operand" "=r")
17247 (clz:SWI48
17248 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
17249 (clobber (reg:CC FLAGS_REG))]
17250 "TARGET_LZCNT"
17251 "lzcnt{<imodesuffix>}\t{%1, %0|%0, %1}"
17252 "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
17253 && optimize_function_for_speed_p (cfun)
17254 && !reg_mentioned_p (operands[0], operands[1])"
17255 [(parallel
17256 [(set (match_dup 0)
17257 (clz:SWI48 (match_dup 1)))
17258 (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
17259 (clobber (reg:CC FLAGS_REG))])]
17260 "ix86_expand_clear (operands[0]);"
17261 [(set_attr "prefix_rep" "1")
17262 (set_attr "type" "bitmanip")
17263 (set_attr "mode" "<MODE>")])
17264
17265 ; False dependency happens when destination is only updated by tzcnt,
17266 ; lzcnt or popcnt. There is no false dependency when destination is
17267 ; also used in source.
17268 (define_insn "*clz<mode>2_lzcnt_falsedep"
17269 [(set (match_operand:SWI48 0 "register_operand" "=r")
17270 (clz:SWI48
17271 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
17272 (unspec [(match_operand:SWI48 2 "register_operand" "0")]
17273 UNSPEC_INSN_FALSE_DEP)
17274 (clobber (reg:CC FLAGS_REG))]
17275 "TARGET_LZCNT"
17276 "lzcnt{<imodesuffix>}\t{%1, %0|%0, %1}"
17277 [(set_attr "prefix_rep" "1")
17278 (set_attr "type" "bitmanip")
17279 (set_attr "mode" "<MODE>")])
17280
17281 (define_insn_and_split "*clzsi2_lzcnt_zext"
17282 [(set (match_operand:DI 0 "register_operand" "=r")
17283 (and:DI
17284 (subreg:DI
17285 (clz:SI
17286 (match_operand:SI 1 "nonimmediate_operand" "rm")) 0)
17287 (const_int 63)))
17288 (clobber (reg:CC FLAGS_REG))]
17289 "TARGET_LZCNT && TARGET_64BIT"
17290 "lzcnt{l}\t{%1, %k0|%k0, %1}"
17291 "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
17292 && optimize_function_for_speed_p (cfun)
17293 && !reg_mentioned_p (operands[0], operands[1])"
17294 [(parallel
17295 [(set (match_dup 0)
17296 (and:DI (subreg:DI (clz:SI (match_dup 1)) 0) (const_int 63)))
17297 (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
17298 (clobber (reg:CC FLAGS_REG))])]
17299 "ix86_expand_clear (operands[0]);"
17300 [(set_attr "prefix_rep" "1")
17301 (set_attr "type" "bitmanip")
17302 (set_attr "mode" "SI")])
17303
17304 ; False dependency happens when destination is only updated by tzcnt,
17305 ; lzcnt or popcnt. There is no false dependency when destination is
17306 ; also used in source.
17307 (define_insn "*clzsi2_lzcnt_zext_falsedep"
17308 [(set (match_operand:DI 0 "register_operand" "=r")
17309 (and:DI
17310 (subreg:DI
17311 (clz:SI
17312 (match_operand:SWI48 1 "nonimmediate_operand" "rm")) 0)
17313 (const_int 63)))
17314 (unspec [(match_operand:DI 2 "register_operand" "0")]
17315 UNSPEC_INSN_FALSE_DEP)
17316 (clobber (reg:CC FLAGS_REG))]
17317 "TARGET_LZCNT"
17318 "lzcnt{l}\t{%1, %k0|%k0, %1}"
17319 [(set_attr "prefix_rep" "1")
17320 (set_attr "type" "bitmanip")
17321 (set_attr "mode" "SI")])
17322
17323 (define_int_iterator LT_ZCNT
17324 [(UNSPEC_TZCNT "TARGET_BMI")
17325 (UNSPEC_LZCNT "TARGET_LZCNT")])
17326
17327 (define_int_attr lt_zcnt
17328 [(UNSPEC_TZCNT "tzcnt")
17329 (UNSPEC_LZCNT "lzcnt")])
17330
17331 (define_int_attr lt_zcnt_type
17332 [(UNSPEC_TZCNT "alu1")
17333 (UNSPEC_LZCNT "bitmanip")])
17334
17335 ;; Version of lzcnt/tzcnt that is expanded from intrinsics. This version
17336 ;; provides operand size as output when source operand is zero.
17337
17338 (define_insn_and_split "<lt_zcnt>_<mode>"
17339 [(set (match_operand:SWI48 0 "register_operand" "=r")
17340 (unspec:SWI48
17341 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] LT_ZCNT))
17342 (clobber (reg:CC FLAGS_REG))]
17343 ""
17344 "<lt_zcnt>{<imodesuffix>}\t{%1, %0|%0, %1}"
17345 "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
17346 && optimize_function_for_speed_p (cfun)
17347 && !reg_mentioned_p (operands[0], operands[1])"
17348 [(parallel
17349 [(set (match_dup 0)
17350 (unspec:SWI48 [(match_dup 1)] LT_ZCNT))
17351 (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
17352 (clobber (reg:CC FLAGS_REG))])]
17353 "ix86_expand_clear (operands[0]);"
17354 [(set_attr "type" "<lt_zcnt_type>")
17355 (set_attr "prefix_0f" "1")
17356 (set_attr "prefix_rep" "1")
17357 (set_attr "mode" "<MODE>")])
17358
17359 ; False dependency happens when destination is only updated by tzcnt,
17360 ; lzcnt or popcnt. There is no false dependency when destination is
17361 ; also used in source.
17362 (define_insn "*<lt_zcnt>_<mode>_falsedep"
17363 [(set (match_operand:SWI48 0 "register_operand" "=r")
17364 (unspec:SWI48
17365 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] LT_ZCNT))
17366 (unspec [(match_operand:SWI48 2 "register_operand" "0")]
17367 UNSPEC_INSN_FALSE_DEP)
17368 (clobber (reg:CC FLAGS_REG))]
17369 ""
17370 "<lt_zcnt>{<imodesuffix>}\t{%1, %0|%0, %1}"
17371 [(set_attr "type" "<lt_zcnt_type>")
17372 (set_attr "prefix_0f" "1")
17373 (set_attr "prefix_rep" "1")
17374 (set_attr "mode" "<MODE>")])
17375
17376 (define_insn "<lt_zcnt>_hi"
17377 [(set (match_operand:HI 0 "register_operand" "=r")
17378 (unspec:HI
17379 [(match_operand:HI 1 "nonimmediate_operand" "rm")] LT_ZCNT))
17380 (clobber (reg:CC FLAGS_REG))]
17381 ""
17382 "<lt_zcnt>{w}\t{%1, %0|%0, %1}"
17383 [(set_attr "type" "<lt_zcnt_type>")
17384 (set_attr "prefix_0f" "1")
17385 (set_attr "prefix_rep" "1")
17386 (set_attr "mode" "HI")])
17387
17388 ;; BMI instructions.
17389
17390 (define_insn "bmi_bextr_<mode>"
17391 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
17392 (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "r,m")
17393 (match_operand:SWI48 2 "register_operand" "r,r")]
17394 UNSPEC_BEXTR))
17395 (clobber (reg:CC FLAGS_REG))]
17396 "TARGET_BMI"
17397 "bextr\t{%2, %1, %0|%0, %1, %2}"
17398 [(set_attr "type" "bitmanip")
17399 (set_attr "btver2_decode" "direct, double")
17400 (set_attr "mode" "<MODE>")])
17401
17402 (define_insn "*bmi_bextr_<mode>_ccz"
17403 [(set (reg:CCZ FLAGS_REG)
17404 (compare:CCZ
17405 (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "r,m")
17406 (match_operand:SWI48 2 "register_operand" "r,r")]
17407 UNSPEC_BEXTR)
17408 (const_int 0)))
17409 (clobber (match_scratch:SWI48 0 "=r,r"))]
17410 "TARGET_BMI"
17411 "bextr\t{%2, %1, %0|%0, %1, %2}"
17412 [(set_attr "type" "bitmanip")
17413 (set_attr "btver2_decode" "direct, double")
17414 (set_attr "mode" "<MODE>")])
17415
17416 (define_insn "*bmi_blsi_<mode>"
17417 [(set (match_operand:SWI48 0 "register_operand" "=r")
17418 (and:SWI48
17419 (neg:SWI48
17420 (match_operand:SWI48 1 "nonimmediate_operand" "rm"))
17421 (match_dup 1)))
17422 (clobber (reg:CC FLAGS_REG))]
17423 "TARGET_BMI"
17424 "blsi\t{%1, %0|%0, %1}"
17425 [(set_attr "type" "bitmanip")
17426 (set_attr "btver2_decode" "double")
17427 (set_attr "mode" "<MODE>")])
17428
17429 (define_insn "*bmi_blsi_<mode>_cmp"
17430 [(set (reg FLAGS_REG)
17431 (compare
17432 (and:SWI48
17433 (neg:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm"))
17434 (match_dup 1))
17435 (const_int 0)))
17436 (set (match_operand:SWI48 0 "register_operand" "=r")
17437 (and:SWI48 (neg:SWI48 (match_dup 1)) (match_dup 1)))]
17438 "TARGET_BMI && ix86_match_ccmode (insn, CCNOmode)"
17439 "blsi\t{%1, %0|%0, %1}"
17440 [(set_attr "type" "bitmanip")
17441 (set_attr "btver2_decode" "double")
17442 (set_attr "mode" "<MODE>")])
17443
17444 (define_insn "*bmi_blsi_<mode>_ccno"
17445 [(set (reg FLAGS_REG)
17446 (compare
17447 (and:SWI48
17448 (neg:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm"))
17449 (match_dup 1))
17450 (const_int 0)))
17451 (clobber (match_scratch:SWI48 0 "=r"))]
17452 "TARGET_BMI && ix86_match_ccmode (insn, CCNOmode)"
17453 "blsi\t{%1, %0|%0, %1}"
17454 [(set_attr "type" "bitmanip")
17455 (set_attr "btver2_decode" "double")
17456 (set_attr "mode" "<MODE>")])
17457
17458 (define_insn "*bmi_blsmsk_<mode>"
17459 [(set (match_operand:SWI48 0 "register_operand" "=r")
17460 (xor:SWI48
17461 (plus:SWI48
17462 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
17463 (const_int -1))
17464 (match_dup 1)))
17465 (clobber (reg:CC FLAGS_REG))]
17466 "TARGET_BMI"
17467 "blsmsk\t{%1, %0|%0, %1}"
17468 [(set_attr "type" "bitmanip")
17469 (set_attr "btver2_decode" "double")
17470 (set_attr "mode" "<MODE>")])
17471
17472 (define_insn "*bmi_blsr_<mode>"
17473 [(set (match_operand:SWI48 0 "register_operand" "=r")
17474 (and:SWI48
17475 (plus:SWI48
17476 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
17477 (const_int -1))
17478 (match_dup 1)))
17479 (clobber (reg:CC FLAGS_REG))]
17480 "TARGET_BMI"
17481 "blsr\t{%1, %0|%0, %1}"
17482 [(set_attr "type" "bitmanip")
17483 (set_attr "btver2_decode" "double")
17484 (set_attr "mode" "<MODE>")])
17485
17486 (define_insn "*bmi_blsr_<mode>_cmp"
17487 [(set (reg:CCZ FLAGS_REG)
17488 (compare:CCZ
17489 (and:SWI48
17490 (plus:SWI48
17491 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
17492 (const_int -1))
17493 (match_dup 1))
17494 (const_int 0)))
17495 (set (match_operand:SWI48 0 "register_operand" "=r")
17496 (and:SWI48
17497 (plus:SWI48
17498 (match_dup 1)
17499 (const_int -1))
17500 (match_dup 1)))]
17501 "TARGET_BMI"
17502 "blsr\t{%1, %0|%0, %1}"
17503 [(set_attr "type" "bitmanip")
17504 (set_attr "btver2_decode" "double")
17505 (set_attr "mode" "<MODE>")])
17506
17507 (define_insn "*bmi_blsr_<mode>_ccz"
17508 [(set (reg:CCZ FLAGS_REG)
17509 (compare:CCZ
17510 (and:SWI48
17511 (plus:SWI48
17512 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
17513 (const_int -1))
17514 (match_dup 1))
17515 (const_int 0)))
17516 (clobber (match_scratch:SWI48 0 "=r"))]
17517 "TARGET_BMI"
17518 "blsr\t{%1, %0|%0, %1}"
17519 [(set_attr "type" "bitmanip")
17520 (set_attr "btver2_decode" "double")
17521 (set_attr "mode" "<MODE>")])
17522
17523 ;; BMI2 instructions.
17524 (define_expand "bmi2_bzhi_<mode>3"
17525 [(parallel
17526 [(set (match_operand:SWI48 0 "register_operand")
17527 (if_then_else:SWI48
17528 (ne:QI (and:SWI48 (match_operand:SWI48 2 "register_operand")
17529 (const_int 255))
17530 (const_int 0))
17531 (zero_extract:SWI48
17532 (match_operand:SWI48 1 "nonimmediate_operand")
17533 (umin:SWI48 (and:SWI48 (match_dup 2) (const_int 255))
17534 (match_dup 3))
17535 (const_int 0))
17536 (const_int 0)))
17537 (clobber (reg:CC FLAGS_REG))])]
17538 "TARGET_BMI2"
17539 "operands[3] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);")
17540
17541 (define_insn "*bmi2_bzhi_<mode>3"
17542 [(set (match_operand:SWI48 0 "register_operand" "=r")
17543 (if_then_else:SWI48
17544 (ne:QI (and:SWI48 (match_operand:SWI48 2 "register_operand" "r")
17545 (const_int 255))
17546 (const_int 0))
17547 (zero_extract:SWI48
17548 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
17549 (umin:SWI48 (and:SWI48 (match_dup 2) (const_int 255))
17550 (match_operand:SWI48 3 "const_int_operand"))
17551 (const_int 0))
17552 (const_int 0)))
17553 (clobber (reg:CC FLAGS_REG))]
17554 "TARGET_BMI2 && INTVAL (operands[3]) == <MODE_SIZE> * BITS_PER_UNIT"
17555 "bzhi\t{%2, %1, %0|%0, %1, %2}"
17556 [(set_attr "type" "bitmanip")
17557 (set_attr "prefix" "vex")
17558 (set_attr "mode" "<MODE>")])
17559
17560 (define_insn "*bmi2_bzhi_<mode>3_1"
17561 [(set (match_operand:SWI48 0 "register_operand" "=r")
17562 (if_then_else:SWI48
17563 (ne:QI (match_operand:QI 2 "register_operand" "r") (const_int 0))
17564 (zero_extract:SWI48
17565 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
17566 (umin:SWI48 (zero_extend:SWI48 (match_dup 2))
17567 (match_operand:SWI48 3 "const_int_operand"))
17568 (const_int 0))
17569 (const_int 0)))
17570 (clobber (reg:CC FLAGS_REG))]
17571 "TARGET_BMI2 && INTVAL (operands[3]) == <MODE_SIZE> * BITS_PER_UNIT"
17572 "bzhi\t{%<k>2, %1, %0|%0, %1, %<k>2}"
17573 [(set_attr "type" "bitmanip")
17574 (set_attr "prefix" "vex")
17575 (set_attr "mode" "<MODE>")])
17576
17577 (define_insn "*bmi2_bzhi_<mode>3_1_ccz"
17578 [(set (reg:CCZ FLAGS_REG)
17579 (compare:CCZ
17580 (if_then_else:SWI48
17581 (ne:QI (match_operand:QI 2 "register_operand" "r") (const_int 0))
17582 (zero_extract:SWI48
17583 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
17584 (umin:SWI48 (zero_extend:SWI48 (match_dup 2))
17585 (match_operand:SWI48 3 "const_int_operand"))
17586 (const_int 0))
17587 (const_int 0))
17588 (const_int 0)))
17589 (clobber (match_scratch:SWI48 0 "=r"))]
17590 "TARGET_BMI2 && INTVAL (operands[3]) == <MODE_SIZE> * BITS_PER_UNIT"
17591 "bzhi\t{%<k>2, %1, %0|%0, %1, %<k>2}"
17592 [(set_attr "type" "bitmanip")
17593 (set_attr "prefix" "vex")
17594 (set_attr "mode" "<MODE>")])
17595
17596 (define_insn "*bmi2_bzhi_<mode>3_2"
17597 [(set (match_operand:SWI48 0 "register_operand" "=r")
17598 (and:SWI48
17599 (plus:SWI48
17600 (ashift:SWI48 (const_int 1)
17601 (match_operand:QI 2 "register_operand" "r"))
17602 (const_int -1))
17603 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
17604 (clobber (reg:CC FLAGS_REG))]
17605 "TARGET_BMI2"
17606 "bzhi\t{%<k>2, %1, %0|%0, %1, %<k>2}"
17607 [(set_attr "type" "bitmanip")
17608 (set_attr "prefix" "vex")
17609 (set_attr "mode" "<MODE>")])
17610
17611 (define_insn "*bmi2_bzhi_<mode>3_3"
17612 [(set (match_operand:SWI48 0 "register_operand" "=r")
17613 (and:SWI48
17614 (not:SWI48
17615 (ashift:SWI48 (const_int -1)
17616 (match_operand:QI 2 "register_operand" "r")))
17617 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
17618 (clobber (reg:CC FLAGS_REG))]
17619 "TARGET_BMI2"
17620 "bzhi\t{%<k>2, %1, %0|%0, %1, %<k>2}"
17621 [(set_attr "type" "bitmanip")
17622 (set_attr "prefix" "vex")
17623 (set_attr "mode" "<MODE>")])
17624
17625 (define_insn "*bmi2_bzhi_zero_extendsidi_4"
17626 [(set (match_operand:DI 0 "register_operand" "=r")
17627 (zero_extend:DI
17628 (and:SI
17629 (plus:SI
17630 (ashift:SI (const_int 1)
17631 (match_operand:QI 2 "register_operand" "r"))
17632 (const_int -1))
17633 (match_operand:SI 1 "nonimmediate_operand" "rm"))))
17634 (clobber (reg:CC FLAGS_REG))]
17635 "TARGET_64BIT && TARGET_BMI2"
17636 "bzhi\t{%q2, %q1, %q0|%q0, %q1, %q2}"
17637 [(set_attr "type" "bitmanip")
17638 (set_attr "prefix" "vex")
17639 (set_attr "mode" "DI")])
17640
17641 (define_insn "bmi2_pdep_<mode>3"
17642 [(set (match_operand:SWI48 0 "register_operand" "=r")
17643 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")
17644 (match_operand:SWI48 2 "nonimmediate_operand" "rm")]
17645 UNSPEC_PDEP))]
17646 "TARGET_BMI2"
17647 "pdep\t{%2, %1, %0|%0, %1, %2}"
17648 [(set_attr "type" "bitmanip")
17649 (set_attr "prefix" "vex")
17650 (set_attr "mode" "<MODE>")])
17651
17652 (define_insn "bmi2_pext_<mode>3"
17653 [(set (match_operand:SWI48 0 "register_operand" "=r")
17654 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")
17655 (match_operand:SWI48 2 "nonimmediate_operand" "rm")]
17656 UNSPEC_PEXT))]
17657 "TARGET_BMI2"
17658 "pext\t{%2, %1, %0|%0, %1, %2}"
17659 [(set_attr "type" "bitmanip")
17660 (set_attr "prefix" "vex")
17661 (set_attr "mode" "<MODE>")])
17662
17663 ;; TBM instructions.
17664 (define_insn "@tbm_bextri_<mode>"
17665 [(set (match_operand:SWI48 0 "register_operand" "=r")
17666 (zero_extract:SWI48
17667 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
17668 (match_operand 2 "const_0_to_255_operand")
17669 (match_operand 3 "const_0_to_255_operand")))
17670 (clobber (reg:CC FLAGS_REG))]
17671 "TARGET_TBM"
17672 {
17673 operands[2] = GEN_INT (INTVAL (operands[2]) << 8 | INTVAL (operands[3]));
17674 return "bextr\t{%2, %1, %0|%0, %1, %2}";
17675 }
17676 [(set_attr "type" "bitmanip")
17677 (set_attr "mode" "<MODE>")])
17678
17679 (define_insn "*tbm_blcfill_<mode>"
17680 [(set (match_operand:SWI48 0 "register_operand" "=r")
17681 (and:SWI48
17682 (plus:SWI48
17683 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
17684 (const_int 1))
17685 (match_dup 1)))
17686 (clobber (reg:CC FLAGS_REG))]
17687 "TARGET_TBM"
17688 "blcfill\t{%1, %0|%0, %1}"
17689 [(set_attr "type" "bitmanip")
17690 (set_attr "mode" "<MODE>")])
17691
17692 (define_insn "*tbm_blci_<mode>"
17693 [(set (match_operand:SWI48 0 "register_operand" "=r")
17694 (ior:SWI48
17695 (not:SWI48
17696 (plus:SWI48
17697 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
17698 (const_int 1)))
17699 (match_dup 1)))
17700 (clobber (reg:CC FLAGS_REG))]
17701 "TARGET_TBM"
17702 "blci\t{%1, %0|%0, %1}"
17703 [(set_attr "type" "bitmanip")
17704 (set_attr "mode" "<MODE>")])
17705
17706 (define_insn "*tbm_blcic_<mode>"
17707 [(set (match_operand:SWI48 0 "register_operand" "=r")
17708 (and:SWI48
17709 (plus:SWI48
17710 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
17711 (const_int 1))
17712 (not:SWI48
17713 (match_dup 1))))
17714 (clobber (reg:CC FLAGS_REG))]
17715 "TARGET_TBM"
17716 "blcic\t{%1, %0|%0, %1}"
17717 [(set_attr "type" "bitmanip")
17718 (set_attr "mode" "<MODE>")])
17719
17720 (define_insn "*tbm_blcmsk_<mode>"
17721 [(set (match_operand:SWI48 0 "register_operand" "=r")
17722 (xor:SWI48
17723 (plus:SWI48
17724 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
17725 (const_int 1))
17726 (match_dup 1)))
17727 (clobber (reg:CC FLAGS_REG))]
17728 "TARGET_TBM"
17729 "blcmsk\t{%1, %0|%0, %1}"
17730 [(set_attr "type" "bitmanip")
17731 (set_attr "mode" "<MODE>")])
17732
17733 (define_insn "*tbm_blcs_<mode>"
17734 [(set (match_operand:SWI48 0 "register_operand" "=r")
17735 (ior:SWI48
17736 (plus:SWI48
17737 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
17738 (const_int 1))
17739 (match_dup 1)))
17740 (clobber (reg:CC FLAGS_REG))]
17741 "TARGET_TBM"
17742 "blcs\t{%1, %0|%0, %1}"
17743 [(set_attr "type" "bitmanip")
17744 (set_attr "mode" "<MODE>")])
17745
17746 (define_insn "*tbm_blsfill_<mode>"
17747 [(set (match_operand:SWI48 0 "register_operand" "=r")
17748 (ior:SWI48
17749 (plus:SWI48
17750 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
17751 (const_int -1))
17752 (match_dup 1)))
17753 (clobber (reg:CC FLAGS_REG))]
17754 "TARGET_TBM"
17755 "blsfill\t{%1, %0|%0, %1}"
17756 [(set_attr "type" "bitmanip")
17757 (set_attr "mode" "<MODE>")])
17758
17759 (define_insn "*tbm_blsic_<mode>"
17760 [(set (match_operand:SWI48 0 "register_operand" "=r")
17761 (ior:SWI48
17762 (plus:SWI48
17763 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
17764 (const_int -1))
17765 (not:SWI48
17766 (match_dup 1))))
17767 (clobber (reg:CC FLAGS_REG))]
17768 "TARGET_TBM"
17769 "blsic\t{%1, %0|%0, %1}"
17770 [(set_attr "type" "bitmanip")
17771 (set_attr "mode" "<MODE>")])
17772
17773 (define_insn "*tbm_t1mskc_<mode>"
17774 [(set (match_operand:SWI48 0 "register_operand" "=r")
17775 (ior:SWI48
17776 (plus:SWI48
17777 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
17778 (const_int 1))
17779 (not:SWI48
17780 (match_dup 1))))
17781 (clobber (reg:CC FLAGS_REG))]
17782 "TARGET_TBM"
17783 "t1mskc\t{%1, %0|%0, %1}"
17784 [(set_attr "type" "bitmanip")
17785 (set_attr "mode" "<MODE>")])
17786
17787 (define_insn "*tbm_tzmsk_<mode>"
17788 [(set (match_operand:SWI48 0 "register_operand" "=r")
17789 (and:SWI48
17790 (plus:SWI48
17791 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
17792 (const_int -1))
17793 (not:SWI48
17794 (match_dup 1))))
17795 (clobber (reg:CC FLAGS_REG))]
17796 "TARGET_TBM"
17797 "tzmsk\t{%1, %0|%0, %1}"
17798 [(set_attr "type" "bitmanip")
17799 (set_attr "mode" "<MODE>")])
17800
17801 (define_insn_and_split "popcount<mode>2"
17802 [(set (match_operand:SWI48 0 "register_operand" "=r")
17803 (popcount:SWI48
17804 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
17805 (clobber (reg:CC FLAGS_REG))]
17806 "TARGET_POPCNT"
17807 {
17808 #if TARGET_MACHO
17809 return "popcnt\t{%1, %0|%0, %1}";
17810 #else
17811 return "popcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
17812 #endif
17813 }
17814 "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
17815 && optimize_function_for_speed_p (cfun)
17816 && !reg_mentioned_p (operands[0], operands[1])"
17817 [(parallel
17818 [(set (match_dup 0)
17819 (popcount:SWI48 (match_dup 1)))
17820 (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
17821 (clobber (reg:CC FLAGS_REG))])]
17822 "ix86_expand_clear (operands[0]);"
17823 [(set_attr "prefix_rep" "1")
17824 (set_attr "type" "bitmanip")
17825 (set_attr "mode" "<MODE>")])
17826
17827 ; False dependency happens when destination is only updated by tzcnt,
17828 ; lzcnt or popcnt. There is no false dependency when destination is
17829 ; also used in source.
17830 (define_insn "*popcount<mode>2_falsedep"
17831 [(set (match_operand:SWI48 0 "register_operand" "=r")
17832 (popcount:SWI48
17833 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
17834 (unspec [(match_operand:SWI48 2 "register_operand" "0")]
17835 UNSPEC_INSN_FALSE_DEP)
17836 (clobber (reg:CC FLAGS_REG))]
17837 "TARGET_POPCNT"
17838 {
17839 #if TARGET_MACHO
17840 return "popcnt\t{%1, %0|%0, %1}";
17841 #else
17842 return "popcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
17843 #endif
17844 }
17845 [(set_attr "prefix_rep" "1")
17846 (set_attr "type" "bitmanip")
17847 (set_attr "mode" "<MODE>")])
17848
17849 (define_insn_and_split "*popcountsi2_zext"
17850 [(set (match_operand:DI 0 "register_operand" "=r")
17851 (and:DI
17852 (subreg:DI
17853 (popcount:SI
17854 (match_operand:SI 1 "nonimmediate_operand" "rm")) 0)
17855 (const_int 63)))
17856 (clobber (reg:CC FLAGS_REG))]
17857 "TARGET_POPCNT && TARGET_64BIT"
17858 {
17859 #if TARGET_MACHO
17860 return "popcnt\t{%1, %k0|%k0, %1}";
17861 #else
17862 return "popcnt{l}\t{%1, %k0|%k0, %1}";
17863 #endif
17864 }
17865 "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
17866 && optimize_function_for_speed_p (cfun)
17867 && !reg_mentioned_p (operands[0], operands[1])"
17868 [(parallel
17869 [(set (match_dup 0)
17870 (and:DI (subreg:DI (popcount:SI (match_dup 1)) 0) (const_int 63)))
17871 (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
17872 (clobber (reg:CC FLAGS_REG))])]
17873 "ix86_expand_clear (operands[0]);"
17874 [(set_attr "prefix_rep" "1")
17875 (set_attr "type" "bitmanip")
17876 (set_attr "mode" "SI")])
17877
17878 ; False dependency happens when destination is only updated by tzcnt,
17879 ; lzcnt or popcnt. There is no false dependency when destination is
17880 ; also used in source.
17881 (define_insn "*popcountsi2_zext_falsedep"
17882 [(set (match_operand:DI 0 "register_operand" "=r")
17883 (and:DI
17884 (subreg:DI
17885 (popcount:SI
17886 (match_operand:SI 1 "nonimmediate_operand" "rm")) 0)
17887 (const_int 63)))
17888 (unspec [(match_operand:DI 2 "register_operand" "0")]
17889 UNSPEC_INSN_FALSE_DEP)
17890 (clobber (reg:CC FLAGS_REG))]
17891 "TARGET_POPCNT && TARGET_64BIT"
17892 {
17893 #if TARGET_MACHO
17894 return "popcnt\t{%1, %k0|%k0, %1}";
17895 #else
17896 return "popcnt{l}\t{%1, %k0|%k0, %1}";
17897 #endif
17898 }
17899 [(set_attr "prefix_rep" "1")
17900 (set_attr "type" "bitmanip")
17901 (set_attr "mode" "SI")])
17902
17903 (define_insn_and_split "*popcounthi2_1"
17904 [(set (match_operand:SI 0 "register_operand")
17905 (popcount:SI
17906 (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand"))))
17907 (clobber (reg:CC FLAGS_REG))]
17908 "TARGET_POPCNT
17909 && ix86_pre_reload_split ()"
17910 "#"
17911 "&& 1"
17912 [(const_int 0)]
17913 {
17914 rtx tmp = gen_reg_rtx (HImode);
17915
17916 emit_insn (gen_popcounthi2 (tmp, operands[1]));
17917 emit_insn (gen_zero_extendhisi2 (operands[0], tmp));
17918 DONE;
17919 })
17920
17921 (define_insn "popcounthi2"
17922 [(set (match_operand:HI 0 "register_operand" "=r")
17923 (popcount:HI
17924 (match_operand:HI 1 "nonimmediate_operand" "rm")))
17925 (clobber (reg:CC FLAGS_REG))]
17926 "TARGET_POPCNT"
17927 {
17928 #if TARGET_MACHO
17929 return "popcnt\t{%1, %0|%0, %1}";
17930 #else
17931 return "popcnt{w}\t{%1, %0|%0, %1}";
17932 #endif
17933 }
17934 [(set_attr "prefix_rep" "1")
17935 (set_attr "type" "bitmanip")
17936 (set_attr "mode" "HI")])
17937
17938 (define_expand "bswapdi2"
17939 [(set (match_operand:DI 0 "register_operand")
17940 (bswap:DI (match_operand:DI 1 "nonimmediate_operand")))]
17941 "TARGET_64BIT"
17942 {
17943 if (!TARGET_MOVBE)
17944 operands[1] = force_reg (DImode, operands[1]);
17945 })
17946
17947 (define_expand "bswapsi2"
17948 [(set (match_operand:SI 0 "register_operand")
17949 (bswap:SI (match_operand:SI 1 "nonimmediate_operand")))]
17950 ""
17951 {
17952 if (TARGET_MOVBE)
17953 ;
17954 else if (TARGET_BSWAP)
17955 operands[1] = force_reg (SImode, operands[1]);
17956 else
17957 {
17958 rtx x = operands[0];
17959
17960 emit_move_insn (x, operands[1]);
17961 emit_insn (gen_bswaphi_lowpart (gen_lowpart (HImode, x)));
17962 emit_insn (gen_rotlsi3 (x, x, GEN_INT (16)));
17963 emit_insn (gen_bswaphi_lowpart (gen_lowpart (HImode, x)));
17964 DONE;
17965 }
17966 })
17967
17968 (define_insn "*bswap<mode>2_movbe"
17969 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r,r,m")
17970 (bswap:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,m,r")))]
17971 "TARGET_MOVBE
17972 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
17973 "@
17974 bswap\t%0
17975 movbe{<imodesuffix>}\t{%1, %0|%0, %1}
17976 movbe{<imodesuffix>}\t{%1, %0|%0, %1}"
17977 [(set_attr "type" "bitmanip,imov,imov")
17978 (set_attr "modrm" "0,1,1")
17979 (set_attr "prefix_0f" "*,1,1")
17980 (set_attr "prefix_extra" "*,1,1")
17981 (set_attr "mode" "<MODE>")])
17982
17983 (define_insn "*bswap<mode>2"
17984 [(set (match_operand:SWI48 0 "register_operand" "=r")
17985 (bswap:SWI48 (match_operand:SWI48 1 "register_operand" "0")))]
17986 "TARGET_BSWAP"
17987 "bswap\t%0"
17988 [(set_attr "type" "bitmanip")
17989 (set_attr "modrm" "0")
17990 (set_attr "mode" "<MODE>")])
17991
17992 (define_expand "bswaphi2"
17993 [(set (match_operand:HI 0 "register_operand")
17994 (bswap:HI (match_operand:HI 1 "nonimmediate_operand")))]
17995 "TARGET_MOVBE")
17996
17997 (define_insn "*bswaphi2_movbe"
17998 [(set (match_operand:HI 0 "nonimmediate_operand" "=Q,r,m")
17999 (bswap:HI (match_operand:HI 1 "nonimmediate_operand" "0,m,r")))]
18000 "TARGET_MOVBE
18001 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
18002 "@
18003 xchg{b}\t{%h0, %b0|%b0, %h0}
18004 movbe{w}\t{%1, %0|%0, %1}
18005 movbe{w}\t{%1, %0|%0, %1}"
18006 [(set_attr "type" "imov")
18007 (set_attr "modrm" "*,1,1")
18008 (set_attr "prefix_0f" "*,1,1")
18009 (set_attr "prefix_extra" "*,1,1")
18010 (set_attr "pent_pair" "np,*,*")
18011 (set_attr "athlon_decode" "vector,*,*")
18012 (set_attr "amdfam10_decode" "double,*,*")
18013 (set_attr "bdver1_decode" "double,*,*")
18014 (set_attr "mode" "QI,HI,HI")])
18015
18016 (define_peephole2
18017 [(set (match_operand:HI 0 "general_reg_operand")
18018 (bswap:HI (match_dup 0)))]
18019 "TARGET_MOVBE
18020 && !(TARGET_USE_XCHGB || optimize_function_for_size_p (cfun))
18021 && peep2_regno_dead_p (0, FLAGS_REG)"
18022 [(parallel [(set (match_dup 0) (rotate:HI (match_dup 0) (const_int 8)))
18023 (clobber (reg:CC FLAGS_REG))])])
18024
18025 (define_insn "bswaphi_lowpart"
18026 [(set (strict_low_part (match_operand:HI 0 "register_operand" "+Q,r"))
18027 (bswap:HI (match_dup 0)))
18028 (clobber (reg:CC FLAGS_REG))]
18029 ""
18030 "@
18031 xchg{b}\t{%h0, %b0|%b0, %h0}
18032 rol{w}\t{$8, %0|%0, 8}"
18033 [(set (attr "preferred_for_size")
18034 (cond [(eq_attr "alternative" "0")
18035 (symbol_ref "true")]
18036 (symbol_ref "false")))
18037 (set (attr "preferred_for_speed")
18038 (cond [(eq_attr "alternative" "0")
18039 (symbol_ref "TARGET_USE_XCHGB")]
18040 (symbol_ref "!TARGET_USE_XCHGB")))
18041 (set_attr "length" "2,4")
18042 (set_attr "mode" "QI,HI")])
18043
18044 (define_expand "paritydi2"
18045 [(set (match_operand:DI 0 "register_operand")
18046 (parity:DI (match_operand:DI 1 "register_operand")))]
18047 "! TARGET_POPCNT"
18048 {
18049 rtx scratch = gen_reg_rtx (QImode);
18050 rtx hipart1 = gen_reg_rtx (SImode);
18051 rtx lopart1 = gen_reg_rtx (SImode);
18052 rtx xor1 = gen_reg_rtx (SImode);
18053 rtx shift2 = gen_reg_rtx (SImode);
18054 rtx hipart2 = gen_reg_rtx (HImode);
18055 rtx lopart2 = gen_reg_rtx (HImode);
18056 rtx xor2 = gen_reg_rtx (HImode);
18057
18058 if (TARGET_64BIT)
18059 {
18060 rtx shift1 = gen_reg_rtx (DImode);
18061 emit_insn (gen_lshrdi3 (shift1, operands[1], GEN_INT (32)));
18062 emit_move_insn (hipart1, gen_lowpart (SImode, shift1));
18063 }
18064 else
18065 emit_move_insn (hipart1, gen_highpart (SImode, operands[1]));
18066
18067 emit_move_insn (lopart1, gen_lowpart (SImode, operands[1]));
18068 emit_insn (gen_xorsi3 (xor1, hipart1, lopart1));
18069
18070 emit_insn (gen_lshrsi3 (shift2, xor1, GEN_INT (16)));
18071 emit_move_insn (hipart2, gen_lowpart (HImode, shift2));
18072 emit_move_insn (lopart2, gen_lowpart (HImode, xor1));
18073 emit_insn (gen_xorhi3 (xor2, hipart2, lopart2));
18074
18075 emit_insn (gen_parityhi2_cmp (xor2));
18076
18077 ix86_expand_setcc (scratch, ORDERED,
18078 gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
18079
18080 if (TARGET_64BIT)
18081 emit_insn (gen_zero_extendqidi2 (operands[0], scratch));
18082 else
18083 {
18084 rtx tmp = gen_reg_rtx (SImode);
18085
18086 emit_insn (gen_zero_extendqisi2 (tmp, scratch));
18087 emit_insn (gen_zero_extendsidi2 (operands[0], tmp));
18088 }
18089 DONE;
18090 })
18091
18092 (define_expand "paritysi2"
18093 [(set (match_operand:SI 0 "register_operand")
18094 (parity:SI (match_operand:SI 1 "register_operand")))]
18095 "! TARGET_POPCNT"
18096 {
18097 rtx scratch = gen_reg_rtx (QImode);
18098 rtx shift = gen_reg_rtx (SImode);
18099 rtx hipart = gen_reg_rtx (HImode);
18100 rtx lopart = gen_reg_rtx (HImode);
18101 rtx tmp = gen_reg_rtx (HImode);
18102
18103 emit_insn (gen_lshrsi3 (shift, operands[1], GEN_INT (16)));
18104 emit_move_insn (hipart, gen_lowpart (HImode, shift));
18105 emit_move_insn (lopart, gen_lowpart (HImode, operands[1]));
18106 emit_insn (gen_xorhi3 (tmp, hipart, lopart));
18107
18108 emit_insn (gen_parityhi2_cmp (tmp));
18109
18110 ix86_expand_setcc (scratch, ORDERED,
18111 gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
18112
18113 emit_insn (gen_zero_extendqisi2 (operands[0], scratch));
18114 DONE;
18115 })
18116
18117 (define_expand "parityhi2"
18118 [(set (match_operand:HI 0 "register_operand")
18119 (parity:HI (match_operand:HI 1 "register_operand")))]
18120 "! TARGET_POPCNT"
18121 {
18122 rtx scratch = gen_reg_rtx (QImode);
18123
18124 emit_insn (gen_parityhi2_cmp (operands[1]));
18125
18126 ix86_expand_setcc (scratch, ORDERED,
18127 gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
18128
18129 emit_insn (gen_zero_extendqihi2 (operands[0], scratch));
18130 DONE;
18131 })
18132
18133 (define_expand "parityqi2"
18134 [(set (match_operand:QI 0 "register_operand")
18135 (parity:QI (match_operand:QI 1 "register_operand")))]
18136 "! TARGET_POPCNT"
18137 {
18138 emit_insn (gen_parityqi2_cmp (operands[1]));
18139
18140 ix86_expand_setcc (operands[0], ORDERED,
18141 gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
18142 DONE;
18143 })
18144
18145 (define_insn "parityhi2_cmp"
18146 [(set (reg:CC FLAGS_REG)
18147 (unspec:CC [(match_operand:HI 0 "register_operand" "+Q")]
18148 UNSPEC_PARITY))
18149 (clobber (match_dup 0))]
18150 ""
18151 "xor{b}\t{%h0, %b0|%b0, %h0}"
18152 [(set_attr "length" "2")
18153 (set_attr "mode" "QI")])
18154
18155 (define_insn "parityqi2_cmp"
18156 [(set (reg:CC FLAGS_REG)
18157 (unspec:CC [(match_operand:QI 0 "register_operand" "q")]
18158 UNSPEC_PARITY))]
18159 ""
18160 "test{b}\t%0, %0"
18161 [(set_attr "mode" "QI")])
18162
18163 ;; Replace zero_extend:HI followed by parityhi2_cmp with parityqi2_cmp
18164 (define_peephole2
18165 [(set (match_operand:HI 0 "register_operand")
18166 (zero_extend:HI (match_operand:QI 1 "general_reg_operand")))
18167 (parallel [(set (reg:CC FLAGS_REG)
18168 (unspec:CC [(match_dup 0)] UNSPEC_PARITY))
18169 (clobber (match_dup 0))])]
18170 ""
18171 [(set (reg:CC FLAGS_REG)
18172 (unspec:CC [(match_dup 1)] UNSPEC_PARITY))])
18173
18174 ;; Eliminate QImode popcount&1 using parity flag
18175 (define_peephole2
18176 [(set (match_operand:SI 0 "register_operand")
18177 (zero_extend:SI (match_operand:QI 1 "general_reg_operand")))
18178 (parallel [(set (match_operand:SI 2 "register_operand")
18179 (popcount:SI (match_dup 0)))
18180 (clobber (reg:CC FLAGS_REG))])
18181 (set (reg:CCZ FLAGS_REG)
18182 (compare:CCZ (and:QI (match_operand:QI 3 "register_operand")
18183 (const_int 1))
18184 (const_int 0)))
18185 (set (pc) (if_then_else (match_operator 4 "bt_comparison_operator"
18186 [(reg:CCZ FLAGS_REG)
18187 (const_int 0)])
18188 (label_ref (match_operand 5))
18189 (pc)))]
18190 "REGNO (operands[2]) == REGNO (operands[3])
18191 && peep2_reg_dead_p (3, operands[0])
18192 && peep2_reg_dead_p (3, operands[2])
18193 && peep2_regno_dead_p (4, FLAGS_REG)"
18194 [(set (reg:CC FLAGS_REG)
18195 (unspec:CC [(match_dup 1)] UNSPEC_PARITY))
18196 (set (pc) (if_then_else (match_op_dup 4 [(reg:CC FLAGS_REG)
18197 (const_int 0)])
18198 (label_ref (match_dup 5))
18199 (pc)))]
18200 {
18201 operands[4] = shallow_copy_rtx (operands[4]);
18202 PUT_CODE (operands[4], GET_CODE (operands[4]) == EQ ? UNORDERED : ORDERED);
18203 })
18204
18205 ;; Eliminate HImode popcount&1 using parity flag
18206 (define_peephole2
18207 [(match_scratch:HI 0 "Q")
18208 (parallel [(set (match_operand:HI 1 "register_operand")
18209 (popcount:HI
18210 (match_operand:HI 2 "nonimmediate_operand")))
18211 (clobber (reg:CC FLAGS_REG))])
18212 (set (match_operand 3 "register_operand")
18213 (zero_extend (match_dup 1)))
18214 (set (reg:CCZ FLAGS_REG)
18215 (compare:CCZ (and:QI (match_operand:QI 4 "register_operand")
18216 (const_int 1))
18217 (const_int 0)))
18218 (set (pc) (if_then_else (match_operator 5 "bt_comparison_operator"
18219 [(reg:CCZ FLAGS_REG)
18220 (const_int 0)])
18221 (label_ref (match_operand 6))
18222 (pc)))]
18223 "REGNO (operands[3]) == REGNO (operands[4])
18224 && peep2_reg_dead_p (3, operands[1])
18225 && peep2_reg_dead_p (3, operands[3])
18226 && peep2_regno_dead_p (4, FLAGS_REG)"
18227 [(set (match_dup 0) (match_dup 2))
18228 (parallel [(set (reg:CC FLAGS_REG)
18229 (unspec:CC [(match_dup 0)] UNSPEC_PARITY))
18230 (clobber (match_dup 0))])
18231 (set (pc) (if_then_else (match_op_dup 5 [(reg:CC FLAGS_REG)
18232 (const_int 0)])
18233 (label_ref (match_dup 6))
18234 (pc)))]
18235 {
18236 operands[5] = shallow_copy_rtx (operands[5]);
18237 PUT_CODE (operands[5], GET_CODE (operands[5]) == EQ ? UNORDERED : ORDERED);
18238 })
18239
18240 \f
18241 ;; Thread-local storage patterns for ELF.
18242 ;;
18243 ;; Note that these code sequences must appear exactly as shown
18244 ;; in order to allow linker relaxation.
18245
18246 (define_insn "*tls_global_dynamic_32_gnu"
18247 [(set (match_operand:SI 0 "register_operand" "=a")
18248 (unspec:SI
18249 [(match_operand:SI 1 "register_operand" "Yb")
18250 (match_operand 2 "tls_symbolic_operand")
18251 (match_operand 3 "constant_call_address_operand" "Bz")
18252 (reg:SI SP_REG)]
18253 UNSPEC_TLS_GD))
18254 (clobber (match_scratch:SI 4 "=d"))
18255 (clobber (match_scratch:SI 5 "=c"))
18256 (clobber (reg:CC FLAGS_REG))]
18257 "!TARGET_64BIT && TARGET_GNU_TLS"
18258 {
18259 if (TARGET_SUN_TLS || flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT)
18260 output_asm_insn
18261 ("lea{l}\t{%E2@tlsgd(,%1,1), %0|%0, %E2@tlsgd[%1*1]}", operands);
18262 else
18263 output_asm_insn
18264 ("lea{l}\t{%E2@tlsgd(%1), %0|%0, %E2@tlsgd[%1]}", operands);
18265 if (TARGET_SUN_TLS)
18266 #ifdef HAVE_AS_IX86_TLSGDPLT
18267 return "call\t%a2@tlsgdplt";
18268 #else
18269 return "call\t%p3@plt";
18270 #endif
18271 if (flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT)
18272 return "call\t%P3";
18273 return "call\t{*%p3@GOT(%1)|[DWORD PTR %p3@GOT[%1]]}";
18274 }
18275 [(set_attr "type" "multi")
18276 (set_attr "length" "12")])
18277
18278 (define_expand "tls_global_dynamic_32"
18279 [(parallel
18280 [(set (match_operand:SI 0 "register_operand")
18281 (unspec:SI [(match_operand:SI 2 "register_operand")
18282 (match_operand 1 "tls_symbolic_operand")
18283 (match_operand 3 "constant_call_address_operand")
18284 (reg:SI SP_REG)]
18285 UNSPEC_TLS_GD))
18286 (clobber (scratch:SI))
18287 (clobber (scratch:SI))
18288 (clobber (reg:CC FLAGS_REG))])]
18289 ""
18290 "ix86_tls_descriptor_calls_expanded_in_cfun = true;")
18291
18292 (define_insn "*tls_global_dynamic_64_<mode>"
18293 [(set (match_operand:P 0 "register_operand" "=a")
18294 (call:P
18295 (mem:QI (match_operand 2 "constant_call_address_operand" "Bz"))
18296 (match_operand 3)))
18297 (unspec:P [(match_operand 1 "tls_symbolic_operand")
18298 (reg:P SP_REG)]
18299 UNSPEC_TLS_GD)]
18300 "TARGET_64BIT"
18301 {
18302 if (!TARGET_X32)
18303 /* The .loc directive has effect for 'the immediately following assembly
18304 instruction'. So for a sequence:
18305 .loc f l
18306 .byte x
18307 insn1
18308 the 'immediately following assembly instruction' is insn1.
18309 We want to emit an insn prefix here, but if we use .byte (as shown in
18310 'ELF Handling For Thread-Local Storage'), a preceding .loc will point
18311 inside the insn sequence, rather than to the start. After relaxation
18312 of the sequence by the linker, the .loc might point inside an insn.
18313 Use data16 prefix instead, which doesn't have this problem. */
18314 fputs ("\tdata16", asm_out_file);
18315 output_asm_insn
18316 ("lea{q}\t{%E1@tlsgd(%%rip), %%rdi|rdi, %E1@tlsgd[rip]}", operands);
18317 if (TARGET_SUN_TLS || flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT)
18318 fputs (ASM_SHORT "0x6666\n", asm_out_file);
18319 else
18320 fputs (ASM_BYTE "0x66\n", asm_out_file);
18321 fputs ("\trex64\n", asm_out_file);
18322 if (TARGET_SUN_TLS)
18323 return "call\t%p2@plt";
18324 if (flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT)
18325 return "call\t%P2";
18326 return "call\t{*%p2@GOTPCREL(%%rip)|[QWORD PTR %p2@GOTPCREL[rip]]}";
18327 }
18328 [(set_attr "type" "multi")
18329 (set (attr "length")
18330 (symbol_ref "TARGET_X32 ? 15 : 16"))])
18331
18332 (define_insn "*tls_global_dynamic_64_largepic"
18333 [(set (match_operand:DI 0 "register_operand" "=a")
18334 (call:DI
18335 (mem:QI (plus:DI (match_operand:DI 2 "register_operand" "b")
18336 (match_operand:DI 3 "immediate_operand" "i")))
18337 (match_operand 4)))
18338 (unspec:DI [(match_operand 1 "tls_symbolic_operand")
18339 (reg:DI SP_REG)]
18340 UNSPEC_TLS_GD)]
18341 "TARGET_64BIT && ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF
18342 && GET_CODE (operands[3]) == CONST
18343 && GET_CODE (XEXP (operands[3], 0)) == UNSPEC
18344 && XINT (XEXP (operands[3], 0), 1) == UNSPEC_PLTOFF"
18345 {
18346 output_asm_insn
18347 ("lea{q}\t{%E1@tlsgd(%%rip), %%rdi|rdi, %E1@tlsgd[rip]}", operands);
18348 output_asm_insn ("movabs{q}\t{%3, %%rax|rax, %3}", operands);
18349 output_asm_insn ("add{q}\t{%2, %%rax|rax, %2}", operands);
18350 return "call\t{*%%rax|rax}";
18351 }
18352 [(set_attr "type" "multi")
18353 (set_attr "length" "22")])
18354
18355 (define_expand "@tls_global_dynamic_64_<mode>"
18356 [(parallel
18357 [(set (match_operand:P 0 "register_operand")
18358 (call:P
18359 (mem:QI (match_operand 2))
18360 (const_int 0)))
18361 (unspec:P [(match_operand 1 "tls_symbolic_operand")
18362 (reg:P SP_REG)]
18363 UNSPEC_TLS_GD)])]
18364 "TARGET_64BIT"
18365 "ix86_tls_descriptor_calls_expanded_in_cfun = true;")
18366
18367 (define_insn "*tls_local_dynamic_base_32_gnu"
18368 [(set (match_operand:SI 0 "register_operand" "=a")
18369 (unspec:SI
18370 [(match_operand:SI 1 "register_operand" "Yb")
18371 (match_operand 2 "constant_call_address_operand" "Bz")
18372 (reg:SI SP_REG)]
18373 UNSPEC_TLS_LD_BASE))
18374 (clobber (match_scratch:SI 3 "=d"))
18375 (clobber (match_scratch:SI 4 "=c"))
18376 (clobber (reg:CC FLAGS_REG))]
18377 "!TARGET_64BIT && TARGET_GNU_TLS"
18378 {
18379 output_asm_insn
18380 ("lea{l}\t{%&@tlsldm(%1), %0|%0, %&@tlsldm[%1]}", operands);
18381 if (TARGET_SUN_TLS)
18382 {
18383 if (HAVE_AS_IX86_TLSLDMPLT)
18384 return "call\t%&@tlsldmplt";
18385 else
18386 return "call\t%p2@plt";
18387 }
18388 if (flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT)
18389 return "call\t%P2";
18390 return "call\t{*%p2@GOT(%1)|[DWORD PTR %p2@GOT[%1]]}";
18391 }
18392 [(set_attr "type" "multi")
18393 (set_attr "length" "11")])
18394
18395 (define_expand "tls_local_dynamic_base_32"
18396 [(parallel
18397 [(set (match_operand:SI 0 "register_operand")
18398 (unspec:SI
18399 [(match_operand:SI 1 "register_operand")
18400 (match_operand 2 "constant_call_address_operand")
18401 (reg:SI SP_REG)]
18402 UNSPEC_TLS_LD_BASE))
18403 (clobber (scratch:SI))
18404 (clobber (scratch:SI))
18405 (clobber (reg:CC FLAGS_REG))])]
18406 ""
18407 "ix86_tls_descriptor_calls_expanded_in_cfun = true;")
18408
18409 (define_insn "*tls_local_dynamic_base_64_<mode>"
18410 [(set (match_operand:P 0 "register_operand" "=a")
18411 (call:P
18412 (mem:QI (match_operand 1 "constant_call_address_operand" "Bz"))
18413 (match_operand 2)))
18414 (unspec:P [(reg:P SP_REG)] UNSPEC_TLS_LD_BASE)]
18415 "TARGET_64BIT"
18416 {
18417 output_asm_insn
18418 ("lea{q}\t{%&@tlsld(%%rip), %%rdi|rdi, %&@tlsld[rip]}", operands);
18419 if (TARGET_SUN_TLS)
18420 return "call\t%p1@plt";
18421 if (flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT)
18422 return "call\t%P1";
18423 return "call\t{*%p1@GOTPCREL(%%rip)|[QWORD PTR %p1@GOTPCREL[rip]]}";
18424 }
18425 [(set_attr "type" "multi")
18426 (set_attr "length" "12")])
18427
18428 (define_insn "*tls_local_dynamic_base_64_largepic"
18429 [(set (match_operand:DI 0 "register_operand" "=a")
18430 (call:DI
18431 (mem:QI (plus:DI (match_operand:DI 1 "register_operand" "b")
18432 (match_operand:DI 2 "immediate_operand" "i")))
18433 (match_operand 3)))
18434 (unspec:DI [(reg:DI SP_REG)] UNSPEC_TLS_LD_BASE)]
18435 "TARGET_64BIT && ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF
18436 && GET_CODE (operands[2]) == CONST
18437 && GET_CODE (XEXP (operands[2], 0)) == UNSPEC
18438 && XINT (XEXP (operands[2], 0), 1) == UNSPEC_PLTOFF"
18439 {
18440 output_asm_insn
18441 ("lea{q}\t{%&@tlsld(%%rip), %%rdi|rdi, %&@tlsld[rip]}", operands);
18442 output_asm_insn ("movabs{q}\t{%2, %%rax|rax, %2}", operands);
18443 output_asm_insn ("add{q}\t{%1, %%rax|rax, %1}", operands);
18444 return "call\t{*%%rax|rax}";
18445 }
18446 [(set_attr "type" "multi")
18447 (set_attr "length" "22")])
18448
18449 (define_expand "@tls_local_dynamic_base_64_<mode>"
18450 [(parallel
18451 [(set (match_operand:P 0 "register_operand")
18452 (call:P
18453 (mem:QI (match_operand 1))
18454 (const_int 0)))
18455 (unspec:P [(reg:P SP_REG)] UNSPEC_TLS_LD_BASE)])]
18456 "TARGET_64BIT"
18457 "ix86_tls_descriptor_calls_expanded_in_cfun = true;")
18458
18459 ;; Local dynamic of a single variable is a lose. Show combine how
18460 ;; to convert that back to global dynamic.
18461
18462 (define_insn_and_split "*tls_local_dynamic_32_once"
18463 [(set (match_operand:SI 0 "register_operand" "=a")
18464 (plus:SI
18465 (unspec:SI [(match_operand:SI 1 "register_operand" "b")
18466 (match_operand 2 "constant_call_address_operand" "Bz")
18467 (reg:SI SP_REG)]
18468 UNSPEC_TLS_LD_BASE)
18469 (const:SI (unspec:SI
18470 [(match_operand 3 "tls_symbolic_operand")]
18471 UNSPEC_DTPOFF))))
18472 (clobber (match_scratch:SI 4 "=d"))
18473 (clobber (match_scratch:SI 5 "=c"))
18474 (clobber (reg:CC FLAGS_REG))]
18475 ""
18476 "#"
18477 ""
18478 [(parallel
18479 [(set (match_dup 0)
18480 (unspec:SI [(match_dup 1) (match_dup 3) (match_dup 2)
18481 (reg:SI SP_REG)]
18482 UNSPEC_TLS_GD))
18483 (clobber (match_dup 4))
18484 (clobber (match_dup 5))
18485 (clobber (reg:CC FLAGS_REG))])])
18486
18487 ;; Load and add the thread base pointer from %<tp_seg>:0.
18488 (define_expand "get_thread_pointer<mode>"
18489 [(set (match_operand:PTR 0 "register_operand")
18490 (unspec:PTR [(const_int 0)] UNSPEC_TP))]
18491 ""
18492 {
18493 /* targetm is not visible in the scope of the condition. */
18494 if (!targetm.have_tls)
18495 error ("%<__builtin_thread_pointer%> is not supported on this target");
18496 })
18497
18498 (define_insn_and_split "*load_tp_<mode>"
18499 [(set (match_operand:PTR 0 "register_operand" "=r")
18500 (unspec:PTR [(const_int 0)] UNSPEC_TP))]
18501 ""
18502 "#"
18503 ""
18504 [(set (match_dup 0)
18505 (match_dup 1))]
18506 {
18507 addr_space_t as = DEFAULT_TLS_SEG_REG;
18508
18509 operands[1] = gen_const_mem (<MODE>mode, const0_rtx);
18510 set_mem_addr_space (operands[1], as);
18511 })
18512
18513 (define_insn_and_split "*load_tp_x32_zext"
18514 [(set (match_operand:DI 0 "register_operand" "=r")
18515 (zero_extend:DI
18516 (unspec:SI [(const_int 0)] UNSPEC_TP)))]
18517 "TARGET_X32"
18518 "#"
18519 "&& 1"
18520 [(set (match_dup 0)
18521 (zero_extend:DI (match_dup 1)))]
18522 {
18523 addr_space_t as = DEFAULT_TLS_SEG_REG;
18524
18525 operands[1] = gen_const_mem (SImode, const0_rtx);
18526 set_mem_addr_space (operands[1], as);
18527 })
18528
18529 (define_insn_and_split "*add_tp_<mode>"
18530 [(set (match_operand:PTR 0 "register_operand" "=r")
18531 (plus:PTR
18532 (unspec:PTR [(const_int 0)] UNSPEC_TP)
18533 (match_operand:PTR 1 "register_operand" "0")))
18534 (clobber (reg:CC FLAGS_REG))]
18535 ""
18536 "#"
18537 ""
18538 [(parallel
18539 [(set (match_dup 0)
18540 (plus:PTR (match_dup 1) (match_dup 2)))
18541 (clobber (reg:CC FLAGS_REG))])]
18542 {
18543 addr_space_t as = DEFAULT_TLS_SEG_REG;
18544
18545 operands[2] = gen_const_mem (<MODE>mode, const0_rtx);
18546 set_mem_addr_space (operands[2], as);
18547 })
18548
18549 (define_insn_and_split "*add_tp_x32_zext"
18550 [(set (match_operand:DI 0 "register_operand" "=r")
18551 (zero_extend:DI
18552 (plus:SI (unspec:SI [(const_int 0)] UNSPEC_TP)
18553 (match_operand:SI 1 "register_operand" "0"))))
18554 (clobber (reg:CC FLAGS_REG))]
18555 "TARGET_X32"
18556 "#"
18557 "&& 1"
18558 [(parallel
18559 [(set (match_dup 0)
18560 (zero_extend:DI
18561 (plus:SI (match_dup 1) (match_dup 2))))
18562 (clobber (reg:CC FLAGS_REG))])]
18563 {
18564 addr_space_t as = DEFAULT_TLS_SEG_REG;
18565
18566 operands[2] = gen_const_mem (SImode, const0_rtx);
18567 set_mem_addr_space (operands[2], as);
18568 })
18569
18570 ;; The Sun linker took the AMD64 TLS spec literally and can only handle
18571 ;; %rax as destination of the initial executable code sequence.
18572 (define_insn "tls_initial_exec_64_sun"
18573 [(set (match_operand:DI 0 "register_operand" "=a")
18574 (unspec:DI
18575 [(match_operand 1 "tls_symbolic_operand")]
18576 UNSPEC_TLS_IE_SUN))
18577 (clobber (reg:CC FLAGS_REG))]
18578 "TARGET_64BIT && TARGET_SUN_TLS"
18579 {
18580 output_asm_insn
18581 ("mov{q}\t{%%fs:0, %0|%0, QWORD PTR fs:0}", operands);
18582 return "add{q}\t{%a1@gottpoff(%%rip), %0|%0, %a1@gottpoff[rip]}";
18583 }
18584 [(set_attr "type" "multi")])
18585
18586 ;; GNU2 TLS patterns can be split.
18587
18588 (define_expand "tls_dynamic_gnu2_32"
18589 [(set (match_dup 3)
18590 (plus:SI (match_operand:SI 2 "register_operand")
18591 (const:SI
18592 (unspec:SI [(match_operand 1 "tls_symbolic_operand")]
18593 UNSPEC_TLSDESC))))
18594 (parallel
18595 [(set (match_operand:SI 0 "register_operand")
18596 (unspec:SI [(match_dup 1) (match_dup 3)
18597 (match_dup 2) (reg:SI SP_REG)]
18598 UNSPEC_TLSDESC))
18599 (clobber (reg:CC FLAGS_REG))])]
18600 "!TARGET_64BIT && TARGET_GNU2_TLS"
18601 {
18602 operands[3] = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : operands[0];
18603 ix86_tls_descriptor_calls_expanded_in_cfun = true;
18604 })
18605
18606 (define_insn "*tls_dynamic_gnu2_lea_32"
18607 [(set (match_operand:SI 0 "register_operand" "=r")
18608 (plus:SI (match_operand:SI 1 "register_operand" "b")
18609 (const:SI
18610 (unspec:SI [(match_operand 2 "tls_symbolic_operand")]
18611 UNSPEC_TLSDESC))))]
18612 "!TARGET_64BIT && TARGET_GNU2_TLS"
18613 "lea{l}\t{%E2@TLSDESC(%1), %0|%0, %E2@TLSDESC[%1]}"
18614 [(set_attr "type" "lea")
18615 (set_attr "mode" "SI")
18616 (set_attr "length" "6")
18617 (set_attr "length_address" "4")])
18618
18619 (define_insn "*tls_dynamic_gnu2_call_32"
18620 [(set (match_operand:SI 0 "register_operand" "=a")
18621 (unspec:SI [(match_operand 1 "tls_symbolic_operand")
18622 (match_operand:SI 2 "register_operand" "0")
18623 ;; we have to make sure %ebx still points to the GOT
18624 (match_operand:SI 3 "register_operand" "b")
18625 (reg:SI SP_REG)]
18626 UNSPEC_TLSDESC))
18627 (clobber (reg:CC FLAGS_REG))]
18628 "!TARGET_64BIT && TARGET_GNU2_TLS"
18629 "call\t{*%a1@TLSCALL(%2)|[DWORD PTR [%2+%a1@TLSCALL]]}"
18630 [(set_attr "type" "call")
18631 (set_attr "length" "2")
18632 (set_attr "length_address" "0")])
18633
18634 (define_insn_and_split "*tls_dynamic_gnu2_combine_32"
18635 [(set (match_operand:SI 0 "register_operand" "=&a")
18636 (plus:SI
18637 (unspec:SI [(match_operand 3 "tls_modbase_operand")
18638 (match_operand:SI 4)
18639 (match_operand:SI 2 "register_operand" "b")
18640 (reg:SI SP_REG)]
18641 UNSPEC_TLSDESC)
18642 (const:SI (unspec:SI
18643 [(match_operand 1 "tls_symbolic_operand")]
18644 UNSPEC_DTPOFF))))
18645 (clobber (reg:CC FLAGS_REG))]
18646 "!TARGET_64BIT && TARGET_GNU2_TLS"
18647 "#"
18648 "&& 1"
18649 [(set (match_dup 0) (match_dup 5))]
18650 {
18651 operands[5] = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : operands[0];
18652 emit_insn (gen_tls_dynamic_gnu2_32 (operands[5], operands[1], operands[2]));
18653 })
18654
18655 (define_expand "@tls_dynamic_gnu2_64_<mode>"
18656 [(set (match_dup 2)
18657 (unspec:PTR [(match_operand 1 "tls_symbolic_operand")]
18658 UNSPEC_TLSDESC))
18659 (parallel
18660 [(set (match_operand:PTR 0 "register_operand")
18661 (unspec:PTR [(match_dup 1) (match_dup 2) (reg:PTR SP_REG)]
18662 UNSPEC_TLSDESC))
18663 (clobber (reg:CC FLAGS_REG))])]
18664 "TARGET_64BIT && TARGET_GNU2_TLS"
18665 {
18666 operands[2] = can_create_pseudo_p () ? gen_reg_rtx (ptr_mode) : operands[0];
18667 ix86_tls_descriptor_calls_expanded_in_cfun = true;
18668 })
18669
18670 (define_insn "*tls_dynamic_gnu2_lea_64_<mode>"
18671 [(set (match_operand:PTR 0 "register_operand" "=r")
18672 (unspec:PTR [(match_operand 1 "tls_symbolic_operand")]
18673 UNSPEC_TLSDESC))]
18674 "TARGET_64BIT && TARGET_GNU2_TLS"
18675 "lea%z0\t{%E1@TLSDESC(%%rip), %0|%0, %E1@TLSDESC[rip]}"
18676 [(set_attr "type" "lea")
18677 (set_attr "mode" "<MODE>")
18678 (set_attr "length" "7")
18679 (set_attr "length_address" "4")])
18680
18681 (define_insn "*tls_dynamic_gnu2_call_64_<mode>"
18682 [(set (match_operand:PTR 0 "register_operand" "=a")
18683 (unspec:PTR [(match_operand 1 "tls_symbolic_operand")
18684 (match_operand:PTR 2 "register_operand" "0")
18685 (reg:PTR SP_REG)]
18686 UNSPEC_TLSDESC))
18687 (clobber (reg:CC FLAGS_REG))]
18688 "TARGET_64BIT && TARGET_GNU2_TLS"
18689 "call\t{*%a1@TLSCALL(%2)|[QWORD PTR [%2+%a1@TLSCALL]]}"
18690 [(set_attr "type" "call")
18691 (set_attr "length" "2")
18692 (set_attr "length_address" "0")])
18693
18694 (define_insn_and_split "*tls_dynamic_gnu2_combine_64_<mode>"
18695 [(set (match_operand:PTR 0 "register_operand" "=&a")
18696 (plus:PTR
18697 (unspec:PTR [(match_operand 2 "tls_modbase_operand")
18698 (match_operand:PTR 3)
18699 (reg:PTR SP_REG)]
18700 UNSPEC_TLSDESC)
18701 (const:PTR (unspec:PTR
18702 [(match_operand 1 "tls_symbolic_operand")]
18703 UNSPEC_DTPOFF))))
18704 (clobber (reg:CC FLAGS_REG))]
18705 "TARGET_64BIT && TARGET_GNU2_TLS"
18706 "#"
18707 "&& 1"
18708 [(set (match_dup 0) (match_dup 4))]
18709 {
18710 operands[4] = can_create_pseudo_p () ? gen_reg_rtx (ptr_mode) : operands[0];
18711 emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode, operands[4], operands[1]));
18712 })
18713
18714 (define_split
18715 [(match_operand 0 "tls_address_pattern")]
18716 "TARGET_TLS_DIRECT_SEG_REFS"
18717 [(match_dup 0)]
18718 "operands[0] = ix86_rewrite_tls_address (operands[0]);")
18719
18720 \f
18721 ;; These patterns match the binary 387 instructions for addM3, subM3,
18722 ;; mulM3 and divM3. There are three patterns for each of DFmode and
18723 ;; SFmode. The first is the normal insn, the second the same insn but
18724 ;; with one operand a conversion, and the third the same insn but with
18725 ;; the other operand a conversion. The conversion may be SFmode or
18726 ;; SImode if the target mode DFmode, but only SImode if the target mode
18727 ;; is SFmode.
18728
18729 ;; Gcc is slightly more smart about handling normal two address instructions
18730 ;; so use special patterns for add and mull.
18731
18732 (define_insn "*fop_xf_comm_i387"
18733 [(set (match_operand:XF 0 "register_operand" "=f")
18734 (match_operator:XF 3 "binary_fp_operator"
18735 [(match_operand:XF 1 "register_operand" "%0")
18736 (match_operand:XF 2 "register_operand" "f")]))]
18737 "TARGET_80387
18738 && COMMUTATIVE_ARITH_P (operands[3])"
18739 "* return output_387_binary_op (insn, operands);"
18740 [(set (attr "type")
18741 (if_then_else (match_operand:XF 3 "mult_operator")
18742 (const_string "fmul")
18743 (const_string "fop")))
18744 (set_attr "mode" "XF")])
18745
18746 (define_insn "*fop_<mode>_comm"
18747 [(set (match_operand:MODEF 0 "register_operand" "=f,x,v")
18748 (match_operator:MODEF 3 "binary_fp_operator"
18749 [(match_operand:MODEF 1 "nonimmediate_operand" "%0,0,v")
18750 (match_operand:MODEF 2 "nonimmediate_operand" "fm,xm,vm")]))]
18751 "((SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
18752 || (TARGET_80387 && X87_ENABLE_ARITH (<MODE>mode)))
18753 && COMMUTATIVE_ARITH_P (operands[3])
18754 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
18755 "* return output_387_binary_op (insn, operands);"
18756 [(set (attr "type")
18757 (if_then_else (eq_attr "alternative" "1,2")
18758 (if_then_else (match_operand:MODEF 3 "mult_operator")
18759 (const_string "ssemul")
18760 (const_string "sseadd"))
18761 (if_then_else (match_operand:MODEF 3 "mult_operator")
18762 (const_string "fmul")
18763 (const_string "fop"))))
18764 (set_attr "isa" "*,noavx,avx")
18765 (set_attr "prefix" "orig,orig,vex")
18766 (set_attr "mode" "<MODE>")
18767 (set (attr "enabled")
18768 (if_then_else
18769 (match_test ("SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"))
18770 (if_then_else
18771 (eq_attr "alternative" "0")
18772 (symbol_ref "TARGET_MIX_SSE_I387
18773 && X87_ENABLE_ARITH (<MODE>mode)")
18774 (const_string "*"))
18775 (if_then_else
18776 (eq_attr "alternative" "0")
18777 (symbol_ref "true")
18778 (symbol_ref "false"))))])
18779
18780 (define_insn "*<insn>hf"
18781 [(set (match_operand:HF 0 "register_operand" "=v")
18782 (plusminusmultdiv:HF
18783 (match_operand:HF 1 "nonimmediate_operand" "<comm>v")
18784 (match_operand:HF 2 "nonimmediate_operand" "vm")))]
18785 "TARGET_AVX512FP16
18786 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
18787 "v<insn>sh\t{%2, %1, %0|%0, %1, %2}"
18788 [(set_attr "prefix" "evex")
18789 (set_attr "mode" "HF")])
18790
18791 (define_insn "*rcpsf2_sse"
18792 [(set (match_operand:SF 0 "register_operand" "=x,x,x")
18793 (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "0,x,m")]
18794 UNSPEC_RCP))]
18795 "TARGET_SSE && TARGET_SSE_MATH"
18796 "@
18797 %vrcpss\t{%d1, %0|%0, %d1}
18798 %vrcpss\t{%d1, %0|%0, %d1}
18799 %vrcpss\t{%1, %d0|%d0, %1}"
18800 [(set_attr "type" "sse")
18801 (set_attr "atom_sse_attr" "rcp")
18802 (set_attr "btver2_sse_attr" "rcp")
18803 (set_attr "prefix" "maybe_vex")
18804 (set_attr "mode" "SF")
18805 (set_attr "avx_partial_xmm_update" "false,false,true")
18806 (set (attr "preferred_for_speed")
18807 (cond [(match_test "TARGET_AVX")
18808 (symbol_ref "true")
18809 (eq_attr "alternative" "1,2")
18810 (symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY")
18811 ]
18812 (symbol_ref "true")))])
18813
18814 (define_insn "rcphf2"
18815 [(set (match_operand:HF 0 "register_operand" "=v,v")
18816 (unspec:HF [(match_operand:HF 1 "nonimmediate_operand" "v,m")]
18817 UNSPEC_RCP))]
18818 "TARGET_AVX512FP16"
18819 "@
18820 vrcpsh\t{%d1, %0|%0, %d1}
18821 vrcpsh\t{%1, %d0|%d0, %1}"
18822 [(set_attr "type" "sse")
18823 (set_attr "prefix" "evex")
18824 (set_attr "mode" "HF")
18825 (set_attr "avx_partial_xmm_update" "false,true")])
18826
18827 (define_insn "*fop_xf_1_i387"
18828 [(set (match_operand:XF 0 "register_operand" "=f,f")
18829 (match_operator:XF 3 "binary_fp_operator"
18830 [(match_operand:XF 1 "register_operand" "0,f")
18831 (match_operand:XF 2 "register_operand" "f,0")]))]
18832 "TARGET_80387
18833 && !COMMUTATIVE_ARITH_P (operands[3])"
18834 "* return output_387_binary_op (insn, operands);"
18835 [(set (attr "type")
18836 (if_then_else (match_operand:XF 3 "div_operator")
18837 (const_string "fdiv")
18838 (const_string "fop")))
18839 (set_attr "mode" "XF")])
18840
18841 (define_insn "*fop_<mode>_1"
18842 [(set (match_operand:MODEF 0 "register_operand" "=f,f,x,v")
18843 (match_operator:MODEF 3 "binary_fp_operator"
18844 [(match_operand:MODEF 1
18845 "x87nonimm_ssenomem_operand" "0,fm,0,v")
18846 (match_operand:MODEF 2
18847 "nonimmediate_operand" "fm,0,xm,vm")]))]
18848 "((SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
18849 || (TARGET_80387 && X87_ENABLE_ARITH (<MODE>mode)))
18850 && !COMMUTATIVE_ARITH_P (operands[3])
18851 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
18852 "* return output_387_binary_op (insn, operands);"
18853 [(set (attr "type")
18854 (if_then_else (eq_attr "alternative" "2,3")
18855 (if_then_else (match_operand:MODEF 3 "div_operator")
18856 (const_string "ssediv")
18857 (const_string "sseadd"))
18858 (if_then_else (match_operand:MODEF 3 "div_operator")
18859 (const_string "fdiv")
18860 (const_string "fop"))))
18861 (set_attr "isa" "*,*,noavx,avx")
18862 (set_attr "prefix" "orig,orig,orig,vex")
18863 (set_attr "mode" "<MODE>")
18864 (set (attr "enabled")
18865 (if_then_else
18866 (match_test ("SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"))
18867 (if_then_else
18868 (eq_attr "alternative" "0,1")
18869 (symbol_ref "TARGET_MIX_SSE_I387
18870 && X87_ENABLE_ARITH (<MODE>mode)")
18871 (const_string "*"))
18872 (if_then_else
18873 (eq_attr "alternative" "0,1")
18874 (symbol_ref "true")
18875 (symbol_ref "false"))))])
18876
18877 (define_insn "*fop_<X87MODEF:mode>_2_i387"
18878 [(set (match_operand:X87MODEF 0 "register_operand" "=f")
18879 (match_operator:X87MODEF 3 "binary_fp_operator"
18880 [(float:X87MODEF
18881 (match_operand:SWI24 1 "nonimmediate_operand" "m"))
18882 (match_operand:X87MODEF 2 "register_operand" "0")]))]
18883 "TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, <SWI24:MODE>mode)
18884 && !(SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH)
18885 && (TARGET_USE_<SWI24:MODE>MODE_FIOP
18886 || optimize_function_for_size_p (cfun))"
18887 "* return output_387_binary_op (insn, operands);"
18888 [(set (attr "type")
18889 (cond [(match_operand:X87MODEF 3 "mult_operator")
18890 (const_string "fmul")
18891 (match_operand:X87MODEF 3 "div_operator")
18892 (const_string "fdiv")
18893 ]
18894 (const_string "fop")))
18895 (set_attr "fp_int_src" "true")
18896 (set_attr "mode" "<SWI24:MODE>")])
18897
18898 (define_insn "*fop_<X87MODEF:mode>_3_i387"
18899 [(set (match_operand:X87MODEF 0 "register_operand" "=f")
18900 (match_operator:X87MODEF 3 "binary_fp_operator"
18901 [(match_operand:X87MODEF 1 "register_operand" "0")
18902 (float:X87MODEF
18903 (match_operand:SWI24 2 "nonimmediate_operand" "m"))]))]
18904 "TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, <SWI24:MODE>mode)
18905 && !(SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH)
18906 && (TARGET_USE_<SWI24:MODE>MODE_FIOP
18907 || optimize_function_for_size_p (cfun))"
18908 "* return output_387_binary_op (insn, operands);"
18909 [(set (attr "type")
18910 (cond [(match_operand:X87MODEF 3 "mult_operator")
18911 (const_string "fmul")
18912 (match_operand:X87MODEF 3 "div_operator")
18913 (const_string "fdiv")
18914 ]
18915 (const_string "fop")))
18916 (set_attr "fp_int_src" "true")
18917 (set_attr "mode" "<SWI24:MODE>")])
18918
18919 (define_insn "*fop_xf_4_i387"
18920 [(set (match_operand:XF 0 "register_operand" "=f,f")
18921 (match_operator:XF 3 "binary_fp_operator"
18922 [(float_extend:XF
18923 (match_operand:MODEF 1 "nonimmediate_operand" "fm,0"))
18924 (match_operand:XF 2 "register_operand" "0,f")]))]
18925 "TARGET_80387"
18926 "* return output_387_binary_op (insn, operands);"
18927 [(set (attr "type")
18928 (cond [(match_operand:XF 3 "mult_operator")
18929 (const_string "fmul")
18930 (match_operand:XF 3 "div_operator")
18931 (const_string "fdiv")
18932 ]
18933 (const_string "fop")))
18934 (set_attr "mode" "<MODE>")])
18935
18936 (define_insn "*fop_df_4_i387"
18937 [(set (match_operand:DF 0 "register_operand" "=f,f")
18938 (match_operator:DF 3 "binary_fp_operator"
18939 [(float_extend:DF
18940 (match_operand:SF 1 "nonimmediate_operand" "fm,0"))
18941 (match_operand:DF 2 "register_operand" "0,f")]))]
18942 "TARGET_80387 && X87_ENABLE_ARITH (DFmode)
18943 && !(SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)"
18944 "* return output_387_binary_op (insn, operands);"
18945 [(set (attr "type")
18946 (cond [(match_operand:DF 3 "mult_operator")
18947 (const_string "fmul")
18948 (match_operand:DF 3 "div_operator")
18949 (const_string "fdiv")
18950 ]
18951 (const_string "fop")))
18952 (set_attr "mode" "SF")])
18953
18954 (define_insn "*fop_xf_5_i387"
18955 [(set (match_operand:XF 0 "register_operand" "=f,f")
18956 (match_operator:XF 3 "binary_fp_operator"
18957 [(match_operand:XF 1 "register_operand" "0,f")
18958 (float_extend:XF
18959 (match_operand:MODEF 2 "nonimmediate_operand" "fm,0"))]))]
18960 "TARGET_80387"
18961 "* return output_387_binary_op (insn, operands);"
18962 [(set (attr "type")
18963 (cond [(match_operand:XF 3 "mult_operator")
18964 (const_string "fmul")
18965 (match_operand:XF 3 "div_operator")
18966 (const_string "fdiv")
18967 ]
18968 (const_string "fop")))
18969 (set_attr "mode" "<MODE>")])
18970
18971 (define_insn "*fop_df_5_i387"
18972 [(set (match_operand:DF 0 "register_operand" "=f,f")
18973 (match_operator:DF 3 "binary_fp_operator"
18974 [(match_operand:DF 1 "register_operand" "0,f")
18975 (float_extend:DF
18976 (match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))]
18977 "TARGET_80387 && X87_ENABLE_ARITH (DFmode)
18978 && !(SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)"
18979 "* return output_387_binary_op (insn, operands);"
18980 [(set (attr "type")
18981 (cond [(match_operand:DF 3 "mult_operator")
18982 (const_string "fmul")
18983 (match_operand:DF 3 "div_operator")
18984 (const_string "fdiv")
18985 ]
18986 (const_string "fop")))
18987 (set_attr "mode" "SF")])
18988
18989 (define_insn "*fop_xf_6_i387"
18990 [(set (match_operand:XF 0 "register_operand" "=f,f")
18991 (match_operator:XF 3 "binary_fp_operator"
18992 [(float_extend:XF
18993 (match_operand:MODEF 1 "register_operand" "0,f"))
18994 (float_extend:XF
18995 (match_operand:MODEF 2 "nonimmediate_operand" "fm,0"))]))]
18996 "TARGET_80387"
18997 "* return output_387_binary_op (insn, operands);"
18998 [(set (attr "type")
18999 (cond [(match_operand:XF 3 "mult_operator")
19000 (const_string "fmul")
19001 (match_operand:XF 3 "div_operator")
19002 (const_string "fdiv")
19003 ]
19004 (const_string "fop")))
19005 (set_attr "mode" "<MODE>")])
19006
19007 (define_insn "*fop_df_6_i387"
19008 [(set (match_operand:DF 0 "register_operand" "=f,f")
19009 (match_operator:DF 3 "binary_fp_operator"
19010 [(float_extend:DF
19011 (match_operand:SF 1 "register_operand" "0,f"))
19012 (float_extend:DF
19013 (match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))]
19014 "TARGET_80387 && X87_ENABLE_ARITH (DFmode)
19015 && !(SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)"
19016 "* return output_387_binary_op (insn, operands);"
19017 [(set (attr "type")
19018 (cond [(match_operand:DF 3 "mult_operator")
19019 (const_string "fmul")
19020 (match_operand:DF 3 "div_operator")
19021 (const_string "fdiv")
19022 ]
19023 (const_string "fop")))
19024 (set_attr "mode" "SF")])
19025 \f
19026 ;; FPU special functions.
19027
19028 ;; This pattern implements a no-op XFmode truncation for
19029 ;; all fancy i386 XFmode math functions.
19030
19031 (define_insn "truncxf<mode>2_i387_noop_unspec"
19032 [(set (match_operand:MODEF 0 "nonimmediate_operand" "=mf")
19033 (unspec:MODEF [(match_operand:XF 1 "register_operand" "f")]
19034 UNSPEC_TRUNC_NOOP))]
19035 "TARGET_USE_FANCY_MATH_387"
19036 "* return output_387_reg_move (insn, operands);"
19037 [(set_attr "type" "fmov")
19038 (set_attr "mode" "<MODE>")])
19039
19040 (define_insn "sqrtxf2"
19041 [(set (match_operand:XF 0 "register_operand" "=f")
19042 (sqrt:XF (match_operand:XF 1 "register_operand" "0")))]
19043 "TARGET_USE_FANCY_MATH_387"
19044 "fsqrt"
19045 [(set_attr "type" "fpspc")
19046 (set_attr "mode" "XF")
19047 (set_attr "athlon_decode" "direct")
19048 (set_attr "amdfam10_decode" "direct")
19049 (set_attr "bdver1_decode" "direct")])
19050
19051 (define_insn "*rsqrtsf2_sse"
19052 [(set (match_operand:SF 0 "register_operand" "=x,x,x")
19053 (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "0,x,m")]
19054 UNSPEC_RSQRT))]
19055 "TARGET_SSE && TARGET_SSE_MATH"
19056 "@
19057 %vrsqrtss\t{%d1, %0|%0, %d1}
19058 %vrsqrtss\t{%d1, %0|%0, %d1}
19059 %vrsqrtss\t{%1, %d0|%d0, %1}"
19060 [(set_attr "type" "sse")
19061 (set_attr "atom_sse_attr" "rcp")
19062 (set_attr "btver2_sse_attr" "rcp")
19063 (set_attr "prefix" "maybe_vex")
19064 (set_attr "mode" "SF")
19065 (set_attr "avx_partial_xmm_update" "false,false,true")
19066 (set (attr "preferred_for_speed")
19067 (cond [(match_test "TARGET_AVX")
19068 (symbol_ref "true")
19069 (eq_attr "alternative" "1,2")
19070 (symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY")
19071 ]
19072 (symbol_ref "true")))])
19073
19074 (define_expand "rsqrtsf2"
19075 [(set (match_operand:SF 0 "register_operand")
19076 (unspec:SF [(match_operand:SF 1 "nonimmediate_operand")]
19077 UNSPEC_RSQRT))]
19078 "TARGET_SSE && TARGET_SSE_MATH"
19079 {
19080 ix86_emit_swsqrtsf (operands[0], operands[1], SFmode, 1);
19081 DONE;
19082 })
19083
19084 (define_insn "rsqrthf2"
19085 [(set (match_operand:HF 0 "register_operand" "=v,v")
19086 (unspec:HF [(match_operand:HF 1 "nonimmediate_operand" "v,m")]
19087 UNSPEC_RSQRT))]
19088 "TARGET_AVX512FP16"
19089 "@
19090 vrsqrtsh\t{%d1, %0|%0, %d1}
19091 vrsqrtsh\t{%1, %d0|%d0, %1}"
19092 [(set_attr "type" "sse")
19093 (set_attr "prefix" "evex")
19094 (set_attr "avx_partial_xmm_update" "false,true")
19095 (set_attr "mode" "HF")])
19096
19097 (define_insn "sqrthf2"
19098 [(set (match_operand:HF 0 "register_operand" "=v,v")
19099 (sqrt:HF
19100 (match_operand:HF 1 "nonimmediate_operand" "v,m")))]
19101 "TARGET_AVX512FP16"
19102 "@
19103 vsqrtsh\t{%d1, %0|%0, %d1}
19104 vsqrtsh\t{%1, %d0|%d0, %1}"
19105 [(set_attr "type" "sse")
19106 (set_attr "prefix" "evex")
19107 (set_attr "avx_partial_xmm_update" "false,true")
19108 (set_attr "mode" "HF")])
19109
19110 (define_insn "*sqrt<mode>2_sse"
19111 [(set (match_operand:MODEF 0 "register_operand" "=v,v,v")
19112 (sqrt:MODEF
19113 (match_operand:MODEF 1 "nonimmediate_operand" "0,v,m")))]
19114 "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
19115 "@
19116 %vsqrt<ssemodesuffix>\t{%d1, %0|%0, %d1}
19117 %vsqrt<ssemodesuffix>\t{%d1, %0|%0, %d1}
19118 %vsqrt<ssemodesuffix>\t{%1, %d0|%d0, %1}"
19119 [(set_attr "type" "sse")
19120 (set_attr "atom_sse_attr" "sqrt")
19121 (set_attr "btver2_sse_attr" "sqrt")
19122 (set_attr "prefix" "maybe_vex")
19123 (set_attr "avx_partial_xmm_update" "false,false,true")
19124 (set_attr "mode" "<MODE>")
19125 (set (attr "preferred_for_speed")
19126 (cond [(match_test "TARGET_AVX")
19127 (symbol_ref "true")
19128 (eq_attr "alternative" "1,2")
19129 (symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY")
19130 ]
19131 (symbol_ref "true")))])
19132
19133 (define_expand "sqrt<mode>2"
19134 [(set (match_operand:MODEF 0 "register_operand")
19135 (sqrt:MODEF
19136 (match_operand:MODEF 1 "nonimmediate_operand")))]
19137 "(TARGET_USE_FANCY_MATH_387 && X87_ENABLE_ARITH (<MODE>mode))
19138 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
19139 {
19140 if (<MODE>mode == SFmode
19141 && TARGET_SSE && TARGET_SSE_MATH
19142 && TARGET_RECIP_SQRT
19143 && !optimize_function_for_size_p (cfun)
19144 && flag_finite_math_only && !flag_trapping_math
19145 && flag_unsafe_math_optimizations)
19146 {
19147 ix86_emit_swsqrtsf (operands[0], operands[1], SFmode, 0);
19148 DONE;
19149 }
19150
19151 if (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
19152 {
19153 rtx op0 = gen_reg_rtx (XFmode);
19154 rtx op1 = gen_reg_rtx (XFmode);
19155
19156 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
19157 emit_insn (gen_sqrtxf2 (op0, op1));
19158 emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op0));
19159 DONE;
19160 }
19161 })
19162
19163 (define_expand "hypot<mode>3"
19164 [(use (match_operand:MODEF 0 "register_operand"))
19165 (use (match_operand:MODEF 1 "general_operand"))
19166 (use (match_operand:MODEF 2 "general_operand"))]
19167 "TARGET_USE_FANCY_MATH_387
19168 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
19169 || TARGET_MIX_SSE_I387)
19170 && flag_finite_math_only
19171 && flag_unsafe_math_optimizations"
19172 {
19173 rtx op0 = gen_reg_rtx (XFmode);
19174 rtx op1 = gen_reg_rtx (XFmode);
19175 rtx op2 = gen_reg_rtx (XFmode);
19176
19177 emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
19178 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
19179
19180 emit_insn (gen_mulxf3 (op1, op1, op1));
19181 emit_insn (gen_mulxf3 (op2, op2, op2));
19182 emit_insn (gen_addxf3 (op0, op2, op1));
19183 emit_insn (gen_sqrtxf2 (op0, op0));
19184
19185 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
19186 DONE;
19187 })
19188
19189 (define_insn "x86_fnstsw_1"
19190 [(set (match_operand:HI 0 "register_operand" "=a")
19191 (unspec:HI [(reg:CCFP FPSR_REG)] UNSPEC_FNSTSW))]
19192 "TARGET_80387"
19193 "fnstsw\t%0"
19194 [(set_attr "length" "2")
19195 (set_attr "mode" "SI")
19196 (set_attr "unit" "i387")])
19197
19198 (define_insn "fpremxf4_i387"
19199 [(set (match_operand:XF 0 "register_operand" "=f")
19200 (unspec:XF [(match_operand:XF 2 "register_operand" "0")
19201 (match_operand:XF 3 "register_operand" "1")]
19202 UNSPEC_FPREM_F))
19203 (set (match_operand:XF 1 "register_operand" "=f")
19204 (unspec:XF [(match_dup 2) (match_dup 3)]
19205 UNSPEC_FPREM_U))
19206 (set (reg:CCFP FPSR_REG)
19207 (unspec:CCFP [(match_dup 2) (match_dup 3)]
19208 UNSPEC_C2_FLAG))]
19209 "TARGET_USE_FANCY_MATH_387
19210 && flag_finite_math_only"
19211 "fprem"
19212 [(set_attr "type" "fpspc")
19213 (set_attr "znver1_decode" "vector")
19214 (set_attr "mode" "XF")])
19215
19216 (define_expand "fmodxf3"
19217 [(use (match_operand:XF 0 "register_operand"))
19218 (use (match_operand:XF 1 "general_operand"))
19219 (use (match_operand:XF 2 "general_operand"))]
19220 "TARGET_USE_FANCY_MATH_387
19221 && flag_finite_math_only"
19222 {
19223 rtx_code_label *label = gen_label_rtx ();
19224
19225 rtx op1 = gen_reg_rtx (XFmode);
19226 rtx op2 = gen_reg_rtx (XFmode);
19227
19228 emit_move_insn (op2, operands[2]);
19229 emit_move_insn (op1, operands[1]);
19230
19231 emit_label (label);
19232 emit_insn (gen_fpremxf4_i387 (op1, op2, op1, op2));
19233 ix86_emit_fp_unordered_jump (label);
19234 LABEL_NUSES (label) = 1;
19235
19236 emit_move_insn (operands[0], op1);
19237 DONE;
19238 })
19239
19240 (define_expand "fmod<mode>3"
19241 [(use (match_operand:MODEF 0 "register_operand"))
19242 (use (match_operand:MODEF 1 "general_operand"))
19243 (use (match_operand:MODEF 2 "general_operand"))]
19244 "TARGET_USE_FANCY_MATH_387
19245 && flag_finite_math_only"
19246 {
19247 rtx (*gen_truncxf) (rtx, rtx);
19248
19249 rtx_code_label *label = gen_label_rtx ();
19250
19251 rtx op1 = gen_reg_rtx (XFmode);
19252 rtx op2 = gen_reg_rtx (XFmode);
19253
19254 emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
19255 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
19256
19257 emit_label (label);
19258 emit_insn (gen_fpremxf4_i387 (op1, op2, op1, op2));
19259 ix86_emit_fp_unordered_jump (label);
19260 LABEL_NUSES (label) = 1;
19261
19262 /* Truncate the result properly for strict SSE math. */
19263 if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
19264 && !TARGET_MIX_SSE_I387)
19265 gen_truncxf = gen_truncxf<mode>2;
19266 else
19267 gen_truncxf = gen_truncxf<mode>2_i387_noop_unspec;
19268
19269 emit_insn (gen_truncxf (operands[0], op1));
19270 DONE;
19271 })
19272
19273 (define_insn "fprem1xf4_i387"
19274 [(set (match_operand:XF 0 "register_operand" "=f")
19275 (unspec:XF [(match_operand:XF 2 "register_operand" "0")
19276 (match_operand:XF 3 "register_operand" "1")]
19277 UNSPEC_FPREM1_F))
19278 (set (match_operand:XF 1 "register_operand" "=f")
19279 (unspec:XF [(match_dup 2) (match_dup 3)]
19280 UNSPEC_FPREM1_U))
19281 (set (reg:CCFP FPSR_REG)
19282 (unspec:CCFP [(match_dup 2) (match_dup 3)]
19283 UNSPEC_C2_FLAG))]
19284 "TARGET_USE_FANCY_MATH_387
19285 && flag_finite_math_only"
19286 "fprem1"
19287 [(set_attr "type" "fpspc")
19288 (set_attr "znver1_decode" "vector")
19289 (set_attr "mode" "XF")])
19290
19291 (define_expand "remainderxf3"
19292 [(use (match_operand:XF 0 "register_operand"))
19293 (use (match_operand:XF 1 "general_operand"))
19294 (use (match_operand:XF 2 "general_operand"))]
19295 "TARGET_USE_FANCY_MATH_387
19296 && flag_finite_math_only"
19297 {
19298 rtx_code_label *label = gen_label_rtx ();
19299
19300 rtx op1 = gen_reg_rtx (XFmode);
19301 rtx op2 = gen_reg_rtx (XFmode);
19302
19303 emit_move_insn (op2, operands[2]);
19304 emit_move_insn (op1, operands[1]);
19305
19306 emit_label (label);
19307 emit_insn (gen_fprem1xf4_i387 (op1, op2, op1, op2));
19308 ix86_emit_fp_unordered_jump (label);
19309 LABEL_NUSES (label) = 1;
19310
19311 emit_move_insn (operands[0], op1);
19312 DONE;
19313 })
19314
19315 (define_expand "remainder<mode>3"
19316 [(use (match_operand:MODEF 0 "register_operand"))
19317 (use (match_operand:MODEF 1 "general_operand"))
19318 (use (match_operand:MODEF 2 "general_operand"))]
19319 "TARGET_USE_FANCY_MATH_387
19320 && flag_finite_math_only"
19321 {
19322 rtx (*gen_truncxf) (rtx, rtx);
19323
19324 rtx_code_label *label = gen_label_rtx ();
19325
19326 rtx op1 = gen_reg_rtx (XFmode);
19327 rtx op2 = gen_reg_rtx (XFmode);
19328
19329 emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
19330 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
19331
19332 emit_label (label);
19333
19334 emit_insn (gen_fprem1xf4_i387 (op1, op2, op1, op2));
19335 ix86_emit_fp_unordered_jump (label);
19336 LABEL_NUSES (label) = 1;
19337
19338 /* Truncate the result properly for strict SSE math. */
19339 if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
19340 && !TARGET_MIX_SSE_I387)
19341 gen_truncxf = gen_truncxf<mode>2;
19342 else
19343 gen_truncxf = gen_truncxf<mode>2_i387_noop_unspec;
19344
19345 emit_insn (gen_truncxf (operands[0], op1));
19346 DONE;
19347 })
19348
19349 (define_int_iterator SINCOS
19350 [UNSPEC_SIN
19351 UNSPEC_COS])
19352
19353 (define_int_attr sincos
19354 [(UNSPEC_SIN "sin")
19355 (UNSPEC_COS "cos")])
19356
19357 (define_insn "<sincos>xf2"
19358 [(set (match_operand:XF 0 "register_operand" "=f")
19359 (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
19360 SINCOS))]
19361 "TARGET_USE_FANCY_MATH_387
19362 && flag_unsafe_math_optimizations"
19363 "f<sincos>"
19364 [(set_attr "type" "fpspc")
19365 (set_attr "znver1_decode" "vector")
19366 (set_attr "mode" "XF")])
19367
19368 (define_expand "<sincos><mode>2"
19369 [(set (match_operand:MODEF 0 "register_operand")
19370 (unspec:MODEF [(match_operand:MODEF 1 "general_operand")]
19371 SINCOS))]
19372 "TARGET_USE_FANCY_MATH_387
19373 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
19374 || TARGET_MIX_SSE_I387)
19375 && flag_unsafe_math_optimizations"
19376 {
19377 rtx op0 = gen_reg_rtx (XFmode);
19378 rtx op1 = gen_reg_rtx (XFmode);
19379
19380 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
19381 emit_insn (gen_<sincos>xf2 (op0, op1));
19382 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
19383 DONE;
19384 })
19385
19386 (define_insn "sincosxf3"
19387 [(set (match_operand:XF 0 "register_operand" "=f")
19388 (unspec:XF [(match_operand:XF 2 "register_operand" "0")]
19389 UNSPEC_SINCOS_COS))
19390 (set (match_operand:XF 1 "register_operand" "=f")
19391 (unspec:XF [(match_dup 2)] UNSPEC_SINCOS_SIN))]
19392 "TARGET_USE_FANCY_MATH_387
19393 && flag_unsafe_math_optimizations"
19394 "fsincos"
19395 [(set_attr "type" "fpspc")
19396 (set_attr "znver1_decode" "vector")
19397 (set_attr "mode" "XF")])
19398
19399 (define_expand "sincos<mode>3"
19400 [(use (match_operand:MODEF 0 "register_operand"))
19401 (use (match_operand:MODEF 1 "register_operand"))
19402 (use (match_operand:MODEF 2 "general_operand"))]
19403 "TARGET_USE_FANCY_MATH_387
19404 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
19405 || TARGET_MIX_SSE_I387)
19406 && flag_unsafe_math_optimizations"
19407 {
19408 rtx op0 = gen_reg_rtx (XFmode);
19409 rtx op1 = gen_reg_rtx (XFmode);
19410 rtx op2 = gen_reg_rtx (XFmode);
19411
19412 emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
19413 emit_insn (gen_sincosxf3 (op0, op1, op2));
19414 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
19415 emit_insn (gen_truncxf<mode>2 (operands[1], op1));
19416 DONE;
19417 })
19418
19419 (define_insn "fptanxf4_i387"
19420 [(set (match_operand:SF 0 "register_operand" "=f")
19421 (match_operand:SF 3 "const1_operand"))
19422 (set (match_operand:XF 1 "register_operand" "=f")
19423 (unspec:XF [(match_operand:XF 2 "register_operand" "0")]
19424 UNSPEC_TAN))]
19425 "TARGET_USE_FANCY_MATH_387
19426 && flag_unsafe_math_optimizations"
19427 "fptan"
19428 [(set_attr "type" "fpspc")
19429 (set_attr "znver1_decode" "vector")
19430 (set_attr "mode" "XF")])
19431
19432 (define_expand "tanxf2"
19433 [(use (match_operand:XF 0 "register_operand"))
19434 (use (match_operand:XF 1 "register_operand"))]
19435 "TARGET_USE_FANCY_MATH_387
19436 && flag_unsafe_math_optimizations"
19437 {
19438 rtx one = gen_reg_rtx (SFmode);
19439 emit_insn (gen_fptanxf4_i387 (one, operands[0], operands[1],
19440 CONST1_RTX (SFmode)));
19441 DONE;
19442 })
19443
19444 (define_expand "tan<mode>2"
19445 [(use (match_operand:MODEF 0 "register_operand"))
19446 (use (match_operand:MODEF 1 "general_operand"))]
19447 "TARGET_USE_FANCY_MATH_387
19448 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
19449 || TARGET_MIX_SSE_I387)
19450 && flag_unsafe_math_optimizations"
19451 {
19452 rtx op0 = gen_reg_rtx (XFmode);
19453 rtx op1 = gen_reg_rtx (XFmode);
19454
19455 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
19456 emit_insn (gen_tanxf2 (op0, op1));
19457 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
19458 DONE;
19459 })
19460
19461 (define_insn "atan2xf3"
19462 [(set (match_operand:XF 0 "register_operand" "=f")
19463 (unspec:XF [(match_operand:XF 2 "register_operand" "0")
19464 (match_operand:XF 1 "register_operand" "f")]
19465 UNSPEC_FPATAN))
19466 (clobber (match_scratch:XF 3 "=1"))]
19467 "TARGET_USE_FANCY_MATH_387
19468 && flag_unsafe_math_optimizations"
19469 "fpatan"
19470 [(set_attr "type" "fpspc")
19471 (set_attr "znver1_decode" "vector")
19472 (set_attr "mode" "XF")])
19473
19474 (define_expand "atan2<mode>3"
19475 [(use (match_operand:MODEF 0 "register_operand"))
19476 (use (match_operand:MODEF 1 "general_operand"))
19477 (use (match_operand:MODEF 2 "general_operand"))]
19478 "TARGET_USE_FANCY_MATH_387
19479 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
19480 || TARGET_MIX_SSE_I387)
19481 && flag_unsafe_math_optimizations"
19482 {
19483 rtx op0 = gen_reg_rtx (XFmode);
19484 rtx op1 = gen_reg_rtx (XFmode);
19485 rtx op2 = gen_reg_rtx (XFmode);
19486
19487 emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
19488 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
19489
19490 emit_insn (gen_atan2xf3 (op0, op1, op2));
19491 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
19492 DONE;
19493 })
19494
19495 (define_expand "atanxf2"
19496 [(parallel [(set (match_operand:XF 0 "register_operand")
19497 (unspec:XF [(match_dup 2)
19498 (match_operand:XF 1 "register_operand")]
19499 UNSPEC_FPATAN))
19500 (clobber (scratch:XF))])]
19501 "TARGET_USE_FANCY_MATH_387
19502 && flag_unsafe_math_optimizations"
19503 "operands[2] = force_reg (XFmode, CONST1_RTX (XFmode));")
19504
19505 (define_expand "atan<mode>2"
19506 [(use (match_operand:MODEF 0 "register_operand"))
19507 (use (match_operand:MODEF 1 "general_operand"))]
19508 "TARGET_USE_FANCY_MATH_387
19509 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
19510 || TARGET_MIX_SSE_I387)
19511 && flag_unsafe_math_optimizations"
19512 {
19513 rtx op0 = gen_reg_rtx (XFmode);
19514 rtx op1 = gen_reg_rtx (XFmode);
19515
19516 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
19517 emit_insn (gen_atanxf2 (op0, op1));
19518 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
19519 DONE;
19520 })
19521
19522 (define_expand "asinxf2"
19523 [(set (match_dup 2)
19524 (mult:XF (match_operand:XF 1 "register_operand")
19525 (match_dup 1)))
19526 (set (match_dup 4) (minus:XF (match_dup 3) (match_dup 2)))
19527 (set (match_dup 5) (sqrt:XF (match_dup 4)))
19528 (parallel [(set (match_operand:XF 0 "register_operand")
19529 (unspec:XF [(match_dup 5) (match_dup 1)]
19530 UNSPEC_FPATAN))
19531 (clobber (scratch:XF))])]
19532 "TARGET_USE_FANCY_MATH_387
19533 && flag_unsafe_math_optimizations"
19534 {
19535 int i;
19536
19537 for (i = 2; i < 6; i++)
19538 operands[i] = gen_reg_rtx (XFmode);
19539
19540 emit_move_insn (operands[3], CONST1_RTX (XFmode));
19541 })
19542
19543 (define_expand "asin<mode>2"
19544 [(use (match_operand:MODEF 0 "register_operand"))
19545 (use (match_operand:MODEF 1 "general_operand"))]
19546 "TARGET_USE_FANCY_MATH_387
19547 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
19548 || TARGET_MIX_SSE_I387)
19549 && flag_unsafe_math_optimizations"
19550 {
19551 rtx op0 = gen_reg_rtx (XFmode);
19552 rtx op1 = gen_reg_rtx (XFmode);
19553
19554 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
19555 emit_insn (gen_asinxf2 (op0, op1));
19556 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
19557 DONE;
19558 })
19559
19560 (define_expand "acosxf2"
19561 [(set (match_dup 2)
19562 (mult:XF (match_operand:XF 1 "register_operand")
19563 (match_dup 1)))
19564 (set (match_dup 4) (minus:XF (match_dup 3) (match_dup 2)))
19565 (set (match_dup 5) (sqrt:XF (match_dup 4)))
19566 (parallel [(set (match_operand:XF 0 "register_operand")
19567 (unspec:XF [(match_dup 1) (match_dup 5)]
19568 UNSPEC_FPATAN))
19569 (clobber (scratch:XF))])]
19570 "TARGET_USE_FANCY_MATH_387
19571 && flag_unsafe_math_optimizations"
19572 {
19573 int i;
19574
19575 for (i = 2; i < 6; i++)
19576 operands[i] = gen_reg_rtx (XFmode);
19577
19578 emit_move_insn (operands[3], CONST1_RTX (XFmode));
19579 })
19580
19581 (define_expand "acos<mode>2"
19582 [(use (match_operand:MODEF 0 "register_operand"))
19583 (use (match_operand:MODEF 1 "general_operand"))]
19584 "TARGET_USE_FANCY_MATH_387
19585 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
19586 || TARGET_MIX_SSE_I387)
19587 && flag_unsafe_math_optimizations"
19588 {
19589 rtx op0 = gen_reg_rtx (XFmode);
19590 rtx op1 = gen_reg_rtx (XFmode);
19591
19592 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
19593 emit_insn (gen_acosxf2 (op0, op1));
19594 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
19595 DONE;
19596 })
19597
19598 (define_expand "sinhxf2"
19599 [(use (match_operand:XF 0 "register_operand"))
19600 (use (match_operand:XF 1 "register_operand"))]
19601 "TARGET_USE_FANCY_MATH_387
19602 && flag_finite_math_only
19603 && flag_unsafe_math_optimizations"
19604 {
19605 ix86_emit_i387_sinh (operands[0], operands[1]);
19606 DONE;
19607 })
19608
19609 (define_expand "sinh<mode>2"
19610 [(use (match_operand:MODEF 0 "register_operand"))
19611 (use (match_operand:MODEF 1 "general_operand"))]
19612 "TARGET_USE_FANCY_MATH_387
19613 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
19614 || TARGET_MIX_SSE_I387)
19615 && flag_finite_math_only
19616 && flag_unsafe_math_optimizations"
19617 {
19618 rtx op0 = gen_reg_rtx (XFmode);
19619 rtx op1 = gen_reg_rtx (XFmode);
19620
19621 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
19622 emit_insn (gen_sinhxf2 (op0, op1));
19623 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
19624 DONE;
19625 })
19626
19627 (define_expand "coshxf2"
19628 [(use (match_operand:XF 0 "register_operand"))
19629 (use (match_operand:XF 1 "register_operand"))]
19630 "TARGET_USE_FANCY_MATH_387
19631 && flag_unsafe_math_optimizations"
19632 {
19633 ix86_emit_i387_cosh (operands[0], operands[1]);
19634 DONE;
19635 })
19636
19637 (define_expand "cosh<mode>2"
19638 [(use (match_operand:MODEF 0 "register_operand"))
19639 (use (match_operand:MODEF 1 "general_operand"))]
19640 "TARGET_USE_FANCY_MATH_387
19641 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
19642 || TARGET_MIX_SSE_I387)
19643 && flag_unsafe_math_optimizations"
19644 {
19645 rtx op0 = gen_reg_rtx (XFmode);
19646 rtx op1 = gen_reg_rtx (XFmode);
19647
19648 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
19649 emit_insn (gen_coshxf2 (op0, op1));
19650 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
19651 DONE;
19652 })
19653
19654 (define_expand "tanhxf2"
19655 [(use (match_operand:XF 0 "register_operand"))
19656 (use (match_operand:XF 1 "register_operand"))]
19657 "TARGET_USE_FANCY_MATH_387
19658 && flag_unsafe_math_optimizations"
19659 {
19660 ix86_emit_i387_tanh (operands[0], operands[1]);
19661 DONE;
19662 })
19663
19664 (define_expand "tanh<mode>2"
19665 [(use (match_operand:MODEF 0 "register_operand"))
19666 (use (match_operand:MODEF 1 "general_operand"))]
19667 "TARGET_USE_FANCY_MATH_387
19668 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
19669 || TARGET_MIX_SSE_I387)
19670 && flag_unsafe_math_optimizations"
19671 {
19672 rtx op0 = gen_reg_rtx (XFmode);
19673 rtx op1 = gen_reg_rtx (XFmode);
19674
19675 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
19676 emit_insn (gen_tanhxf2 (op0, op1));
19677 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
19678 DONE;
19679 })
19680
19681 (define_expand "asinhxf2"
19682 [(use (match_operand:XF 0 "register_operand"))
19683 (use (match_operand:XF 1 "register_operand"))]
19684 "TARGET_USE_FANCY_MATH_387
19685 && flag_finite_math_only
19686 && flag_unsafe_math_optimizations"
19687 {
19688 ix86_emit_i387_asinh (operands[0], operands[1]);
19689 DONE;
19690 })
19691
19692 (define_expand "asinh<mode>2"
19693 [(use (match_operand:MODEF 0 "register_operand"))
19694 (use (match_operand:MODEF 1 "general_operand"))]
19695 "TARGET_USE_FANCY_MATH_387
19696 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
19697 || TARGET_MIX_SSE_I387)
19698 && flag_finite_math_only
19699 && flag_unsafe_math_optimizations"
19700 {
19701 rtx op0 = gen_reg_rtx (XFmode);
19702 rtx op1 = gen_reg_rtx (XFmode);
19703
19704 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
19705 emit_insn (gen_asinhxf2 (op0, op1));
19706 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
19707 DONE;
19708 })
19709
19710 (define_expand "acoshxf2"
19711 [(use (match_operand:XF 0 "register_operand"))
19712 (use (match_operand:XF 1 "register_operand"))]
19713 "TARGET_USE_FANCY_MATH_387
19714 && flag_unsafe_math_optimizations"
19715 {
19716 ix86_emit_i387_acosh (operands[0], operands[1]);
19717 DONE;
19718 })
19719
19720 (define_expand "acosh<mode>2"
19721 [(use (match_operand:MODEF 0 "register_operand"))
19722 (use (match_operand:MODEF 1 "general_operand"))]
19723 "TARGET_USE_FANCY_MATH_387
19724 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
19725 || TARGET_MIX_SSE_I387)
19726 && flag_unsafe_math_optimizations"
19727 {
19728 rtx op0 = gen_reg_rtx (XFmode);
19729 rtx op1 = gen_reg_rtx (XFmode);
19730
19731 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
19732 emit_insn (gen_acoshxf2 (op0, op1));
19733 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
19734 DONE;
19735 })
19736
19737 (define_expand "atanhxf2"
19738 [(use (match_operand:XF 0 "register_operand"))
19739 (use (match_operand:XF 1 "register_operand"))]
19740 "TARGET_USE_FANCY_MATH_387
19741 && flag_unsafe_math_optimizations"
19742 {
19743 ix86_emit_i387_atanh (operands[0], operands[1]);
19744 DONE;
19745 })
19746
19747 (define_expand "atanh<mode>2"
19748 [(use (match_operand:MODEF 0 "register_operand"))
19749 (use (match_operand:MODEF 1 "general_operand"))]
19750 "TARGET_USE_FANCY_MATH_387
19751 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
19752 || TARGET_MIX_SSE_I387)
19753 && flag_unsafe_math_optimizations"
19754 {
19755 rtx op0 = gen_reg_rtx (XFmode);
19756 rtx op1 = gen_reg_rtx (XFmode);
19757
19758 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
19759 emit_insn (gen_atanhxf2 (op0, op1));
19760 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
19761 DONE;
19762 })
19763
19764 (define_insn "fyl2xxf3_i387"
19765 [(set (match_operand:XF 0 "register_operand" "=f")
19766 (unspec:XF [(match_operand:XF 1 "register_operand" "0")
19767 (match_operand:XF 2 "register_operand" "f")]
19768 UNSPEC_FYL2X))
19769 (clobber (match_scratch:XF 3 "=2"))]
19770 "TARGET_USE_FANCY_MATH_387
19771 && flag_unsafe_math_optimizations"
19772 "fyl2x"
19773 [(set_attr "type" "fpspc")
19774 (set_attr "znver1_decode" "vector")
19775 (set_attr "mode" "XF")])
19776
19777 (define_expand "logxf2"
19778 [(parallel [(set (match_operand:XF 0 "register_operand")
19779 (unspec:XF [(match_operand:XF 1 "register_operand")
19780 (match_dup 2)] UNSPEC_FYL2X))
19781 (clobber (scratch:XF))])]
19782 "TARGET_USE_FANCY_MATH_387
19783 && flag_unsafe_math_optimizations"
19784 {
19785 operands[2]
19786 = force_reg (XFmode, standard_80387_constant_rtx (4)); /* fldln2 */
19787 })
19788
19789 (define_expand "log<mode>2"
19790 [(use (match_operand:MODEF 0 "register_operand"))
19791 (use (match_operand:MODEF 1 "general_operand"))]
19792 "TARGET_USE_FANCY_MATH_387
19793 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
19794 || TARGET_MIX_SSE_I387)
19795 && flag_unsafe_math_optimizations"
19796 {
19797 rtx op0 = gen_reg_rtx (XFmode);
19798 rtx op1 = gen_reg_rtx (XFmode);
19799
19800 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
19801 emit_insn (gen_logxf2 (op0, op1));
19802 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
19803 DONE;
19804 })
19805
19806 (define_expand "log10xf2"
19807 [(parallel [(set (match_operand:XF 0 "register_operand")
19808 (unspec:XF [(match_operand:XF 1 "register_operand")
19809 (match_dup 2)] UNSPEC_FYL2X))
19810 (clobber (scratch:XF))])]
19811 "TARGET_USE_FANCY_MATH_387
19812 && flag_unsafe_math_optimizations"
19813 {
19814 operands[2]
19815 = force_reg (XFmode, standard_80387_constant_rtx (3)); /* fldlg2 */
19816 })
19817
19818 (define_expand "log10<mode>2"
19819 [(use (match_operand:MODEF 0 "register_operand"))
19820 (use (match_operand:MODEF 1 "general_operand"))]
19821 "TARGET_USE_FANCY_MATH_387
19822 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
19823 || TARGET_MIX_SSE_I387)
19824 && flag_unsafe_math_optimizations"
19825 {
19826 rtx op0 = gen_reg_rtx (XFmode);
19827 rtx op1 = gen_reg_rtx (XFmode);
19828
19829 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
19830 emit_insn (gen_log10xf2 (op0, op1));
19831 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
19832 DONE;
19833 })
19834
19835 (define_expand "log2xf2"
19836 [(parallel [(set (match_operand:XF 0 "register_operand")
19837 (unspec:XF [(match_operand:XF 1 "register_operand")
19838 (match_dup 2)] UNSPEC_FYL2X))
19839 (clobber (scratch:XF))])]
19840 "TARGET_USE_FANCY_MATH_387
19841 && flag_unsafe_math_optimizations"
19842 "operands[2] = force_reg (XFmode, CONST1_RTX (XFmode));")
19843
19844 (define_expand "log2<mode>2"
19845 [(use (match_operand:MODEF 0 "register_operand"))
19846 (use (match_operand:MODEF 1 "general_operand"))]
19847 "TARGET_USE_FANCY_MATH_387
19848 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
19849 || TARGET_MIX_SSE_I387)
19850 && flag_unsafe_math_optimizations"
19851 {
19852 rtx op0 = gen_reg_rtx (XFmode);
19853 rtx op1 = gen_reg_rtx (XFmode);
19854
19855 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
19856 emit_insn (gen_log2xf2 (op0, op1));
19857 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
19858 DONE;
19859 })
19860
19861 (define_insn "fyl2xp1xf3_i387"
19862 [(set (match_operand:XF 0 "register_operand" "=f")
19863 (unspec:XF [(match_operand:XF 1 "register_operand" "0")
19864 (match_operand:XF 2 "register_operand" "f")]
19865 UNSPEC_FYL2XP1))
19866 (clobber (match_scratch:XF 3 "=2"))]
19867 "TARGET_USE_FANCY_MATH_387
19868 && flag_unsafe_math_optimizations"
19869 "fyl2xp1"
19870 [(set_attr "type" "fpspc")
19871 (set_attr "znver1_decode" "vector")
19872 (set_attr "mode" "XF")])
19873
19874 (define_expand "log1pxf2"
19875 [(use (match_operand:XF 0 "register_operand"))
19876 (use (match_operand:XF 1 "register_operand"))]
19877 "TARGET_USE_FANCY_MATH_387
19878 && flag_unsafe_math_optimizations"
19879 {
19880 ix86_emit_i387_log1p (operands[0], operands[1]);
19881 DONE;
19882 })
19883
19884 (define_expand "log1p<mode>2"
19885 [(use (match_operand:MODEF 0 "register_operand"))
19886 (use (match_operand:MODEF 1 "general_operand"))]
19887 "TARGET_USE_FANCY_MATH_387
19888 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
19889 || TARGET_MIX_SSE_I387)
19890 && flag_unsafe_math_optimizations"
19891 {
19892 rtx op0 = gen_reg_rtx (XFmode);
19893 rtx op1 = gen_reg_rtx (XFmode);
19894
19895 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
19896 emit_insn (gen_log1pxf2 (op0, op1));
19897 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
19898 DONE;
19899 })
19900
19901 (define_insn "fxtractxf3_i387"
19902 [(set (match_operand:XF 0 "register_operand" "=f")
19903 (unspec:XF [(match_operand:XF 2 "register_operand" "0")]
19904 UNSPEC_XTRACT_FRACT))
19905 (set (match_operand:XF 1 "register_operand" "=f")
19906 (unspec:XF [(match_dup 2)] UNSPEC_XTRACT_EXP))]
19907 "TARGET_USE_FANCY_MATH_387
19908 && flag_unsafe_math_optimizations"
19909 "fxtract"
19910 [(set_attr "type" "fpspc")
19911 (set_attr "znver1_decode" "vector")
19912 (set_attr "mode" "XF")])
19913
19914 (define_expand "logbxf2"
19915 [(parallel [(set (match_dup 2)
19916 (unspec:XF [(match_operand:XF 1 "register_operand")]
19917 UNSPEC_XTRACT_FRACT))
19918 (set (match_operand:XF 0 "register_operand")
19919 (unspec:XF [(match_dup 1)] UNSPEC_XTRACT_EXP))])]
19920 "TARGET_USE_FANCY_MATH_387
19921 && flag_unsafe_math_optimizations"
19922 "operands[2] = gen_reg_rtx (XFmode);")
19923
19924 (define_expand "logb<mode>2"
19925 [(use (match_operand:MODEF 0 "register_operand"))
19926 (use (match_operand:MODEF 1 "general_operand"))]
19927 "TARGET_USE_FANCY_MATH_387
19928 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
19929 || TARGET_MIX_SSE_I387)
19930 && flag_unsafe_math_optimizations"
19931 {
19932 rtx op0 = gen_reg_rtx (XFmode);
19933 rtx op1 = gen_reg_rtx (XFmode);
19934
19935 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
19936 emit_insn (gen_logbxf2 (op0, op1));
19937 emit_insn (gen_truncxf<mode>2 (operands[0], op1));
19938 DONE;
19939 })
19940
19941 (define_expand "ilogbxf2"
19942 [(use (match_operand:SI 0 "register_operand"))
19943 (use (match_operand:XF 1 "register_operand"))]
19944 "TARGET_USE_FANCY_MATH_387
19945 && flag_unsafe_math_optimizations"
19946 {
19947 rtx op0, op1;
19948
19949 if (optimize_insn_for_size_p ())
19950 FAIL;
19951
19952 op0 = gen_reg_rtx (XFmode);
19953 op1 = gen_reg_rtx (XFmode);
19954
19955 emit_insn (gen_fxtractxf3_i387 (op0, op1, operands[1]));
19956 emit_insn (gen_fix_truncxfsi2 (operands[0], op1));
19957 DONE;
19958 })
19959
19960 (define_expand "ilogb<mode>2"
19961 [(use (match_operand:SI 0 "register_operand"))
19962 (use (match_operand:MODEF 1 "general_operand"))]
19963 "TARGET_USE_FANCY_MATH_387
19964 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
19965 || TARGET_MIX_SSE_I387)
19966 && flag_unsafe_math_optimizations"
19967 {
19968 rtx op0, op1, op2;
19969
19970 if (optimize_insn_for_size_p ())
19971 FAIL;
19972
19973 op0 = gen_reg_rtx (XFmode);
19974 op1 = gen_reg_rtx (XFmode);
19975 op2 = gen_reg_rtx (XFmode);
19976
19977 emit_insn (gen_extend<mode>xf2 (op2, operands[1]));
19978 emit_insn (gen_fxtractxf3_i387 (op0, op1, op2));
19979 emit_insn (gen_fix_truncxfsi2 (operands[0], op1));
19980 DONE;
19981 })
19982
19983 (define_insn "*f2xm1xf2_i387"
19984 [(set (match_operand:XF 0 "register_operand" "=f")
19985 (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
19986 UNSPEC_F2XM1))]
19987 "TARGET_USE_FANCY_MATH_387
19988 && flag_unsafe_math_optimizations"
19989 "f2xm1"
19990 [(set_attr "type" "fpspc")
19991 (set_attr "znver1_decode" "vector")
19992 (set_attr "mode" "XF")])
19993
19994 (define_insn "fscalexf4_i387"
19995 [(set (match_operand:XF 0 "register_operand" "=f")
19996 (unspec:XF [(match_operand:XF 2 "register_operand" "0")
19997 (match_operand:XF 3 "register_operand" "1")]
19998 UNSPEC_FSCALE_FRACT))
19999 (set (match_operand:XF 1 "register_operand" "=f")
20000 (unspec:XF [(match_dup 2) (match_dup 3)]
20001 UNSPEC_FSCALE_EXP))]
20002 "TARGET_USE_FANCY_MATH_387
20003 && flag_unsafe_math_optimizations"
20004 "fscale"
20005 [(set_attr "type" "fpspc")
20006 (set_attr "znver1_decode" "vector")
20007 (set_attr "mode" "XF")])
20008
20009 (define_expand "expNcorexf3"
20010 [(set (match_dup 3) (mult:XF (match_operand:XF 1 "register_operand")
20011 (match_operand:XF 2 "register_operand")))
20012 (set (match_dup 4) (unspec:XF [(match_dup 3)] UNSPEC_FRNDINT))
20013 (set (match_dup 5) (minus:XF (match_dup 3) (match_dup 4)))
20014 (set (match_dup 6) (unspec:XF [(match_dup 5)] UNSPEC_F2XM1))
20015 (set (match_dup 8) (plus:XF (match_dup 6) (match_dup 7)))
20016 (parallel [(set (match_operand:XF 0 "register_operand")
20017 (unspec:XF [(match_dup 8) (match_dup 4)]
20018 UNSPEC_FSCALE_FRACT))
20019 (set (match_dup 9)
20020 (unspec:XF [(match_dup 8) (match_dup 4)]
20021 UNSPEC_FSCALE_EXP))])]
20022 "TARGET_USE_FANCY_MATH_387
20023 && flag_unsafe_math_optimizations"
20024 {
20025 int i;
20026
20027 for (i = 3; i < 10; i++)
20028 operands[i] = gen_reg_rtx (XFmode);
20029
20030 emit_move_insn (operands[7], CONST1_RTX (XFmode));
20031 })
20032
20033 (define_expand "expxf2"
20034 [(use (match_operand:XF 0 "register_operand"))
20035 (use (match_operand:XF 1 "register_operand"))]
20036 "TARGET_USE_FANCY_MATH_387
20037 && flag_unsafe_math_optimizations"
20038 {
20039 rtx op2 = force_reg (XFmode, standard_80387_constant_rtx (5)); /* fldl2e */
20040
20041 emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2));
20042 DONE;
20043 })
20044
20045 (define_expand "exp<mode>2"
20046 [(use (match_operand:MODEF 0 "register_operand"))
20047 (use (match_operand:MODEF 1 "general_operand"))]
20048 "TARGET_USE_FANCY_MATH_387
20049 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
20050 || TARGET_MIX_SSE_I387)
20051 && flag_unsafe_math_optimizations"
20052 {
20053 rtx op0 = gen_reg_rtx (XFmode);
20054 rtx op1 = gen_reg_rtx (XFmode);
20055
20056 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
20057 emit_insn (gen_expxf2 (op0, op1));
20058 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
20059 DONE;
20060 })
20061
20062 (define_expand "exp10xf2"
20063 [(use (match_operand:XF 0 "register_operand"))
20064 (use (match_operand:XF 1 "register_operand"))]
20065 "TARGET_USE_FANCY_MATH_387
20066 && flag_unsafe_math_optimizations"
20067 {
20068 rtx op2 = force_reg (XFmode, standard_80387_constant_rtx (6)); /* fldl2t */
20069
20070 emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2));
20071 DONE;
20072 })
20073
20074 (define_expand "exp10<mode>2"
20075 [(use (match_operand:MODEF 0 "register_operand"))
20076 (use (match_operand:MODEF 1 "general_operand"))]
20077 "TARGET_USE_FANCY_MATH_387
20078 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
20079 || TARGET_MIX_SSE_I387)
20080 && flag_unsafe_math_optimizations"
20081 {
20082 rtx op0 = gen_reg_rtx (XFmode);
20083 rtx op1 = gen_reg_rtx (XFmode);
20084
20085 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
20086 emit_insn (gen_exp10xf2 (op0, op1));
20087 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
20088 DONE;
20089 })
20090
20091 (define_expand "exp2xf2"
20092 [(use (match_operand:XF 0 "register_operand"))
20093 (use (match_operand:XF 1 "register_operand"))]
20094 "TARGET_USE_FANCY_MATH_387
20095 && flag_unsafe_math_optimizations"
20096 {
20097 rtx op2 = force_reg (XFmode, CONST1_RTX (XFmode));
20098
20099 emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2));
20100 DONE;
20101 })
20102
20103 (define_expand "exp2<mode>2"
20104 [(use (match_operand:MODEF 0 "register_operand"))
20105 (use (match_operand:MODEF 1 "general_operand"))]
20106 "TARGET_USE_FANCY_MATH_387
20107 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
20108 || TARGET_MIX_SSE_I387)
20109 && flag_unsafe_math_optimizations"
20110 {
20111 rtx op0 = gen_reg_rtx (XFmode);
20112 rtx op1 = gen_reg_rtx (XFmode);
20113
20114 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
20115 emit_insn (gen_exp2xf2 (op0, op1));
20116 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
20117 DONE;
20118 })
20119
20120 (define_expand "expm1xf2"
20121 [(set (match_dup 3) (mult:XF (match_operand:XF 1 "register_operand")
20122 (match_dup 2)))
20123 (set (match_dup 4) (unspec:XF [(match_dup 3)] UNSPEC_FRNDINT))
20124 (set (match_dup 5) (minus:XF (match_dup 3) (match_dup 4)))
20125 (set (match_dup 6) (unspec:XF [(match_dup 5)] UNSPEC_F2XM1))
20126 (parallel [(set (match_dup 7)
20127 (unspec:XF [(match_dup 6) (match_dup 4)]
20128 UNSPEC_FSCALE_FRACT))
20129 (set (match_dup 8)
20130 (unspec:XF [(match_dup 6) (match_dup 4)]
20131 UNSPEC_FSCALE_EXP))])
20132 (parallel [(set (match_dup 10)
20133 (unspec:XF [(match_dup 9) (match_dup 8)]
20134 UNSPEC_FSCALE_FRACT))
20135 (set (match_dup 11)
20136 (unspec:XF [(match_dup 9) (match_dup 8)]
20137 UNSPEC_FSCALE_EXP))])
20138 (set (match_dup 12) (minus:XF (match_dup 10) (match_dup 9)))
20139 (set (match_operand:XF 0 "register_operand")
20140 (plus:XF (match_dup 12) (match_dup 7)))]
20141 "TARGET_USE_FANCY_MATH_387
20142 && flag_unsafe_math_optimizations"
20143 {
20144 int i;
20145
20146 for (i = 2; i < 13; i++)
20147 operands[i] = gen_reg_rtx (XFmode);
20148
20149 emit_move_insn (operands[2], standard_80387_constant_rtx (5)); /* fldl2e */
20150 emit_move_insn (operands[9], CONST1_RTX (XFmode));
20151 })
20152
20153 (define_expand "expm1<mode>2"
20154 [(use (match_operand:MODEF 0 "register_operand"))
20155 (use (match_operand:MODEF 1 "general_operand"))]
20156 "TARGET_USE_FANCY_MATH_387
20157 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
20158 || TARGET_MIX_SSE_I387)
20159 && flag_unsafe_math_optimizations"
20160 {
20161 rtx op0 = gen_reg_rtx (XFmode);
20162 rtx op1 = gen_reg_rtx (XFmode);
20163
20164 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
20165 emit_insn (gen_expm1xf2 (op0, op1));
20166 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
20167 DONE;
20168 })
20169
20170 (define_insn "avx512f_scalef<mode>2"
20171 [(set (match_operand:MODEF 0 "register_operand" "=v")
20172 (unspec:MODEF
20173 [(match_operand:MODEF 1 "register_operand" "v")
20174 (match_operand:MODEF 2 "nonimmediate_operand" "vm")]
20175 UNSPEC_SCALEF))]
20176 "TARGET_AVX512F"
20177 "vscalef<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
20178 [(set_attr "prefix" "evex")
20179 (set_attr "mode" "<MODE>")])
20180
20181 (define_expand "ldexpxf3"
20182 [(match_operand:XF 0 "register_operand")
20183 (match_operand:XF 1 "register_operand")
20184 (match_operand:SI 2 "register_operand")]
20185 "TARGET_USE_FANCY_MATH_387
20186 && flag_unsafe_math_optimizations"
20187 {
20188 rtx tmp1 = gen_reg_rtx (XFmode);
20189 rtx tmp2 = gen_reg_rtx (XFmode);
20190
20191 emit_insn (gen_floatsixf2 (tmp1, operands[2]));
20192 emit_insn (gen_fscalexf4_i387 (operands[0], tmp2,
20193 operands[1], tmp1));
20194 DONE;
20195 })
20196
20197 (define_expand "ldexp<mode>3"
20198 [(use (match_operand:MODEF 0 "register_operand"))
20199 (use (match_operand:MODEF 1 "general_operand"))
20200 (use (match_operand:SI 2 "register_operand"))]
20201 "((TARGET_USE_FANCY_MATH_387
20202 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
20203 || TARGET_MIX_SSE_I387))
20204 || (TARGET_AVX512F && TARGET_SSE_MATH))
20205 && flag_unsafe_math_optimizations"
20206 {
20207 /* Prefer avx512f version. */
20208 if (TARGET_AVX512F && TARGET_SSE_MATH)
20209 {
20210 rtx op2 = gen_reg_rtx (<MODE>mode);
20211 operands[1] = force_reg (<MODE>mode, operands[1]);
20212
20213 emit_insn (gen_floatsi<mode>2 (op2, operands[2]));
20214 emit_insn (gen_avx512f_scalef<mode>2 (operands[0], operands[1], op2));
20215 }
20216 else
20217 {
20218 rtx op0 = gen_reg_rtx (XFmode);
20219 rtx op1 = gen_reg_rtx (XFmode);
20220
20221 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
20222 emit_insn (gen_ldexpxf3 (op0, op1, operands[2]));
20223 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
20224 }
20225 DONE;
20226 })
20227
20228 (define_expand "scalbxf3"
20229 [(parallel [(set (match_operand:XF 0 " register_operand")
20230 (unspec:XF [(match_operand:XF 1 "register_operand")
20231 (match_operand:XF 2 "register_operand")]
20232 UNSPEC_FSCALE_FRACT))
20233 (set (match_dup 3)
20234 (unspec:XF [(match_dup 1) (match_dup 2)]
20235 UNSPEC_FSCALE_EXP))])]
20236 "TARGET_USE_FANCY_MATH_387
20237 && flag_unsafe_math_optimizations"
20238 "operands[3] = gen_reg_rtx (XFmode);")
20239
20240 (define_expand "scalb<mode>3"
20241 [(use (match_operand:MODEF 0 "register_operand"))
20242 (use (match_operand:MODEF 1 "general_operand"))
20243 (use (match_operand:MODEF 2 "general_operand"))]
20244 "TARGET_USE_FANCY_MATH_387
20245 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
20246 || TARGET_MIX_SSE_I387)
20247 && flag_unsafe_math_optimizations"
20248 {
20249 rtx op0 = gen_reg_rtx (XFmode);
20250 rtx op1 = gen_reg_rtx (XFmode);
20251 rtx op2 = gen_reg_rtx (XFmode);
20252
20253 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
20254 emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
20255 emit_insn (gen_scalbxf3 (op0, op1, op2));
20256 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
20257 DONE;
20258 })
20259
20260 (define_expand "significandxf2"
20261 [(parallel [(set (match_operand:XF 0 "register_operand")
20262 (unspec:XF [(match_operand:XF 1 "register_operand")]
20263 UNSPEC_XTRACT_FRACT))
20264 (set (match_dup 2)
20265 (unspec:XF [(match_dup 1)] UNSPEC_XTRACT_EXP))])]
20266 "TARGET_USE_FANCY_MATH_387
20267 && flag_unsafe_math_optimizations"
20268 "operands[2] = gen_reg_rtx (XFmode);")
20269
20270 (define_expand "significand<mode>2"
20271 [(use (match_operand:MODEF 0 "register_operand"))
20272 (use (match_operand:MODEF 1 "general_operand"))]
20273 "TARGET_USE_FANCY_MATH_387
20274 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
20275 || TARGET_MIX_SSE_I387)
20276 && flag_unsafe_math_optimizations"
20277 {
20278 rtx op0 = gen_reg_rtx (XFmode);
20279 rtx op1 = gen_reg_rtx (XFmode);
20280
20281 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
20282 emit_insn (gen_significandxf2 (op0, op1));
20283 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
20284 DONE;
20285 })
20286 \f
20287
20288 (define_insn "sse4_1_round<mode>2"
20289 [(set (match_operand:MODEFH 0 "register_operand" "=x,x,x,v,v")
20290 (unspec:MODEFH
20291 [(match_operand:MODEFH 1 "nonimmediate_operand" "0,x,m,v,m")
20292 (match_operand:SI 2 "const_0_to_15_operand")]
20293 UNSPEC_ROUND))]
20294 "TARGET_SSE4_1"
20295 "@
20296 %vround<ssemodesuffix>\t{%2, %d1, %0|%0, %d1, %2}
20297 %vround<ssemodesuffix>\t{%2, %d1, %0|%0, %d1, %2}
20298 %vround<ssemodesuffix>\t{%2, %1, %d0|%d0, %1, %2}
20299 vrndscale<ssemodesuffix>\t{%2, %d1, %0|%0, %d1, %2}
20300 vrndscale<ssemodesuffix>\t{%2, %1, %d0|%d0, %1, %2}"
20301 [(set_attr "type" "ssecvt")
20302 (set_attr "prefix_extra" "1,1,1,*,*")
20303 (set_attr "length_immediate" "*,*,*,1,1")
20304 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,evex,evex")
20305 (set_attr "isa" "noavx512f,noavx512f,noavx512f,avx512f,avx512f")
20306 (set_attr "avx_partial_xmm_update" "false,false,true,false,true")
20307 (set_attr "mode" "<MODE>")
20308 (set (attr "preferred_for_speed")
20309 (cond [(match_test "TARGET_AVX")
20310 (symbol_ref "true")
20311 (eq_attr "alternative" "1,2")
20312 (symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY")
20313 ]
20314 (symbol_ref "true")))])
20315
20316 (define_insn "rintxf2"
20317 [(set (match_operand:XF 0 "register_operand" "=f")
20318 (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
20319 UNSPEC_FRNDINT))]
20320 "TARGET_USE_FANCY_MATH_387"
20321 "frndint"
20322 [(set_attr "type" "fpspc")
20323 (set_attr "znver1_decode" "vector")
20324 (set_attr "mode" "XF")])
20325
20326 (define_expand "rinthf2"
20327 [(match_operand:HF 0 "register_operand")
20328 (match_operand:HF 1 "nonimmediate_operand")]
20329 "TARGET_AVX512FP16"
20330 {
20331 emit_insn (gen_sse4_1_roundhf2 (operands[0],
20332 operands[1],
20333 GEN_INT (ROUND_MXCSR)));
20334 DONE;
20335 })
20336
20337 (define_expand "rint<mode>2"
20338 [(use (match_operand:MODEF 0 "register_operand"))
20339 (use (match_operand:MODEF 1 "nonimmediate_operand"))]
20340 "TARGET_USE_FANCY_MATH_387
20341 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
20342 {
20343 if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
20344 {
20345 if (TARGET_SSE4_1)
20346 emit_insn (gen_sse4_1_round<mode>2
20347 (operands[0], operands[1], GEN_INT (ROUND_MXCSR)));
20348 else
20349 ix86_expand_rint (operands[0], operands[1]);
20350 }
20351 else
20352 {
20353 rtx op0 = gen_reg_rtx (XFmode);
20354 rtx op1 = gen_reg_rtx (XFmode);
20355
20356 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
20357 emit_insn (gen_rintxf2 (op0, op1));
20358 emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op0));
20359 }
20360 DONE;
20361 })
20362
20363 (define_expand "nearbyintxf2"
20364 [(set (match_operand:XF 0 "register_operand")
20365 (unspec:XF [(match_operand:XF 1 "register_operand")]
20366 UNSPEC_FRNDINT))]
20367 "TARGET_USE_FANCY_MATH_387
20368 && !flag_trapping_math")
20369
20370 (define_expand "nearbyinthf2"
20371 [(match_operand:HF 0 "register_operand")
20372 (match_operand:HF 1 "nonimmediate_operand")]
20373 "TARGET_AVX512FP16"
20374 {
20375 emit_insn (gen_sse4_1_roundhf2 (operands[0],
20376 operands[1],
20377 GEN_INT (ROUND_MXCSR | ROUND_NO_EXC)));
20378 DONE;
20379 })
20380
20381 (define_expand "nearbyint<mode>2"
20382 [(use (match_operand:MODEF 0 "register_operand"))
20383 (use (match_operand:MODEF 1 "nonimmediate_operand"))]
20384 "(TARGET_USE_FANCY_MATH_387
20385 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
20386 || TARGET_MIX_SSE_I387)
20387 && !flag_trapping_math)
20388 || (TARGET_SSE4_1 && TARGET_SSE_MATH)"
20389 {
20390 if (TARGET_SSE4_1 && TARGET_SSE_MATH)
20391 emit_insn (gen_sse4_1_round<mode>2
20392 (operands[0], operands[1], GEN_INT (ROUND_MXCSR
20393 | ROUND_NO_EXC)));
20394 else
20395 {
20396 rtx op0 = gen_reg_rtx (XFmode);
20397 rtx op1 = gen_reg_rtx (XFmode);
20398
20399 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
20400 emit_insn (gen_nearbyintxf2 (op0, op1));
20401 emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op0));
20402 }
20403 DONE;
20404 })
20405
20406 (define_expand "round<mode>2"
20407 [(match_operand:X87MODEF 0 "register_operand")
20408 (match_operand:X87MODEF 1 "nonimmediate_operand")]
20409 "(TARGET_USE_FANCY_MATH_387
20410 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
20411 || TARGET_MIX_SSE_I387)
20412 && flag_unsafe_math_optimizations
20413 && (flag_fp_int_builtin_inexact || !flag_trapping_math))
20414 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
20415 && !flag_trapping_math && !flag_rounding_math)"
20416 {
20417 if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
20418 && !flag_trapping_math && !flag_rounding_math)
20419 {
20420 if (TARGET_SSE4_1)
20421 {
20422 operands[1] = force_reg (<MODE>mode, operands[1]);
20423 ix86_expand_round_sse4 (operands[0], operands[1]);
20424 }
20425 else if (TARGET_64BIT || (<MODE>mode != DFmode))
20426 ix86_expand_round (operands[0], operands[1]);
20427 else
20428 ix86_expand_rounddf_32 (operands[0], operands[1]);
20429 }
20430 else
20431 {
20432 operands[1] = force_reg (<MODE>mode, operands[1]);
20433 ix86_emit_i387_round (operands[0], operands[1]);
20434 }
20435 DONE;
20436 })
20437
20438 (define_insn "lrintxfdi2"
20439 [(set (match_operand:DI 0 "nonimmediate_operand" "=m")
20440 (unspec:DI [(match_operand:XF 1 "register_operand" "f")]
20441 UNSPEC_FIST))
20442 (clobber (match_scratch:XF 2 "=&f"))]
20443 "TARGET_USE_FANCY_MATH_387"
20444 "* return output_fix_trunc (insn, operands, false);"
20445 [(set_attr "type" "fpspc")
20446 (set_attr "mode" "DI")])
20447
20448 (define_insn "lrintxf<mode>2"
20449 [(set (match_operand:SWI24 0 "nonimmediate_operand" "=m")
20450 (unspec:SWI24 [(match_operand:XF 1 "register_operand" "f")]
20451 UNSPEC_FIST))]
20452 "TARGET_USE_FANCY_MATH_387"
20453 "* return output_fix_trunc (insn, operands, false);"
20454 [(set_attr "type" "fpspc")
20455 (set_attr "mode" "<MODE>")])
20456
20457 (define_expand "lrint<MODEF:mode><SWI48:mode>2"
20458 [(set (match_operand:SWI48 0 "register_operand")
20459 (unspec:SWI48 [(match_operand:MODEF 1 "nonimmediate_operand")]
20460 UNSPEC_FIX_NOTRUNC))]
20461 "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH")
20462
20463 (define_expand "lround<X87MODEF:mode><SWI248x:mode>2"
20464 [(match_operand:SWI248x 0 "nonimmediate_operand")
20465 (match_operand:X87MODEF 1 "register_operand")]
20466 "(TARGET_USE_FANCY_MATH_387
20467 && (!(SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH)
20468 || TARGET_MIX_SSE_I387)
20469 && flag_unsafe_math_optimizations)
20470 || (SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH
20471 && <SWI248x:MODE>mode != HImode
20472 && ((<SWI248x:MODE>mode != DImode) || TARGET_64BIT)
20473 && !flag_trapping_math && !flag_rounding_math)"
20474 {
20475 if (SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH
20476 && <SWI248x:MODE>mode != HImode
20477 && ((<SWI248x:MODE>mode != DImode) || TARGET_64BIT)
20478 && !flag_trapping_math && !flag_rounding_math)
20479 ix86_expand_lround (operands[0], operands[1]);
20480 else
20481 ix86_emit_i387_round (operands[0], operands[1]);
20482 DONE;
20483 })
20484
20485 (define_int_iterator FRNDINT_ROUNDING
20486 [UNSPEC_FRNDINT_ROUNDEVEN
20487 UNSPEC_FRNDINT_FLOOR
20488 UNSPEC_FRNDINT_CEIL
20489 UNSPEC_FRNDINT_TRUNC])
20490
20491 (define_int_iterator FIST_ROUNDING
20492 [UNSPEC_FIST_FLOOR
20493 UNSPEC_FIST_CEIL])
20494
20495 ;; Base name for define_insn
20496 (define_int_attr rounding_insn
20497 [(UNSPEC_FRNDINT_ROUNDEVEN "roundeven")
20498 (UNSPEC_FRNDINT_FLOOR "floor")
20499 (UNSPEC_FRNDINT_CEIL "ceil")
20500 (UNSPEC_FRNDINT_TRUNC "btrunc")
20501 (UNSPEC_FIST_FLOOR "floor")
20502 (UNSPEC_FIST_CEIL "ceil")])
20503
20504 (define_int_attr rounding
20505 [(UNSPEC_FRNDINT_ROUNDEVEN "roundeven")
20506 (UNSPEC_FRNDINT_FLOOR "floor")
20507 (UNSPEC_FRNDINT_CEIL "ceil")
20508 (UNSPEC_FRNDINT_TRUNC "trunc")
20509 (UNSPEC_FIST_FLOOR "floor")
20510 (UNSPEC_FIST_CEIL "ceil")])
20511
20512 (define_int_attr ROUNDING
20513 [(UNSPEC_FRNDINT_ROUNDEVEN "ROUNDEVEN")
20514 (UNSPEC_FRNDINT_FLOOR "FLOOR")
20515 (UNSPEC_FRNDINT_CEIL "CEIL")
20516 (UNSPEC_FRNDINT_TRUNC "TRUNC")
20517 (UNSPEC_FIST_FLOOR "FLOOR")
20518 (UNSPEC_FIST_CEIL "CEIL")])
20519
20520 ;; Rounding mode control word calculation could clobber FLAGS_REG.
20521 (define_insn_and_split "frndintxf2_<rounding>"
20522 [(set (match_operand:XF 0 "register_operand")
20523 (unspec:XF [(match_operand:XF 1 "register_operand")]
20524 FRNDINT_ROUNDING))
20525 (clobber (reg:CC FLAGS_REG))]
20526 "TARGET_USE_FANCY_MATH_387
20527 && (flag_fp_int_builtin_inexact || !flag_trapping_math)
20528 && ix86_pre_reload_split ()"
20529 "#"
20530 "&& 1"
20531 [(const_int 0)]
20532 {
20533 ix86_optimize_mode_switching[I387_<ROUNDING>] = 1;
20534
20535 operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
20536 operands[3] = assign_386_stack_local (HImode, SLOT_CW_<ROUNDING>);
20537
20538 emit_insn (gen_frndintxf2_<rounding>_i387 (operands[0], operands[1],
20539 operands[2], operands[3]));
20540 DONE;
20541 }
20542 [(set_attr "type" "frndint")
20543 (set_attr "i387_cw" "<rounding>")
20544 (set_attr "mode" "XF")])
20545
20546 (define_insn "frndintxf2_<rounding>_i387"
20547 [(set (match_operand:XF 0 "register_operand" "=f")
20548 (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
20549 FRNDINT_ROUNDING))
20550 (use (match_operand:HI 2 "memory_operand" "m"))
20551 (use (match_operand:HI 3 "memory_operand" "m"))]
20552 "TARGET_USE_FANCY_MATH_387
20553 && (flag_fp_int_builtin_inexact || !flag_trapping_math)"
20554 "fldcw\t%3\n\tfrndint\n\tfldcw\t%2"
20555 [(set_attr "type" "frndint")
20556 (set_attr "i387_cw" "<rounding>")
20557 (set_attr "mode" "XF")])
20558
20559 (define_expand "<rounding_insn>xf2"
20560 [(parallel [(set (match_operand:XF 0 "register_operand")
20561 (unspec:XF [(match_operand:XF 1 "register_operand")]
20562 FRNDINT_ROUNDING))
20563 (clobber (reg:CC FLAGS_REG))])]
20564 "TARGET_USE_FANCY_MATH_387
20565 && (flag_fp_int_builtin_inexact || !flag_trapping_math)")
20566
20567 (define_expand "<rounding_insn>hf2"
20568 [(parallel [(set (match_operand:HF 0 "register_operand")
20569 (unspec:HF [(match_operand:HF 1 "register_operand")]
20570 FRNDINT_ROUNDING))
20571 (clobber (reg:CC FLAGS_REG))])]
20572 "TARGET_AVX512FP16"
20573 {
20574 emit_insn (gen_sse4_1_roundhf2 (operands[0], operands[1],
20575 GEN_INT (ROUND_<ROUNDING> | ROUND_NO_EXC)));
20576 DONE;
20577 })
20578
20579 (define_expand "<rounding_insn><mode>2"
20580 [(parallel [(set (match_operand:MODEF 0 "register_operand")
20581 (unspec:MODEF [(match_operand:MODEF 1 "register_operand")]
20582 FRNDINT_ROUNDING))
20583 (clobber (reg:CC FLAGS_REG))])]
20584 "(TARGET_USE_FANCY_MATH_387
20585 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
20586 || TARGET_MIX_SSE_I387)
20587 && (flag_fp_int_builtin_inexact || !flag_trapping_math))
20588 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
20589 && (TARGET_SSE4_1
20590 || (ROUND_<ROUNDING> != ROUND_ROUNDEVEN
20591 && (flag_fp_int_builtin_inexact || !flag_trapping_math))))"
20592 {
20593 if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
20594 && (TARGET_SSE4_1
20595 || (ROUND_<ROUNDING> != ROUND_ROUNDEVEN
20596 && (flag_fp_int_builtin_inexact || !flag_trapping_math))))
20597 {
20598 if (TARGET_SSE4_1)
20599 emit_insn (gen_sse4_1_round<mode>2
20600 (operands[0], operands[1],
20601 GEN_INT (ROUND_<ROUNDING> | ROUND_NO_EXC)));
20602 else if (TARGET_64BIT || (<MODE>mode != DFmode))
20603 {
20604 if (ROUND_<ROUNDING> == ROUND_FLOOR)
20605 ix86_expand_floorceil (operands[0], operands[1], true);
20606 else if (ROUND_<ROUNDING> == ROUND_CEIL)
20607 ix86_expand_floorceil (operands[0], operands[1], false);
20608 else if (ROUND_<ROUNDING> == ROUND_TRUNC)
20609 ix86_expand_trunc (operands[0], operands[1]);
20610 else
20611 gcc_unreachable ();
20612 }
20613 else
20614 {
20615 if (ROUND_<ROUNDING> == ROUND_FLOOR)
20616 ix86_expand_floorceildf_32 (operands[0], operands[1], true);
20617 else if (ROUND_<ROUNDING> == ROUND_CEIL)
20618 ix86_expand_floorceildf_32 (operands[0], operands[1], false);
20619 else if (ROUND_<ROUNDING> == ROUND_TRUNC)
20620 ix86_expand_truncdf_32 (operands[0], operands[1]);
20621 else
20622 gcc_unreachable ();
20623 }
20624 }
20625 else
20626 {
20627 rtx op0 = gen_reg_rtx (XFmode);
20628 rtx op1 = gen_reg_rtx (XFmode);
20629
20630 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
20631 emit_insn (gen_frndintxf2_<rounding> (op0, op1));
20632 emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op0));
20633 }
20634 DONE;
20635 })
20636
20637 ;; Rounding mode control word calculation could clobber FLAGS_REG.
20638 (define_insn_and_split "*fist<mode>2_<rounding>_1"
20639 [(set (match_operand:SWI248x 0 "nonimmediate_operand")
20640 (unspec:SWI248x [(match_operand:XF 1 "register_operand")]
20641 FIST_ROUNDING))
20642 (clobber (reg:CC FLAGS_REG))]
20643 "TARGET_USE_FANCY_MATH_387
20644 && flag_unsafe_math_optimizations
20645 && ix86_pre_reload_split ()"
20646 "#"
20647 "&& 1"
20648 [(const_int 0)]
20649 {
20650 ix86_optimize_mode_switching[I387_<ROUNDING>] = 1;
20651
20652 operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
20653 operands[3] = assign_386_stack_local (HImode, SLOT_CW_<ROUNDING>);
20654
20655 emit_insn (gen_fist<mode>2_<rounding> (operands[0], operands[1],
20656 operands[2], operands[3]));
20657 DONE;
20658 }
20659 [(set_attr "type" "fistp")
20660 (set_attr "i387_cw" "<rounding>")
20661 (set_attr "mode" "<MODE>")])
20662
20663 (define_insn "fistdi2_<rounding>"
20664 [(set (match_operand:DI 0 "nonimmediate_operand" "=m")
20665 (unspec:DI [(match_operand:XF 1 "register_operand" "f")]
20666 FIST_ROUNDING))
20667 (use (match_operand:HI 2 "memory_operand" "m"))
20668 (use (match_operand:HI 3 "memory_operand" "m"))
20669 (clobber (match_scratch:XF 4 "=&f"))]
20670 "TARGET_USE_FANCY_MATH_387
20671 && flag_unsafe_math_optimizations"
20672 "* return output_fix_trunc (insn, operands, false);"
20673 [(set_attr "type" "fistp")
20674 (set_attr "i387_cw" "<rounding>")
20675 (set_attr "mode" "DI")])
20676
20677 (define_insn "fist<mode>2_<rounding>"
20678 [(set (match_operand:SWI24 0 "nonimmediate_operand" "=m")
20679 (unspec:SWI24 [(match_operand:XF 1 "register_operand" "f")]
20680 FIST_ROUNDING))
20681 (use (match_operand:HI 2 "memory_operand" "m"))
20682 (use (match_operand:HI 3 "memory_operand" "m"))]
20683 "TARGET_USE_FANCY_MATH_387
20684 && flag_unsafe_math_optimizations"
20685 "* return output_fix_trunc (insn, operands, false);"
20686 [(set_attr "type" "fistp")
20687 (set_attr "i387_cw" "<rounding>")
20688 (set_attr "mode" "<MODE>")])
20689
20690 (define_expand "l<rounding_insn>xf<mode>2"
20691 [(parallel [(set (match_operand:SWI248x 0 "nonimmediate_operand")
20692 (unspec:SWI248x [(match_operand:XF 1 "register_operand")]
20693 FIST_ROUNDING))
20694 (clobber (reg:CC FLAGS_REG))])]
20695 "TARGET_USE_FANCY_MATH_387
20696 && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
20697 && flag_unsafe_math_optimizations")
20698
20699 (define_expand "l<rounding_insn><MODEF:mode><SWI48:mode>2"
20700 [(parallel [(set (match_operand:SWI48 0 "nonimmediate_operand")
20701 (unspec:SWI48 [(match_operand:MODEF 1 "register_operand")]
20702 FIST_ROUNDING))
20703 (clobber (reg:CC FLAGS_REG))])]
20704 "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
20705 && (TARGET_SSE4_1 || !flag_trapping_math)"
20706 {
20707 if (TARGET_SSE4_1)
20708 {
20709 rtx tmp = gen_reg_rtx (<MODEF:MODE>mode);
20710
20711 emit_insn (gen_sse4_1_round<MODEF:mode>2
20712 (tmp, operands[1], GEN_INT (ROUND_<ROUNDING>
20713 | ROUND_NO_EXC)));
20714 emit_insn (gen_fix_trunc<MODEF:mode><SWI48:mode>2
20715 (operands[0], tmp));
20716 }
20717 else if (ROUND_<ROUNDING> == ROUND_FLOOR)
20718 ix86_expand_lfloorceil (operands[0], operands[1], true);
20719 else if (ROUND_<ROUNDING> == ROUND_CEIL)
20720 ix86_expand_lfloorceil (operands[0], operands[1], false);
20721 else
20722 gcc_unreachable ();
20723
20724 DONE;
20725 })
20726
20727 (define_insn "fxam<mode>2_i387"
20728 [(set (match_operand:HI 0 "register_operand" "=a")
20729 (unspec:HI
20730 [(match_operand:X87MODEF 1 "register_operand" "f")]
20731 UNSPEC_FXAM))]
20732 "TARGET_USE_FANCY_MATH_387"
20733 "fxam\n\tfnstsw\t%0"
20734 [(set_attr "type" "multi")
20735 (set_attr "length" "4")
20736 (set_attr "unit" "i387")
20737 (set_attr "mode" "<MODE>")])
20738
20739 (define_expand "signbittf2"
20740 [(use (match_operand:SI 0 "register_operand"))
20741 (use (match_operand:TF 1 "register_operand"))]
20742 "TARGET_SSE"
20743 {
20744 if (TARGET_SSE4_1)
20745 {
20746 rtx mask = ix86_build_signbit_mask (TFmode, 0, 0);
20747 rtx scratch = gen_reg_rtx (QImode);
20748
20749 emit_insn (gen_ptesttf2 (operands[1], mask));
20750 ix86_expand_setcc (scratch, NE,
20751 gen_rtx_REG (CCZmode, FLAGS_REG), const0_rtx);
20752
20753 emit_insn (gen_zero_extendqisi2 (operands[0], scratch));
20754 }
20755 else
20756 {
20757 emit_insn (gen_sse_movmskps (operands[0],
20758 gen_lowpart (V4SFmode, operands[1])));
20759 emit_insn (gen_andsi3 (operands[0], operands[0], GEN_INT (0x8)));
20760 }
20761 DONE;
20762 })
20763
20764 (define_expand "signbitxf2"
20765 [(use (match_operand:SI 0 "register_operand"))
20766 (use (match_operand:XF 1 "register_operand"))]
20767 "TARGET_USE_FANCY_MATH_387"
20768 {
20769 rtx scratch = gen_reg_rtx (HImode);
20770
20771 emit_insn (gen_fxamxf2_i387 (scratch, operands[1]));
20772 emit_insn (gen_andsi3 (operands[0],
20773 gen_lowpart (SImode, scratch), GEN_INT (0x200)));
20774 DONE;
20775 })
20776
20777 (define_insn "movmsk_df"
20778 [(set (match_operand:SI 0 "register_operand" "=r")
20779 (unspec:SI
20780 [(match_operand:DF 1 "register_operand" "x")]
20781 UNSPEC_MOVMSK))]
20782 "SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH"
20783 "%vmovmskpd\t{%1, %0|%0, %1}"
20784 [(set_attr "type" "ssemov")
20785 (set_attr "prefix" "maybe_vex")
20786 (set_attr "mode" "DF")])
20787
20788 ;; Use movmskpd in SSE mode to avoid store forwarding stall
20789 ;; for 32bit targets and movq+shrq sequence for 64bit targets.
20790 (define_expand "signbitdf2"
20791 [(use (match_operand:SI 0 "register_operand"))
20792 (use (match_operand:DF 1 "register_operand"))]
20793 "TARGET_USE_FANCY_MATH_387
20794 || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)"
20795 {
20796 if (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)
20797 {
20798 emit_insn (gen_movmsk_df (operands[0], operands[1]));
20799 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
20800 }
20801 else
20802 {
20803 rtx scratch = gen_reg_rtx (HImode);
20804
20805 emit_insn (gen_fxamdf2_i387 (scratch, operands[1]));
20806 emit_insn (gen_andsi3 (operands[0],
20807 gen_lowpart (SImode, scratch), GEN_INT (0x200)));
20808 }
20809 DONE;
20810 })
20811
20812 (define_expand "signbitsf2"
20813 [(use (match_operand:SI 0 "register_operand"))
20814 (use (match_operand:SF 1 "register_operand"))]
20815 "TARGET_USE_FANCY_MATH_387
20816 && !(SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH)"
20817 {
20818 rtx scratch = gen_reg_rtx (HImode);
20819
20820 emit_insn (gen_fxamsf2_i387 (scratch, operands[1]));
20821 emit_insn (gen_andsi3 (operands[0],
20822 gen_lowpart (SImode, scratch), GEN_INT (0x200)));
20823 DONE;
20824 })
20825 \f
20826 ;; Block operation instructions
20827
20828 (define_insn "cld"
20829 [(unspec_volatile [(const_int 0)] UNSPECV_CLD)]
20830 ""
20831 "cld"
20832 [(set_attr "length" "1")
20833 (set_attr "length_immediate" "0")
20834 (set_attr "modrm" "0")])
20835
20836 (define_expand "cpymem<mode>"
20837 [(use (match_operand:BLK 0 "memory_operand"))
20838 (use (match_operand:BLK 1 "memory_operand"))
20839 (use (match_operand:SWI48 2 "nonmemory_operand"))
20840 (use (match_operand:SWI48 3 "const_int_operand"))
20841 (use (match_operand:SI 4 "const_int_operand"))
20842 (use (match_operand:SI 5 "const_int_operand"))
20843 (use (match_operand:SI 6 ""))
20844 (use (match_operand:SI 7 ""))
20845 (use (match_operand:SI 8 ""))]
20846 ""
20847 {
20848 if (ix86_expand_set_or_cpymem (operands[0], operands[1],
20849 operands[2], NULL, operands[3],
20850 operands[4], operands[5],
20851 operands[6], operands[7],
20852 operands[8], false))
20853 DONE;
20854 else
20855 FAIL;
20856 })
20857
20858 ;; Most CPUs don't like single string operations
20859 ;; Handle this case here to simplify previous expander.
20860
20861 (define_expand "strmov"
20862 [(set (match_dup 4) (match_operand 3 "memory_operand"))
20863 (set (match_operand 1 "memory_operand") (match_dup 4))
20864 (parallel [(set (match_operand 0 "register_operand") (match_dup 5))
20865 (clobber (reg:CC FLAGS_REG))])
20866 (parallel [(set (match_operand 2 "register_operand") (match_dup 6))
20867 (clobber (reg:CC FLAGS_REG))])]
20868 ""
20869 {
20870 /* Can't use this for non-default address spaces. */
20871 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (operands[3])))
20872 FAIL;
20873
20874 int piece_size = GET_MODE_SIZE (GET_MODE (operands[1]));
20875
20876 /* If .md ever supports :P for Pmode, these can be directly
20877 in the pattern above. */
20878 operands[5] = plus_constant (Pmode, operands[0], piece_size);
20879 operands[6] = plus_constant (Pmode, operands[2], piece_size);
20880
20881 /* Can't use this if the user has appropriated esi or edi. */
20882 if ((TARGET_SINGLE_STRINGOP || optimize_insn_for_size_p ())
20883 && !(fixed_regs[SI_REG] || fixed_regs[DI_REG]))
20884 {
20885 emit_insn (gen_strmov_singleop (operands[0], operands[1],
20886 operands[2], operands[3],
20887 operands[5], operands[6]));
20888 DONE;
20889 }
20890
20891 operands[4] = gen_reg_rtx (GET_MODE (operands[1]));
20892 })
20893
20894 (define_expand "strmov_singleop"
20895 [(parallel [(set (match_operand 1 "memory_operand")
20896 (match_operand 3 "memory_operand"))
20897 (set (match_operand 0 "register_operand")
20898 (match_operand 4))
20899 (set (match_operand 2 "register_operand")
20900 (match_operand 5))])]
20901 ""
20902 {
20903 if (TARGET_CLD)
20904 ix86_optimize_mode_switching[X86_DIRFLAG] = 1;
20905 })
20906
20907 (define_insn "*strmovdi_rex_1"
20908 [(set (mem:DI (match_operand:P 2 "register_operand" "0"))
20909 (mem:DI (match_operand:P 3 "register_operand" "1")))
20910 (set (match_operand:P 0 "register_operand" "=D")
20911 (plus:P (match_dup 2)
20912 (const_int 8)))
20913 (set (match_operand:P 1 "register_operand" "=S")
20914 (plus:P (match_dup 3)
20915 (const_int 8)))]
20916 "TARGET_64BIT
20917 && !(fixed_regs[SI_REG] || fixed_regs[DI_REG])
20918 && ix86_check_no_addr_space (insn)"
20919 "%^movsq"
20920 [(set_attr "type" "str")
20921 (set_attr "memory" "both")
20922 (set_attr "mode" "DI")])
20923
20924 (define_insn "*strmovsi_1"
20925 [(set (mem:SI (match_operand:P 2 "register_operand" "0"))
20926 (mem:SI (match_operand:P 3 "register_operand" "1")))
20927 (set (match_operand:P 0 "register_operand" "=D")
20928 (plus:P (match_dup 2)
20929 (const_int 4)))
20930 (set (match_operand:P 1 "register_operand" "=S")
20931 (plus:P (match_dup 3)
20932 (const_int 4)))]
20933 "!(fixed_regs[SI_REG] || fixed_regs[DI_REG])
20934 && ix86_check_no_addr_space (insn)"
20935 "%^movs{l|d}"
20936 [(set_attr "type" "str")
20937 (set_attr "memory" "both")
20938 (set_attr "mode" "SI")])
20939
20940 (define_insn "*strmovhi_1"
20941 [(set (mem:HI (match_operand:P 2 "register_operand" "0"))
20942 (mem:HI (match_operand:P 3 "register_operand" "1")))
20943 (set (match_operand:P 0 "register_operand" "=D")
20944 (plus:P (match_dup 2)
20945 (const_int 2)))
20946 (set (match_operand:P 1 "register_operand" "=S")
20947 (plus:P (match_dup 3)
20948 (const_int 2)))]
20949 "!(fixed_regs[SI_REG] || fixed_regs[DI_REG])
20950 && ix86_check_no_addr_space (insn)"
20951 "%^movsw"
20952 [(set_attr "type" "str")
20953 (set_attr "memory" "both")
20954 (set_attr "mode" "HI")])
20955
20956 (define_insn "*strmovqi_1"
20957 [(set (mem:QI (match_operand:P 2 "register_operand" "0"))
20958 (mem:QI (match_operand:P 3 "register_operand" "1")))
20959 (set (match_operand:P 0 "register_operand" "=D")
20960 (plus:P (match_dup 2)
20961 (const_int 1)))
20962 (set (match_operand:P 1 "register_operand" "=S")
20963 (plus:P (match_dup 3)
20964 (const_int 1)))]
20965 "!(fixed_regs[SI_REG] || fixed_regs[DI_REG])
20966 && ix86_check_no_addr_space (insn)"
20967 "%^movsb"
20968 [(set_attr "type" "str")
20969 (set_attr "memory" "both")
20970 (set (attr "prefix_rex")
20971 (if_then_else
20972 (match_test "<P:MODE>mode == DImode")
20973 (const_string "0")
20974 (const_string "*")))
20975 (set_attr "mode" "QI")])
20976
20977 (define_expand "rep_mov"
20978 [(parallel [(set (match_operand 4 "register_operand") (const_int 0))
20979 (set (match_operand 0 "register_operand")
20980 (match_operand 5))
20981 (set (match_operand 2 "register_operand")
20982 (match_operand 6))
20983 (set (match_operand 1 "memory_operand")
20984 (match_operand 3 "memory_operand"))
20985 (use (match_dup 4))])]
20986 ""
20987 {
20988 if (TARGET_CLD)
20989 ix86_optimize_mode_switching[X86_DIRFLAG] = 1;
20990 })
20991
20992 (define_insn "*rep_movdi_rex64"
20993 [(set (match_operand:P 2 "register_operand" "=c") (const_int 0))
20994 (set (match_operand:P 0 "register_operand" "=D")
20995 (plus:P (ashift:P (match_operand:P 5 "register_operand" "2")
20996 (const_int 3))
20997 (match_operand:P 3 "register_operand" "0")))
20998 (set (match_operand:P 1 "register_operand" "=S")
20999 (plus:P (ashift:P (match_dup 5) (const_int 3))
21000 (match_operand:P 4 "register_operand" "1")))
21001 (set (mem:BLK (match_dup 3))
21002 (mem:BLK (match_dup 4)))
21003 (use (match_dup 5))]
21004 "TARGET_64BIT
21005 && !(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
21006 && ix86_check_no_addr_space (insn)"
21007 "%^rep{%;} movsq"
21008 [(set_attr "type" "str")
21009 (set_attr "prefix_rep" "1")
21010 (set_attr "memory" "both")
21011 (set_attr "mode" "DI")])
21012
21013 (define_insn "*rep_movsi"
21014 [(set (match_operand:P 2 "register_operand" "=c") (const_int 0))
21015 (set (match_operand:P 0 "register_operand" "=D")
21016 (plus:P (ashift:P (match_operand:P 5 "register_operand" "2")
21017 (const_int 2))
21018 (match_operand:P 3 "register_operand" "0")))
21019 (set (match_operand:P 1 "register_operand" "=S")
21020 (plus:P (ashift:P (match_dup 5) (const_int 2))
21021 (match_operand:P 4 "register_operand" "1")))
21022 (set (mem:BLK (match_dup 3))
21023 (mem:BLK (match_dup 4)))
21024 (use (match_dup 5))]
21025 "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
21026 && ix86_check_no_addr_space (insn)"
21027 "%^rep{%;} movs{l|d}"
21028 [(set_attr "type" "str")
21029 (set_attr "prefix_rep" "1")
21030 (set_attr "memory" "both")
21031 (set_attr "mode" "SI")])
21032
21033 (define_insn "*rep_movqi"
21034 [(set (match_operand:P 2 "register_operand" "=c") (const_int 0))
21035 (set (match_operand:P 0 "register_operand" "=D")
21036 (plus:P (match_operand:P 3 "register_operand" "0")
21037 (match_operand:P 5 "register_operand" "2")))
21038 (set (match_operand:P 1 "register_operand" "=S")
21039 (plus:P (match_operand:P 4 "register_operand" "1") (match_dup 5)))
21040 (set (mem:BLK (match_dup 3))
21041 (mem:BLK (match_dup 4)))
21042 (use (match_dup 5))]
21043 "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
21044 && ix86_check_no_addr_space (insn)"
21045 "%^rep{%;} movsb"
21046 [(set_attr "type" "str")
21047 (set_attr "prefix_rep" "1")
21048 (set_attr "memory" "both")
21049 (set_attr "mode" "QI")])
21050
21051 (define_expand "setmem<mode>"
21052 [(use (match_operand:BLK 0 "memory_operand"))
21053 (use (match_operand:SWI48 1 "nonmemory_operand"))
21054 (use (match_operand:QI 2 "nonmemory_operand"))
21055 (use (match_operand 3 "const_int_operand"))
21056 (use (match_operand:SI 4 "const_int_operand"))
21057 (use (match_operand:SI 5 "const_int_operand"))
21058 (use (match_operand:SI 6 ""))
21059 (use (match_operand:SI 7 ""))
21060 (use (match_operand:SI 8 ""))]
21061 ""
21062 {
21063 if (ix86_expand_set_or_cpymem (operands[0], NULL,
21064 operands[1], operands[2],
21065 operands[3], operands[4],
21066 operands[5], operands[6],
21067 operands[7], operands[8], true))
21068 DONE;
21069 else
21070 FAIL;
21071 })
21072
21073 ;; Most CPUs don't like single string operations
21074 ;; Handle this case here to simplify previous expander.
21075
21076 (define_expand "strset"
21077 [(set (match_operand 1 "memory_operand")
21078 (match_operand 2 "register_operand"))
21079 (parallel [(set (match_operand 0 "register_operand")
21080 (match_dup 3))
21081 (clobber (reg:CC FLAGS_REG))])]
21082 ""
21083 {
21084 /* Can't use this for non-default address spaces. */
21085 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (operands[1])))
21086 FAIL;
21087
21088 if (GET_MODE (operands[1]) != GET_MODE (operands[2]))
21089 operands[1] = adjust_address_nv (operands[1], GET_MODE (operands[2]), 0);
21090
21091 /* If .md ever supports :P for Pmode, this can be directly
21092 in the pattern above. */
21093 operands[3] = plus_constant (Pmode, operands[0],
21094 GET_MODE_SIZE (GET_MODE (operands[2])));
21095
21096 /* Can't use this if the user has appropriated eax or edi. */
21097 if ((TARGET_SINGLE_STRINGOP || optimize_insn_for_size_p ())
21098 && !(fixed_regs[AX_REG] || fixed_regs[DI_REG]))
21099 {
21100 emit_insn (gen_strset_singleop (operands[0], operands[1], operands[2],
21101 operands[3]));
21102 DONE;
21103 }
21104 })
21105
21106 (define_expand "strset_singleop"
21107 [(parallel [(set (match_operand 1 "memory_operand")
21108 (match_operand 2 "register_operand"))
21109 (set (match_operand 0 "register_operand")
21110 (match_operand 3))
21111 (unspec [(const_int 0)] UNSPEC_STOS)])]
21112 ""
21113 {
21114 if (TARGET_CLD)
21115 ix86_optimize_mode_switching[X86_DIRFLAG] = 1;
21116 })
21117
21118 (define_insn "*strsetdi_rex_1"
21119 [(set (mem:DI (match_operand:P 1 "register_operand" "0"))
21120 (match_operand:DI 2 "register_operand" "a"))
21121 (set (match_operand:P 0 "register_operand" "=D")
21122 (plus:P (match_dup 1)
21123 (const_int 8)))
21124 (unspec [(const_int 0)] UNSPEC_STOS)]
21125 "TARGET_64BIT
21126 && !(fixed_regs[AX_REG] || fixed_regs[DI_REG])
21127 && ix86_check_no_addr_space (insn)"
21128 "%^stosq"
21129 [(set_attr "type" "str")
21130 (set_attr "memory" "store")
21131 (set_attr "mode" "DI")])
21132
21133 (define_insn "*strsetsi_1"
21134 [(set (mem:SI (match_operand:P 1 "register_operand" "0"))
21135 (match_operand:SI 2 "register_operand" "a"))
21136 (set (match_operand:P 0 "register_operand" "=D")
21137 (plus:P (match_dup 1)
21138 (const_int 4)))
21139 (unspec [(const_int 0)] UNSPEC_STOS)]
21140 "!(fixed_regs[AX_REG] || fixed_regs[DI_REG])
21141 && ix86_check_no_addr_space (insn)"
21142 "%^stos{l|d}"
21143 [(set_attr "type" "str")
21144 (set_attr "memory" "store")
21145 (set_attr "mode" "SI")])
21146
21147 (define_insn "*strsethi_1"
21148 [(set (mem:HI (match_operand:P 1 "register_operand" "0"))
21149 (match_operand:HI 2 "register_operand" "a"))
21150 (set (match_operand:P 0 "register_operand" "=D")
21151 (plus:P (match_dup 1)
21152 (const_int 2)))
21153 (unspec [(const_int 0)] UNSPEC_STOS)]
21154 "!(fixed_regs[AX_REG] || fixed_regs[DI_REG])
21155 && ix86_check_no_addr_space (insn)"
21156 "%^stosw"
21157 [(set_attr "type" "str")
21158 (set_attr "memory" "store")
21159 (set_attr "mode" "HI")])
21160
21161 (define_insn "*strsetqi_1"
21162 [(set (mem:QI (match_operand:P 1 "register_operand" "0"))
21163 (match_operand:QI 2 "register_operand" "a"))
21164 (set (match_operand:P 0 "register_operand" "=D")
21165 (plus:P (match_dup 1)
21166 (const_int 1)))
21167 (unspec [(const_int 0)] UNSPEC_STOS)]
21168 "!(fixed_regs[AX_REG] || fixed_regs[DI_REG])
21169 && ix86_check_no_addr_space (insn)"
21170 "%^stosb"
21171 [(set_attr "type" "str")
21172 (set_attr "memory" "store")
21173 (set (attr "prefix_rex")
21174 (if_then_else
21175 (match_test "<P:MODE>mode == DImode")
21176 (const_string "0")
21177 (const_string "*")))
21178 (set_attr "mode" "QI")])
21179
21180 (define_expand "rep_stos"
21181 [(parallel [(set (match_operand 1 "register_operand") (const_int 0))
21182 (set (match_operand 0 "register_operand")
21183 (match_operand 4))
21184 (set (match_operand 2 "memory_operand") (const_int 0))
21185 (use (match_operand 3 "register_operand"))
21186 (use (match_dup 1))])]
21187 ""
21188 {
21189 if (TARGET_CLD)
21190 ix86_optimize_mode_switching[X86_DIRFLAG] = 1;
21191 })
21192
21193 (define_insn "*rep_stosdi_rex64"
21194 [(set (match_operand:P 1 "register_operand" "=c") (const_int 0))
21195 (set (match_operand:P 0 "register_operand" "=D")
21196 (plus:P (ashift:P (match_operand:P 4 "register_operand" "1")
21197 (const_int 3))
21198 (match_operand:P 3 "register_operand" "0")))
21199 (set (mem:BLK (match_dup 3))
21200 (const_int 0))
21201 (use (match_operand:DI 2 "register_operand" "a"))
21202 (use (match_dup 4))]
21203 "TARGET_64BIT
21204 && !(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
21205 && ix86_check_no_addr_space (insn)"
21206 "%^rep{%;} stosq"
21207 [(set_attr "type" "str")
21208 (set_attr "prefix_rep" "1")
21209 (set_attr "memory" "store")
21210 (set_attr "mode" "DI")])
21211
21212 (define_insn "*rep_stossi"
21213 [(set (match_operand:P 1 "register_operand" "=c") (const_int 0))
21214 (set (match_operand:P 0 "register_operand" "=D")
21215 (plus:P (ashift:P (match_operand:P 4 "register_operand" "1")
21216 (const_int 2))
21217 (match_operand:P 3 "register_operand" "0")))
21218 (set (mem:BLK (match_dup 3))
21219 (const_int 0))
21220 (use (match_operand:SI 2 "register_operand" "a"))
21221 (use (match_dup 4))]
21222 "!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
21223 && ix86_check_no_addr_space (insn)"
21224 "%^rep{%;} stos{l|d}"
21225 [(set_attr "type" "str")
21226 (set_attr "prefix_rep" "1")
21227 (set_attr "memory" "store")
21228 (set_attr "mode" "SI")])
21229
21230 (define_insn "*rep_stosqi"
21231 [(set (match_operand:P 1 "register_operand" "=c") (const_int 0))
21232 (set (match_operand:P 0 "register_operand" "=D")
21233 (plus:P (match_operand:P 3 "register_operand" "0")
21234 (match_operand:P 4 "register_operand" "1")))
21235 (set (mem:BLK (match_dup 3))
21236 (const_int 0))
21237 (use (match_operand:QI 2 "register_operand" "a"))
21238 (use (match_dup 4))]
21239 "!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
21240 && ix86_check_no_addr_space (insn)"
21241 "%^rep{%;} stosb"
21242 [(set_attr "type" "str")
21243 (set_attr "prefix_rep" "1")
21244 (set_attr "memory" "store")
21245 (set (attr "prefix_rex")
21246 (if_then_else
21247 (match_test "<P:MODE>mode == DImode")
21248 (const_string "0")
21249 (const_string "*")))
21250 (set_attr "mode" "QI")])
21251
21252 (define_expand "cmpmemsi"
21253 [(set (match_operand:SI 0 "register_operand" "")
21254 (compare:SI (match_operand:BLK 1 "memory_operand" "")
21255 (match_operand:BLK 2 "memory_operand" "") ) )
21256 (use (match_operand 3 "general_operand"))
21257 (use (match_operand 4 "immediate_operand"))]
21258 ""
21259 {
21260 if (ix86_expand_cmpstrn_or_cmpmem (operands[0], operands[1],
21261 operands[2], operands[3],
21262 operands[4], false))
21263 DONE;
21264 else
21265 FAIL;
21266 })
21267
21268 (define_expand "cmpstrnsi"
21269 [(set (match_operand:SI 0 "register_operand")
21270 (compare:SI (match_operand:BLK 1 "general_operand")
21271 (match_operand:BLK 2 "general_operand")))
21272 (use (match_operand 3 "general_operand"))
21273 (use (match_operand 4 "immediate_operand"))]
21274 ""
21275 {
21276 if (ix86_expand_cmpstrn_or_cmpmem (operands[0], operands[1],
21277 operands[2], operands[3],
21278 operands[4], true))
21279 DONE;
21280 else
21281 FAIL;
21282 })
21283
21284 ;; Produce a tri-state integer (-1, 0, 1) from condition codes.
21285
21286 (define_expand "cmpintqi"
21287 [(set (match_dup 1)
21288 (gtu:QI (reg:CC FLAGS_REG) (const_int 0)))
21289 (set (match_dup 2)
21290 (ltu:QI (reg:CC FLAGS_REG) (const_int 0)))
21291 (parallel [(set (match_operand:QI 0 "register_operand")
21292 (minus:QI (match_dup 1)
21293 (match_dup 2)))
21294 (clobber (reg:CC FLAGS_REG))])]
21295 ""
21296 {
21297 operands[1] = gen_reg_rtx (QImode);
21298 operands[2] = gen_reg_rtx (QImode);
21299 })
21300
21301 ;; memcmp recognizers. The `cmpsb' opcode does nothing if the count is
21302 ;; zero. Emit extra code to make sure that a zero-length compare is EQ.
21303
21304 (define_expand "cmpstrnqi_nz_1"
21305 [(parallel [(set (reg:CC FLAGS_REG)
21306 (compare:CC (match_operand 4 "memory_operand")
21307 (match_operand 5 "memory_operand")))
21308 (use (match_operand 2 "register_operand"))
21309 (use (match_operand:SI 3 "immediate_operand"))
21310 (clobber (match_operand 0 "register_operand"))
21311 (clobber (match_operand 1 "register_operand"))
21312 (clobber (match_dup 2))])]
21313 ""
21314 {
21315 if (TARGET_CLD)
21316 ix86_optimize_mode_switching[X86_DIRFLAG] = 1;
21317 })
21318
21319 (define_insn "*cmpstrnqi_nz_1"
21320 [(set (reg:CC FLAGS_REG)
21321 (compare:CC (mem:BLK (match_operand:P 4 "register_operand" "0"))
21322 (mem:BLK (match_operand:P 5 "register_operand" "1"))))
21323 (use (match_operand:P 6 "register_operand" "2"))
21324 (use (match_operand:SI 3 "immediate_operand" "i"))
21325 (clobber (match_operand:P 0 "register_operand" "=S"))
21326 (clobber (match_operand:P 1 "register_operand" "=D"))
21327 (clobber (match_operand:P 2 "register_operand" "=c"))]
21328 "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
21329 && ix86_check_no_addr_space (insn)"
21330 "%^repz{%;} cmpsb"
21331 [(set_attr "type" "str")
21332 (set_attr "mode" "QI")
21333 (set (attr "prefix_rex")
21334 (if_then_else
21335 (match_test "<P:MODE>mode == DImode")
21336 (const_string "0")
21337 (const_string "*")))
21338 (set_attr "prefix_rep" "1")])
21339
21340 ;; The same, but the count is not known to not be zero.
21341
21342 (define_expand "cmpstrnqi_1"
21343 [(parallel [(set (reg:CC FLAGS_REG)
21344 (if_then_else:CC (ne (match_operand 2 "register_operand")
21345 (const_int 0))
21346 (compare:CC (match_operand 4 "memory_operand")
21347 (match_operand 5 "memory_operand"))
21348 (const_int 0)))
21349 (use (match_operand:SI 3 "immediate_operand"))
21350 (use (reg:CC FLAGS_REG))
21351 (clobber (match_operand 0 "register_operand"))
21352 (clobber (match_operand 1 "register_operand"))
21353 (clobber (match_dup 2))])]
21354 ""
21355 {
21356 if (TARGET_CLD)
21357 ix86_optimize_mode_switching[X86_DIRFLAG] = 1;
21358 })
21359
21360 (define_insn "*cmpstrnqi_1"
21361 [(set (reg:CC FLAGS_REG)
21362 (if_then_else:CC (ne (match_operand:P 6 "register_operand" "2")
21363 (const_int 0))
21364 (compare:CC (mem:BLK (match_operand:P 4 "register_operand" "0"))
21365 (mem:BLK (match_operand:P 5 "register_operand" "1")))
21366 (const_int 0)))
21367 (use (match_operand:SI 3 "immediate_operand" "i"))
21368 (use (reg:CC FLAGS_REG))
21369 (clobber (match_operand:P 0 "register_operand" "=S"))
21370 (clobber (match_operand:P 1 "register_operand" "=D"))
21371 (clobber (match_operand:P 2 "register_operand" "=c"))]
21372 "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
21373 && ix86_check_no_addr_space (insn)"
21374 "%^repz{%;} cmpsb"
21375 [(set_attr "type" "str")
21376 (set_attr "mode" "QI")
21377 (set (attr "prefix_rex")
21378 (if_then_else
21379 (match_test "<P:MODE>mode == DImode")
21380 (const_string "0")
21381 (const_string "*")))
21382 (set_attr "prefix_rep" "1")])
21383
21384 (define_expand "strlen<mode>"
21385 [(set (match_operand:P 0 "register_operand")
21386 (unspec:P [(match_operand:BLK 1 "general_operand")
21387 (match_operand:QI 2 "immediate_operand")
21388 (match_operand 3 "immediate_operand")]
21389 UNSPEC_SCAS))]
21390 ""
21391 {
21392 if (ix86_expand_strlen (operands[0], operands[1], operands[2], operands[3]))
21393 DONE;
21394 else
21395 FAIL;
21396 })
21397
21398 (define_expand "strlenqi_1"
21399 [(parallel [(set (match_operand 0 "register_operand")
21400 (match_operand 2))
21401 (clobber (match_operand 1 "register_operand"))
21402 (clobber (reg:CC FLAGS_REG))])]
21403 ""
21404 {
21405 if (TARGET_CLD)
21406 ix86_optimize_mode_switching[X86_DIRFLAG] = 1;
21407 })
21408
21409 (define_insn "*strlenqi_1"
21410 [(set (match_operand:P 0 "register_operand" "=&c")
21411 (unspec:P [(mem:BLK (match_operand:P 5 "register_operand" "1"))
21412 (match_operand:QI 2 "register_operand" "a")
21413 (match_operand:P 3 "immediate_operand" "i")
21414 (match_operand:P 4 "register_operand" "0")] UNSPEC_SCAS))
21415 (clobber (match_operand:P 1 "register_operand" "=D"))
21416 (clobber (reg:CC FLAGS_REG))]
21417 "!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
21418 && ix86_check_no_addr_space (insn)"
21419 "%^repnz{%;} scasb"
21420 [(set_attr "type" "str")
21421 (set_attr "mode" "QI")
21422 (set (attr "prefix_rex")
21423 (if_then_else
21424 (match_test "<P:MODE>mode == DImode")
21425 (const_string "0")
21426 (const_string "*")))
21427 (set_attr "prefix_rep" "1")])
21428
21429 ;; Peephole optimizations to clean up after cmpstrn*. This should be
21430 ;; handled in combine, but it is not currently up to the task.
21431 ;; When used for their truth value, the cmpstrn* expanders generate
21432 ;; code like this:
21433 ;;
21434 ;; repz cmpsb
21435 ;; seta %al
21436 ;; setb %dl
21437 ;; cmpb %al, %dl
21438 ;; jcc label
21439 ;;
21440 ;; The intermediate three instructions are unnecessary.
21441
21442 ;; This one handles cmpstrn*_nz_1...
21443 (define_peephole2
21444 [(parallel[
21445 (set (reg:CC FLAGS_REG)
21446 (compare:CC (mem:BLK (match_operand 4 "register_operand"))
21447 (mem:BLK (match_operand 5 "register_operand"))))
21448 (use (match_operand 6 "register_operand"))
21449 (use (match_operand:SI 3 "immediate_operand"))
21450 (clobber (match_operand 0 "register_operand"))
21451 (clobber (match_operand 1 "register_operand"))
21452 (clobber (match_operand 2 "register_operand"))])
21453 (set (match_operand:QI 7 "register_operand")
21454 (gtu:QI (reg:CC FLAGS_REG) (const_int 0)))
21455 (set (match_operand:QI 8 "register_operand")
21456 (ltu:QI (reg:CC FLAGS_REG) (const_int 0)))
21457 (set (reg FLAGS_REG)
21458 (compare (match_dup 7) (match_dup 8)))
21459 ]
21460 "peep2_reg_dead_p (4, operands[7]) && peep2_reg_dead_p (4, operands[8])"
21461 [(parallel[
21462 (set (reg:CC FLAGS_REG)
21463 (compare:CC (mem:BLK (match_dup 4))
21464 (mem:BLK (match_dup 5))))
21465 (use (match_dup 6))
21466 (use (match_dup 3))
21467 (clobber (match_dup 0))
21468 (clobber (match_dup 1))
21469 (clobber (match_dup 2))])])
21470
21471 ;; ...and this one handles cmpstrn*_1.
21472 (define_peephole2
21473 [(parallel[
21474 (set (reg:CC FLAGS_REG)
21475 (if_then_else:CC (ne (match_operand 6 "register_operand")
21476 (const_int 0))
21477 (compare:CC (mem:BLK (match_operand 4 "register_operand"))
21478 (mem:BLK (match_operand 5 "register_operand")))
21479 (const_int 0)))
21480 (use (match_operand:SI 3 "immediate_operand"))
21481 (use (reg:CC FLAGS_REG))
21482 (clobber (match_operand 0 "register_operand"))
21483 (clobber (match_operand 1 "register_operand"))
21484 (clobber (match_operand 2 "register_operand"))])
21485 (set (match_operand:QI 7 "register_operand")
21486 (gtu:QI (reg:CC FLAGS_REG) (const_int 0)))
21487 (set (match_operand:QI 8 "register_operand")
21488 (ltu:QI (reg:CC FLAGS_REG) (const_int 0)))
21489 (set (reg FLAGS_REG)
21490 (compare (match_dup 7) (match_dup 8)))
21491 ]
21492 "peep2_reg_dead_p (4, operands[7]) && peep2_reg_dead_p (4, operands[8])"
21493 [(parallel[
21494 (set (reg:CC FLAGS_REG)
21495 (if_then_else:CC (ne (match_dup 6)
21496 (const_int 0))
21497 (compare:CC (mem:BLK (match_dup 4))
21498 (mem:BLK (match_dup 5)))
21499 (const_int 0)))
21500 (use (match_dup 3))
21501 (use (reg:CC FLAGS_REG))
21502 (clobber (match_dup 0))
21503 (clobber (match_dup 1))
21504 (clobber (match_dup 2))])])
21505 \f
21506 ;; Conditional move instructions.
21507
21508 (define_expand "mov<mode>cc"
21509 [(set (match_operand:SWIM 0 "register_operand")
21510 (if_then_else:SWIM (match_operand 1 "comparison_operator")
21511 (match_operand:SWIM 2 "<general_operand>")
21512 (match_operand:SWIM 3 "<general_operand>")))]
21513 ""
21514 "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;")
21515
21516 ;; Data flow gets confused by our desire for `sbbl reg,reg', and clearing
21517 ;; the register first winds up with `sbbl $0,reg', which is also weird.
21518 ;; So just document what we're doing explicitly.
21519
21520 (define_expand "x86_mov<mode>cc_0_m1"
21521 [(parallel
21522 [(set (match_operand:SWI48 0 "register_operand")
21523 (if_then_else:SWI48
21524 (match_operator:SWI48 2 "ix86_carry_flag_operator"
21525 [(match_operand 1 "flags_reg_operand")
21526 (const_int 0)])
21527 (const_int -1)
21528 (const_int 0)))
21529 (clobber (reg:CC FLAGS_REG))])])
21530
21531 (define_insn "*x86_mov<mode>cc_0_m1"
21532 [(set (match_operand:SWI48 0 "register_operand" "=r")
21533 (if_then_else:SWI48 (match_operator 1 "ix86_carry_flag_operator"
21534 [(reg FLAGS_REG) (const_int 0)])
21535 (const_int -1)
21536 (const_int 0)))
21537 (clobber (reg:CC FLAGS_REG))]
21538 ""
21539 "sbb{<imodesuffix>}\t%0, %0"
21540 [(set_attr "type" "alu1")
21541 (set_attr "use_carry" "1")
21542 (set_attr "pent_pair" "pu")
21543 (set_attr "mode" "<MODE>")
21544 (set_attr "length_immediate" "0")])
21545
21546 (define_insn "*x86_mov<mode>cc_0_m1_se"
21547 [(set (match_operand:SWI48 0 "register_operand" "=r")
21548 (sign_extract:SWI48 (match_operator 1 "ix86_carry_flag_operator"
21549 [(reg FLAGS_REG) (const_int 0)])
21550 (const_int 1)
21551 (const_int 0)))
21552 (clobber (reg:CC FLAGS_REG))]
21553 ""
21554 "sbb{<imodesuffix>}\t%0, %0"
21555 [(set_attr "type" "alu1")
21556 (set_attr "use_carry" "1")
21557 (set_attr "pent_pair" "pu")
21558 (set_attr "mode" "<MODE>")
21559 (set_attr "length_immediate" "0")])
21560
21561 (define_insn "*x86_mov<mode>cc_0_m1_neg"
21562 [(set (match_operand:SWI 0 "register_operand" "=<r>")
21563 (neg:SWI (match_operator 1 "ix86_carry_flag_operator"
21564 [(reg FLAGS_REG) (const_int 0)])))
21565 (clobber (reg:CC FLAGS_REG))]
21566 ""
21567 "sbb{<imodesuffix>}\t%0, %0"
21568 [(set_attr "type" "alu1")
21569 (set_attr "use_carry" "1")
21570 (set_attr "pent_pair" "pu")
21571 (set_attr "mode" "<MODE>")
21572 (set_attr "length_immediate" "0")])
21573
21574 (define_expand "x86_mov<mode>cc_0_m1_neg"
21575 [(parallel
21576 [(set (match_operand:SWI48 0 "register_operand")
21577 (neg:SWI48 (ltu:SWI48 (reg:CCC FLAGS_REG) (const_int 0))))
21578 (clobber (reg:CC FLAGS_REG))])])
21579
21580 (define_split
21581 [(set (match_operand:SWI48 0 "register_operand")
21582 (neg:SWI48
21583 (leu:SWI48
21584 (match_operand 1 "int_nonimmediate_operand")
21585 (match_operand 2 "const_int_operand"))))]
21586 "x86_64_immediate_operand (operands[2], VOIDmode)
21587 && INTVAL (operands[2]) != -1
21588 && INTVAL (operands[2]) != 2147483647"
21589 [(set (reg:CC FLAGS_REG) (compare:CC (match_dup 1) (match_dup 2)))
21590 (set (match_dup 0)
21591 (neg:SWI48 (ltu:SWI48 (reg:CC FLAGS_REG) (const_int 0))))]
21592 "operands[2] = GEN_INT (INTVAL (operands[2]) + 1);")
21593
21594 (define_split
21595 [(set (match_operand:SWI 0 "register_operand")
21596 (neg:SWI
21597 (eq:SWI
21598 (match_operand 1 "int_nonimmediate_operand")
21599 (const_int 0))))]
21600 ""
21601 [(set (reg:CC FLAGS_REG) (compare:CC (match_dup 1) (const_int 1)))
21602 (set (match_dup 0)
21603 (neg:SWI (ltu:SWI (reg:CC FLAGS_REG) (const_int 0))))])
21604
21605 (define_split
21606 [(set (match_operand:SWI 0 "register_operand")
21607 (neg:SWI
21608 (ne:SWI
21609 (match_operand 1 "int_nonimmediate_operand")
21610 (const_int 0))))]
21611 ""
21612 [(set (reg:CCC FLAGS_REG)
21613 (unspec:CCC [(match_dup 1) (const_int 0)] UNSPEC_CC_NE))
21614 (set (match_dup 0)
21615 (neg:SWI (ltu:SWI (reg:CCC FLAGS_REG) (const_int 0))))])
21616
21617 (define_insn "*mov<mode>cc_noc"
21618 [(set (match_operand:SWI248 0 "register_operand" "=r,r")
21619 (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator"
21620 [(reg FLAGS_REG) (const_int 0)])
21621 (match_operand:SWI248 2 "nonimmediate_operand" "rm,0")
21622 (match_operand:SWI248 3 "nonimmediate_operand" "0,rm")))]
21623 "TARGET_CMOVE && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
21624 "@
21625 cmov%O2%C1\t{%2, %0|%0, %2}
21626 cmov%O2%c1\t{%3, %0|%0, %3}"
21627 [(set_attr "type" "icmov")
21628 (set_attr "mode" "<MODE>")])
21629
21630 (define_insn "*movsicc_noc_zext"
21631 [(set (match_operand:DI 0 "register_operand" "=r,r")
21632 (if_then_else:DI (match_operator 1 "ix86_comparison_operator"
21633 [(reg FLAGS_REG) (const_int 0)])
21634 (zero_extend:DI
21635 (match_operand:SI 2 "nonimmediate_operand" "rm,0"))
21636 (zero_extend:DI
21637 (match_operand:SI 3 "nonimmediate_operand" "0,rm"))))]
21638 "TARGET_64BIT
21639 && TARGET_CMOVE && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
21640 "@
21641 cmov%O2%C1\t{%2, %k0|%k0, %2}
21642 cmov%O2%c1\t{%3, %k0|%k0, %3}"
21643 [(set_attr "type" "icmov")
21644 (set_attr "mode" "SI")])
21645
21646 ;; Don't do conditional moves with memory inputs. This splitter helps
21647 ;; register starved x86_32 by forcing inputs into registers before reload.
21648 (define_split
21649 [(set (match_operand:SWI248 0 "register_operand")
21650 (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator"
21651 [(reg FLAGS_REG) (const_int 0)])
21652 (match_operand:SWI248 2 "nonimmediate_operand")
21653 (match_operand:SWI248 3 "nonimmediate_operand")))]
21654 "!TARGET_64BIT && TARGET_CMOVE
21655 && TARGET_AVOID_MEM_OPND_FOR_CMOVE
21656 && (MEM_P (operands[2]) || MEM_P (operands[3]))
21657 && can_create_pseudo_p ()
21658 && optimize_insn_for_speed_p ()"
21659 [(set (match_dup 0)
21660 (if_then_else:SWI248 (match_dup 1) (match_dup 2) (match_dup 3)))]
21661 {
21662 operands[2] = force_reg (<MODE>mode, operands[2]);
21663 operands[3] = force_reg (<MODE>mode, operands[3]);
21664 })
21665
21666 (define_insn "*movqicc_noc"
21667 [(set (match_operand:QI 0 "register_operand" "=r,r")
21668 (if_then_else:QI (match_operator 1 "ix86_comparison_operator"
21669 [(reg FLAGS_REG) (const_int 0)])
21670 (match_operand:QI 2 "register_operand" "r,0")
21671 (match_operand:QI 3 "register_operand" "0,r")))]
21672 "TARGET_CMOVE && !TARGET_PARTIAL_REG_STALL"
21673 "#"
21674 [(set_attr "type" "icmov")
21675 (set_attr "mode" "QI")])
21676
21677 (define_split
21678 [(set (match_operand:SWI12 0 "register_operand")
21679 (if_then_else:SWI12 (match_operator 1 "ix86_comparison_operator"
21680 [(reg FLAGS_REG) (const_int 0)])
21681 (match_operand:SWI12 2 "register_operand")
21682 (match_operand:SWI12 3 "register_operand")))]
21683 "TARGET_CMOVE && !TARGET_PARTIAL_REG_STALL
21684 && reload_completed"
21685 [(set (match_dup 0)
21686 (if_then_else:SI (match_dup 1) (match_dup 2) (match_dup 3)))]
21687 {
21688 operands[0] = gen_lowpart (SImode, operands[0]);
21689 operands[2] = gen_lowpart (SImode, operands[2]);
21690 operands[3] = gen_lowpart (SImode, operands[3]);
21691 })
21692
21693 ;; Don't do conditional moves with memory inputs
21694 (define_peephole2
21695 [(match_scratch:SWI248 4 "r")
21696 (set (match_operand:SWI248 0 "register_operand")
21697 (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator"
21698 [(reg FLAGS_REG) (const_int 0)])
21699 (match_operand:SWI248 2 "nonimmediate_operand")
21700 (match_operand:SWI248 3 "nonimmediate_operand")))]
21701 "TARGET_CMOVE && TARGET_AVOID_MEM_OPND_FOR_CMOVE
21702 && (MEM_P (operands[2]) || MEM_P (operands[3]))
21703 && optimize_insn_for_speed_p ()"
21704 [(set (match_dup 4) (match_dup 5))
21705 (set (match_dup 0)
21706 (if_then_else:SWI248 (match_dup 1) (match_dup 2) (match_dup 3)))]
21707 {
21708 if (MEM_P (operands[2]))
21709 {
21710 operands[5] = operands[2];
21711 operands[2] = operands[4];
21712 }
21713 else if (MEM_P (operands[3]))
21714 {
21715 operands[5] = operands[3];
21716 operands[3] = operands[4];
21717 }
21718 else
21719 gcc_unreachable ();
21720 })
21721
21722 (define_peephole2
21723 [(match_scratch:SI 4 "r")
21724 (set (match_operand:DI 0 "register_operand")
21725 (if_then_else:DI (match_operator 1 "ix86_comparison_operator"
21726 [(reg FLAGS_REG) (const_int 0)])
21727 (zero_extend:DI
21728 (match_operand:SI 2 "nonimmediate_operand"))
21729 (zero_extend:DI
21730 (match_operand:SI 3 "nonimmediate_operand"))))]
21731 "TARGET_64BIT
21732 && TARGET_CMOVE && TARGET_AVOID_MEM_OPND_FOR_CMOVE
21733 && (MEM_P (operands[2]) || MEM_P (operands[3]))
21734 && optimize_insn_for_speed_p ()"
21735 [(set (match_dup 4) (match_dup 5))
21736 (set (match_dup 0)
21737 (if_then_else:DI (match_dup 1)
21738 (zero_extend:DI (match_dup 2))
21739 (zero_extend:DI (match_dup 3))))]
21740 {
21741 if (MEM_P (operands[2]))
21742 {
21743 operands[5] = operands[2];
21744 operands[2] = operands[4];
21745 }
21746 else if (MEM_P (operands[3]))
21747 {
21748 operands[5] = operands[3];
21749 operands[3] = operands[4];
21750 }
21751 else
21752 gcc_unreachable ();
21753 })
21754
21755 ;; Eliminate a reg-reg mov by inverting the condition of a cmov (#1).
21756 ;; mov r0,r1; dec r0; mov r2,r3; cmov r0,r2 -> dec r1; mov r0,r3; cmov r0, r1
21757 (define_peephole2
21758 [(set (match_operand:SWI248 0 "general_reg_operand")
21759 (match_operand:SWI248 1 "general_reg_operand"))
21760 (parallel [(set (reg FLAGS_REG) (match_operand 5))
21761 (set (match_dup 0) (match_operand:SWI248 6))])
21762 (set (match_operand:SWI248 2 "general_reg_operand")
21763 (match_operand:SWI248 3 "general_gr_operand"))
21764 (set (match_dup 0)
21765 (if_then_else:SWI248 (match_operator 4 "ix86_comparison_operator"
21766 [(reg FLAGS_REG) (const_int 0)])
21767 (match_dup 0)
21768 (match_dup 2)))]
21769 "TARGET_CMOVE
21770 && REGNO (operands[2]) != REGNO (operands[0])
21771 && REGNO (operands[2]) != REGNO (operands[1])
21772 && peep2_reg_dead_p (1, operands[1])
21773 && peep2_reg_dead_p (4, operands[2])
21774 && !reg_overlap_mentioned_p (operands[0], operands[3])"
21775 [(parallel [(set (match_dup 7) (match_dup 8))
21776 (set (match_dup 1) (match_dup 9))])
21777 (set (match_dup 0) (match_dup 3))
21778 (set (match_dup 0) (if_then_else:SWI248 (match_dup 4)
21779 (match_dup 1)
21780 (match_dup 0)))]
21781 {
21782 operands[7] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (1)), 0, 0));
21783 operands[8] = replace_rtx (operands[5], operands[0], operands[1], true);
21784 operands[9] = replace_rtx (operands[6], operands[0], operands[1], true);
21785 })
21786
21787 ;; Eliminate a reg-reg mov by inverting the condition of a cmov (#2).
21788 ;; mov r2,r3; mov r0,r1; dec r0; cmov r0,r2 -> dec r1; mov r0,r3; cmov r0, r1
21789 (define_peephole2
21790 [(set (match_operand:SWI248 2 "general_reg_operand")
21791 (match_operand:SWI248 3 "general_gr_operand"))
21792 (set (match_operand:SWI248 0 "general_reg_operand")
21793 (match_operand:SWI248 1 "general_reg_operand"))
21794 (parallel [(set (reg FLAGS_REG) (match_operand 5))
21795 (set (match_dup 0) (match_operand:SWI248 6))])
21796 (set (match_dup 0)
21797 (if_then_else:SWI248 (match_operator 4 "ix86_comparison_operator"
21798 [(reg FLAGS_REG) (const_int 0)])
21799 (match_dup 0)
21800 (match_dup 2)))]
21801 "TARGET_CMOVE
21802 && REGNO (operands[2]) != REGNO (operands[0])
21803 && REGNO (operands[2]) != REGNO (operands[1])
21804 && peep2_reg_dead_p (2, operands[1])
21805 && peep2_reg_dead_p (4, operands[2])
21806 && !reg_overlap_mentioned_p (operands[0], operands[3])
21807 && !reg_mentioned_p (operands[2], operands[6])"
21808 [(parallel [(set (match_dup 7) (match_dup 8))
21809 (set (match_dup 1) (match_dup 9))])
21810 (set (match_dup 0) (match_dup 3))
21811 (set (match_dup 0) (if_then_else:SWI248 (match_dup 4)
21812 (match_dup 1)
21813 (match_dup 0)))]
21814 {
21815 operands[7] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (2)), 0, 0));
21816 operands[8] = replace_rtx (operands[5], operands[0], operands[1], true);
21817 operands[9] = replace_rtx (operands[6], operands[0], operands[1], true);
21818 })
21819
21820 (define_insn "movhf_mask"
21821 [(set (match_operand:HF 0 "nonimmediate_operand" "=v,m,v")
21822 (unspec:HF
21823 [(match_operand:HF 1 "nonimmediate_operand" "m,v,v")
21824 (match_operand:HF 2 "nonimm_or_0_operand" "0C,0C,0C")
21825 (match_operand:QI 3 "register_operand" "Yk,Yk,Yk")]
21826 UNSPEC_MOVCC_MASK))]
21827 "TARGET_AVX512FP16"
21828 "@
21829 vmovsh\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}
21830 vmovsh\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}
21831 vmovsh\t{%d1, %0%{%3%}%N2|%0%{%3%}%N2, %d1}"
21832 [(set_attr "type" "ssemov")
21833 (set_attr "prefix" "evex")
21834 (set_attr "mode" "HF")])
21835
21836 (define_expand "movhfcc"
21837 [(set (match_operand:HF 0 "register_operand")
21838 (if_then_else:HF
21839 (match_operand 1 "comparison_operator")
21840 (match_operand:HF 2 "register_operand")
21841 (match_operand:HF 3 "register_operand")))]
21842 "TARGET_AVX512FP16"
21843 "if (ix86_expand_fp_movcc (operands)) DONE; else FAIL;")
21844
21845 (define_expand "mov<mode>cc"
21846 [(set (match_operand:X87MODEF 0 "register_operand")
21847 (if_then_else:X87MODEF
21848 (match_operand 1 "comparison_operator")
21849 (match_operand:X87MODEF 2 "register_operand")
21850 (match_operand:X87MODEF 3 "register_operand")))]
21851 "(TARGET_80387 && TARGET_CMOVE)
21852 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
21853 "if (ix86_expand_fp_movcc (operands)) DONE; else FAIL;")
21854
21855 (define_insn "*movxfcc_1"
21856 [(set (match_operand:XF 0 "register_operand" "=f,f")
21857 (if_then_else:XF (match_operator 1 "fcmov_comparison_operator"
21858 [(reg FLAGS_REG) (const_int 0)])
21859 (match_operand:XF 2 "register_operand" "f,0")
21860 (match_operand:XF 3 "register_operand" "0,f")))]
21861 "TARGET_80387 && TARGET_CMOVE"
21862 "@
21863 fcmov%F1\t{%2, %0|%0, %2}
21864 fcmov%f1\t{%3, %0|%0, %3}"
21865 [(set_attr "type" "fcmov")
21866 (set_attr "mode" "XF")])
21867
21868 (define_insn "*movdfcc_1"
21869 [(set (match_operand:DF 0 "register_operand" "=f,f,&r,&r,r ,r")
21870 (if_then_else:DF (match_operator 1 "fcmov_comparison_operator"
21871 [(reg FLAGS_REG) (const_int 0)])
21872 (match_operand:DF 2 "nonimmediate_operand"
21873 "f ,0,rm,0 ,rm,0")
21874 (match_operand:DF 3 "nonimmediate_operand"
21875 "0 ,f,0 ,rm,0, rm")))]
21876 "TARGET_80387 && TARGET_CMOVE
21877 && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
21878 "@
21879 fcmov%F1\t{%2, %0|%0, %2}
21880 fcmov%f1\t{%3, %0|%0, %3}
21881 #
21882 #
21883 cmov%O2%C1\t{%2, %0|%0, %2}
21884 cmov%O2%c1\t{%3, %0|%0, %3}"
21885 [(set_attr "isa" "*,*,nox64,nox64,x64,x64")
21886 (set_attr "type" "fcmov,fcmov,multi,multi,icmov,icmov")
21887 (set_attr "mode" "DF,DF,DI,DI,DI,DI")])
21888
21889 (define_split
21890 [(set (match_operand:DF 0 "general_reg_operand")
21891 (if_then_else:DF (match_operator 1 "fcmov_comparison_operator"
21892 [(reg FLAGS_REG) (const_int 0)])
21893 (match_operand:DF 2 "nonimmediate_operand")
21894 (match_operand:DF 3 "nonimmediate_operand")))]
21895 "!TARGET_64BIT && reload_completed"
21896 [(set (match_dup 2)
21897 (if_then_else:SI (match_dup 1) (match_dup 4) (match_dup 5)))
21898 (set (match_dup 3)
21899 (if_then_else:SI (match_dup 1) (match_dup 6) (match_dup 7)))]
21900 {
21901 split_double_mode (DImode, &operands[2], 2, &operands[4], &operands[6]);
21902 split_double_mode (DImode, &operands[0], 1, &operands[2], &operands[3]);
21903 })
21904
21905 (define_insn "*movsfcc_1_387"
21906 [(set (match_operand:SF 0 "register_operand" "=f,f,r,r")
21907 (if_then_else:SF (match_operator 1 "fcmov_comparison_operator"
21908 [(reg FLAGS_REG) (const_int 0)])
21909 (match_operand:SF 2 "nonimmediate_operand" "f,0,rm,0")
21910 (match_operand:SF 3 "nonimmediate_operand" "0,f,0,rm")))]
21911 "TARGET_80387 && TARGET_CMOVE
21912 && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
21913 "@
21914 fcmov%F1\t{%2, %0|%0, %2}
21915 fcmov%f1\t{%3, %0|%0, %3}
21916 cmov%O2%C1\t{%2, %0|%0, %2}
21917 cmov%O2%c1\t{%3, %0|%0, %3}"
21918 [(set_attr "type" "fcmov,fcmov,icmov,icmov")
21919 (set_attr "mode" "SF,SF,SI,SI")])
21920
21921 ;; Don't do conditional moves with memory inputs. This splitter helps
21922 ;; register starved x86_32 by forcing inputs into registers before reload.
21923 (define_split
21924 [(set (match_operand:MODEF 0 "register_operand")
21925 (if_then_else:MODEF (match_operator 1 "ix86_comparison_operator"
21926 [(reg FLAGS_REG) (const_int 0)])
21927 (match_operand:MODEF 2 "nonimmediate_operand")
21928 (match_operand:MODEF 3 "nonimmediate_operand")))]
21929 "!TARGET_64BIT && TARGET_80387 && TARGET_CMOVE
21930 && TARGET_AVOID_MEM_OPND_FOR_CMOVE
21931 && (MEM_P (operands[2]) || MEM_P (operands[3]))
21932 && can_create_pseudo_p ()
21933 && optimize_insn_for_speed_p ()"
21934 [(set (match_dup 0)
21935 (if_then_else:MODEF (match_dup 1) (match_dup 2) (match_dup 3)))]
21936 {
21937 operands[2] = force_reg (<MODE>mode, operands[2]);
21938 operands[3] = force_reg (<MODE>mode, operands[3]);
21939 })
21940
21941 ;; Don't do conditional moves with memory inputs
21942 (define_peephole2
21943 [(match_scratch:MODEF 4 "r")
21944 (set (match_operand:MODEF 0 "general_reg_operand")
21945 (if_then_else:MODEF (match_operator 1 "fcmov_comparison_operator"
21946 [(reg FLAGS_REG) (const_int 0)])
21947 (match_operand:MODEF 2 "nonimmediate_operand")
21948 (match_operand:MODEF 3 "nonimmediate_operand")))]
21949 "(<MODE>mode != DFmode || TARGET_64BIT)
21950 && TARGET_80387 && TARGET_CMOVE
21951 && TARGET_AVOID_MEM_OPND_FOR_CMOVE
21952 && (MEM_P (operands[2]) || MEM_P (operands[3]))
21953 && optimize_insn_for_speed_p ()"
21954 [(set (match_dup 4) (match_dup 5))
21955 (set (match_dup 0)
21956 (if_then_else:MODEF (match_dup 1) (match_dup 2) (match_dup 3)))]
21957 {
21958 if (MEM_P (operands[2]))
21959 {
21960 operands[5] = operands[2];
21961 operands[2] = operands[4];
21962 }
21963 else if (MEM_P (operands[3]))
21964 {
21965 operands[5] = operands[3];
21966 operands[3] = operands[4];
21967 }
21968 else
21969 gcc_unreachable ();
21970 })
21971
21972 ;; All moves in XOP pcmov instructions are 128 bits and hence we restrict
21973 ;; the scalar versions to have only XMM registers as operands.
21974
21975 ;; XOP conditional move
21976 (define_insn "*xop_pcmov_<mode>"
21977 [(set (match_operand:MODEF 0 "register_operand" "=x")
21978 (if_then_else:MODEF
21979 (match_operand:MODEF 1 "register_operand" "x")
21980 (match_operand:MODEF 2 "register_operand" "x")
21981 (match_operand:MODEF 3 "register_operand" "x")))]
21982 "TARGET_XOP"
21983 "vpcmov\t{%1, %3, %2, %0|%0, %2, %3, %1}"
21984 [(set_attr "type" "sse4arg")])
21985
21986 ;; These versions of the min/max patterns are intentionally ignorant of
21987 ;; their behavior wrt -0.0 and NaN (via the commutative operand mark).
21988 ;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator
21989 ;; are undefined in this condition, we're certain this is correct.
21990
21991 (define_insn "<code><mode>3"
21992 [(set (match_operand:MODEF 0 "register_operand" "=x,v")
21993 (smaxmin:MODEF
21994 (match_operand:MODEF 1 "nonimmediate_operand" "%0,v")
21995 (match_operand:MODEF 2 "nonimmediate_operand" "xm,vm")))]
21996 "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
21997 "@
21998 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
21999 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
22000 [(set_attr "isa" "noavx,avx")
22001 (set_attr "prefix" "orig,vex")
22002 (set_attr "type" "sseadd")
22003 (set_attr "mode" "<MODE>")])
22004
22005 (define_insn "<code>hf3"
22006 [(set (match_operand:HF 0 "register_operand" "=v")
22007 (smaxmin:HF
22008 (match_operand:HF 1 "nonimmediate_operand" "%v")
22009 (match_operand:HF 2 "nonimmediate_operand" "vm")))]
22010 "TARGET_AVX512FP16"
22011 "v<maxmin_float>sh\t{%2, %1, %0|%0, %1, %2}"
22012 [(set_attr "prefix" "evex")
22013 (set_attr "type" "sseadd")
22014 (set_attr "mode" "HF")])
22015
22016 ;; These versions of the min/max patterns implement exactly the operations
22017 ;; min = (op1 < op2 ? op1 : op2)
22018 ;; max = (!(op1 < op2) ? op1 : op2)
22019 ;; Their operands are not commutative, and thus they may be used in the
22020 ;; presence of -0.0 and NaN.
22021
22022 (define_insn "*ieee_s<ieee_maxmin>hf3"
22023 [(set (match_operand:HF 0 "register_operand" "=v")
22024 (unspec:HF
22025 [(match_operand:HF 1 "register_operand" "v")
22026 (match_operand:HF 2 "nonimmediate_operand" "vm")]
22027 IEEE_MAXMIN))]
22028 "TARGET_AVX512FP16"
22029 "v<ieee_maxmin>sh\t{%2, %1, %0|%0, %1, %2}"
22030 [(set_attr "prefix" "evex")
22031 (set_attr "type" "sseadd")
22032 (set_attr "mode" "HF")])
22033
22034 (define_insn "*ieee_s<ieee_maxmin><mode>3"
22035 [(set (match_operand:MODEF 0 "register_operand" "=x,v")
22036 (unspec:MODEF
22037 [(match_operand:MODEF 1 "register_operand" "0,v")
22038 (match_operand:MODEF 2 "nonimmediate_operand" "xm,vm")]
22039 IEEE_MAXMIN))]
22040 "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
22041 "@
22042 <ieee_maxmin><ssemodesuffix>\t{%2, %0|%0, %2}
22043 v<ieee_maxmin><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
22044 [(set_attr "isa" "noavx,avx")
22045 (set_attr "prefix" "orig,maybe_evex")
22046 (set_attr "type" "sseadd")
22047 (set_attr "mode" "<MODE>")])
22048
22049 ;; Make two stack loads independent:
22050 ;; fld aa fld aa
22051 ;; fld %st(0) -> fld bb
22052 ;; fmul bb fmul %st(1), %st
22053 ;;
22054 ;; Actually we only match the last two instructions for simplicity.
22055
22056 (define_peephole2
22057 [(set (match_operand 0 "fp_register_operand")
22058 (match_operand 1 "fp_register_operand"))
22059 (set (match_dup 0)
22060 (match_operator 2 "binary_fp_operator"
22061 [(match_dup 0)
22062 (match_operand 3 "memory_operand")]))]
22063 "REGNO (operands[0]) != REGNO (operands[1])"
22064 [(set (match_dup 0) (match_dup 3))
22065 (set (match_dup 0)
22066 (match_op_dup 2
22067 [(match_dup 5) (match_dup 4)]))]
22068 {
22069 operands[4] = operands[0];
22070 operands[5] = operands[1];
22071
22072 /* The % modifier is not operational anymore in peephole2's, so we have to
22073 swap the operands manually in the case of addition and multiplication. */
22074 if (COMMUTATIVE_ARITH_P (operands[2]))
22075 std::swap (operands[4], operands[5]);
22076 })
22077
22078 (define_peephole2
22079 [(set (match_operand 0 "fp_register_operand")
22080 (match_operand 1 "fp_register_operand"))
22081 (set (match_dup 0)
22082 (match_operator 2 "binary_fp_operator"
22083 [(match_operand 3 "memory_operand")
22084 (match_dup 0)]))]
22085 "REGNO (operands[0]) != REGNO (operands[1])"
22086 [(set (match_dup 0) (match_dup 3))
22087 (set (match_dup 0)
22088 (match_op_dup 2
22089 [(match_dup 4) (match_dup 5)]))]
22090 {
22091 operands[4] = operands[0];
22092 operands[5] = operands[1];
22093
22094 /* The % modifier is not operational anymore in peephole2's, so we have to
22095 swap the operands manually in the case of addition and multiplication. */
22096 if (COMMUTATIVE_ARITH_P (operands[2]))
22097 std::swap (operands[4], operands[5]);
22098 })
22099
22100 ;; Conditional addition patterns
22101 (define_expand "add<mode>cc"
22102 [(match_operand:SWI 0 "register_operand")
22103 (match_operand 1 "ordered_comparison_operator")
22104 (match_operand:SWI 2 "register_operand")
22105 (match_operand:SWI 3 "const_int_operand")]
22106 ""
22107 "if (ix86_expand_int_addcc (operands)) DONE; else FAIL;")
22108
22109 ;; min/max patterns
22110
22111 (define_code_attr maxmin_rel
22112 [(smax "GE") (smin "LE") (umax "GEU") (umin "LEU")])
22113
22114 (define_expand "<code><mode>3"
22115 [(parallel
22116 [(set (match_operand:SDWIM 0 "register_operand")
22117 (maxmin:SDWIM
22118 (match_operand:SDWIM 1 "register_operand")
22119 (match_operand:SDWIM 2 "general_operand")))
22120 (clobber (reg:CC FLAGS_REG))])]
22121 "TARGET_CMOVE
22122 && (<MODE>mode != QImode || !TARGET_PARTIAL_REG_STALL)")
22123
22124 (define_insn_and_split "*<code><dwi>3_doubleword"
22125 [(set (match_operand:<DWI> 0 "register_operand")
22126 (maxmin:<DWI>
22127 (match_operand:<DWI> 1 "register_operand")
22128 (match_operand:<DWI> 2 "general_operand")))
22129 (clobber (reg:CC FLAGS_REG))]
22130 "TARGET_CMOVE
22131 && ix86_pre_reload_split ()"
22132 "#"
22133 "&& 1"
22134 [(set (match_dup 0)
22135 (if_then_else:DWIH (match_dup 6)
22136 (match_dup 1)
22137 (match_dup 2)))
22138 (set (match_dup 3)
22139 (if_then_else:DWIH (match_dup 6)
22140 (match_dup 4)
22141 (match_dup 5)))]
22142 {
22143 operands[2] = force_reg (<DWI>mode, operands[2]);
22144
22145 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
22146
22147 rtx cmplo[2] = { operands[1], operands[2] };
22148 rtx cmphi[2] = { operands[4], operands[5] };
22149
22150 enum rtx_code code = <maxmin_rel>;
22151
22152 switch (code)
22153 {
22154 case LE: case LEU:
22155 std::swap (cmplo[0], cmplo[1]);
22156 std::swap (cmphi[0], cmphi[1]);
22157 code = swap_condition (code);
22158 /* FALLTHRU */
22159
22160 case GE: case GEU:
22161 {
22162 bool uns = (code == GEU);
22163 rtx (*sbb_insn) (machine_mode, rtx, rtx, rtx)
22164 = uns ? gen_sub3_carry_ccc : gen_sub3_carry_ccgz;
22165
22166 emit_insn (gen_cmp_1 (<MODE>mode, cmplo[0], cmplo[1]));
22167
22168 rtx tmp = gen_rtx_SCRATCH (<MODE>mode);
22169 emit_insn (sbb_insn (<MODE>mode, tmp, cmphi[0], cmphi[1]));
22170
22171 rtx flags = gen_rtx_REG (uns ? CCCmode : CCGZmode, FLAGS_REG);
22172 operands[6] = gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
22173
22174 break;
22175 }
22176
22177 default:
22178 gcc_unreachable ();
22179 }
22180 })
22181
22182 (define_insn_and_split "*<code><mode>3_1"
22183 [(set (match_operand:SWI 0 "register_operand")
22184 (maxmin:SWI
22185 (match_operand:SWI 1 "register_operand")
22186 (match_operand:SWI 2 "general_operand")))
22187 (clobber (reg:CC FLAGS_REG))]
22188 "TARGET_CMOVE
22189 && (<MODE>mode != QImode || !TARGET_PARTIAL_REG_STALL)
22190 && ix86_pre_reload_split ()"
22191 "#"
22192 "&& 1"
22193 [(set (match_dup 0)
22194 (if_then_else:SWI (match_dup 3)
22195 (match_dup 1)
22196 (match_dup 2)))]
22197 {
22198 machine_mode mode = <MODE>mode;
22199 rtx cmp_op = operands[2];
22200
22201 operands[2] = force_reg (mode, cmp_op);
22202
22203 enum rtx_code code = <maxmin_rel>;
22204
22205 if (cmp_op == const1_rtx)
22206 {
22207 /* Convert smax (x, 1) into (x > 0 ? x : 1).
22208 Convert umax (x, 1) into (x != 0 ? x : 1).
22209 Convert ?min (x, 1) into (x <= 0 ? x : 1). */
22210 cmp_op = const0_rtx;
22211 if (code == GE)
22212 code = GT;
22213 else if (code == GEU)
22214 code = NE;
22215 }
22216 /* Convert smin (x, -1) into (x < 0 ? x : -1). */
22217 else if (cmp_op == constm1_rtx && code == LE)
22218 {
22219 cmp_op = const0_rtx;
22220 code = LT;
22221 }
22222 /* Convert smax (x, -1) into (x >= 0 ? x : -1). */
22223 else if (cmp_op == constm1_rtx && code == GE)
22224 cmp_op = const0_rtx;
22225 else if (cmp_op != const0_rtx)
22226 cmp_op = operands[2];
22227
22228 machine_mode cmpmode = SELECT_CC_MODE (code, operands[1], cmp_op);
22229 rtx flags = gen_rtx_REG (cmpmode, FLAGS_REG);
22230
22231 rtx tmp = gen_rtx_COMPARE (cmpmode, operands[1], cmp_op);
22232 emit_insn (gen_rtx_SET (flags, tmp));
22233
22234 operands[3] = gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
22235 })
22236
22237 ;; Avoid clearing a register between a flags setting comparison and its use,
22238 ;; i.e. prefer "xorl %eax,%eax; test/cmp" over "test/cmp; movl $0, %eax".
22239 (define_peephole2
22240 [(set (reg FLAGS_REG) (match_operand 0))
22241 (set (match_operand:SWI 1 "general_reg_operand") (const_int 0))]
22242 "peep2_regno_dead_p (0, FLAGS_REG)
22243 && !reg_overlap_mentioned_p (operands[1], operands[0])"
22244 [(set (match_dup 2) (match_dup 0))]
22245 {
22246 operands[2] = gen_rtx_REG (GET_MODE (operands[0]), FLAGS_REG);
22247 ix86_expand_clear (operands[1]);
22248 })
22249
22250 ;; When optimizing for size, zeroing memory should use a register.
22251 (define_peephole2
22252 [(match_scratch:SWI48 0 "r")
22253 (set (match_operand:SWI48 1 "memory_operand") (const_int 0))
22254 (set (match_operand:SWI48 2 "memory_operand") (const_int 0))
22255 (set (match_operand:SWI48 3 "memory_operand") (const_int 0))
22256 (set (match_operand:SWI48 4 "memory_operand") (const_int 0))]
22257 "optimize_insn_for_size_p () && peep2_regno_dead_p (0, FLAGS_REG)"
22258 [(const_int 0)]
22259 {
22260 ix86_expand_clear (operands[0]);
22261 emit_move_insn (operands[1], operands[0]);
22262 emit_move_insn (operands[2], operands[0]);
22263 emit_move_insn (operands[3], operands[0]);
22264 ix86_last_zero_store_uid
22265 = INSN_UID (emit_move_insn (operands[4], operands[0]));
22266 DONE;
22267 })
22268
22269 (define_peephole2
22270 [(match_scratch:SWI48 0 "r")
22271 (set (match_operand:SWI48 1 "memory_operand") (const_int 0))
22272 (set (match_operand:SWI48 2 "memory_operand") (const_int 0))]
22273 "optimize_insn_for_size_p () && peep2_regno_dead_p (0, FLAGS_REG)"
22274 [(const_int 0)]
22275 {
22276 ix86_expand_clear (operands[0]);
22277 emit_move_insn (operands[1], operands[0]);
22278 ix86_last_zero_store_uid
22279 = INSN_UID (emit_move_insn (operands[2], operands[0]));
22280 DONE;
22281 })
22282
22283 (define_peephole2
22284 [(match_scratch:SWI48 0 "r")
22285 (set (match_operand:SWI48 1 "memory_operand") (const_int 0))]
22286 "optimize_insn_for_size_p () && peep2_regno_dead_p (0, FLAGS_REG)"
22287 [(const_int 0)]
22288 {
22289 ix86_expand_clear (operands[0]);
22290 ix86_last_zero_store_uid
22291 = INSN_UID (emit_move_insn (operands[1], operands[0]));
22292 DONE;
22293 })
22294
22295 (define_peephole2
22296 [(set (match_operand:SWI48 5 "memory_operand")
22297 (match_operand:SWI48 0 "general_reg_operand"))
22298 (set (match_operand:SWI48 1 "memory_operand") (const_int 0))
22299 (set (match_operand:SWI48 2 "memory_operand") (const_int 0))
22300 (set (match_operand:SWI48 3 "memory_operand") (const_int 0))
22301 (set (match_operand:SWI48 4 "memory_operand") (const_int 0))]
22302 "optimize_insn_for_size_p ()
22303 && INSN_UID (peep2_next_insn (0)) == ix86_last_zero_store_uid"
22304 [(const_int 0)]
22305 {
22306 emit_move_insn (operands[5], operands[0]);
22307 emit_move_insn (operands[1], operands[0]);
22308 emit_move_insn (operands[2], operands[0]);
22309 emit_move_insn (operands[3], operands[0]);
22310 ix86_last_zero_store_uid
22311 = INSN_UID (emit_move_insn (operands[4], operands[0]));
22312 DONE;
22313 })
22314
22315 (define_peephole2
22316 [(set (match_operand:SWI48 3 "memory_operand")
22317 (match_operand:SWI48 0 "general_reg_operand"))
22318 (set (match_operand:SWI48 1 "memory_operand") (const_int 0))
22319 (set (match_operand:SWI48 2 "memory_operand") (const_int 0))]
22320 "optimize_insn_for_size_p ()
22321 && INSN_UID (peep2_next_insn (0)) == ix86_last_zero_store_uid"
22322 [(const_int 0)]
22323 {
22324 emit_move_insn (operands[3], operands[0]);
22325 emit_move_insn (operands[1], operands[0]);
22326 ix86_last_zero_store_uid
22327 = INSN_UID (emit_move_insn (operands[2], operands[0]));
22328 DONE;
22329 })
22330
22331 (define_peephole2
22332 [(set (match_operand:SWI48 2 "memory_operand")
22333 (match_operand:SWI48 0 "general_reg_operand"))
22334 (set (match_operand:SWI48 1 "memory_operand") (const_int 0))]
22335 "optimize_insn_for_size_p ()
22336 && INSN_UID (peep2_next_insn (0)) == ix86_last_zero_store_uid"
22337 [(const_int 0)]
22338 {
22339 emit_move_insn (operands[2], operands[0]);
22340 ix86_last_zero_store_uid
22341 = INSN_UID (emit_move_insn (operands[1], operands[0]));
22342 DONE;
22343 })
22344
22345 ;; Reload dislikes loading constants directly into class_likely_spilled
22346 ;; hard registers. Try to tidy things up here.
22347 (define_peephole2
22348 [(set (match_operand:SWI 0 "general_reg_operand")
22349 (match_operand:SWI 1 "x86_64_general_operand"))
22350 (set (match_operand:SWI 2 "general_reg_operand")
22351 (match_dup 0))]
22352 "peep2_reg_dead_p (2, operands[0])"
22353 [(set (match_dup 2) (match_dup 1))])
22354 \f
22355 ;; Misc patterns (?)
22356
22357 ;; This pattern exists to put a dependency on all ebp-based memory accesses.
22358 ;; Otherwise there will be nothing to keep
22359 ;;
22360 ;; [(set (reg ebp) (reg esp))]
22361 ;; [(set (reg esp) (plus (reg esp) (const_int -160000)))
22362 ;; (clobber (eflags)]
22363 ;; [(set (mem (plus (reg ebp) (const_int -160000))) (const_int 0))]
22364 ;;
22365 ;; in proper program order.
22366
22367 (define_insn "@pro_epilogue_adjust_stack_add_<mode>"
22368 [(set (match_operand:P 0 "register_operand" "=r,r")
22369 (plus:P (match_operand:P 1 "register_operand" "0,r")
22370 (match_operand:P 2 "<nonmemory_operand>" "r<i>,l<i>")))
22371 (clobber (reg:CC FLAGS_REG))
22372 (clobber (mem:BLK (scratch)))]
22373 ""
22374 {
22375 switch (get_attr_type (insn))
22376 {
22377 case TYPE_IMOV:
22378 return "mov{<imodesuffix>}\t{%1, %0|%0, %1}";
22379
22380 case TYPE_ALU:
22381 gcc_assert (rtx_equal_p (operands[0], operands[1]));
22382 if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
22383 return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
22384
22385 return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
22386
22387 default:
22388 operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
22389 return "lea{<imodesuffix>}\t{%E2, %0|%0, %E2}";
22390 }
22391 }
22392 [(set (attr "type")
22393 (cond [(and (eq_attr "alternative" "0")
22394 (not (match_test "TARGET_OPT_AGU")))
22395 (const_string "alu")
22396 (match_operand:<MODE> 2 "const0_operand")
22397 (const_string "imov")
22398 ]
22399 (const_string "lea")))
22400 (set (attr "length_immediate")
22401 (cond [(eq_attr "type" "imov")
22402 (const_string "0")
22403 (and (eq_attr "type" "alu")
22404 (match_operand 2 "const128_operand"))
22405 (const_string "1")
22406 ]
22407 (const_string "*")))
22408 (set_attr "mode" "<MODE>")])
22409
22410 (define_insn "@pro_epilogue_adjust_stack_sub_<mode>"
22411 [(set (match_operand:P 0 "register_operand" "=r")
22412 (minus:P (match_operand:P 1 "register_operand" "0")
22413 (match_operand:P 2 "register_operand" "r")))
22414 (clobber (reg:CC FLAGS_REG))
22415 (clobber (mem:BLK (scratch)))]
22416 ""
22417 "sub{<imodesuffix>}\t{%2, %0|%0, %2}"
22418 [(set_attr "type" "alu")
22419 (set_attr "mode" "<MODE>")])
22420
22421 (define_insn "@allocate_stack_worker_probe_<mode>"
22422 [(set (match_operand:P 0 "register_operand" "=a")
22423 (unspec_volatile:P [(match_operand:P 1 "register_operand" "0")]
22424 UNSPECV_STACK_PROBE))
22425 (clobber (reg:CC FLAGS_REG))]
22426 "ix86_target_stack_probe ()"
22427 "call\t___chkstk_ms"
22428 [(set_attr "type" "multi")
22429 (set_attr "length" "5")])
22430
22431 (define_expand "allocate_stack"
22432 [(match_operand 0 "register_operand")
22433 (match_operand 1 "general_operand")]
22434 "ix86_target_stack_probe ()"
22435 {
22436 rtx x;
22437
22438 #ifndef CHECK_STACK_LIMIT
22439 #define CHECK_STACK_LIMIT 0
22440 #endif
22441
22442 if (CHECK_STACK_LIMIT && CONST_INT_P (operands[1])
22443 && INTVAL (operands[1]) < CHECK_STACK_LIMIT)
22444 x = operands[1];
22445 else
22446 {
22447 x = copy_to_mode_reg (Pmode, operands[1]);
22448
22449 emit_insn (gen_allocate_stack_worker_probe (Pmode, x, x));
22450 }
22451
22452 x = expand_simple_binop (Pmode, MINUS, stack_pointer_rtx, x,
22453 stack_pointer_rtx, 0, OPTAB_DIRECT);
22454
22455 if (x != stack_pointer_rtx)
22456 emit_move_insn (stack_pointer_rtx, x);
22457
22458 emit_move_insn (operands[0], virtual_stack_dynamic_rtx);
22459 DONE;
22460 })
22461
22462 (define_expand "probe_stack"
22463 [(match_operand 0 "memory_operand")]
22464 ""
22465 {
22466 emit_insn (gen_probe_stack_1
22467 (word_mode, operands[0], const0_rtx));
22468 DONE;
22469 })
22470
22471 ;; Use OR for stack probes, this is shorter.
22472 (define_insn "@probe_stack_1_<mode>"
22473 [(set (match_operand:W 0 "memory_operand" "=m")
22474 (unspec:W [(match_operand:W 1 "const0_operand")]
22475 UNSPEC_PROBE_STACK))
22476 (clobber (reg:CC FLAGS_REG))]
22477 ""
22478 "or{<imodesuffix>}\t{%1, %0|%0, %1}"
22479 [(set_attr "type" "alu1")
22480 (set_attr "mode" "<MODE>")
22481 (set_attr "length_immediate" "1")])
22482
22483 (define_insn "@adjust_stack_and_probe_<mode>"
22484 [(set (match_operand:P 0 "register_operand" "=r")
22485 (unspec_volatile:P [(match_operand:P 1 "register_operand" "0")]
22486 UNSPECV_PROBE_STACK_RANGE))
22487 (set (reg:P SP_REG)
22488 (minus:P (reg:P SP_REG) (match_operand:P 2 "const_int_operand")))
22489 (clobber (reg:CC FLAGS_REG))
22490 (clobber (mem:BLK (scratch)))]
22491 ""
22492 "* return output_adjust_stack_and_probe (operands[0]);"
22493 [(set_attr "type" "multi")])
22494
22495 (define_insn "@probe_stack_range_<mode>"
22496 [(set (match_operand:P 0 "register_operand" "=r")
22497 (unspec_volatile:P [(match_operand:P 1 "register_operand" "0")
22498 (match_operand:P 2 "const_int_operand")]
22499 UNSPECV_PROBE_STACK_RANGE))
22500 (clobber (reg:CC FLAGS_REG))]
22501 ""
22502 "* return output_probe_stack_range (operands[0], operands[2]);"
22503 [(set_attr "type" "multi")])
22504
22505 (define_expand "builtin_setjmp_receiver"
22506 [(label_ref (match_operand 0))]
22507 "!TARGET_64BIT && flag_pic"
22508 {
22509 #if TARGET_MACHO
22510 if (TARGET_MACHO)
22511 {
22512 rtx xops[3];
22513 rtx_code_label *label_rtx = gen_label_rtx ();
22514 emit_insn (gen_set_got_labelled (pic_offset_table_rtx, label_rtx));
22515 xops[0] = xops[1] = pic_offset_table_rtx;
22516 xops[2] = machopic_gen_offset (gen_rtx_LABEL_REF (SImode, label_rtx));
22517 ix86_expand_binary_operator (MINUS, SImode, xops);
22518 }
22519 else
22520 #endif
22521 emit_insn (gen_set_got (pic_offset_table_rtx));
22522 DONE;
22523 })
22524
22525 (define_expand "save_stack_nonlocal"
22526 [(set (match_operand 0 "memory_operand")
22527 (match_operand 1 "register_operand"))]
22528 ""
22529 {
22530 rtx stack_slot;
22531
22532 if (flag_cf_protection & CF_RETURN)
22533 {
22534 /* Copy shadow stack pointer to the first slot
22535 and stack pointer to the second slot. */
22536 rtx ssp_slot = adjust_address (operands[0], word_mode, 0);
22537 stack_slot = adjust_address (operands[0], Pmode, UNITS_PER_WORD);
22538
22539 rtx reg_ssp = force_reg (word_mode, const0_rtx);
22540 emit_insn (gen_rdssp (word_mode, reg_ssp, reg_ssp));
22541 emit_move_insn (ssp_slot, reg_ssp);
22542 }
22543 else
22544 stack_slot = adjust_address (operands[0], Pmode, 0);
22545 emit_move_insn (stack_slot, operands[1]);
22546 DONE;
22547 })
22548
22549 (define_expand "restore_stack_nonlocal"
22550 [(set (match_operand 0 "register_operand" "")
22551 (match_operand 1 "memory_operand" ""))]
22552 ""
22553 {
22554 rtx stack_slot;
22555
22556 if (flag_cf_protection & CF_RETURN)
22557 {
22558 /* Restore shadow stack pointer from the first slot
22559 and stack pointer from the second slot. */
22560 rtx ssp_slot = adjust_address (operands[1], word_mode, 0);
22561 stack_slot = adjust_address (operands[1], Pmode, UNITS_PER_WORD);
22562
22563 /* Get the current shadow stack pointer. The code below will check if
22564 SHSTK feature is enabled. If it is not enabled the RDSSP instruction
22565 is a NOP. */
22566 rtx reg_ssp = force_reg (word_mode, const0_rtx);
22567 emit_insn (gen_rdssp (word_mode, reg_ssp, reg_ssp));
22568
22569 /* Compare through subtraction the saved and the current ssp
22570 to decide if ssp has to be adjusted. */
22571 reg_ssp = expand_simple_binop (word_mode, MINUS,
22572 reg_ssp, ssp_slot,
22573 reg_ssp, 1, OPTAB_DIRECT);
22574
22575 /* Compare and jump over adjustment code. */
22576 rtx noadj_label = gen_label_rtx ();
22577 emit_cmp_and_jump_insns (reg_ssp, const0_rtx, EQ, NULL_RTX,
22578 word_mode, 1, noadj_label);
22579
22580 /* Compute the number of frames to adjust. */
22581 rtx reg_adj = gen_lowpart (ptr_mode, reg_ssp);
22582 rtx reg_adj_neg = expand_simple_unop (ptr_mode, NEG, reg_adj,
22583 NULL_RTX, 1);
22584
22585 reg_adj = expand_simple_binop (ptr_mode, LSHIFTRT, reg_adj_neg,
22586 GEN_INT (exact_log2 (UNITS_PER_WORD)),
22587 reg_adj, 1, OPTAB_DIRECT);
22588
22589 /* Check if number of frames <= 255 so no loop is needed. */
22590 rtx inc_label = gen_label_rtx ();
22591 emit_cmp_and_jump_insns (reg_adj, GEN_INT (255), LEU, NULL_RTX,
22592 ptr_mode, 1, inc_label);
22593
22594 /* Adjust the ssp in a loop. */
22595 rtx loop_label = gen_label_rtx ();
22596 emit_label (loop_label);
22597 LABEL_NUSES (loop_label) = 1;
22598
22599 rtx reg_255 = force_reg (word_mode, GEN_INT (255));
22600 emit_insn (gen_incssp (word_mode, reg_255));
22601
22602 reg_adj = expand_simple_binop (ptr_mode, MINUS,
22603 reg_adj, GEN_INT (255),
22604 reg_adj, 1, OPTAB_DIRECT);
22605
22606 /* Compare and jump to the loop label. */
22607 emit_cmp_and_jump_insns (reg_adj, GEN_INT (255), GTU, NULL_RTX,
22608 ptr_mode, 1, loop_label);
22609
22610 emit_label (inc_label);
22611 LABEL_NUSES (inc_label) = 1;
22612
22613 emit_insn (gen_incssp (word_mode, reg_ssp));
22614
22615 emit_label (noadj_label);
22616 LABEL_NUSES (noadj_label) = 1;
22617 }
22618 else
22619 stack_slot = adjust_address (operands[1], Pmode, 0);
22620 emit_move_insn (operands[0], stack_slot);
22621 DONE;
22622 })
22623
22624
22625 ;; Avoid redundant prefixes by splitting HImode arithmetic to SImode.
22626 ;; Do not split instructions with mask registers.
22627 (define_split
22628 [(set (match_operand 0 "general_reg_operand")
22629 (match_operator 3 "promotable_binary_operator"
22630 [(match_operand 1 "general_reg_operand")
22631 (match_operand 2 "aligned_operand")]))
22632 (clobber (reg:CC FLAGS_REG))]
22633 "! TARGET_PARTIAL_REG_STALL && reload_completed
22634 && ((GET_MODE (operands[0]) == HImode
22635 && ((optimize_function_for_speed_p (cfun) && !TARGET_FAST_PREFIX)
22636 /* ??? next two lines just !satisfies_constraint_K (...) */
22637 || !CONST_INT_P (operands[2])
22638 || satisfies_constraint_K (operands[2])))
22639 || (GET_MODE (operands[0]) == QImode
22640 && (TARGET_PROMOTE_QImode || optimize_function_for_size_p (cfun))))"
22641 [(parallel [(set (match_dup 0)
22642 (match_op_dup 3 [(match_dup 1) (match_dup 2)]))
22643 (clobber (reg:CC FLAGS_REG))])]
22644 {
22645 operands[0] = gen_lowpart (SImode, operands[0]);
22646 operands[1] = gen_lowpart (SImode, operands[1]);
22647 if (GET_CODE (operands[3]) != ASHIFT)
22648 operands[2] = gen_lowpart (SImode, operands[2]);
22649 operands[3] = shallow_copy_rtx (operands[3]);
22650 PUT_MODE (operands[3], SImode);
22651 })
22652
22653 ; Promote the QImode tests, as i386 has encoding of the AND
22654 ; instruction with 32-bit sign-extended immediate and thus the
22655 ; instruction size is unchanged, except in the %eax case for
22656 ; which it is increased by one byte, hence the ! optimize_size.
22657 (define_split
22658 [(set (match_operand 0 "flags_reg_operand")
22659 (match_operator 2 "compare_operator"
22660 [(and (match_operand 3 "aligned_operand")
22661 (match_operand 4 "const_int_operand"))
22662 (const_int 0)]))
22663 (set (match_operand 1 "register_operand")
22664 (and (match_dup 3) (match_dup 4)))]
22665 "! TARGET_PARTIAL_REG_STALL && reload_completed
22666 && optimize_insn_for_speed_p ()
22667 && ((GET_MODE (operands[1]) == HImode && ! TARGET_FAST_PREFIX)
22668 || (GET_MODE (operands[1]) == QImode && TARGET_PROMOTE_QImode))
22669 /* Ensure that the operand will remain sign-extended immediate. */
22670 && ix86_match_ccmode (insn, INTVAL (operands[4]) >= 0 ? CCNOmode : CCZmode)"
22671 [(parallel [(set (match_dup 0)
22672 (match_op_dup 2 [(and:SI (match_dup 3) (match_dup 4))
22673 (const_int 0)]))
22674 (set (match_dup 1)
22675 (and:SI (match_dup 3) (match_dup 4)))])]
22676 {
22677 operands[4]
22678 = gen_int_mode (INTVAL (operands[4])
22679 & GET_MODE_MASK (GET_MODE (operands[1])), SImode);
22680 operands[1] = gen_lowpart (SImode, operands[1]);
22681 operands[3] = gen_lowpart (SImode, operands[3]);
22682 })
22683
22684 ; Don't promote the QImode tests, as i386 doesn't have encoding of
22685 ; the TEST instruction with 32-bit sign-extended immediate and thus
22686 ; the instruction size would at least double, which is not what we
22687 ; want even with ! optimize_size.
22688 (define_split
22689 [(set (match_operand 0 "flags_reg_operand")
22690 (match_operator 1 "compare_operator"
22691 [(and (match_operand:HI 2 "aligned_operand")
22692 (match_operand:HI 3 "const_int_operand"))
22693 (const_int 0)]))]
22694 "! TARGET_PARTIAL_REG_STALL && reload_completed
22695 && ! TARGET_FAST_PREFIX
22696 && optimize_insn_for_speed_p ()
22697 /* Ensure that the operand will remain sign-extended immediate. */
22698 && ix86_match_ccmode (insn, INTVAL (operands[3]) >= 0 ? CCNOmode : CCZmode)"
22699 [(set (match_dup 0)
22700 (match_op_dup 1 [(and:SI (match_dup 2) (match_dup 3))
22701 (const_int 0)]))]
22702 {
22703 operands[3]
22704 = gen_int_mode (INTVAL (operands[3])
22705 & GET_MODE_MASK (GET_MODE (operands[2])), SImode);
22706 operands[2] = gen_lowpart (SImode, operands[2]);
22707 })
22708
22709 (define_split
22710 [(set (match_operand 0 "register_operand")
22711 (neg (match_operand 1 "register_operand")))
22712 (clobber (reg:CC FLAGS_REG))]
22713 "! TARGET_PARTIAL_REG_STALL && reload_completed
22714 && (GET_MODE (operands[0]) == HImode
22715 || (GET_MODE (operands[0]) == QImode
22716 && (TARGET_PROMOTE_QImode
22717 || optimize_insn_for_size_p ())))"
22718 [(parallel [(set (match_dup 0)
22719 (neg:SI (match_dup 1)))
22720 (clobber (reg:CC FLAGS_REG))])]
22721 {
22722 operands[0] = gen_lowpart (SImode, operands[0]);
22723 operands[1] = gen_lowpart (SImode, operands[1]);
22724 })
22725
22726 ;; Do not split instructions with mask regs.
22727 (define_split
22728 [(set (match_operand 0 "general_reg_operand")
22729 (not (match_operand 1 "general_reg_operand")))]
22730 "! TARGET_PARTIAL_REG_STALL && reload_completed
22731 && (GET_MODE (operands[0]) == HImode
22732 || (GET_MODE (operands[0]) == QImode
22733 && (TARGET_PROMOTE_QImode
22734 || optimize_insn_for_size_p ())))"
22735 [(set (match_dup 0)
22736 (not:SI (match_dup 1)))]
22737 {
22738 operands[0] = gen_lowpart (SImode, operands[0]);
22739 operands[1] = gen_lowpart (SImode, operands[1]);
22740 })
22741 \f
22742 ;; RTL Peephole optimizations, run before sched2. These primarily look to
22743 ;; transform a complex memory operation into two memory to register operations.
22744
22745 ;; Don't push memory operands
22746 (define_peephole2
22747 [(set (match_operand:SWI 0 "push_operand")
22748 (match_operand:SWI 1 "memory_operand"))
22749 (match_scratch:SWI 2 "<r>")]
22750 "!(TARGET_PUSH_MEMORY || optimize_insn_for_size_p ())
22751 && !RTX_FRAME_RELATED_P (peep2_next_insn (0))"
22752 [(set (match_dup 2) (match_dup 1))
22753 (set (match_dup 0) (match_dup 2))])
22754
22755 ;; We need to handle SFmode only, because DFmode and XFmode are split to
22756 ;; SImode pushes.
22757 (define_peephole2
22758 [(set (match_operand:SF 0 "push_operand")
22759 (match_operand:SF 1 "memory_operand"))
22760 (match_scratch:SF 2 "r")]
22761 "!(TARGET_PUSH_MEMORY || optimize_insn_for_size_p ())
22762 && !RTX_FRAME_RELATED_P (peep2_next_insn (0))"
22763 [(set (match_dup 2) (match_dup 1))
22764 (set (match_dup 0) (match_dup 2))])
22765
22766 ;; Don't move an immediate directly to memory when the instruction
22767 ;; gets too big, or if LCP stalls are a problem for 16-bit moves.
22768 (define_peephole2
22769 [(match_scratch:SWI124 1 "<r>")
22770 (set (match_operand:SWI124 0 "memory_operand")
22771 (const_int 0))]
22772 "optimize_insn_for_speed_p ()
22773 && ((<MODE>mode == HImode
22774 && TARGET_LCP_STALL)
22775 || (!TARGET_USE_MOV0
22776 && TARGET_SPLIT_LONG_MOVES
22777 && get_attr_length (insn) >= ix86_cur_cost ()->large_insn))
22778 && peep2_regno_dead_p (0, FLAGS_REG)"
22779 [(parallel [(set (match_dup 2) (const_int 0))
22780 (clobber (reg:CC FLAGS_REG))])
22781 (set (match_dup 0) (match_dup 1))]
22782 "operands[2] = gen_lowpart (SImode, operands[1]);")
22783
22784 (define_peephole2
22785 [(match_scratch:SWI124 2 "<r>")
22786 (set (match_operand:SWI124 0 "memory_operand")
22787 (match_operand:SWI124 1 "immediate_operand"))]
22788 "optimize_insn_for_speed_p ()
22789 && ((<MODE>mode == HImode
22790 && TARGET_LCP_STALL)
22791 || (TARGET_SPLIT_LONG_MOVES
22792 && get_attr_length (insn) >= ix86_cur_cost ()->large_insn))"
22793 [(set (match_dup 2) (match_dup 1))
22794 (set (match_dup 0) (match_dup 2))])
22795
22796 ;; Don't compare memory with zero, load and use a test instead.
22797 (define_peephole2
22798 [(set (match_operand 0 "flags_reg_operand")
22799 (match_operator 1 "compare_operator"
22800 [(match_operand:SI 2 "memory_operand")
22801 (const_int 0)]))
22802 (match_scratch:SI 3 "r")]
22803 "optimize_insn_for_speed_p () && ix86_match_ccmode (insn, CCNOmode)"
22804 [(set (match_dup 3) (match_dup 2))
22805 (set (match_dup 0) (match_op_dup 1 [(match_dup 3) (const_int 0)]))])
22806
22807 ;; NOT is not pairable on Pentium, while XOR is, but one byte longer.
22808 ;; Don't split NOTs with a displacement operand, because resulting XOR
22809 ;; will not be pairable anyway.
22810 ;;
22811 ;; On AMD K6, NOT is vector decoded with memory operand that cannot be
22812 ;; represented using a modRM byte. The XOR replacement is long decoded,
22813 ;; so this split helps here as well.
22814 ;;
22815 ;; Note: Can't do this as a regular split because we can't get proper
22816 ;; lifetime information then.
22817
22818 (define_peephole2
22819 [(set (match_operand:SWI124 0 "nonimmediate_gr_operand")
22820 (not:SWI124 (match_operand:SWI124 1 "nonimmediate_gr_operand")))]
22821 "optimize_insn_for_speed_p ()
22822 && ((TARGET_NOT_UNPAIRABLE
22823 && (!MEM_P (operands[0])
22824 || !memory_displacement_operand (operands[0], <MODE>mode)))
22825 || (TARGET_NOT_VECTORMODE
22826 && long_memory_operand (operands[0], <MODE>mode)))
22827 && peep2_regno_dead_p (0, FLAGS_REG)"
22828 [(parallel [(set (match_dup 0)
22829 (xor:SWI124 (match_dup 1) (const_int -1)))
22830 (clobber (reg:CC FLAGS_REG))])])
22831
22832 ;; Non pairable "test imm, reg" instructions can be translated to
22833 ;; "and imm, reg" if reg dies. The "and" form is also shorter (one
22834 ;; byte opcode instead of two, have a short form for byte operands),
22835 ;; so do it for other CPUs as well. Given that the value was dead,
22836 ;; this should not create any new dependencies. Pass on the sub-word
22837 ;; versions if we're concerned about partial register stalls.
22838
22839 (define_peephole2
22840 [(set (match_operand 0 "flags_reg_operand")
22841 (match_operator 1 "compare_operator"
22842 [(and:SI (match_operand:SI 2 "register_operand")
22843 (match_operand:SI 3 "immediate_operand"))
22844 (const_int 0)]))]
22845 "ix86_match_ccmode (insn, CCNOmode)
22846 && (REGNO (operands[2]) != AX_REG
22847 || satisfies_constraint_K (operands[3]))
22848 && peep2_reg_dead_p (1, operands[2])"
22849 [(parallel
22850 [(set (match_dup 0)
22851 (match_op_dup 1 [(and:SI (match_dup 2) (match_dup 3))
22852 (const_int 0)]))
22853 (set (match_dup 2)
22854 (and:SI (match_dup 2) (match_dup 3)))])])
22855
22856 ;; We don't need to handle HImode case, because it will be promoted to SImode
22857 ;; on ! TARGET_PARTIAL_REG_STALL
22858
22859 (define_peephole2
22860 [(set (match_operand 0 "flags_reg_operand")
22861 (match_operator 1 "compare_operator"
22862 [(and:QI (match_operand:QI 2 "register_operand")
22863 (match_operand:QI 3 "immediate_operand"))
22864 (const_int 0)]))]
22865 "! TARGET_PARTIAL_REG_STALL
22866 && ix86_match_ccmode (insn, CCNOmode)
22867 && REGNO (operands[2]) != AX_REG
22868 && peep2_reg_dead_p (1, operands[2])"
22869 [(parallel
22870 [(set (match_dup 0)
22871 (match_op_dup 1 [(and:QI (match_dup 2) (match_dup 3))
22872 (const_int 0)]))
22873 (set (match_dup 2)
22874 (and:QI (match_dup 2) (match_dup 3)))])])
22875
22876 (define_peephole2
22877 [(set (match_operand 0 "flags_reg_operand")
22878 (match_operator 1 "compare_operator"
22879 [(and:QI
22880 (subreg:QI
22881 (zero_extract:SWI248 (match_operand:SWI248 2 "QIreg_operand")
22882 (const_int 8)
22883 (const_int 8)) 0)
22884 (match_operand 3 "const_int_operand"))
22885 (const_int 0)]))]
22886 "! TARGET_PARTIAL_REG_STALL
22887 && ix86_match_ccmode (insn, CCNOmode)
22888 && REGNO (operands[2]) != AX_REG
22889 && peep2_reg_dead_p (1, operands[2])"
22890 [(parallel
22891 [(set (match_dup 0)
22892 (match_op_dup 1
22893 [(and:QI
22894 (subreg:QI
22895 (zero_extract:SWI248 (match_dup 2)
22896 (const_int 8)
22897 (const_int 8)) 0)
22898 (match_dup 3))
22899 (const_int 0)]))
22900 (set (zero_extract:SWI248 (match_dup 2)
22901 (const_int 8)
22902 (const_int 8))
22903 (subreg:SWI248
22904 (and:QI
22905 (subreg:QI
22906 (zero_extract:SWI248 (match_dup 2)
22907 (const_int 8)
22908 (const_int 8)) 0)
22909 (match_dup 3)) 0))])])
22910
22911 ;; Don't do logical operations with memory inputs.
22912 (define_peephole2
22913 [(match_scratch:SWI 2 "<r>")
22914 (parallel [(set (match_operand:SWI 0 "register_operand")
22915 (match_operator:SWI 3 "arith_or_logical_operator"
22916 [(match_dup 0)
22917 (match_operand:SWI 1 "memory_operand")]))
22918 (clobber (reg:CC FLAGS_REG))])]
22919 "!(TARGET_READ_MODIFY || optimize_insn_for_size_p ())"
22920 [(set (match_dup 2) (match_dup 1))
22921 (parallel [(set (match_dup 0)
22922 (match_op_dup 3 [(match_dup 0) (match_dup 2)]))
22923 (clobber (reg:CC FLAGS_REG))])])
22924
22925 (define_peephole2
22926 [(match_scratch:SWI 2 "<r>")
22927 (parallel [(set (match_operand:SWI 0 "register_operand")
22928 (match_operator:SWI 3 "arith_or_logical_operator"
22929 [(match_operand:SWI 1 "memory_operand")
22930 (match_dup 0)]))
22931 (clobber (reg:CC FLAGS_REG))])]
22932 "!(TARGET_READ_MODIFY || optimize_insn_for_size_p ())"
22933 [(set (match_dup 2) (match_dup 1))
22934 (parallel [(set (match_dup 0)
22935 (match_op_dup 3 [(match_dup 2) (match_dup 0)]))
22936 (clobber (reg:CC FLAGS_REG))])])
22937
22938 ;; Prefer Load+RegOp to Mov+MemOp. Watch out for cases when
22939 ;; the memory address refers to the destination of the load!
22940
22941 (define_peephole2
22942 [(set (match_operand:SWI 0 "general_reg_operand")
22943 (match_operand:SWI 1 "general_reg_operand"))
22944 (parallel [(set (match_dup 0)
22945 (match_operator:SWI 3 "commutative_operator"
22946 [(match_dup 0)
22947 (match_operand:SWI 2 "memory_operand")]))
22948 (clobber (reg:CC FLAGS_REG))])]
22949 "REGNO (operands[0]) != REGNO (operands[1])
22950 && (<MODE>mode != QImode
22951 || any_QIreg_operand (operands[1], QImode))"
22952 [(set (match_dup 0) (match_dup 4))
22953 (parallel [(set (match_dup 0)
22954 (match_op_dup 3 [(match_dup 0) (match_dup 1)]))
22955 (clobber (reg:CC FLAGS_REG))])]
22956 "operands[4] = replace_rtx (operands[2], operands[0], operands[1], true);")
22957
22958 (define_peephole2
22959 [(set (match_operand 0 "mmx_reg_operand")
22960 (match_operand 1 "mmx_reg_operand"))
22961 (set (match_dup 0)
22962 (match_operator 3 "commutative_operator"
22963 [(match_dup 0)
22964 (match_operand 2 "memory_operand")]))]
22965 "REGNO (operands[0]) != REGNO (operands[1])"
22966 [(set (match_dup 0) (match_dup 2))
22967 (set (match_dup 0)
22968 (match_op_dup 3 [(match_dup 0) (match_dup 1)]))])
22969
22970 (define_peephole2
22971 [(set (match_operand 0 "sse_reg_operand")
22972 (match_operand 1 "sse_reg_operand"))
22973 (set (match_dup 0)
22974 (match_operator 3 "commutative_operator"
22975 [(match_dup 0)
22976 (match_operand 2 "memory_operand")]))]
22977 "REGNO (operands[0]) != REGNO (operands[1])
22978 /* Punt if operands[1] is %[xy]mm16+ and AVX512BW is not enabled,
22979 as EVEX encoded vpadd[bw], vpmullw, vpmin[su][bw] and vpmax[su][bw]
22980 instructions require AVX512BW and AVX512VL, but with the original
22981 instructions it might require just AVX512VL.
22982 AVX512VL is implied from TARGET_HARD_REGNO_MODE_OK. */
22983 && (!EXT_REX_SSE_REGNO_P (REGNO (operands[1]))
22984 || TARGET_AVX512BW
22985 || GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (operands[0]))) > 2
22986 || logic_operator (operands[3], VOIDmode))"
22987 [(set (match_dup 0) (match_dup 2))
22988 (set (match_dup 0)
22989 (match_op_dup 3 [(match_dup 0) (match_dup 1)]))])
22990
22991 ; Don't do logical operations with memory outputs
22992 ;
22993 ; These two don't make sense for PPro/PII -- we're expanding a 4-uop
22994 ; instruction into two 1-uop insns plus a 2-uop insn. That last has
22995 ; the same decoder scheduling characteristics as the original.
22996
22997 (define_peephole2
22998 [(match_scratch:SWI 2 "<r>")
22999 (parallel [(set (match_operand:SWI 0 "memory_operand")
23000 (match_operator:SWI 3 "arith_or_logical_operator"
23001 [(match_dup 0)
23002 (match_operand:SWI 1 "<nonmemory_operand>")]))
23003 (clobber (reg:CC FLAGS_REG))])]
23004 "!(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())"
23005 [(set (match_dup 2) (match_dup 0))
23006 (parallel [(set (match_dup 2)
23007 (match_op_dup 3 [(match_dup 2) (match_dup 1)]))
23008 (clobber (reg:CC FLAGS_REG))])
23009 (set (match_dup 0) (match_dup 2))])
23010
23011 (define_peephole2
23012 [(match_scratch:SWI 2 "<r>")
23013 (parallel [(set (match_operand:SWI 0 "memory_operand")
23014 (match_operator:SWI 3 "arith_or_logical_operator"
23015 [(match_operand:SWI 1 "<nonmemory_operand>")
23016 (match_dup 0)]))
23017 (clobber (reg:CC FLAGS_REG))])]
23018 "!(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())"
23019 [(set (match_dup 2) (match_dup 0))
23020 (parallel [(set (match_dup 2)
23021 (match_op_dup 3 [(match_dup 1) (match_dup 2)]))
23022 (clobber (reg:CC FLAGS_REG))])
23023 (set (match_dup 0) (match_dup 2))])
23024
23025 ;; Attempt to use arith or logical operations with memory outputs with
23026 ;; setting of flags.
23027 (define_peephole2
23028 [(set (match_operand:SWI 0 "register_operand")
23029 (match_operand:SWI 1 "memory_operand"))
23030 (parallel [(set (match_dup 0)
23031 (match_operator:SWI 3 "plusminuslogic_operator"
23032 [(match_dup 0)
23033 (match_operand:SWI 2 "<nonmemory_operand>")]))
23034 (clobber (reg:CC FLAGS_REG))])
23035 (set (match_dup 1) (match_dup 0))
23036 (set (reg FLAGS_REG) (compare (match_dup 0) (const_int 0)))]
23037 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
23038 && peep2_reg_dead_p (4, operands[0])
23039 && !reg_overlap_mentioned_p (operands[0], operands[1])
23040 && !reg_overlap_mentioned_p (operands[0], operands[2])
23041 && (<MODE>mode != QImode
23042 || immediate_operand (operands[2], QImode)
23043 || any_QIreg_operand (operands[2], QImode))
23044 && ix86_match_ccmode (peep2_next_insn (3),
23045 (GET_CODE (operands[3]) == PLUS
23046 || GET_CODE (operands[3]) == MINUS)
23047 ? CCGOCmode : CCNOmode)"
23048 [(parallel [(set (match_dup 4) (match_dup 6))
23049 (set (match_dup 1) (match_dup 5))])]
23050 {
23051 operands[4] = SET_DEST (PATTERN (peep2_next_insn (3)));
23052 operands[5]
23053 = gen_rtx_fmt_ee (GET_CODE (operands[3]), GET_MODE (operands[3]),
23054 copy_rtx (operands[1]),
23055 operands[2]);
23056 operands[6]
23057 = gen_rtx_COMPARE (GET_MODE (operands[4]),
23058 copy_rtx (operands[5]),
23059 const0_rtx);
23060 })
23061
23062 ;; Likewise for cmpelim optimized pattern.
23063 (define_peephole2
23064 [(set (match_operand:SWI 0 "register_operand")
23065 (match_operand:SWI 1 "memory_operand"))
23066 (parallel [(set (reg FLAGS_REG)
23067 (compare (match_operator:SWI 3 "plusminuslogic_operator"
23068 [(match_dup 0)
23069 (match_operand:SWI 2 "<nonmemory_operand>")])
23070 (const_int 0)))
23071 (set (match_dup 0) (match_dup 3))])
23072 (set (match_dup 1) (match_dup 0))]
23073 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
23074 && peep2_reg_dead_p (3, operands[0])
23075 && !reg_overlap_mentioned_p (operands[0], operands[1])
23076 && !reg_overlap_mentioned_p (operands[0], operands[2])
23077 && ix86_match_ccmode (peep2_next_insn (1),
23078 (GET_CODE (operands[3]) == PLUS
23079 || GET_CODE (operands[3]) == MINUS)
23080 ? CCGOCmode : CCNOmode)"
23081 [(parallel [(set (match_dup 4) (match_dup 6))
23082 (set (match_dup 1) (match_dup 5))])]
23083 {
23084 operands[4] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (1)), 0, 0));
23085 operands[5]
23086 = gen_rtx_fmt_ee (GET_CODE (operands[3]), GET_MODE (operands[3]),
23087 copy_rtx (operands[1]), operands[2]);
23088 operands[6]
23089 = gen_rtx_COMPARE (GET_MODE (operands[4]), copy_rtx (operands[5]),
23090 const0_rtx);
23091 })
23092
23093 ;; Likewise for instances where we have a lea pattern.
23094 (define_peephole2
23095 [(set (match_operand:SWI 0 "register_operand")
23096 (match_operand:SWI 1 "memory_operand"))
23097 (set (match_operand:<LEAMODE> 3 "register_operand")
23098 (plus:<LEAMODE> (match_operand:<LEAMODE> 4 "register_operand")
23099 (match_operand:<LEAMODE> 2 "<nonmemory_operand>")))
23100 (set (match_dup 1) (match_operand:SWI 5 "register_operand"))
23101 (set (reg FLAGS_REG) (compare (match_dup 5) (const_int 0)))]
23102 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
23103 && REGNO (operands[4]) == REGNO (operands[0])
23104 && REGNO (operands[5]) == REGNO (operands[3])
23105 && peep2_reg_dead_p (4, operands[3])
23106 && ((REGNO (operands[0]) == REGNO (operands[3]))
23107 || peep2_reg_dead_p (2, operands[0]))
23108 && !reg_overlap_mentioned_p (operands[0], operands[1])
23109 && !reg_overlap_mentioned_p (operands[3], operands[1])
23110 && !reg_overlap_mentioned_p (operands[0], operands[2])
23111 && (<MODE>mode != QImode
23112 || immediate_operand (operands[2], QImode)
23113 || any_QIreg_operand (operands[2], QImode))
23114 && ix86_match_ccmode (peep2_next_insn (3), CCGOCmode)"
23115 [(parallel [(set (match_dup 6) (match_dup 8))
23116 (set (match_dup 1) (match_dup 7))])]
23117 {
23118 operands[6] = SET_DEST (PATTERN (peep2_next_insn (3)));
23119 operands[7]
23120 = gen_rtx_PLUS (<MODE>mode,
23121 copy_rtx (operands[1]),
23122 gen_lowpart (<MODE>mode, operands[2]));
23123 operands[8]
23124 = gen_rtx_COMPARE (GET_MODE (operands[6]),
23125 copy_rtx (operands[7]),
23126 const0_rtx);
23127 })
23128
23129 (define_peephole2
23130 [(parallel [(set (match_operand:SWI 0 "register_operand")
23131 (match_operator:SWI 2 "plusminuslogic_operator"
23132 [(match_dup 0)
23133 (match_operand:SWI 1 "memory_operand")]))
23134 (clobber (reg:CC FLAGS_REG))])
23135 (set (match_dup 1) (match_dup 0))
23136 (set (reg FLAGS_REG) (compare (match_dup 0) (const_int 0)))]
23137 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
23138 && COMMUTATIVE_ARITH_P (operands[2])
23139 && peep2_reg_dead_p (3, operands[0])
23140 && !reg_overlap_mentioned_p (operands[0], operands[1])
23141 && ix86_match_ccmode (peep2_next_insn (2),
23142 GET_CODE (operands[2]) == PLUS
23143 ? CCGOCmode : CCNOmode)"
23144 [(parallel [(set (match_dup 3) (match_dup 5))
23145 (set (match_dup 1) (match_dup 4))])]
23146 {
23147 operands[3] = SET_DEST (PATTERN (peep2_next_insn (2)));
23148 operands[4]
23149 = gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE (operands[2]),
23150 copy_rtx (operands[1]),
23151 operands[0]);
23152 operands[5]
23153 = gen_rtx_COMPARE (GET_MODE (operands[3]),
23154 copy_rtx (operands[4]),
23155 const0_rtx);
23156 })
23157
23158 ;; Likewise for cmpelim optimized pattern.
23159 (define_peephole2
23160 [(parallel [(set (reg FLAGS_REG)
23161 (compare (match_operator:SWI 2 "plusminuslogic_operator"
23162 [(match_operand:SWI 0 "register_operand")
23163 (match_operand:SWI 1 "memory_operand")])
23164 (const_int 0)))
23165 (set (match_dup 0) (match_dup 2))])
23166 (set (match_dup 1) (match_dup 0))]
23167 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
23168 && COMMUTATIVE_ARITH_P (operands[2])
23169 && peep2_reg_dead_p (2, operands[0])
23170 && !reg_overlap_mentioned_p (operands[0], operands[1])
23171 && ix86_match_ccmode (peep2_next_insn (0),
23172 GET_CODE (operands[2]) == PLUS
23173 ? CCGOCmode : CCNOmode)"
23174 [(parallel [(set (match_dup 3) (match_dup 5))
23175 (set (match_dup 1) (match_dup 4))])]
23176 {
23177 operands[3] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (0)), 0, 0));
23178 operands[4]
23179 = gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE (operands[2]),
23180 copy_rtx (operands[1]), operands[0]);
23181 operands[5]
23182 = gen_rtx_COMPARE (GET_MODE (operands[3]), copy_rtx (operands[4]),
23183 const0_rtx);
23184 })
23185
23186 (define_peephole2
23187 [(set (match_operand:SWI12 0 "register_operand")
23188 (match_operand:SWI12 1 "memory_operand"))
23189 (parallel [(set (match_operand:SI 4 "register_operand")
23190 (match_operator:SI 3 "plusminuslogic_operator"
23191 [(match_dup 4)
23192 (match_operand:SI 2 "nonmemory_operand")]))
23193 (clobber (reg:CC FLAGS_REG))])
23194 (set (match_dup 1) (match_dup 0))
23195 (set (reg FLAGS_REG) (compare (match_dup 0) (const_int 0)))]
23196 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
23197 && REGNO (operands[0]) == REGNO (operands[4])
23198 && peep2_reg_dead_p (4, operands[0])
23199 && (<MODE>mode != QImode
23200 || immediate_operand (operands[2], SImode)
23201 || any_QIreg_operand (operands[2], SImode))
23202 && !reg_overlap_mentioned_p (operands[0], operands[1])
23203 && !reg_overlap_mentioned_p (operands[0], operands[2])
23204 && ix86_match_ccmode (peep2_next_insn (3),
23205 (GET_CODE (operands[3]) == PLUS
23206 || GET_CODE (operands[3]) == MINUS)
23207 ? CCGOCmode : CCNOmode)"
23208 [(parallel [(set (match_dup 5) (match_dup 7))
23209 (set (match_dup 1) (match_dup 6))])]
23210 {
23211 operands[5] = SET_DEST (PATTERN (peep2_next_insn (3)));
23212 operands[6]
23213 = gen_rtx_fmt_ee (GET_CODE (operands[3]), <MODE>mode,
23214 copy_rtx (operands[1]),
23215 gen_lowpart (<MODE>mode, operands[2]));
23216 operands[7]
23217 = gen_rtx_COMPARE (GET_MODE (operands[5]),
23218 copy_rtx (operands[6]),
23219 const0_rtx);
23220 })
23221
23222 ;; peephole2 comes before regcprop, so deal also with a case that
23223 ;; would be cleaned up by regcprop.
23224 (define_peephole2
23225 [(set (match_operand:SWI 0 "register_operand")
23226 (match_operand:SWI 1 "memory_operand"))
23227 (parallel [(set (match_dup 0)
23228 (match_operator:SWI 3 "plusminuslogic_operator"
23229 [(match_dup 0)
23230 (match_operand:SWI 2 "<nonmemory_operand>")]))
23231 (clobber (reg:CC FLAGS_REG))])
23232 (set (match_operand:SWI 4 "register_operand") (match_dup 0))
23233 (set (match_dup 1) (match_dup 4))
23234 (set (reg FLAGS_REG) (compare (match_dup 4) (const_int 0)))]
23235 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
23236 && peep2_reg_dead_p (3, operands[0])
23237 && peep2_reg_dead_p (5, operands[4])
23238 && !reg_overlap_mentioned_p (operands[0], operands[1])
23239 && !reg_overlap_mentioned_p (operands[0], operands[2])
23240 && !reg_overlap_mentioned_p (operands[4], operands[1])
23241 && (<MODE>mode != QImode
23242 || immediate_operand (operands[2], QImode)
23243 || any_QIreg_operand (operands[2], QImode))
23244 && ix86_match_ccmode (peep2_next_insn (4),
23245 (GET_CODE (operands[3]) == PLUS
23246 || GET_CODE (operands[3]) == MINUS)
23247 ? CCGOCmode : CCNOmode)"
23248 [(parallel [(set (match_dup 5) (match_dup 7))
23249 (set (match_dup 1) (match_dup 6))])]
23250 {
23251 operands[5] = SET_DEST (PATTERN (peep2_next_insn (4)));
23252 operands[6]
23253 = gen_rtx_fmt_ee (GET_CODE (operands[3]), GET_MODE (operands[3]),
23254 copy_rtx (operands[1]),
23255 operands[2]);
23256 operands[7]
23257 = gen_rtx_COMPARE (GET_MODE (operands[5]),
23258 copy_rtx (operands[6]),
23259 const0_rtx);
23260 })
23261
23262 (define_peephole2
23263 [(set (match_operand:SWI12 0 "register_operand")
23264 (match_operand:SWI12 1 "memory_operand"))
23265 (parallel [(set (match_operand:SI 4 "register_operand")
23266 (match_operator:SI 3 "plusminuslogic_operator"
23267 [(match_dup 4)
23268 (match_operand:SI 2 "nonmemory_operand")]))
23269 (clobber (reg:CC FLAGS_REG))])
23270 (set (match_operand:SWI12 5 "register_operand") (match_dup 0))
23271 (set (match_dup 1) (match_dup 5))
23272 (set (reg FLAGS_REG) (compare (match_dup 5) (const_int 0)))]
23273 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
23274 && REGNO (operands[0]) == REGNO (operands[4])
23275 && peep2_reg_dead_p (3, operands[0])
23276 && peep2_reg_dead_p (5, operands[5])
23277 && (<MODE>mode != QImode
23278 || immediate_operand (operands[2], SImode)
23279 || any_QIreg_operand (operands[2], SImode))
23280 && !reg_overlap_mentioned_p (operands[0], operands[1])
23281 && !reg_overlap_mentioned_p (operands[0], operands[2])
23282 && !reg_overlap_mentioned_p (operands[5], operands[1])
23283 && ix86_match_ccmode (peep2_next_insn (4),
23284 (GET_CODE (operands[3]) == PLUS
23285 || GET_CODE (operands[3]) == MINUS)
23286 ? CCGOCmode : CCNOmode)"
23287 [(parallel [(set (match_dup 6) (match_dup 8))
23288 (set (match_dup 1) (match_dup 7))])]
23289 {
23290 operands[6] = SET_DEST (PATTERN (peep2_next_insn (4)));
23291 operands[7]
23292 = gen_rtx_fmt_ee (GET_CODE (operands[3]), <MODE>mode,
23293 copy_rtx (operands[1]),
23294 gen_lowpart (<MODE>mode, operands[2]));
23295 operands[8]
23296 = gen_rtx_COMPARE (GET_MODE (operands[6]),
23297 copy_rtx (operands[7]),
23298 const0_rtx);
23299 })
23300
23301 ;; Likewise for cmpelim optimized pattern.
23302 (define_peephole2
23303 [(set (match_operand:SWI 0 "register_operand")
23304 (match_operand:SWI 1 "memory_operand"))
23305 (parallel [(set (reg FLAGS_REG)
23306 (compare (match_operator:SWI 3 "plusminuslogic_operator"
23307 [(match_dup 0)
23308 (match_operand:SWI 2 "<nonmemory_operand>")])
23309 (const_int 0)))
23310 (set (match_dup 0) (match_dup 3))])
23311 (set (match_operand:SWI 4 "register_operand") (match_dup 0))
23312 (set (match_dup 1) (match_dup 4))]
23313 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
23314 && peep2_reg_dead_p (3, operands[0])
23315 && peep2_reg_dead_p (4, operands[4])
23316 && !reg_overlap_mentioned_p (operands[0], operands[1])
23317 && !reg_overlap_mentioned_p (operands[0], operands[2])
23318 && !reg_overlap_mentioned_p (operands[4], operands[1])
23319 && ix86_match_ccmode (peep2_next_insn (1),
23320 (GET_CODE (operands[3]) == PLUS
23321 || GET_CODE (operands[3]) == MINUS)
23322 ? CCGOCmode : CCNOmode)"
23323 [(parallel [(set (match_dup 5) (match_dup 7))
23324 (set (match_dup 1) (match_dup 6))])]
23325 {
23326 operands[5] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (1)), 0, 0));
23327 operands[6]
23328 = gen_rtx_fmt_ee (GET_CODE (operands[3]), GET_MODE (operands[3]),
23329 copy_rtx (operands[1]), operands[2]);
23330 operands[7]
23331 = gen_rtx_COMPARE (GET_MODE (operands[5]), copy_rtx (operands[6]),
23332 const0_rtx);
23333 })
23334
23335 ;; Special cases for xor, where (x ^= y) != 0 is (misoptimized)
23336 ;; into x = z; x ^= y; x != z
23337 (define_peephole2
23338 [(set (match_operand:SWI 0 "register_operand")
23339 (match_operand:SWI 1 "memory_operand"))
23340 (set (match_operand:SWI 3 "register_operand") (match_dup 0))
23341 (parallel [(set (match_operand:SWI 4 "register_operand")
23342 (xor:SWI (match_dup 4)
23343 (match_operand:SWI 2 "<nonmemory_operand>")))
23344 (clobber (reg:CC FLAGS_REG))])
23345 (set (match_dup 1) (match_dup 4))
23346 (set (reg:CCZ FLAGS_REG)
23347 (compare:CCZ (match_operand:SWI 5 "register_operand")
23348 (match_operand:SWI 6 "<nonmemory_operand>")))]
23349 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
23350 && (REGNO (operands[4]) == REGNO (operands[0])
23351 || REGNO (operands[4]) == REGNO (operands[3]))
23352 && (rtx_equal_p (operands[REGNO (operands[4]) == REGNO (operands[0])
23353 ? 3 : 0], operands[5])
23354 ? rtx_equal_p (operands[2], operands[6])
23355 : rtx_equal_p (operands[2], operands[5])
23356 && rtx_equal_p (operands[REGNO (operands[4]) == REGNO (operands[0])
23357 ? 3 : 0], operands[6]))
23358 && peep2_reg_dead_p (4, operands[4])
23359 && peep2_reg_dead_p (5, operands[REGNO (operands[4]) == REGNO (operands[0])
23360 ? 3 : 0])
23361 && !reg_overlap_mentioned_p (operands[0], operands[1])
23362 && !reg_overlap_mentioned_p (operands[0], operands[2])
23363 && !reg_overlap_mentioned_p (operands[3], operands[0])
23364 && !reg_overlap_mentioned_p (operands[3], operands[1])
23365 && !reg_overlap_mentioned_p (operands[3], operands[2])
23366 && (<MODE>mode != QImode
23367 || immediate_operand (operands[2], QImode)
23368 || any_QIreg_operand (operands[2], QImode))"
23369 [(parallel [(set (match_dup 7) (match_dup 9))
23370 (set (match_dup 1) (match_dup 8))])]
23371 {
23372 operands[7] = SET_DEST (PATTERN (peep2_next_insn (4)));
23373 operands[8] = gen_rtx_XOR (<MODE>mode, copy_rtx (operands[1]),
23374 operands[2]);
23375 operands[9]
23376 = gen_rtx_COMPARE (GET_MODE (operands[7]),
23377 copy_rtx (operands[8]),
23378 const0_rtx);
23379 })
23380
23381 (define_peephole2
23382 [(set (match_operand:SWI12 0 "register_operand")
23383 (match_operand:SWI12 1 "memory_operand"))
23384 (set (match_operand:SWI12 3 "register_operand") (match_dup 0))
23385 (parallel [(set (match_operand:SI 4 "register_operand")
23386 (xor:SI (match_dup 4)
23387 (match_operand:SI 2 "<nonmemory_operand>")))
23388 (clobber (reg:CC FLAGS_REG))])
23389 (set (match_dup 1) (match_operand:SWI12 5 "register_operand"))
23390 (set (reg:CCZ FLAGS_REG)
23391 (compare:CCZ (match_operand:SWI12 6 "register_operand")
23392 (match_operand:SWI12 7 "<nonmemory_operand>")))]
23393 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
23394 && (REGNO (operands[5]) == REGNO (operands[0])
23395 || REGNO (operands[5]) == REGNO (operands[3]))
23396 && REGNO (operands[5]) == REGNO (operands[4])
23397 && (rtx_equal_p (operands[REGNO (operands[5]) == REGNO (operands[0])
23398 ? 3 : 0], operands[6])
23399 ? (REG_P (operands[2])
23400 ? REG_P (operands[7]) && REGNO (operands[2]) == REGNO (operands[7])
23401 : rtx_equal_p (operands[2], operands[7]))
23402 : (rtx_equal_p (operands[REGNO (operands[5]) == REGNO (operands[0])
23403 ? 3 : 0], operands[7])
23404 && REG_P (operands[2])
23405 && REGNO (operands[2]) == REGNO (operands[6])))
23406 && peep2_reg_dead_p (4, operands[5])
23407 && peep2_reg_dead_p (5, operands[REGNO (operands[5]) == REGNO (operands[0])
23408 ? 3 : 0])
23409 && !reg_overlap_mentioned_p (operands[0], operands[1])
23410 && !reg_overlap_mentioned_p (operands[0], operands[2])
23411 && !reg_overlap_mentioned_p (operands[3], operands[0])
23412 && !reg_overlap_mentioned_p (operands[3], operands[1])
23413 && !reg_overlap_mentioned_p (operands[3], operands[2])
23414 && (<MODE>mode != QImode
23415 || immediate_operand (operands[2], SImode)
23416 || any_QIreg_operand (operands[2], SImode))"
23417 [(parallel [(set (match_dup 8) (match_dup 10))
23418 (set (match_dup 1) (match_dup 9))])]
23419 {
23420 operands[8] = SET_DEST (PATTERN (peep2_next_insn (4)));
23421 operands[9] = gen_rtx_XOR (<MODE>mode, copy_rtx (operands[1]),
23422 gen_lowpart (<MODE>mode, operands[2]));
23423 operands[10]
23424 = gen_rtx_COMPARE (GET_MODE (operands[8]),
23425 copy_rtx (operands[9]),
23426 const0_rtx);
23427 })
23428
23429 ;; Attempt to optimize away memory stores of values the memory already
23430 ;; has. See PR79593.
23431 (define_peephole2
23432 [(set (match_operand 0 "register_operand")
23433 (match_operand 1 "memory_operand"))
23434 (set (match_operand 2 "memory_operand") (match_dup 0))]
23435 "!MEM_VOLATILE_P (operands[1])
23436 && !MEM_VOLATILE_P (operands[2])
23437 && rtx_equal_p (operands[1], operands[2])
23438 && !reg_overlap_mentioned_p (operands[0], operands[2])"
23439 [(set (match_dup 0) (match_dup 1))])
23440
23441 ;; Attempt to always use XOR for zeroing registers (including FP modes).
23442 (define_peephole2
23443 [(set (match_operand 0 "general_reg_operand")
23444 (match_operand 1 "const0_operand"))]
23445 "GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD
23446 && (! TARGET_USE_MOV0 || optimize_insn_for_size_p ())
23447 && peep2_regno_dead_p (0, FLAGS_REG)"
23448 [(parallel [(set (match_dup 0) (const_int 0))
23449 (clobber (reg:CC FLAGS_REG))])]
23450 "operands[0] = gen_lowpart (word_mode, operands[0]);")
23451
23452 (define_peephole2
23453 [(set (strict_low_part (match_operand:SWI12 0 "general_reg_operand"))
23454 (const_int 0))]
23455 "(! TARGET_USE_MOV0 || optimize_insn_for_size_p ())
23456 && peep2_regno_dead_p (0, FLAGS_REG)"
23457 [(parallel [(set (strict_low_part (match_dup 0)) (const_int 0))
23458 (clobber (reg:CC FLAGS_REG))])])
23459
23460 ;; For HI, SI and DI modes, or $-1,reg is smaller than mov $-1,reg.
23461 (define_peephole2
23462 [(set (match_operand:SWI248 0 "general_reg_operand")
23463 (const_int -1))]
23464 "(TARGET_MOVE_M1_VIA_OR || optimize_insn_for_size_p ())
23465 && peep2_regno_dead_p (0, FLAGS_REG)"
23466 [(parallel [(set (match_dup 0) (const_int -1))
23467 (clobber (reg:CC FLAGS_REG))])]
23468 {
23469 if (<MODE_SIZE> < GET_MODE_SIZE (SImode))
23470 operands[0] = gen_lowpart (SImode, operands[0]);
23471 })
23472
23473 ;; Attempt to convert simple lea to add/shift.
23474 ;; These can be created by move expanders.
23475 ;; Disable PLUS peepholes on TARGET_OPT_AGU, since all
23476 ;; relevant lea instructions were already split.
23477
23478 (define_peephole2
23479 [(set (match_operand:SWI48 0 "register_operand")
23480 (plus:SWI48 (match_dup 0)
23481 (match_operand:SWI48 1 "<nonmemory_operand>")))]
23482 "!TARGET_OPT_AGU
23483 && peep2_regno_dead_p (0, FLAGS_REG)"
23484 [(parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (match_dup 1)))
23485 (clobber (reg:CC FLAGS_REG))])])
23486
23487 (define_peephole2
23488 [(set (match_operand:SWI48 0 "register_operand")
23489 (plus:SWI48 (match_operand:SWI48 1 "<nonmemory_operand>")
23490 (match_dup 0)))]
23491 "!TARGET_OPT_AGU
23492 && peep2_regno_dead_p (0, FLAGS_REG)"
23493 [(parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (match_dup 1)))
23494 (clobber (reg:CC FLAGS_REG))])])
23495
23496 (define_peephole2
23497 [(set (match_operand:DI 0 "register_operand")
23498 (zero_extend:DI
23499 (plus:SI (match_operand:SI 1 "register_operand")
23500 (match_operand:SI 2 "nonmemory_operand"))))]
23501 "TARGET_64BIT && !TARGET_OPT_AGU
23502 && REGNO (operands[0]) == REGNO (operands[1])
23503 && peep2_regno_dead_p (0, FLAGS_REG)"
23504 [(parallel [(set (match_dup 0)
23505 (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))
23506 (clobber (reg:CC FLAGS_REG))])])
23507
23508 (define_peephole2
23509 [(set (match_operand:DI 0 "register_operand")
23510 (zero_extend:DI
23511 (plus:SI (match_operand:SI 1 "nonmemory_operand")
23512 (match_operand:SI 2 "register_operand"))))]
23513 "TARGET_64BIT && !TARGET_OPT_AGU
23514 && REGNO (operands[0]) == REGNO (operands[2])
23515 && peep2_regno_dead_p (0, FLAGS_REG)"
23516 [(parallel [(set (match_dup 0)
23517 (zero_extend:DI (plus:SI (match_dup 2) (match_dup 1))))
23518 (clobber (reg:CC FLAGS_REG))])])
23519
23520 (define_peephole2
23521 [(set (match_operand:SWI48 0 "register_operand")
23522 (mult:SWI48 (match_dup 0)
23523 (match_operand:SWI48 1 "const_int_operand")))]
23524 "pow2p_hwi (INTVAL (operands[1]))
23525 && peep2_regno_dead_p (0, FLAGS_REG)"
23526 [(parallel [(set (match_dup 0) (ashift:SWI48 (match_dup 0) (match_dup 1)))
23527 (clobber (reg:CC FLAGS_REG))])]
23528 "operands[1] = GEN_INT (exact_log2 (INTVAL (operands[1])));")
23529
23530 (define_peephole2
23531 [(set (match_operand:DI 0 "register_operand")
23532 (zero_extend:DI
23533 (mult:SI (match_operand:SI 1 "register_operand")
23534 (match_operand:SI 2 "const_int_operand"))))]
23535 "TARGET_64BIT
23536 && pow2p_hwi (INTVAL (operands[2]))
23537 && REGNO (operands[0]) == REGNO (operands[1])
23538 && peep2_regno_dead_p (0, FLAGS_REG)"
23539 [(parallel [(set (match_dup 0)
23540 (zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))
23541 (clobber (reg:CC FLAGS_REG))])]
23542 "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));")
23543
23544 ;; The ESP adjustments can be done by the push and pop instructions. Resulting
23545 ;; code is shorter, since push is only 1 byte, while add imm, %esp is 3 bytes.
23546 ;; On many CPUs it is also faster, since special hardware to avoid esp
23547 ;; dependencies is present.
23548
23549 ;; While some of these conversions may be done using splitters, we use
23550 ;; peepholes in order to allow combine_stack_adjustments pass to see
23551 ;; nonobfuscated RTL.
23552
23553 ;; Convert prologue esp subtractions to push.
23554 ;; We need register to push. In order to keep verify_flow_info happy we have
23555 ;; two choices
23556 ;; - use scratch and clobber it in order to avoid dependencies
23557 ;; - use already live register
23558 ;; We can't use the second way right now, since there is no reliable way how to
23559 ;; verify that given register is live. First choice will also most likely in
23560 ;; fewer dependencies. On the place of esp adjustments it is very likely that
23561 ;; call clobbered registers are dead. We may want to use base pointer as an
23562 ;; alternative when no register is available later.
23563
23564 (define_peephole2
23565 [(match_scratch:W 1 "r")
23566 (parallel [(set (reg:P SP_REG)
23567 (plus:P (reg:P SP_REG)
23568 (match_operand:P 0 "const_int_operand")))
23569 (clobber (reg:CC FLAGS_REG))
23570 (clobber (mem:BLK (scratch)))])]
23571 "(TARGET_SINGLE_PUSH || optimize_insn_for_size_p ())
23572 && INTVAL (operands[0]) == -GET_MODE_SIZE (word_mode)
23573 && !ix86_red_zone_used"
23574 [(clobber (match_dup 1))
23575 (parallel [(set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1))
23576 (clobber (mem:BLK (scratch)))])])
23577
23578 (define_peephole2
23579 [(match_scratch:W 1 "r")
23580 (parallel [(set (reg:P SP_REG)
23581 (plus:P (reg:P SP_REG)
23582 (match_operand:P 0 "const_int_operand")))
23583 (clobber (reg:CC FLAGS_REG))
23584 (clobber (mem:BLK (scratch)))])]
23585 "(TARGET_DOUBLE_PUSH || optimize_insn_for_size_p ())
23586 && INTVAL (operands[0]) == -2*GET_MODE_SIZE (word_mode)
23587 && !ix86_red_zone_used"
23588 [(clobber (match_dup 1))
23589 (set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1))
23590 (parallel [(set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1))
23591 (clobber (mem:BLK (scratch)))])])
23592
23593 ;; Convert esp subtractions to push.
23594 (define_peephole2
23595 [(match_scratch:W 1 "r")
23596 (parallel [(set (reg:P SP_REG)
23597 (plus:P (reg:P SP_REG)
23598 (match_operand:P 0 "const_int_operand")))
23599 (clobber (reg:CC FLAGS_REG))])]
23600 "(TARGET_SINGLE_PUSH || optimize_insn_for_size_p ())
23601 && INTVAL (operands[0]) == -GET_MODE_SIZE (word_mode)
23602 && !ix86_red_zone_used"
23603 [(clobber (match_dup 1))
23604 (set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1))])
23605
23606 (define_peephole2
23607 [(match_scratch:W 1 "r")
23608 (parallel [(set (reg:P SP_REG)
23609 (plus:P (reg:P SP_REG)
23610 (match_operand:P 0 "const_int_operand")))
23611 (clobber (reg:CC FLAGS_REG))])]
23612 "(TARGET_DOUBLE_PUSH || optimize_insn_for_size_p ())
23613 && INTVAL (operands[0]) == -2*GET_MODE_SIZE (word_mode)
23614 && !ix86_red_zone_used"
23615 [(clobber (match_dup 1))
23616 (set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1))
23617 (set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1))])
23618
23619 ;; Convert epilogue deallocator to pop.
23620 (define_peephole2
23621 [(match_scratch:W 1 "r")
23622 (parallel [(set (reg:P SP_REG)
23623 (plus:P (reg:P SP_REG)
23624 (match_operand:P 0 "const_int_operand")))
23625 (clobber (reg:CC FLAGS_REG))
23626 (clobber (mem:BLK (scratch)))])]
23627 "(TARGET_SINGLE_POP || optimize_insn_for_size_p ())
23628 && INTVAL (operands[0]) == GET_MODE_SIZE (word_mode)"
23629 [(parallel [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))
23630 (clobber (mem:BLK (scratch)))])])
23631
23632 ;; Two pops case is tricky, since pop causes dependency
23633 ;; on destination register. We use two registers if available.
23634 (define_peephole2
23635 [(match_scratch:W 1 "r")
23636 (match_scratch:W 2 "r")
23637 (parallel [(set (reg:P SP_REG)
23638 (plus:P (reg:P SP_REG)
23639 (match_operand:P 0 "const_int_operand")))
23640 (clobber (reg:CC FLAGS_REG))
23641 (clobber (mem:BLK (scratch)))])]
23642 "(TARGET_DOUBLE_POP || optimize_insn_for_size_p ())
23643 && INTVAL (operands[0]) == 2*GET_MODE_SIZE (word_mode)"
23644 [(parallel [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))
23645 (clobber (mem:BLK (scratch)))])
23646 (set (match_dup 2) (mem:W (post_inc:P (reg:P SP_REG))))])
23647
23648 (define_peephole2
23649 [(match_scratch:W 1 "r")
23650 (parallel [(set (reg:P SP_REG)
23651 (plus:P (reg:P SP_REG)
23652 (match_operand:P 0 "const_int_operand")))
23653 (clobber (reg:CC FLAGS_REG))
23654 (clobber (mem:BLK (scratch)))])]
23655 "optimize_insn_for_size_p ()
23656 && INTVAL (operands[0]) == 2*GET_MODE_SIZE (word_mode)"
23657 [(parallel [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))
23658 (clobber (mem:BLK (scratch)))])
23659 (set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))])
23660
23661 ;; Convert esp additions to pop.
23662 (define_peephole2
23663 [(match_scratch:W 1 "r")
23664 (parallel [(set (reg:P SP_REG)
23665 (plus:P (reg:P SP_REG)
23666 (match_operand:P 0 "const_int_operand")))
23667 (clobber (reg:CC FLAGS_REG))])]
23668 "INTVAL (operands[0]) == GET_MODE_SIZE (word_mode)"
23669 [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))])
23670
23671 ;; Two pops case is tricky, since pop causes dependency
23672 ;; on destination register. We use two registers if available.
23673 (define_peephole2
23674 [(match_scratch:W 1 "r")
23675 (match_scratch:W 2 "r")
23676 (parallel [(set (reg:P SP_REG)
23677 (plus:P (reg:P SP_REG)
23678 (match_operand:P 0 "const_int_operand")))
23679 (clobber (reg:CC FLAGS_REG))])]
23680 "INTVAL (operands[0]) == 2*GET_MODE_SIZE (word_mode)"
23681 [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))
23682 (set (match_dup 2) (mem:W (post_inc:P (reg:P SP_REG))))])
23683
23684 (define_peephole2
23685 [(match_scratch:W 1 "r")
23686 (parallel [(set (reg:P SP_REG)
23687 (plus:P (reg:P SP_REG)
23688 (match_operand:P 0 "const_int_operand")))
23689 (clobber (reg:CC FLAGS_REG))])]
23690 "optimize_insn_for_size_p ()
23691 && INTVAL (operands[0]) == 2*GET_MODE_SIZE (word_mode)"
23692 [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))
23693 (set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))])
23694 \f
23695 ;; Convert compares with 1 to shorter inc/dec operations when CF is not
23696 ;; required and register dies. Similarly for 128 to -128.
23697 (define_peephole2
23698 [(set (match_operand 0 "flags_reg_operand")
23699 (match_operator 1 "compare_operator"
23700 [(match_operand 2 "register_operand")
23701 (match_operand 3 "const_int_operand")]))]
23702 "(((!TARGET_FUSE_CMP_AND_BRANCH || optimize_insn_for_size_p ())
23703 && incdec_operand (operands[3], GET_MODE (operands[3])))
23704 || (!TARGET_FUSE_CMP_AND_BRANCH
23705 && INTVAL (operands[3]) == 128))
23706 && ix86_match_ccmode (insn, CCGCmode)
23707 && peep2_reg_dead_p (1, operands[2])"
23708 [(parallel [(set (match_dup 0)
23709 (match_op_dup 1 [(match_dup 2) (match_dup 3)]))
23710 (clobber (match_dup 2))])])
23711 \f
23712 ;; Convert imul by three, five and nine into lea
23713 (define_peephole2
23714 [(parallel
23715 [(set (match_operand:SWI48 0 "register_operand")
23716 (mult:SWI48 (match_operand:SWI48 1 "register_operand")
23717 (match_operand:SWI48 2 "const359_operand")))
23718 (clobber (reg:CC FLAGS_REG))])]
23719 "!TARGET_PARTIAL_REG_STALL
23720 || <MODE>mode == SImode
23721 || optimize_function_for_size_p (cfun)"
23722 [(set (match_dup 0)
23723 (plus:SWI48 (mult:SWI48 (match_dup 1) (match_dup 2))
23724 (match_dup 1)))]
23725 "operands[2] = GEN_INT (INTVAL (operands[2]) - 1);")
23726
23727 (define_peephole2
23728 [(parallel
23729 [(set (match_operand:SWI48 0 "register_operand")
23730 (mult:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
23731 (match_operand:SWI48 2 "const359_operand")))
23732 (clobber (reg:CC FLAGS_REG))])]
23733 "optimize_insn_for_speed_p ()
23734 && (!TARGET_PARTIAL_REG_STALL || <MODE>mode == SImode)"
23735 [(set (match_dup 0) (match_dup 1))
23736 (set (match_dup 0)
23737 (plus:SWI48 (mult:SWI48 (match_dup 0) (match_dup 2))
23738 (match_dup 0)))]
23739 "operands[2] = GEN_INT (INTVAL (operands[2]) - 1);")
23740
23741 ;; imul $32bit_imm, mem, reg is vector decoded, while
23742 ;; imul $32bit_imm, reg, reg is direct decoded.
23743 (define_peephole2
23744 [(match_scratch:SWI48 3 "r")
23745 (parallel [(set (match_operand:SWI48 0 "register_operand")
23746 (mult:SWI48 (match_operand:SWI48 1 "memory_operand")
23747 (match_operand:SWI48 2 "immediate_operand")))
23748 (clobber (reg:CC FLAGS_REG))])]
23749 "TARGET_SLOW_IMUL_IMM32_MEM && optimize_insn_for_speed_p ()
23750 && !satisfies_constraint_K (operands[2])"
23751 [(set (match_dup 3) (match_dup 1))
23752 (parallel [(set (match_dup 0) (mult:SWI48 (match_dup 3) (match_dup 2)))
23753 (clobber (reg:CC FLAGS_REG))])])
23754
23755 (define_peephole2
23756 [(match_scratch:SI 3 "r")
23757 (parallel [(set (match_operand:DI 0 "register_operand")
23758 (zero_extend:DI
23759 (mult:SI (match_operand:SI 1 "memory_operand")
23760 (match_operand:SI 2 "immediate_operand"))))
23761 (clobber (reg:CC FLAGS_REG))])]
23762 "TARGET_64BIT
23763 && TARGET_SLOW_IMUL_IMM32_MEM && optimize_insn_for_speed_p ()
23764 && !satisfies_constraint_K (operands[2])"
23765 [(set (match_dup 3) (match_dup 1))
23766 (parallel [(set (match_dup 0)
23767 (zero_extend:DI (mult:SI (match_dup 3) (match_dup 2))))
23768 (clobber (reg:CC FLAGS_REG))])])
23769
23770 ;; imul $8/16bit_imm, regmem, reg is vector decoded.
23771 ;; Convert it into imul reg, reg
23772 ;; It would be better to force assembler to encode instruction using long
23773 ;; immediate, but there is apparently no way to do so.
23774 (define_peephole2
23775 [(parallel [(set (match_operand:SWI248 0 "register_operand")
23776 (mult:SWI248
23777 (match_operand:SWI248 1 "nonimmediate_operand")
23778 (match_operand:SWI248 2 "const_int_operand")))
23779 (clobber (reg:CC FLAGS_REG))])
23780 (match_scratch:SWI248 3 "r")]
23781 "TARGET_SLOW_IMUL_IMM8 && optimize_insn_for_speed_p ()
23782 && satisfies_constraint_K (operands[2])"
23783 [(set (match_dup 3) (match_dup 2))
23784 (parallel [(set (match_dup 0) (mult:SWI248 (match_dup 0) (match_dup 3)))
23785 (clobber (reg:CC FLAGS_REG))])]
23786 {
23787 if (!rtx_equal_p (operands[0], operands[1]))
23788 emit_move_insn (operands[0], operands[1]);
23789 })
23790
23791 ;; After splitting up read-modify operations, array accesses with memory
23792 ;; operands might end up in form:
23793 ;; sall $2, %eax
23794 ;; movl 4(%esp), %edx
23795 ;; addl %edx, %eax
23796 ;; instead of pre-splitting:
23797 ;; sall $2, %eax
23798 ;; addl 4(%esp), %eax
23799 ;; Turn it into:
23800 ;; movl 4(%esp), %edx
23801 ;; leal (%edx,%eax,4), %eax
23802
23803 (define_peephole2
23804 [(match_scratch:W 5 "r")
23805 (parallel [(set (match_operand 0 "register_operand")
23806 (ashift (match_operand 1 "register_operand")
23807 (match_operand 2 "const_int_operand")))
23808 (clobber (reg:CC FLAGS_REG))])
23809 (parallel [(set (match_operand 3 "register_operand")
23810 (plus (match_dup 0)
23811 (match_operand 4 "x86_64_general_operand")))
23812 (clobber (reg:CC FLAGS_REG))])]
23813 "IN_RANGE (INTVAL (operands[2]), 1, 3)
23814 /* Validate MODE for lea. */
23815 && ((!TARGET_PARTIAL_REG_STALL
23816 && (GET_MODE (operands[0]) == QImode
23817 || GET_MODE (operands[0]) == HImode))
23818 || GET_MODE (operands[0]) == SImode
23819 || (TARGET_64BIT && GET_MODE (operands[0]) == DImode))
23820 && (rtx_equal_p (operands[0], operands[3])
23821 || peep2_reg_dead_p (2, operands[0]))
23822 /* We reorder load and the shift. */
23823 && !reg_overlap_mentioned_p (operands[0], operands[4])"
23824 [(set (match_dup 5) (match_dup 4))
23825 (set (match_dup 0) (match_dup 1))]
23826 {
23827 machine_mode op1mode = GET_MODE (operands[1]);
23828 machine_mode mode = op1mode == DImode ? DImode : SImode;
23829 int scale = 1 << INTVAL (operands[2]);
23830 rtx index = gen_lowpart (word_mode, operands[1]);
23831 rtx base = gen_lowpart (word_mode, operands[5]);
23832 rtx dest = gen_lowpart (mode, operands[3]);
23833
23834 operands[1] = gen_rtx_PLUS (word_mode, base,
23835 gen_rtx_MULT (word_mode, index, GEN_INT (scale)));
23836 if (mode != word_mode)
23837 operands[1] = gen_rtx_SUBREG (mode, operands[1], 0);
23838
23839 operands[5] = base;
23840 if (op1mode != word_mode)
23841 operands[5] = gen_lowpart (op1mode, operands[5]);
23842
23843 operands[0] = dest;
23844 })
23845 \f
23846 ;; We used to use "int $5", in honor of #BR which maps to interrupt vector 5.
23847 ;; That, however, is usually mapped by the OS to SIGSEGV, which is often
23848 ;; caught for use by garbage collectors and the like. Using an insn that
23849 ;; maps to SIGILL makes it more likely the program will rightfully die.
23850 ;; Keeping with tradition, "6" is in honor of #UD.
23851 (define_insn "trap"
23852 [(trap_if (const_int 1) (const_int 6))]
23853 ""
23854 {
23855 #ifdef HAVE_AS_IX86_UD2
23856 return "ud2";
23857 #else
23858 return ASM_SHORT "0x0b0f";
23859 #endif
23860 }
23861 [(set_attr "length" "2")])
23862
23863 (define_insn "ud2"
23864 [(unspec_volatile [(const_int 0)] UNSPECV_UD2)]
23865 ""
23866 {
23867 #ifdef HAVE_AS_IX86_UD2
23868 return "ud2";
23869 #else
23870 return ASM_SHORT "0x0b0f";
23871 #endif
23872 }
23873 [(set_attr "length" "2")])
23874
23875 (define_expand "prefetch"
23876 [(prefetch (match_operand 0 "address_operand")
23877 (match_operand:SI 1 "const_int_operand")
23878 (match_operand:SI 2 "const_int_operand"))]
23879 "TARGET_3DNOW || TARGET_PREFETCH_SSE || TARGET_PRFCHW || TARGET_PREFETCHWT1"
23880 {
23881 bool write = operands[1] != const0_rtx;
23882 int locality = INTVAL (operands[2]);
23883
23884 gcc_assert (IN_RANGE (locality, 0, 3));
23885
23886 /* Use 3dNOW prefetch in case we are asking for write prefetch not
23887 supported by SSE counterpart (non-SSE2 athlon machines) or the
23888 SSE prefetch is not available (K6 machines). Otherwise use SSE
23889 prefetch as it allows specifying of locality. */
23890
23891 if (write)
23892 {
23893 if (TARGET_PREFETCHWT1)
23894 operands[2] = GEN_INT (MAX (locality, 2));
23895 else if (TARGET_PRFCHW)
23896 operands[2] = GEN_INT (3);
23897 else if (TARGET_3DNOW && !TARGET_SSE2)
23898 operands[2] = GEN_INT (3);
23899 else if (TARGET_PREFETCH_SSE)
23900 operands[1] = const0_rtx;
23901 else
23902 {
23903 gcc_assert (TARGET_3DNOW);
23904 operands[2] = GEN_INT (3);
23905 }
23906 }
23907 else
23908 {
23909 if (TARGET_PREFETCH_SSE)
23910 ;
23911 else
23912 {
23913 gcc_assert (TARGET_3DNOW);
23914 operands[2] = GEN_INT (3);
23915 }
23916 }
23917 })
23918
23919 (define_insn "*prefetch_sse"
23920 [(prefetch (match_operand 0 "address_operand" "p")
23921 (const_int 0)
23922 (match_operand:SI 1 "const_int_operand"))]
23923 "TARGET_PREFETCH_SSE"
23924 {
23925 static const char * const patterns[4] = {
23926 "prefetchnta\t%a0", "prefetcht2\t%a0", "prefetcht1\t%a0", "prefetcht0\t%a0"
23927 };
23928
23929 int locality = INTVAL (operands[1]);
23930 gcc_assert (IN_RANGE (locality, 0, 3));
23931
23932 return patterns[locality];
23933 }
23934 [(set_attr "type" "sse")
23935 (set_attr "atom_sse_attr" "prefetch")
23936 (set (attr "length_address")
23937 (symbol_ref "memory_address_length (operands[0], false)"))
23938 (set_attr "memory" "none")])
23939
23940 (define_insn "*prefetch_3dnow"
23941 [(prefetch (match_operand 0 "address_operand" "p")
23942 (match_operand:SI 1 "const_int_operand")
23943 (const_int 3))]
23944 "TARGET_3DNOW || TARGET_PRFCHW || TARGET_PREFETCHWT1"
23945 {
23946 if (operands[1] == const0_rtx)
23947 return "prefetch\t%a0";
23948 else
23949 return "prefetchw\t%a0";
23950 }
23951 [(set_attr "type" "mmx")
23952 (set (attr "length_address")
23953 (symbol_ref "memory_address_length (operands[0], false)"))
23954 (set_attr "memory" "none")])
23955
23956 (define_insn "*prefetch_prefetchwt1"
23957 [(prefetch (match_operand 0 "address_operand" "p")
23958 (const_int 1)
23959 (const_int 2))]
23960 "TARGET_PREFETCHWT1"
23961 "prefetchwt1\t%a0";
23962 [(set_attr "type" "sse")
23963 (set (attr "length_address")
23964 (symbol_ref "memory_address_length (operands[0], false)"))
23965 (set_attr "memory" "none")])
23966
23967 (define_insn "prefetchi"
23968 [(unspec_volatile [(match_operand 0 "local_func_symbolic_operand" "p")
23969 (match_operand:SI 1 "const_int_operand")]
23970 UNSPECV_PREFETCHI)]
23971 "TARGET_PREFETCHI && TARGET_64BIT"
23972 {
23973 static const char * const patterns[2] = {
23974 "prefetchit1\t%0", "prefetchit0\t%0"
23975 };
23976
23977 int locality = INTVAL (operands[1]);
23978 gcc_assert (IN_RANGE (locality, 2, 3));
23979
23980 return patterns[locality - 2];
23981 }
23982 [(set_attr "type" "sse")
23983 (set (attr "length_address")
23984 (symbol_ref "memory_address_length (operands[0], false)"))
23985 (set_attr "memory" "none")])
23986
23987 (define_expand "stack_protect_set"
23988 [(match_operand 0 "memory_operand")
23989 (match_operand 1 "memory_operand")]
23990 ""
23991 {
23992 emit_insn (gen_stack_protect_set_1
23993 (ptr_mode, operands[0], operands[1]));
23994 DONE;
23995 })
23996
23997 (define_insn "@stack_protect_set_1_<mode>"
23998 [(set (match_operand:PTR 0 "memory_operand" "=m")
23999 (unspec:PTR [(match_operand:PTR 1 "memory_operand" "m")]
24000 UNSPEC_SP_SET))
24001 (set (match_scratch:PTR 2 "=&r") (const_int 0))
24002 (clobber (reg:CC FLAGS_REG))]
24003 ""
24004 {
24005 output_asm_insn ("mov{<imodesuffix>}\t{%1, %2|%2, %1}", operands);
24006 output_asm_insn ("mov{<imodesuffix>}\t{%2, %0|%0, %2}", operands);
24007 return "xor{l}\t%k2, %k2";
24008 }
24009 [(set_attr "type" "multi")])
24010
24011 ;; Patterns and peephole2s to optimize stack_protect_set_1_<mode>
24012 ;; immediately followed by *mov{s,d}i_internal to the same register,
24013 ;; where we can avoid the xor{l} above. We don't split this, so that
24014 ;; scheduling or anything else doesn't separate the *stack_protect_set*
24015 ;; pattern from the set of the register that overwrites the register
24016 ;; with a new value.
24017 (define_insn "*stack_protect_set_2_<mode>"
24018 [(set (match_operand:PTR 0 "memory_operand" "=m")
24019 (unspec:PTR [(match_operand:PTR 3 "memory_operand" "m")]
24020 UNSPEC_SP_SET))
24021 (set (match_operand:SI 1 "register_operand" "=&r")
24022 (match_operand:SI 2 "general_operand" "g"))
24023 (clobber (reg:CC FLAGS_REG))]
24024 "reload_completed
24025 && !reg_overlap_mentioned_p (operands[1], operands[2])"
24026 {
24027 output_asm_insn ("mov{<imodesuffix>}\t{%3, %<k>1|%<k>1, %3}", operands);
24028 output_asm_insn ("mov{<imodesuffix>}\t{%<k>1, %0|%0, %<k>1}", operands);
24029 if (pic_32bit_operand (operands[2], SImode)
24030 || ix86_use_lea_for_mov (insn, operands + 1))
24031 return "lea{l}\t{%E2, %1|%1, %E2}";
24032 else
24033 return "mov{l}\t{%2, %1|%1, %2}";
24034 }
24035 [(set_attr "type" "multi")
24036 (set_attr "length" "24")])
24037
24038 (define_peephole2
24039 [(parallel [(set (match_operand:PTR 0 "memory_operand")
24040 (unspec:PTR [(match_operand:PTR 1 "memory_operand")]
24041 UNSPEC_SP_SET))
24042 (set (match_operand:PTR 2 "general_reg_operand") (const_int 0))
24043 (clobber (reg:CC FLAGS_REG))])
24044 (set (match_operand:SI 3 "general_reg_operand")
24045 (match_operand:SI 4))]
24046 "REGNO (operands[2]) == REGNO (operands[3])
24047 && general_operand (operands[4], SImode)
24048 && (general_reg_operand (operands[4], SImode)
24049 || memory_operand (operands[4], SImode)
24050 || immediate_operand (operands[4], SImode))
24051 && !reg_overlap_mentioned_p (operands[3], operands[4])"
24052 [(parallel [(set (match_dup 0)
24053 (unspec:PTR [(match_dup 1)] UNSPEC_SP_SET))
24054 (set (match_dup 3) (match_dup 4))
24055 (clobber (reg:CC FLAGS_REG))])])
24056
24057 (define_insn "*stack_protect_set_3"
24058 [(set (match_operand:DI 0 "memory_operand" "=m,m,m")
24059 (unspec:DI [(match_operand:DI 3 "memory_operand" "m,m,m")]
24060 UNSPEC_SP_SET))
24061 (set (match_operand:DI 1 "register_operand" "=&r,r,r")
24062 (match_operand:DI 2 "general_operand" "Z,rem,i"))
24063 (clobber (reg:CC FLAGS_REG))]
24064 "TARGET_64BIT
24065 && reload_completed
24066 && !reg_overlap_mentioned_p (operands[1], operands[2])"
24067 {
24068 output_asm_insn ("mov{q}\t{%3, %1|%1, %3}", operands);
24069 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", operands);
24070 if (pic_32bit_operand (operands[2], DImode))
24071 return "lea{q}\t{%E2, %1|%1, %E2}";
24072 else if (which_alternative == 0)
24073 return "mov{l}\t{%k2, %k1|%k1, %k2}";
24074 else if (which_alternative == 2)
24075 return "movabs{q}\t{%2, %1|%1, %2}";
24076 else if (ix86_use_lea_for_mov (insn, operands + 1))
24077 return "lea{q}\t{%E2, %1|%1, %E2}";
24078 else
24079 return "mov{q}\t{%2, %1|%1, %2}";
24080 }
24081 [(set_attr "type" "multi")
24082 (set_attr "length" "24")])
24083
24084 (define_peephole2
24085 [(parallel [(set (match_operand:DI 0 "memory_operand")
24086 (unspec:DI [(match_operand:DI 1 "memory_operand")]
24087 UNSPEC_SP_SET))
24088 (set (match_operand:DI 2 "general_reg_operand") (const_int 0))
24089 (clobber (reg:CC FLAGS_REG))])
24090 (set (match_dup 2) (match_operand:DI 3))]
24091 "TARGET_64BIT
24092 && general_operand (operands[3], DImode)
24093 && (general_reg_operand (operands[3], DImode)
24094 || memory_operand (operands[3], DImode)
24095 || x86_64_zext_immediate_operand (operands[3], DImode)
24096 || x86_64_immediate_operand (operands[3], DImode)
24097 || (CONSTANT_P (operands[3])
24098 && (!flag_pic || LEGITIMATE_PIC_OPERAND_P (operands[3]))))
24099 && !reg_overlap_mentioned_p (operands[2], operands[3])"
24100 [(parallel [(set (match_dup 0)
24101 (unspec:PTR [(match_dup 1)] UNSPEC_SP_SET))
24102 (set (match_dup 2) (match_dup 3))
24103 (clobber (reg:CC FLAGS_REG))])])
24104
24105 (define_expand "stack_protect_test"
24106 [(match_operand 0 "memory_operand")
24107 (match_operand 1 "memory_operand")
24108 (match_operand 2)]
24109 ""
24110 {
24111 rtx flags = gen_rtx_REG (CCZmode, FLAGS_REG);
24112
24113 emit_insn (gen_stack_protect_test_1
24114 (ptr_mode, flags, operands[0], operands[1]));
24115
24116 emit_jump_insn (gen_cbranchcc4 (gen_rtx_EQ (VOIDmode, flags, const0_rtx),
24117 flags, const0_rtx, operands[2]));
24118 DONE;
24119 })
24120
24121 (define_insn "@stack_protect_test_1_<mode>"
24122 [(set (match_operand:CCZ 0 "flags_reg_operand")
24123 (unspec:CCZ [(match_operand:PTR 1 "memory_operand" "m")
24124 (match_operand:PTR 2 "memory_operand" "m")]
24125 UNSPEC_SP_TEST))
24126 (clobber (match_scratch:PTR 3 "=&r"))]
24127 ""
24128 {
24129 output_asm_insn ("mov{<imodesuffix>}\t{%1, %3|%3, %1}", operands);
24130 return "sub{<imodesuffix>}\t{%2, %3|%3, %2}";
24131 }
24132 [(set_attr "type" "multi")])
24133
24134 (define_insn "sse4_2_crc32<mode>"
24135 [(set (match_operand:SI 0 "register_operand" "=r")
24136 (unspec:SI
24137 [(match_operand:SI 1 "register_operand" "0")
24138 (match_operand:SWI124 2 "nonimmediate_operand" "<r>m")]
24139 UNSPEC_CRC32))]
24140 "TARGET_CRC32"
24141 "crc32{<imodesuffix>}\t{%2, %0|%0, %2}"
24142 [(set_attr "type" "sselog1")
24143 (set_attr "prefix_rep" "1")
24144 (set_attr "prefix_extra" "1")
24145 (set (attr "prefix_data16")
24146 (if_then_else (match_operand:HI 2)
24147 (const_string "1")
24148 (const_string "*")))
24149 (set (attr "prefix_rex")
24150 (if_then_else (match_operand:QI 2 "ext_QIreg_operand")
24151 (const_string "1")
24152 (const_string "*")))
24153 (set_attr "mode" "SI")])
24154
24155 (define_insn "sse4_2_crc32di"
24156 [(set (match_operand:DI 0 "register_operand" "=r")
24157 (zero_extend:DI
24158 (unspec:SI
24159 [(match_operand:SI 1 "register_operand" "0")
24160 (match_operand:DI 2 "nonimmediate_operand" "rm")]
24161 UNSPEC_CRC32)))]
24162 "TARGET_64BIT && TARGET_CRC32"
24163 "crc32{q}\t{%2, %0|%0, %2}"
24164 [(set_attr "type" "sselog1")
24165 (set_attr "prefix_rep" "1")
24166 (set_attr "prefix_extra" "1")
24167 (set_attr "mode" "DI")])
24168
24169 (define_insn "rdpmc"
24170 [(set (match_operand:DI 0 "register_operand" "=A")
24171 (unspec_volatile:DI [(match_operand:SI 1 "register_operand" "c")]
24172 UNSPECV_RDPMC))]
24173 "!TARGET_64BIT"
24174 "rdpmc"
24175 [(set_attr "type" "other")
24176 (set_attr "length" "2")])
24177
24178 (define_insn "rdpmc_rex64"
24179 [(set (match_operand:DI 0 "register_operand" "=a")
24180 (unspec_volatile:DI [(match_operand:SI 2 "register_operand" "c")]
24181 UNSPECV_RDPMC))
24182 (set (match_operand:DI 1 "register_operand" "=d")
24183 (unspec_volatile:DI [(match_dup 2)] UNSPECV_RDPMC))]
24184 "TARGET_64BIT"
24185 "rdpmc"
24186 [(set_attr "type" "other")
24187 (set_attr "length" "2")])
24188
24189 (define_insn "rdtsc"
24190 [(set (match_operand:DI 0 "register_operand" "=A")
24191 (unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSC))]
24192 "!TARGET_64BIT"
24193 "rdtsc"
24194 [(set_attr "type" "other")
24195 (set_attr "length" "2")])
24196
24197 (define_insn "rdtsc_rex64"
24198 [(set (match_operand:DI 0 "register_operand" "=a")
24199 (unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSC))
24200 (set (match_operand:DI 1 "register_operand" "=d")
24201 (unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSC))]
24202 "TARGET_64BIT"
24203 "rdtsc"
24204 [(set_attr "type" "other")
24205 (set_attr "length" "2")])
24206
24207 (define_insn "rdtscp"
24208 [(set (match_operand:DI 0 "register_operand" "=A")
24209 (unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSCP))
24210 (set (match_operand:SI 1 "register_operand" "=c")
24211 (unspec_volatile:SI [(const_int 0)] UNSPECV_RDTSCP))]
24212 "!TARGET_64BIT"
24213 "rdtscp"
24214 [(set_attr "type" "other")
24215 (set_attr "length" "3")])
24216
24217 (define_insn "rdtscp_rex64"
24218 [(set (match_operand:DI 0 "register_operand" "=a")
24219 (unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSCP))
24220 (set (match_operand:DI 1 "register_operand" "=d")
24221 (unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSCP))
24222 (set (match_operand:SI 2 "register_operand" "=c")
24223 (unspec_volatile:SI [(const_int 0)] UNSPECV_RDTSCP))]
24224 "TARGET_64BIT"
24225 "rdtscp"
24226 [(set_attr "type" "other")
24227 (set_attr "length" "3")])
24228
24229 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
24230 ;;
24231 ;; FXSR, XSAVE and XSAVEOPT instructions
24232 ;;
24233 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
24234
24235 (define_insn "fxsave"
24236 [(set (match_operand:BLK 0 "memory_operand" "=m")
24237 (unspec_volatile:BLK [(const_int 0)] UNSPECV_FXSAVE))]
24238 "TARGET_FXSR"
24239 "fxsave\t%0"
24240 [(set_attr "type" "other")
24241 (set_attr "memory" "store")
24242 (set (attr "length")
24243 (symbol_ref "ix86_attr_length_address_default (insn) + 3"))])
24244
24245 (define_insn "fxsave64"
24246 [(set (match_operand:BLK 0 "memory_operand" "=m")
24247 (unspec_volatile:BLK [(const_int 0)] UNSPECV_FXSAVE64))]
24248 "TARGET_64BIT && TARGET_FXSR"
24249 "fxsave64\t%0"
24250 [(set_attr "type" "other")
24251 (set_attr "memory" "store")
24252 (set (attr "length")
24253 (symbol_ref "ix86_attr_length_address_default (insn) + 4"))])
24254
24255 (define_insn "fxrstor"
24256 [(unspec_volatile [(match_operand:BLK 0 "memory_operand" "m")]
24257 UNSPECV_FXRSTOR)]
24258 "TARGET_FXSR"
24259 "fxrstor\t%0"
24260 [(set_attr "type" "other")
24261 (set_attr "memory" "load")
24262 (set (attr "length")
24263 (symbol_ref "ix86_attr_length_address_default (insn) + 3"))])
24264
24265 (define_insn "fxrstor64"
24266 [(unspec_volatile [(match_operand:BLK 0 "memory_operand" "m")]
24267 UNSPECV_FXRSTOR64)]
24268 "TARGET_64BIT && TARGET_FXSR"
24269 "fxrstor64\t%0"
24270 [(set_attr "type" "other")
24271 (set_attr "memory" "load")
24272 (set (attr "length")
24273 (symbol_ref "ix86_attr_length_address_default (insn) + 4"))])
24274
24275 (define_int_iterator ANY_XSAVE
24276 [UNSPECV_XSAVE
24277 (UNSPECV_XSAVEOPT "TARGET_XSAVEOPT")
24278 (UNSPECV_XSAVEC "TARGET_XSAVEC")
24279 (UNSPECV_XSAVES "TARGET_XSAVES")])
24280
24281 (define_int_iterator ANY_XSAVE64
24282 [UNSPECV_XSAVE64
24283 (UNSPECV_XSAVEOPT64 "TARGET_XSAVEOPT")
24284 (UNSPECV_XSAVEC64 "TARGET_XSAVEC")
24285 (UNSPECV_XSAVES64 "TARGET_XSAVES")])
24286
24287 (define_int_attr xsave
24288 [(UNSPECV_XSAVE "xsave")
24289 (UNSPECV_XSAVE64 "xsave64")
24290 (UNSPECV_XSAVEOPT "xsaveopt")
24291 (UNSPECV_XSAVEOPT64 "xsaveopt64")
24292 (UNSPECV_XSAVEC "xsavec")
24293 (UNSPECV_XSAVEC64 "xsavec64")
24294 (UNSPECV_XSAVES "xsaves")
24295 (UNSPECV_XSAVES64 "xsaves64")])
24296
24297 (define_int_iterator ANY_XRSTOR
24298 [UNSPECV_XRSTOR
24299 (UNSPECV_XRSTORS "TARGET_XSAVES")])
24300
24301 (define_int_iterator ANY_XRSTOR64
24302 [UNSPECV_XRSTOR64
24303 (UNSPECV_XRSTORS64 "TARGET_XSAVES")])
24304
24305 (define_int_attr xrstor
24306 [(UNSPECV_XRSTOR "xrstor")
24307 (UNSPECV_XRSTOR64 "xrstor")
24308 (UNSPECV_XRSTORS "xrstors")
24309 (UNSPECV_XRSTORS64 "xrstors")])
24310
24311 (define_insn "<xsave>"
24312 [(set (match_operand:BLK 0 "memory_operand" "=m")
24313 (unspec_volatile:BLK
24314 [(match_operand:DI 1 "register_operand" "A")]
24315 ANY_XSAVE))]
24316 "!TARGET_64BIT && TARGET_XSAVE"
24317 "<xsave>\t%0"
24318 [(set_attr "type" "other")
24319 (set_attr "memory" "store")
24320 (set (attr "length")
24321 (symbol_ref "ix86_attr_length_address_default (insn) + 3"))])
24322
24323 (define_insn "<xsave>_rex64"
24324 [(set (match_operand:BLK 0 "memory_operand" "=m")
24325 (unspec_volatile:BLK
24326 [(match_operand:SI 1 "register_operand" "a")
24327 (match_operand:SI 2 "register_operand" "d")]
24328 ANY_XSAVE))]
24329 "TARGET_64BIT && TARGET_XSAVE"
24330 "<xsave>\t%0"
24331 [(set_attr "type" "other")
24332 (set_attr "memory" "store")
24333 (set (attr "length")
24334 (symbol_ref "ix86_attr_length_address_default (insn) + 3"))])
24335
24336 (define_insn "<xsave>"
24337 [(set (match_operand:BLK 0 "memory_operand" "=m")
24338 (unspec_volatile:BLK
24339 [(match_operand:SI 1 "register_operand" "a")
24340 (match_operand:SI 2 "register_operand" "d")]
24341 ANY_XSAVE64))]
24342 "TARGET_64BIT && TARGET_XSAVE"
24343 "<xsave>\t%0"
24344 [(set_attr "type" "other")
24345 (set_attr "memory" "store")
24346 (set (attr "length")
24347 (symbol_ref "ix86_attr_length_address_default (insn) + 4"))])
24348
24349 (define_insn "<xrstor>"
24350 [(unspec_volatile:BLK
24351 [(match_operand:BLK 0 "memory_operand" "m")
24352 (match_operand:DI 1 "register_operand" "A")]
24353 ANY_XRSTOR)]
24354 "!TARGET_64BIT && TARGET_XSAVE"
24355 "<xrstor>\t%0"
24356 [(set_attr "type" "other")
24357 (set_attr "memory" "load")
24358 (set (attr "length")
24359 (symbol_ref "ix86_attr_length_address_default (insn) + 3"))])
24360
24361 (define_insn "<xrstor>_rex64"
24362 [(unspec_volatile:BLK
24363 [(match_operand:BLK 0 "memory_operand" "m")
24364 (match_operand:SI 1 "register_operand" "a")
24365 (match_operand:SI 2 "register_operand" "d")]
24366 ANY_XRSTOR)]
24367 "TARGET_64BIT && TARGET_XSAVE"
24368 "<xrstor>\t%0"
24369 [(set_attr "type" "other")
24370 (set_attr "memory" "load")
24371 (set (attr "length")
24372 (symbol_ref "ix86_attr_length_address_default (insn) + 3"))])
24373
24374 (define_insn "<xrstor>64"
24375 [(unspec_volatile:BLK
24376 [(match_operand:BLK 0 "memory_operand" "m")
24377 (match_operand:SI 1 "register_operand" "a")
24378 (match_operand:SI 2 "register_operand" "d")]
24379 ANY_XRSTOR64)]
24380 "TARGET_64BIT && TARGET_XSAVE"
24381 "<xrstor>64\t%0"
24382 [(set_attr "type" "other")
24383 (set_attr "memory" "load")
24384 (set (attr "length")
24385 (symbol_ref "ix86_attr_length_address_default (insn) + 4"))])
24386
24387 (define_insn "xsetbv"
24388 [(unspec_volatile:SI
24389 [(match_operand:SI 0 "register_operand" "c")
24390 (match_operand:DI 1 "register_operand" "A")]
24391 UNSPECV_XSETBV)]
24392 "!TARGET_64BIT && TARGET_XSAVE"
24393 "xsetbv"
24394 [(set_attr "type" "other")])
24395
24396 (define_insn "xsetbv_rex64"
24397 [(unspec_volatile:SI
24398 [(match_operand:SI 0 "register_operand" "c")
24399 (match_operand:SI 1 "register_operand" "a")
24400 (match_operand:SI 2 "register_operand" "d")]
24401 UNSPECV_XSETBV)]
24402 "TARGET_64BIT && TARGET_XSAVE"
24403 "xsetbv"
24404 [(set_attr "type" "other")])
24405
24406 (define_insn "xgetbv"
24407 [(set (match_operand:DI 0 "register_operand" "=A")
24408 (unspec_volatile:DI [(match_operand:SI 1 "register_operand" "c")]
24409 UNSPECV_XGETBV))]
24410 "!TARGET_64BIT && TARGET_XSAVE"
24411 "xgetbv"
24412 [(set_attr "type" "other")])
24413
24414 (define_insn "xgetbv_rex64"
24415 [(set (match_operand:DI 0 "register_operand" "=a")
24416 (unspec_volatile:DI [(match_operand:SI 2 "register_operand" "c")]
24417 UNSPECV_XGETBV))
24418 (set (match_operand:DI 1 "register_operand" "=d")
24419 (unspec_volatile:DI [(match_dup 2)] UNSPECV_XGETBV))]
24420 "TARGET_64BIT && TARGET_XSAVE"
24421 "xgetbv"
24422 [(set_attr "type" "other")])
24423
24424 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
24425 ;;
24426 ;; Floating-point instructions for atomic compound assignments
24427 ;;
24428 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
24429
24430 ; Clobber all floating-point registers on environment save and restore
24431 ; to ensure that the TOS value saved at fnstenv is valid after fldenv.
24432 (define_insn "fnstenv"
24433 [(set (match_operand:BLK 0 "memory_operand" "=m")
24434 (unspec_volatile:BLK [(const_int 0)] UNSPECV_FNSTENV))
24435 (clobber (reg:XF ST0_REG))
24436 (clobber (reg:XF ST1_REG))
24437 (clobber (reg:XF ST2_REG))
24438 (clobber (reg:XF ST3_REG))
24439 (clobber (reg:XF ST4_REG))
24440 (clobber (reg:XF ST5_REG))
24441 (clobber (reg:XF ST6_REG))
24442 (clobber (reg:XF ST7_REG))]
24443 "TARGET_80387"
24444 "fnstenv\t%0"
24445 [(set_attr "type" "other")
24446 (set_attr "memory" "store")
24447 (set (attr "length")
24448 (symbol_ref "ix86_attr_length_address_default (insn) + 2"))])
24449
24450 (define_insn "fldenv"
24451 [(unspec_volatile [(match_operand:BLK 0 "memory_operand" "m")]
24452 UNSPECV_FLDENV)
24453 (clobber (reg:XF ST0_REG))
24454 (clobber (reg:XF ST1_REG))
24455 (clobber (reg:XF ST2_REG))
24456 (clobber (reg:XF ST3_REG))
24457 (clobber (reg:XF ST4_REG))
24458 (clobber (reg:XF ST5_REG))
24459 (clobber (reg:XF ST6_REG))
24460 (clobber (reg:XF ST7_REG))]
24461 "TARGET_80387"
24462 "fldenv\t%0"
24463 [(set_attr "type" "other")
24464 (set_attr "memory" "load")
24465 (set (attr "length")
24466 (symbol_ref "ix86_attr_length_address_default (insn) + 2"))])
24467
24468 (define_insn "fnstsw"
24469 [(set (match_operand:HI 0 "nonimmediate_operand" "=a,m")
24470 (unspec_volatile:HI [(const_int 0)] UNSPECV_FNSTSW))]
24471 "TARGET_80387"
24472 "fnstsw\t%0"
24473 [(set_attr "type" "other,other")
24474 (set_attr "memory" "none,store")
24475 (set (attr "length")
24476 (symbol_ref "ix86_attr_length_address_default (insn) + 2"))])
24477
24478 (define_insn "fnclex"
24479 [(unspec_volatile [(const_int 0)] UNSPECV_FNCLEX)]
24480 "TARGET_80387"
24481 "fnclex"
24482 [(set_attr "type" "other")
24483 (set_attr "memory" "none")
24484 (set_attr "length" "2")])
24485
24486 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
24487 ;;
24488 ;; LWP instructions
24489 ;;
24490 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
24491
24492 (define_insn "@lwp_llwpcb<mode>"
24493 [(unspec_volatile [(match_operand:P 0 "register_operand" "r")]
24494 UNSPECV_LLWP_INTRINSIC)]
24495 "TARGET_LWP"
24496 "llwpcb\t%0"
24497 [(set_attr "type" "lwp")
24498 (set_attr "mode" "<MODE>")
24499 (set_attr "length" "5")])
24500
24501 (define_insn "@lwp_slwpcb<mode>"
24502 [(set (match_operand:P 0 "register_operand" "=r")
24503 (unspec_volatile:P [(const_int 0)] UNSPECV_SLWP_INTRINSIC))]
24504 "TARGET_LWP"
24505 "slwpcb\t%0"
24506 [(set_attr "type" "lwp")
24507 (set_attr "mode" "<MODE>")
24508 (set_attr "length" "5")])
24509
24510 (define_insn "@lwp_lwpval<mode>"
24511 [(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r")
24512 (match_operand:SI 1 "nonimmediate_operand" "rm")
24513 (match_operand:SI 2 "const_int_operand")]
24514 UNSPECV_LWPVAL_INTRINSIC)]
24515 "TARGET_LWP"
24516 "lwpval\t{%2, %1, %0|%0, %1, %2}"
24517 [(set_attr "type" "lwp")
24518 (set_attr "mode" "<MODE>")
24519 (set (attr "length")
24520 (symbol_ref "ix86_attr_length_address_default (insn) + 9"))])
24521
24522 (define_insn "@lwp_lwpins<mode>"
24523 [(set (reg:CCC FLAGS_REG)
24524 (unspec_volatile:CCC [(match_operand:SWI48 0 "register_operand" "r")
24525 (match_operand:SI 1 "nonimmediate_operand" "rm")
24526 (match_operand:SI 2 "const_int_operand")]
24527 UNSPECV_LWPINS_INTRINSIC))]
24528 "TARGET_LWP"
24529 "lwpins\t{%2, %1, %0|%0, %1, %2}"
24530 [(set_attr "type" "lwp")
24531 (set_attr "mode" "<MODE>")
24532 (set (attr "length")
24533 (symbol_ref "ix86_attr_length_address_default (insn) + 9"))])
24534
24535 (define_int_iterator RDFSGSBASE
24536 [UNSPECV_RDFSBASE
24537 UNSPECV_RDGSBASE])
24538
24539 (define_int_iterator WRFSGSBASE
24540 [UNSPECV_WRFSBASE
24541 UNSPECV_WRGSBASE])
24542
24543 (define_int_attr fsgs
24544 [(UNSPECV_RDFSBASE "fs")
24545 (UNSPECV_RDGSBASE "gs")
24546 (UNSPECV_WRFSBASE "fs")
24547 (UNSPECV_WRGSBASE "gs")])
24548
24549 (define_insn "rd<fsgs>base<mode>"
24550 [(set (match_operand:SWI48 0 "register_operand" "=r")
24551 (unspec_volatile:SWI48 [(const_int 0)] RDFSGSBASE))]
24552 "TARGET_64BIT && TARGET_FSGSBASE"
24553 "rd<fsgs>base\t%0"
24554 [(set_attr "type" "other")
24555 (set_attr "prefix_extra" "2")])
24556
24557 (define_insn "wr<fsgs>base<mode>"
24558 [(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r")]
24559 WRFSGSBASE)]
24560 "TARGET_64BIT && TARGET_FSGSBASE"
24561 "wr<fsgs>base\t%0"
24562 [(set_attr "type" "other")
24563 (set_attr "prefix_extra" "2")])
24564
24565 (define_insn "ptwrite<mode>"
24566 [(unspec_volatile [(match_operand:SWI48 0 "nonimmediate_operand" "rm")]
24567 UNSPECV_PTWRITE)]
24568 "TARGET_PTWRITE"
24569 "ptwrite\t%0"
24570 [(set_attr "type" "other")
24571 (set_attr "prefix_extra" "2")])
24572
24573 (define_insn "@rdrand<mode>"
24574 [(set (match_operand:SWI248 0 "register_operand" "=r")
24575 (unspec_volatile:SWI248 [(const_int 0)] UNSPECV_RDRAND))
24576 (set (reg:CCC FLAGS_REG)
24577 (unspec_volatile:CCC [(const_int 0)] UNSPECV_RDRAND))]
24578 "TARGET_RDRND"
24579 "rdrand\t%0"
24580 [(set_attr "type" "other")
24581 (set_attr "prefix_extra" "1")])
24582
24583 (define_insn "@rdseed<mode>"
24584 [(set (match_operand:SWI248 0 "register_operand" "=r")
24585 (unspec_volatile:SWI248 [(const_int 0)] UNSPECV_RDSEED))
24586 (set (reg:CCC FLAGS_REG)
24587 (unspec_volatile:CCC [(const_int 0)] UNSPECV_RDSEED))]
24588 "TARGET_RDSEED"
24589 "rdseed\t%0"
24590 [(set_attr "type" "other")
24591 (set_attr "prefix_extra" "1")])
24592
24593 (define_expand "pause"
24594 [(set (match_dup 0)
24595 (unspec:BLK [(match_dup 0)] UNSPEC_PAUSE))]
24596 ""
24597 {
24598 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
24599 MEM_VOLATILE_P (operands[0]) = 1;
24600 })
24601
24602 ;; Use "rep; nop", instead of "pause", to support older assemblers.
24603 ;; They have the same encoding.
24604 (define_insn "*pause"
24605 [(set (match_operand:BLK 0)
24606 (unspec:BLK [(match_dup 0)] UNSPEC_PAUSE))]
24607 ""
24608 "rep%; nop"
24609 [(set_attr "length" "2")
24610 (set_attr "memory" "unknown")])
24611
24612 ;; CET instructions
24613 (define_insn "@rdssp<mode>"
24614 [(set (match_operand:SWI48 0 "register_operand" "=r")
24615 (unspec_volatile:SWI48 [(match_operand:SWI48 1 "register_operand" "0")]
24616 UNSPECV_NOP_RDSSP))]
24617 "TARGET_SHSTK || (flag_cf_protection & CF_RETURN)"
24618 "rdssp<mskmodesuffix>\t%0"
24619 [(set_attr "length" "6")
24620 (set_attr "type" "other")])
24621
24622 (define_insn "@incssp<mode>"
24623 [(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r")]
24624 UNSPECV_INCSSP)]
24625 "TARGET_SHSTK || (flag_cf_protection & CF_RETURN)"
24626 "incssp<mskmodesuffix>\t%0"
24627 [(set_attr "length" "4")
24628 (set_attr "type" "other")])
24629
24630 (define_insn "saveprevssp"
24631 [(unspec_volatile [(const_int 0)] UNSPECV_SAVEPREVSSP)]
24632 "TARGET_SHSTK"
24633 "saveprevssp"
24634 [(set_attr "length" "5")
24635 (set_attr "type" "other")])
24636
24637 (define_insn "rstorssp"
24638 [(unspec_volatile [(match_operand:DI 0 "memory_operand" "m")]
24639 UNSPECV_RSTORSSP)]
24640 "TARGET_SHSTK"
24641 "rstorssp\t%0"
24642 [(set_attr "length" "5")
24643 (set_attr "type" "other")])
24644
24645 (define_insn "@wrss<mode>"
24646 [(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r")
24647 (match_operand:SWI48 1 "memory_operand" "m")]
24648 UNSPECV_WRSS)]
24649 "TARGET_SHSTK"
24650 "wrss<mskmodesuffix>\t%0, %1"
24651 [(set_attr "length" "3")
24652 (set_attr "type" "other")])
24653
24654 (define_insn "@wruss<mode>"
24655 [(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r")
24656 (match_operand:SWI48 1 "memory_operand" "m")]
24657 UNSPECV_WRUSS)]
24658 "TARGET_SHSTK"
24659 "wruss<mskmodesuffix>\t%0, %1"
24660 [(set_attr "length" "4")
24661 (set_attr "type" "other")])
24662
24663 (define_insn "setssbsy"
24664 [(unspec_volatile [(const_int 0)] UNSPECV_SETSSBSY)]
24665 "TARGET_SHSTK"
24666 "setssbsy"
24667 [(set_attr "length" "4")
24668 (set_attr "type" "other")])
24669
24670 (define_insn "clrssbsy"
24671 [(unspec_volatile [(match_operand:DI 0 "memory_operand" "m")]
24672 UNSPECV_CLRSSBSY)]
24673 "TARGET_SHSTK"
24674 "clrssbsy\t%0"
24675 [(set_attr "length" "4")
24676 (set_attr "type" "other")])
24677
24678 (define_insn "nop_endbr"
24679 [(unspec_volatile [(const_int 0)] UNSPECV_NOP_ENDBR)]
24680 "(flag_cf_protection & CF_BRANCH)"
24681 {
24682 return TARGET_64BIT ? "endbr64" : "endbr32";
24683 }
24684 [(set_attr "length" "4")
24685 (set_attr "length_immediate" "0")
24686 (set_attr "modrm" "0")])
24687
24688 ;; For RTM support
24689 (define_expand "xbegin"
24690 [(set (match_operand:SI 0 "register_operand")
24691 (unspec_volatile:SI [(const_int 0)] UNSPECV_XBEGIN))]
24692 "TARGET_RTM"
24693 {
24694 rtx_code_label *label = gen_label_rtx ();
24695
24696 /* xbegin is emitted as jump_insn, so reload won't be able
24697 to reload its operand. Force the value into AX hard register. */
24698 rtx ax_reg = gen_rtx_REG (SImode, AX_REG);
24699 emit_move_insn (ax_reg, constm1_rtx);
24700
24701 emit_jump_insn (gen_xbegin_1 (ax_reg, label));
24702
24703 emit_label (label);
24704 LABEL_NUSES (label) = 1;
24705
24706 emit_move_insn (operands[0], ax_reg);
24707
24708 DONE;
24709 })
24710
24711 (define_insn "xbegin_1"
24712 [(set (pc)
24713 (if_then_else (ne (unspec [(const_int 0)] UNSPEC_XBEGIN_ABORT)
24714 (const_int 0))
24715 (label_ref (match_operand 1))
24716 (pc)))
24717 (set (match_operand:SI 0 "register_operand" "+a")
24718 (unspec_volatile:SI [(match_dup 0)] UNSPECV_XBEGIN))]
24719 "TARGET_RTM"
24720 "xbegin\t%l1"
24721 [(set_attr "type" "other")
24722 (set_attr "length" "6")])
24723
24724 (define_insn "xend"
24725 [(unspec_volatile [(const_int 0)] UNSPECV_XEND)]
24726 "TARGET_RTM"
24727 "xend"
24728 [(set_attr "type" "other")
24729 (set_attr "length" "3")])
24730
24731 (define_insn "xabort"
24732 [(unspec_volatile [(match_operand:SI 0 "const_0_to_255_operand")]
24733 UNSPECV_XABORT)]
24734 "TARGET_RTM"
24735 "xabort\t%0"
24736 [(set_attr "type" "other")
24737 (set_attr "length" "3")])
24738
24739 (define_expand "xtest"
24740 [(set (match_operand:QI 0 "register_operand")
24741 (unspec_volatile:QI [(const_int 0)] UNSPECV_XTEST))]
24742 "TARGET_RTM"
24743 {
24744 emit_insn (gen_xtest_1 ());
24745
24746 ix86_expand_setcc (operands[0], NE,
24747 gen_rtx_REG (CCZmode, FLAGS_REG), const0_rtx);
24748 DONE;
24749 })
24750
24751 (define_insn "xtest_1"
24752 [(set (reg:CCZ FLAGS_REG)
24753 (unspec_volatile:CCZ [(const_int 0)] UNSPECV_XTEST))]
24754 "TARGET_RTM"
24755 "xtest"
24756 [(set_attr "type" "other")
24757 (set_attr "length" "3")])
24758
24759 (define_insn "clwb"
24760 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
24761 UNSPECV_CLWB)]
24762 "TARGET_CLWB"
24763 "clwb\t%a0"
24764 [(set_attr "type" "sse")
24765 (set_attr "atom_sse_attr" "fence")
24766 (set_attr "memory" "unknown")])
24767
24768 (define_insn "clflushopt"
24769 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
24770 UNSPECV_CLFLUSHOPT)]
24771 "TARGET_CLFLUSHOPT"
24772 "clflushopt\t%a0"
24773 [(set_attr "type" "sse")
24774 (set_attr "atom_sse_attr" "fence")
24775 (set_attr "memory" "unknown")])
24776
24777 ;; MONITORX and MWAITX
24778 (define_insn "mwaitx"
24779 [(unspec_volatile [(match_operand:SI 0 "register_operand" "c")
24780 (match_operand:SI 1 "register_operand" "a")
24781 (match_operand:SI 2 "register_operand" "b")]
24782 UNSPECV_MWAITX)]
24783 "TARGET_MWAITX"
24784 ;; 64bit version is "mwaitx %rax,%rcx,%rbx". But only lower 32bits are used.
24785 ;; Since 32bit register operands are implicitly zero extended to 64bit,
24786 ;; we only need to set up 32bit registers.
24787 "mwaitx"
24788 [(set_attr "length" "3")])
24789
24790 (define_insn "@monitorx_<mode>"
24791 [(unspec_volatile [(match_operand:P 0 "register_operand" "a")
24792 (match_operand:SI 1 "register_operand" "c")
24793 (match_operand:SI 2 "register_operand" "d")]
24794 UNSPECV_MONITORX)]
24795 "TARGET_MWAITX"
24796 ;; 64bit version is "monitorx %rax,%rcx,%rdx". But only lower 32bits in
24797 ;; RCX and RDX are used. Since 32bit register operands are implicitly
24798 ;; zero extended to 64bit, we only need to set up 32bit registers.
24799 "%^monitorx"
24800 [(set (attr "length")
24801 (symbol_ref ("(Pmode != word_mode) + 3")))])
24802
24803 ;; CLZERO
24804 (define_insn "@clzero_<mode>"
24805 [(unspec_volatile [(match_operand: P 0 "register_operand" "a")]
24806 UNSPECV_CLZERO)]
24807 "TARGET_CLZERO"
24808 "clzero"
24809 [(set_attr "length" "3")
24810 (set_attr "memory" "unknown")])
24811
24812 ;; RDPKRU and WRPKRU
24813
24814 (define_expand "rdpkru"
24815 [(parallel
24816 [(set (match_operand:SI 0 "register_operand")
24817 (unspec_volatile:SI [(match_dup 1)] UNSPECV_PKU))
24818 (set (match_dup 2) (const_int 0))])]
24819 "TARGET_PKU"
24820 {
24821 operands[1] = force_reg (SImode, const0_rtx);
24822 operands[2] = gen_reg_rtx (SImode);
24823 })
24824
24825 (define_insn "*rdpkru"
24826 [(set (match_operand:SI 0 "register_operand" "=a")
24827 (unspec_volatile:SI [(match_operand:SI 2 "register_operand" "c")]
24828 UNSPECV_PKU))
24829 (set (match_operand:SI 1 "register_operand" "=d")
24830 (const_int 0))]
24831 "TARGET_PKU"
24832 "rdpkru"
24833 [(set_attr "type" "other")])
24834
24835 (define_expand "wrpkru"
24836 [(unspec_volatile:SI
24837 [(match_operand:SI 0 "register_operand")
24838 (match_dup 1) (match_dup 2)] UNSPECV_PKU)]
24839 "TARGET_PKU"
24840 {
24841 operands[1] = force_reg (SImode, const0_rtx);
24842 operands[2] = force_reg (SImode, const0_rtx);
24843 })
24844
24845 (define_insn "*wrpkru"
24846 [(unspec_volatile:SI
24847 [(match_operand:SI 0 "register_operand" "a")
24848 (match_operand:SI 1 "register_operand" "d")
24849 (match_operand:SI 2 "register_operand" "c")] UNSPECV_PKU)]
24850 "TARGET_PKU"
24851 "wrpkru"
24852 [(set_attr "type" "other")])
24853
24854 (define_insn "rdpid"
24855 [(set (match_operand:SI 0 "register_operand" "=r")
24856 (unspec_volatile:SI [(const_int 0)] UNSPECV_RDPID))]
24857 "!TARGET_64BIT && TARGET_RDPID"
24858 "rdpid\t%0"
24859 [(set_attr "type" "other")])
24860
24861 (define_insn "rdpid_rex64"
24862 [(set (match_operand:DI 0 "register_operand" "=r")
24863 (unspec_volatile:DI [(const_int 0)] UNSPECV_RDPID))]
24864 "TARGET_64BIT && TARGET_RDPID"
24865 "rdpid\t%0"
24866 [(set_attr "type" "other")])
24867
24868 ;; Intirinsics for > i486
24869
24870 (define_insn "wbinvd"
24871 [(unspec_volatile [(const_int 0)] UNSPECV_WBINVD)]
24872 ""
24873 "wbinvd"
24874 [(set_attr "type" "other")])
24875
24876 (define_insn "wbnoinvd"
24877 [(unspec_volatile [(const_int 0)] UNSPECV_WBNOINVD)]
24878 "TARGET_WBNOINVD"
24879 "wbnoinvd"
24880 [(set_attr "type" "other")])
24881
24882 ;; MOVDIRI and MOVDIR64B
24883
24884 (define_insn "movdiri<mode>"
24885 [(set (match_operand:SWI48 0 "memory_operand" "=m")
24886 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
24887 UNSPEC_MOVDIRI))]
24888 "TARGET_MOVDIRI"
24889 "movdiri\t{%1, %0|%0, %1}"
24890 [(set_attr "type" "other")])
24891
24892 (define_insn "@movdir64b_<mode>"
24893 [(set (mem:XI (match_operand:P 0 "register_operand" "r"))
24894 (unspec:XI [(match_operand:XI 1 "memory_operand" "m")]
24895 UNSPEC_MOVDIR64B))]
24896 "TARGET_MOVDIR64B"
24897 "movdir64b\t{%1, %0|%0, %1}"
24898 [(set_attr "type" "other")])
24899
24900 ;; TSXLDTRK
24901 (define_int_iterator TSXLDTRK [UNSPECV_XSUSLDTRK UNSPECV_XRESLDTRK])
24902 (define_int_attr tsxldtrk [(UNSPECV_XSUSLDTRK "xsusldtrk")
24903 (UNSPECV_XRESLDTRK "xresldtrk")])
24904 (define_insn "<tsxldtrk>"
24905 [(unspec_volatile [(const_int 0)] TSXLDTRK)]
24906 "TARGET_TSXLDTRK"
24907 "<tsxldtrk>"
24908 [(set_attr "type" "other")
24909 (set_attr "length" "4")])
24910
24911 ;; ENQCMD and ENQCMDS
24912
24913 (define_int_iterator ENQCMD [UNSPECV_ENQCMD UNSPECV_ENQCMDS])
24914 (define_int_attr enqcmd_sfx [(UNSPECV_ENQCMD "") (UNSPECV_ENQCMDS "s")])
24915
24916 (define_insn "@enqcmd<enqcmd_sfx>_<mode>"
24917 [(set (reg:CCZ FLAGS_REG)
24918 (unspec_volatile:CCZ [(match_operand:P 0 "register_operand" "r")
24919 (match_operand:XI 1 "memory_operand" "m")]
24920 ENQCMD))]
24921 "TARGET_ENQCMD"
24922 "enqcmd<enqcmd_sfx>\t{%1, %0|%0, %1}"
24923 [(set_attr "type" "other")])
24924
24925 ;; UINTR
24926 (define_int_iterator UINTR [UNSPECV_CLUI UNSPECV_STUI])
24927 (define_int_attr uintr [(UNSPECV_CLUI "clui") (UNSPECV_STUI "stui")])
24928
24929 (define_insn "<uintr>"
24930 [(unspec_volatile [(const_int 0)] UINTR)]
24931 "TARGET_UINTR && TARGET_64BIT"
24932 "<uintr>"
24933 [(set_attr "type" "other")
24934 (set_attr "length" "4")])
24935
24936 (define_insn "testui"
24937 [(set (reg:CCC FLAGS_REG)
24938 (unspec_volatile:CCC [(const_int 0)] UNSPECV_TESTUI))]
24939 "TARGET_UINTR && TARGET_64BIT"
24940 "testui"
24941 [(set_attr "type" "other")
24942 (set_attr "length" "4")])
24943
24944 (define_insn "senduipi"
24945 [(unspec_volatile
24946 [(match_operand:DI 0 "register_operand" "r")]
24947 UNSPECV_SENDUIPI)]
24948 "TARGET_UINTR && TARGET_64BIT"
24949 "senduipi\t%0"
24950 [(set_attr "type" "other")
24951 (set_attr "length" "4")])
24952
24953 ;; WAITPKG
24954
24955 (define_insn "umwait"
24956 [(set (reg:CCC FLAGS_REG)
24957 (unspec_volatile:CCC [(match_operand:SI 0 "register_operand" "r")
24958 (match_operand:DI 1 "register_operand" "A")]
24959 UNSPECV_UMWAIT))]
24960 "!TARGET_64BIT && TARGET_WAITPKG"
24961 "umwait\t%0"
24962 [(set_attr "length" "3")])
24963
24964 (define_insn "umwait_rex64"
24965 [(set (reg:CCC FLAGS_REG)
24966 (unspec_volatile:CCC [(match_operand:SI 0 "register_operand" "r")
24967 (match_operand:SI 1 "register_operand" "a")
24968 (match_operand:SI 2 "register_operand" "d")]
24969 UNSPECV_UMWAIT))]
24970 "TARGET_64BIT && TARGET_WAITPKG"
24971 "umwait\t%0"
24972 [(set_attr "length" "3")])
24973
24974 (define_insn "@umonitor_<mode>"
24975 [(unspec_volatile [(match_operand:P 0 "register_operand" "r")]
24976 UNSPECV_UMONITOR)]
24977 "TARGET_WAITPKG"
24978 "umonitor\t%0"
24979 [(set (attr "length")
24980 (symbol_ref ("(Pmode != word_mode) + 3")))])
24981
24982 (define_insn "tpause"
24983 [(set (reg:CCC FLAGS_REG)
24984 (unspec_volatile:CCC [(match_operand:SI 0 "register_operand" "r")
24985 (match_operand:DI 1 "register_operand" "A")]
24986 UNSPECV_TPAUSE))]
24987 "!TARGET_64BIT && TARGET_WAITPKG"
24988 "tpause\t%0"
24989 [(set_attr "length" "3")])
24990
24991 (define_insn "tpause_rex64"
24992 [(set (reg:CCC FLAGS_REG)
24993 (unspec_volatile:CCC [(match_operand:SI 0 "register_operand" "r")
24994 (match_operand:SI 1 "register_operand" "a")
24995 (match_operand:SI 2 "register_operand" "d")]
24996 UNSPECV_TPAUSE))]
24997 "TARGET_64BIT && TARGET_WAITPKG"
24998 "tpause\t%0"
24999 [(set_attr "length" "3")])
25000
25001 (define_insn "cldemote"
25002 [(unspec_volatile[(match_operand 0 "address_operand" "p")]
25003 UNSPECV_CLDEMOTE)]
25004 "TARGET_CLDEMOTE"
25005 "cldemote\t%a0"
25006 [(set_attr "type" "other")
25007 (set_attr "memory" "unknown")])
25008
25009 (define_insn "speculation_barrier"
25010 [(unspec_volatile [(const_int 0)] UNSPECV_SPECULATION_BARRIER)]
25011 ""
25012 "lfence"
25013 [(set_attr "type" "other")
25014 (set_attr "length" "3")])
25015
25016 (define_insn "serialize"
25017 [(unspec_volatile [(const_int 0)] UNSPECV_SERIALIZE)]
25018 "TARGET_SERIALIZE"
25019 "serialize"
25020 [(set_attr "type" "other")
25021 (set_attr "length" "3")])
25022
25023 (define_insn "patchable_area"
25024 [(unspec_volatile [(match_operand 0 "const_int_operand")
25025 (match_operand 1 "const_int_operand")]
25026 UNSPECV_PATCHABLE_AREA)]
25027 ""
25028 {
25029 ix86_output_patchable_area (INTVAL (operands[0]),
25030 INTVAL (operands[1]) != 0);
25031 return "";
25032 }
25033 [(set (attr "length") (symbol_ref "INTVAL (operands[0])"))
25034 (set_attr "length_immediate" "0")
25035 (set_attr "modrm" "0")])
25036
25037 (define_insn "hreset"
25038 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")]
25039 UNSPECV_HRESET)]
25040 "TARGET_HRESET"
25041 "hreset\t{$0|0}"
25042 [(set_attr "type" "other")
25043 (set_attr "length" "4")])
25044
25045 ;; Spaceship optimization
25046 (define_expand "spaceship<mode>3"
25047 [(match_operand:SI 0 "register_operand")
25048 (match_operand:MODEF 1 "cmp_fp_expander_operand")
25049 (match_operand:MODEF 2 "cmp_fp_expander_operand")]
25050 "(TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
25051 && (TARGET_CMOVE || (TARGET_SAHF && TARGET_USE_SAHF))"
25052 {
25053 ix86_expand_fp_spaceship (operands[0], operands[1], operands[2]);
25054 DONE;
25055 })
25056
25057 (define_expand "spaceshipxf3"
25058 [(match_operand:SI 0 "register_operand")
25059 (match_operand:XF 1 "nonmemory_operand")
25060 (match_operand:XF 2 "nonmemory_operand")]
25061 "TARGET_80387 && (TARGET_CMOVE || (TARGET_SAHF && TARGET_USE_SAHF))"
25062 {
25063 ix86_expand_fp_spaceship (operands[0], operands[1], operands[2]);
25064 DONE;
25065 })
25066
25067 ;; Defined because the generic expand_builtin_issignaling for XFmode
25068 ;; only tests for sNaNs, but i387 treats also pseudo numbers as always
25069 ;; signaling.
25070 (define_expand "issignalingxf2"
25071 [(match_operand:SI 0 "register_operand")
25072 (match_operand:XF 1 "general_operand")]
25073 ""
25074 {
25075 rtx temp = operands[1];
25076 if (!MEM_P (temp))
25077 {
25078 rtx mem = assign_stack_temp (XFmode, GET_MODE_SIZE (XFmode));
25079 emit_move_insn (mem, temp);
25080 temp = mem;
25081 }
25082 rtx ex = adjust_address (temp, HImode, 8);
25083 rtx hi = adjust_address (temp, SImode, 4);
25084 rtx lo = adjust_address (temp, SImode, 0);
25085 rtx val = GEN_INT (HOST_WIDE_INT_M1U << 30);
25086 rtx mask = GEN_INT (0x7fff);
25087 rtx bit = GEN_INT (HOST_WIDE_INT_1U << 30);
25088 /* Expand to:
25089 ((ex & mask) && (int) hi >= 0)
25090 || ((ex & mask) == mask && ((hi ^ bit) | ((lo | -lo) >> 31)) > val). */
25091 rtx nlo = expand_unop (SImode, neg_optab, lo, NULL_RTX, 0);
25092 lo = expand_binop (SImode, ior_optab, lo, nlo,
25093 NULL_RTX, 1, OPTAB_LIB_WIDEN);
25094 lo = expand_shift (RSHIFT_EXPR, SImode, lo, 31, NULL_RTX, 1);
25095 temp = expand_binop (SImode, xor_optab, hi, bit,
25096 NULL_RTX, 1, OPTAB_LIB_WIDEN);
25097 temp = expand_binop (SImode, ior_optab, temp, lo,
25098 NULL_RTX, 1, OPTAB_LIB_WIDEN);
25099 temp = emit_store_flag_force (gen_reg_rtx (SImode), GTU, temp, val,
25100 SImode, 1, 1);
25101 ex = expand_binop (HImode, and_optab, ex, mask,
25102 NULL_RTX, 1, OPTAB_LIB_WIDEN);
25103 rtx temp2 = emit_store_flag_force (gen_reg_rtx (SImode), NE,
25104 ex, const0_rtx, SImode, 1, 1);
25105 ex = emit_store_flag_force (gen_reg_rtx (SImode), EQ,
25106 ex, mask, HImode, 1, 1);
25107 temp = expand_binop (SImode, and_optab, temp, ex,
25108 NULL_RTX, 1, OPTAB_LIB_WIDEN);
25109 rtx temp3 = emit_store_flag_force (gen_reg_rtx (SImode), GE,
25110 hi, const0_rtx, SImode, 0, 1);
25111 temp2 = expand_binop (SImode, and_optab, temp2, temp3,
25112 NULL_RTX, 1, OPTAB_LIB_WIDEN);
25113 temp = expand_binop (SImode, ior_optab, temp, temp2,
25114 NULL_RTX, 1, OPTAB_LIB_WIDEN);
25115 emit_move_insn (operands[0], temp);
25116 DONE;
25117 })
25118
25119 (include "mmx.md")
25120 (include "sse.md")
25121 (include "sync.md")