]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/i386/i386.md
710068e9093359584a3acbd87026630007681309
[thirdparty/gcc.git] / gcc / config / i386 / i386.md
1 ;; GCC machine description for IA-32 and x86-64.
2 ;; Copyright (C) 1988-2023 Free Software Foundation, Inc.
3 ;; Mostly by William Schelter.
4 ;; x86_64 support added by Jan Hubicka
5 ;;
6 ;; This file is part of GCC.
7 ;;
8 ;; GCC is free software; you can redistribute it and/or modify
9 ;; it under the terms of the GNU General Public License as published by
10 ;; the Free Software Foundation; either version 3, or (at your option)
11 ;; any later version.
12 ;;
13 ;; GCC is distributed in the hope that it will be useful,
14 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
15 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 ;; GNU General Public License for more details.
17 ;;
18 ;; You should have received a copy of the GNU General Public License
19 ;; along with GCC; see the file COPYING3. If not see
20 ;; <http://www.gnu.org/licenses/>. */
21 ;;
22 ;; The original PO technology requires these to be ordered by speed,
23 ;; so that assigner will pick the fastest.
24 ;;
25 ;; See file "rtl.def" for documentation on define_insn, match_*, et. al.
26 ;;
27 ;; The special asm out single letter directives following a '%' are:
28 ;; L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
29 ;; C -- print opcode suffix for set/cmov insn.
30 ;; c -- like C, but print reversed condition
31 ;; F,f -- likewise, but for floating-point.
32 ;; O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
33 ;; otherwise nothing
34 ;; R -- print the prefix for register names.
35 ;; z -- print the opcode suffix for the size of the current operand.
36 ;; Z -- likewise, with special suffixes for x87 instructions.
37 ;; * -- print a star (in certain assembler syntax)
38 ;; A -- print an absolute memory reference.
39 ;; E -- print address with DImode register names if TARGET_64BIT.
40 ;; w -- print the operand as if it's a "word" (HImode) even if it isn't.
41 ;; s -- print a shift double count, followed by the assemblers argument
42 ;; delimiter.
43 ;; b -- print the QImode name of the register for the indicated operand.
44 ;; %b0 would print %al if operands[0] is reg 0.
45 ;; w -- likewise, print the HImode name of the register.
46 ;; k -- likewise, print the SImode name of the register.
47 ;; q -- likewise, print the DImode name of the register.
48 ;; x -- likewise, print the V4SFmode name of the register.
49 ;; t -- likewise, print the V8SFmode name of the register.
50 ;; h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
51 ;; y -- print "st(0)" instead of "st" as a register.
52 ;; d -- print duplicated register operand for AVX instruction.
53 ;; D -- print condition for SSE cmp instruction.
54 ;; P -- if PIC, print an @PLT suffix.
55 ;; p -- print raw symbol name.
56 ;; X -- don't print any sort of PIC '@' suffix for a symbol.
57 ;; & -- print some in-use local-dynamic symbol name.
58 ;; H -- print a memory address offset by 8; used for sse high-parts
59 ;; K -- print HLE lock prefix
60 ;; Y -- print condition for XOP pcom* instruction.
61 ;; + -- print a branch hint as 'cs' or 'ds' prefix
62 ;; ; -- print a semicolon (after prefixes due to bug in older gas).
63 ;; ~ -- print "i" if TARGET_AVX2, "f" otherwise.
64 ;; ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
65 ;; ! -- print NOTRACK prefix for jxx/call/ret instructions if required.
66
67 (define_c_enum "unspec" [
68 ;; Relocation specifiers
69 UNSPEC_GOT
70 UNSPEC_GOTOFF
71 UNSPEC_GOTPCREL
72 UNSPEC_GOTTPOFF
73 UNSPEC_TPOFF
74 UNSPEC_NTPOFF
75 UNSPEC_DTPOFF
76 UNSPEC_GOTNTPOFF
77 UNSPEC_INDNTPOFF
78 UNSPEC_PLTOFF
79 UNSPEC_MACHOPIC_OFFSET
80 UNSPEC_PCREL
81 UNSPEC_SIZEOF
82
83 ;; Prologue support
84 UNSPEC_STACK_ALLOC
85 UNSPEC_SET_GOT
86 UNSPEC_SET_RIP
87 UNSPEC_SET_GOT_OFFSET
88 UNSPEC_MEMORY_BLOCKAGE
89 UNSPEC_PROBE_STACK
90
91 ;; TLS support
92 UNSPEC_TP
93 UNSPEC_TLS_GD
94 UNSPEC_TLS_LD_BASE
95 UNSPEC_TLSDESC
96 UNSPEC_TLS_IE_SUN
97
98 ;; Other random patterns
99 UNSPEC_SCAS
100 UNSPEC_FNSTSW
101 UNSPEC_SAHF
102 UNSPEC_NOTRAP
103 UNSPEC_PARITY
104 UNSPEC_FSTCW
105 UNSPEC_REP
106 UNSPEC_LD_MPIC ; load_macho_picbase
107 UNSPEC_TRUNC_NOOP
108 UNSPEC_DIV_ALREADY_SPLIT
109 UNSPEC_PAUSE
110 UNSPEC_LEA_ADDR
111 UNSPEC_XBEGIN_ABORT
112 UNSPEC_STOS
113 UNSPEC_PEEPSIB
114 UNSPEC_INSN_FALSE_DEP
115 UNSPEC_SBB
116 UNSPEC_CC_NE
117 UNSPEC_STC
118 UNSPEC_PUSHFL
119 UNSPEC_POPFL
120
121 ;; For SSE/MMX support:
122 UNSPEC_FIX_NOTRUNC
123 UNSPEC_MASKMOV
124 UNSPEC_MOVCC_MASK
125 UNSPEC_MOVMSK
126 UNSPEC_INSERTPS
127 UNSPEC_BLENDV
128 UNSPEC_PSHUFB
129 UNSPEC_XOP_PERMUTE
130 UNSPEC_RCP
131 UNSPEC_RSQRT
132 UNSPEC_PSADBW
133
134 ;; Different from generic us_truncate RTX
135 ;; as it does unsigned saturation of signed source.
136 UNSPEC_US_TRUNCATE
137
138 ;; For AVX/AVX512F support
139 UNSPEC_SCALEF
140 UNSPEC_PCMP
141 UNSPEC_CVTBFSF
142
143 ;; Generic math support
144 UNSPEC_IEEE_MIN ; not commutative
145 UNSPEC_IEEE_MAX ; not commutative
146
147 ;; x87 Floating point
148 UNSPEC_SIN
149 UNSPEC_COS
150 UNSPEC_FPATAN
151 UNSPEC_FYL2X
152 UNSPEC_FYL2XP1
153 UNSPEC_FRNDINT
154 UNSPEC_FIST
155 UNSPEC_F2XM1
156 UNSPEC_TAN
157 UNSPEC_FXAM
158
159 ;; x87 Rounding
160 UNSPEC_FRNDINT_ROUNDEVEN
161 UNSPEC_FRNDINT_FLOOR
162 UNSPEC_FRNDINT_CEIL
163 UNSPEC_FRNDINT_TRUNC
164 UNSPEC_FIST_FLOOR
165 UNSPEC_FIST_CEIL
166
167 ;; x87 Double output FP
168 UNSPEC_SINCOS_COS
169 UNSPEC_SINCOS_SIN
170 UNSPEC_XTRACT_FRACT
171 UNSPEC_XTRACT_EXP
172 UNSPEC_FSCALE_FRACT
173 UNSPEC_FSCALE_EXP
174 UNSPEC_FPREM_F
175 UNSPEC_FPREM_U
176 UNSPEC_FPREM1_F
177 UNSPEC_FPREM1_U
178
179 UNSPEC_C2_FLAG
180 UNSPEC_FXAM_MEM
181
182 ;; SSP patterns
183 UNSPEC_SP_SET
184 UNSPEC_SP_TEST
185
186 ;; For ROUND support
187 UNSPEC_ROUND
188
189 ;; For CRC32 support
190 UNSPEC_CRC32
191
192 ;; For LZCNT suppoprt
193 UNSPEC_LZCNT
194
195 ;; For BMI support
196 UNSPEC_TZCNT
197 UNSPEC_BEXTR
198
199 ;; For BMI2 support
200 UNSPEC_PDEP
201 UNSPEC_PEXT
202
203 ;; IRET support
204 UNSPEC_INTERRUPT_RETURN
205
206 ;; For MOVDIRI and MOVDIR64B support
207 UNSPEC_MOVDIRI
208 UNSPEC_MOVDIR64B
209
210 ;; For insn_callee_abi:
211 UNSPEC_CALLEE_ABI
212
213 ;; For APX PUSH2/POP2 support
214 UNSPEC_APXPUSH2
215 UNSPEC_APXPOP2_LOW
216 UNSPEC_APXPOP2_HIGH
217
218 ;; For APX PPX support
219 UNSPEC_APX_PPX
220 ])
221
222 (define_c_enum "unspecv" [
223 UNSPECV_UD2
224 UNSPECV_BLOCKAGE
225 UNSPECV_STACK_PROBE
226 UNSPECV_PROBE_STACK_RANGE
227 UNSPECV_ALIGN
228 UNSPECV_PROLOGUE_USE
229 UNSPECV_SPLIT_STACK_RETURN
230 UNSPECV_CLD
231 UNSPECV_NOPS
232 UNSPECV_RDTSC
233 UNSPECV_RDTSCP
234 UNSPECV_RDPMC
235 UNSPECV_LLWP_INTRINSIC
236 UNSPECV_SLWP_INTRINSIC
237 UNSPECV_LWPVAL_INTRINSIC
238 UNSPECV_LWPINS_INTRINSIC
239 UNSPECV_RDFSBASE
240 UNSPECV_RDGSBASE
241 UNSPECV_WRFSBASE
242 UNSPECV_WRGSBASE
243 UNSPECV_FXSAVE
244 UNSPECV_FXRSTOR
245 UNSPECV_FXSAVE64
246 UNSPECV_FXRSTOR64
247 UNSPECV_XSAVE
248 UNSPECV_XRSTOR
249 UNSPECV_XSAVE64
250 UNSPECV_XRSTOR64
251 UNSPECV_XSAVEOPT
252 UNSPECV_XSAVEOPT64
253 UNSPECV_XSAVES
254 UNSPECV_XRSTORS
255 UNSPECV_XSAVES64
256 UNSPECV_XRSTORS64
257 UNSPECV_XSAVEC
258 UNSPECV_XSAVEC64
259 UNSPECV_XGETBV
260 UNSPECV_XSETBV
261 UNSPECV_WBINVD
262 UNSPECV_WBNOINVD
263
264 ;; For atomic compound assignments.
265 UNSPECV_FNSTENV
266 UNSPECV_FLDENV
267 UNSPECV_FNSTSW
268 UNSPECV_FNCLEX
269
270 ;; For RDRAND support
271 UNSPECV_RDRAND
272
273 ;; For RDSEED support
274 UNSPECV_RDSEED
275
276 ;; For RTM support
277 UNSPECV_XBEGIN
278 UNSPECV_XEND
279 UNSPECV_XABORT
280 UNSPECV_XTEST
281
282 UNSPECV_NLGR
283
284 ;; For CLWB support
285 UNSPECV_CLWB
286
287 ;; For CLFLUSHOPT support
288 UNSPECV_CLFLUSHOPT
289
290 ;; For MONITORX and MWAITX support
291 UNSPECV_MONITORX
292 UNSPECV_MWAITX
293
294 ;; For CLZERO support
295 UNSPECV_CLZERO
296
297 ;; For RDPKRU and WRPKRU support
298 UNSPECV_PKU
299
300 ;; For RDPID support
301 UNSPECV_RDPID
302
303 ;; For CET support
304 UNSPECV_NOP_ENDBR
305 UNSPECV_NOP_RDSSP
306 UNSPECV_INCSSP
307 UNSPECV_SAVEPREVSSP
308 UNSPECV_RSTORSSP
309 UNSPECV_WRSS
310 UNSPECV_WRUSS
311 UNSPECV_SETSSBSY
312 UNSPECV_CLRSSBSY
313
314 ;; For TSXLDTRK support
315 UNSPECV_XSUSLDTRK
316 UNSPECV_XRESLDTRK
317
318 ;; For WAITPKG support
319 UNSPECV_UMWAIT
320 UNSPECV_UMONITOR
321 UNSPECV_TPAUSE
322
323 ;; For UINTR support
324 UNSPECV_CLUI
325 UNSPECV_STUI
326 UNSPECV_TESTUI
327 UNSPECV_SENDUIPI
328
329 ;; For CLDEMOTE support
330 UNSPECV_CLDEMOTE
331
332 ;; For Speculation Barrier support
333 UNSPECV_SPECULATION_BARRIER
334
335 UNSPECV_PTWRITE
336
337 ;; For ENQCMD and ENQCMDS support
338 UNSPECV_ENQCMD
339 UNSPECV_ENQCMDS
340
341 ;; For SERIALIZE support
342 UNSPECV_SERIALIZE
343
344 ;; For patchable area support
345 UNSPECV_PATCHABLE_AREA
346
347 ;; For HRESET support
348 UNSPECV_HRESET
349
350 ;; For PREFETCHI support
351 UNSPECV_PREFETCHI
352
353 ;; For USER_MSR support
354 UNSPECV_URDMSR
355 UNSPECV_UWRMSR
356 ])
357
358 ;; Constants to represent rounding modes in the ROUND instruction
359 (define_constants
360 [(ROUND_ROUNDEVEN 0x0)
361 (ROUND_FLOOR 0x1)
362 (ROUND_CEIL 0x2)
363 (ROUND_TRUNC 0x3)
364 (ROUND_MXCSR 0x4)
365 (ROUND_NO_EXC 0x8)
366 ])
367
368 ;; Constants to represent AVX512F embeded rounding
369 (define_constants
370 [(ROUND_NEAREST_INT 0)
371 (ROUND_NEG_INF 1)
372 (ROUND_POS_INF 2)
373 (ROUND_ZERO 3)
374 (NO_ROUND 4)
375 (ROUND_SAE 8)
376 ])
377
378 ;; Constants to represent pcomtrue/pcomfalse variants
379 (define_constants
380 [(PCOM_FALSE 0)
381 (PCOM_TRUE 1)
382 (COM_FALSE_S 2)
383 (COM_FALSE_P 3)
384 (COM_TRUE_S 4)
385 (COM_TRUE_P 5)
386 ])
387
388 ;; Constants used in the XOP pperm instruction
389 (define_constants
390 [(PPERM_SRC 0x00) /* copy source */
391 (PPERM_INVERT 0x20) /* invert source */
392 (PPERM_REVERSE 0x40) /* bit reverse source */
393 (PPERM_REV_INV 0x60) /* bit reverse & invert src */
394 (PPERM_ZERO 0x80) /* all 0's */
395 (PPERM_ONES 0xa0) /* all 1's */
396 (PPERM_SIGN 0xc0) /* propagate sign bit */
397 (PPERM_INV_SIGN 0xe0) /* invert & propagate sign */
398 (PPERM_SRC1 0x00) /* use first source byte */
399 (PPERM_SRC2 0x10) /* use second source byte */
400 ])
401
402 ;; Registers by name.
403 (define_constants
404 [(AX_REG 0)
405 (DX_REG 1)
406 (CX_REG 2)
407 (BX_REG 3)
408 (SI_REG 4)
409 (DI_REG 5)
410 (BP_REG 6)
411 (SP_REG 7)
412 (ST0_REG 8)
413 (ST1_REG 9)
414 (ST2_REG 10)
415 (ST3_REG 11)
416 (ST4_REG 12)
417 (ST5_REG 13)
418 (ST6_REG 14)
419 (ST7_REG 15)
420 (ARGP_REG 16)
421 (FLAGS_REG 17)
422 (FPSR_REG 18)
423 (FRAME_REG 19)
424 (XMM0_REG 20)
425 (XMM1_REG 21)
426 (XMM2_REG 22)
427 (XMM3_REG 23)
428 (XMM4_REG 24)
429 (XMM5_REG 25)
430 (XMM6_REG 26)
431 (XMM7_REG 27)
432 (MM0_REG 28)
433 (MM1_REG 29)
434 (MM2_REG 30)
435 (MM3_REG 31)
436 (MM4_REG 32)
437 (MM5_REG 33)
438 (MM6_REG 34)
439 (MM7_REG 35)
440 (R8_REG 36)
441 (R9_REG 37)
442 (R10_REG 38)
443 (R11_REG 39)
444 (R12_REG 40)
445 (R13_REG 41)
446 (R14_REG 42)
447 (R15_REG 43)
448 (XMM8_REG 44)
449 (XMM9_REG 45)
450 (XMM10_REG 46)
451 (XMM11_REG 47)
452 (XMM12_REG 48)
453 (XMM13_REG 49)
454 (XMM14_REG 50)
455 (XMM15_REG 51)
456 (XMM16_REG 52)
457 (XMM17_REG 53)
458 (XMM18_REG 54)
459 (XMM19_REG 55)
460 (XMM20_REG 56)
461 (XMM21_REG 57)
462 (XMM22_REG 58)
463 (XMM23_REG 59)
464 (XMM24_REG 60)
465 (XMM25_REG 61)
466 (XMM26_REG 62)
467 (XMM27_REG 63)
468 (XMM28_REG 64)
469 (XMM29_REG 65)
470 (XMM30_REG 66)
471 (XMM31_REG 67)
472 (MASK0_REG 68)
473 (MASK1_REG 69)
474 (MASK2_REG 70)
475 (MASK3_REG 71)
476 (MASK4_REG 72)
477 (MASK5_REG 73)
478 (MASK6_REG 74)
479 (MASK7_REG 75)
480 (R16_REG 76)
481 (R17_REG 77)
482 (R18_REG 78)
483 (R19_REG 79)
484 (R20_REG 80)
485 (R21_REG 81)
486 (R22_REG 82)
487 (R23_REG 83)
488 (R24_REG 84)
489 (R25_REG 85)
490 (R26_REG 86)
491 (R27_REG 87)
492 (R28_REG 88)
493 (R29_REG 89)
494 (R30_REG 90)
495 (R31_REG 91)
496 (FIRST_PSEUDO_REG 92)
497 ])
498
499 ;; Insn callee abi index.
500 (define_constants
501 [(ABI_DEFAULT 0)
502 (ABI_VZEROUPPER 1)
503 (ABI_UNKNOWN 2)])
504
505 ;; Insns whose names begin with "x86_" are emitted by gen_FOO calls
506 ;; from i386.cc.
507
508 ;; In C guard expressions, put expressions which may be compile-time
509 ;; constants first. This allows for better optimization. For
510 ;; example, write "TARGET_64BIT && reload_completed", not
511 ;; "reload_completed && TARGET_64BIT".
512
513 \f
514 ;; Processor type.
515 (define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,nehalem,
516 atom,slm,glm,haswell,generic,lujiazui,yongfeng,amdfam10,bdver1,
517 bdver2,bdver3,bdver4,btver2,znver1,znver2,znver3,znver4"
518 (const (symbol_ref "ix86_schedule")))
519
520 ;; A basic instruction type. Refinements due to arguments to be
521 ;; provided in other attributes.
522 (define_attr "type"
523 "other,multi,
524 alu,alu1,negnot,imov,imovx,lea,
525 incdec,ishift,ishiftx,ishift1,rotate,rotatex,rotate1,
526 imul,imulx,idiv,icmp,test,ibr,setcc,icmov,
527 push,pop,call,callv,leave,
528 str,bitmanip,
529 fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,
530 fxch,fistp,fisttp,frndint,
531 sse,ssemov,sseadd,sseadd1,sseiadd,sseiadd1,
532 ssemul,sseimul,ssediv,sselog,sselog1,
533 sseishft,sseishft1,ssecmp,ssecomi,
534 ssecvt,ssecvt1,sseicvt,sseins,
535 sseshuf,sseshuf1,ssemuladd,sse4arg,
536 lwp,mskmov,msklog,
537 mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft"
538 (const_string "other"))
539
540 ;; Main data type used by the insn
541 (define_attr "mode"
542 "unknown,none,QI,HI,SI,DI,TI,OI,XI,HF,BF,SF,DF,XF,TF,V32HF,V16HF,V8HF,
543 V16SF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,V8DF,V4HF,V4BF,V2HF,V2BF"
544 (const_string "unknown"))
545
546 ;; The CPU unit operations uses.
547 (define_attr "unit" "integer,i387,sse,mmx,unknown"
548 (cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,
549 fxch,fistp,fisttp,frndint")
550 (const_string "i387")
551 (eq_attr "type" "sse,ssemov,sseadd,sseadd1,sseiadd,sseiadd1,
552 ssemul,sseimul,ssediv,sselog,sselog1,
553 sseishft,sseishft1,ssecmp,ssecomi,
554 ssecvt,ssecvt1,sseicvt,sseins,
555 sseshuf,sseshuf1,ssemuladd,sse4arg,mskmov")
556 (const_string "sse")
557 (eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft")
558 (const_string "mmx")
559 (eq_attr "type" "other")
560 (const_string "unknown")]
561 (const_string "integer")))
562
563 ;; Used to control the "enabled" attribute on a per-instruction basis.
564 (define_attr "isa" "base,x64,nox64,x64_sse2,x64_sse4,x64_sse4_noavx,
565 x64_avx,x64_avx512bw,x64_avx512dq,aes,apx_ndd,
566 sse_noavx,sse2,sse2_noavx,sse3,sse3_noavx,sse4,sse4_noavx,
567 avx,noavx,avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,avx512f_512,
568 noavx512f,avx512bw,avx512bw_512,noavx512bw,avx512dq,
569 noavx512dq,fma_or_avx512vl,avx512vl,noavx512vl,avxvnni,
570 avx512vnnivl,avx512fp16,avxifma,avx512ifmavl,avxneconvert,
571 avx512bf16vl,vpclmulqdqvl,avx_noavx512f,avx_noavx512vl"
572 (const_string "base"))
573
574 ;; The (bounding maximum) length of an instruction immediate.
575 (define_attr "length_immediate" ""
576 (cond [(eq_attr "type" "incdec,setcc,icmov,str,lea,other,multi,idiv,leave,
577 bitmanip,imulx,msklog,mskmov")
578 (const_int 0)
579 (ior (eq_attr "type" "sse4arg")
580 (eq_attr "isa" "fma4"))
581 (const_int 1)
582 (eq_attr "unit" "i387,sse,mmx")
583 (const_int 0)
584 (eq_attr "type" "alu,alu1,negnot,imovx,ishift,ishiftx,ishift1,
585 rotate,rotatex,rotate1,imul,icmp,push,pop")
586 (symbol_ref "ix86_attr_length_immediate_default (insn, true)")
587 (eq_attr "type" "imov,test")
588 (symbol_ref "ix86_attr_length_immediate_default (insn, false)")
589 (eq_attr "type" "call")
590 (if_then_else (match_operand 0 "constant_call_address_operand")
591 (const_int 4)
592 (const_int 0))
593 (eq_attr "type" "callv")
594 (if_then_else (match_operand 1 "constant_call_address_operand")
595 (const_int 4)
596 (const_int 0))
597 ;; We don't know the size before shorten_branches. Expect
598 ;; the instruction to fit for better scheduling.
599 (eq_attr "type" "ibr")
600 (const_int 1)
601 ]
602 (symbol_ref "/* Update immediate_length and other attributes! */
603 gcc_unreachable (),1")))
604
605 ;; The (bounding maximum) length of an instruction address.
606 (define_attr "length_address" ""
607 (cond [(eq_attr "type" "str,other,multi,fxch")
608 (const_int 0)
609 (and (eq_attr "type" "call")
610 (match_operand 0 "constant_call_address_operand"))
611 (const_int 0)
612 (and (eq_attr "type" "callv")
613 (match_operand 1 "constant_call_address_operand"))
614 (const_int 0)
615 ]
616 (symbol_ref "ix86_attr_length_address_default (insn)")))
617
618 ;; Set when length prefix is used.
619 (define_attr "prefix_data16" ""
620 (cond [(eq_attr "type" "ssemuladd,sse4arg,sseiadd1,ssecvt1")
621 (const_int 0)
622 (eq_attr "mode" "HI")
623 (const_int 1)
624 (and (eq_attr "unit" "sse") (eq_attr "mode" "V2DF,TI"))
625 (const_int 1)
626 ]
627 (const_int 0)))
628
629 ;; Set when string REP prefix is used.
630 (define_attr "prefix_rep" ""
631 (cond [(eq_attr "type" "ssemuladd,sse4arg,sseiadd1,ssecvt1")
632 (const_int 0)
633 (and (eq_attr "unit" "sse") (eq_attr "mode" "SF,DF"))
634 (const_int 1)
635 ]
636 (const_int 0)))
637
638 ;; Set when 0f opcode prefix is used.
639 (define_attr "prefix_0f" ""
640 (if_then_else
641 (ior (eq_attr "type" "imovx,setcc,icmov,bitmanip,msklog,mskmov")
642 (eq_attr "unit" "sse,mmx"))
643 (const_int 1)
644 (const_int 0)))
645
646 ;; Set when REX opcode prefix is used.
647 (define_attr "prefix_rex" ""
648 (cond [(not (match_test "TARGET_64BIT"))
649 (const_int 0)
650 (and (eq_attr "mode" "DI")
651 (and (eq_attr "type" "!push,pop,call,callv,leave,ibr")
652 (eq_attr "unit" "!mmx")))
653 (const_int 1)
654 (and (eq_attr "mode" "QI")
655 (match_test "x86_extended_QIreg_mentioned_p (insn)"))
656 (const_int 1)
657 (match_test "x86_extended_reg_mentioned_p (insn)")
658 (const_int 1)
659 (and (eq_attr "type" "imovx")
660 (match_operand:QI 1 "ext_QIreg_operand"))
661 (const_int 1)
662 ]
663 (const_int 0)))
664
665 ;; There are also additional prefixes in 3DNOW, SSSE3.
666 ;; 3DNOW has 0f0f prefix, SSSE3 and SSE4_{1,2} 0f38/0f3a.
667 ;; While generally inapplicable to VEX/XOP/EVEX encodings, "length_vex" uses
668 ;; the attribute evaluating to zero to know that VEX2 encoding may be usable.
669 (define_attr "prefix_extra" ""
670 (cond [(eq_attr "type" "ssemuladd,sse4arg,sseiadd1,ssecvt1")
671 (const_int 1)
672 ]
673 (const_int 0)))
674
675 ;; Prefix used: original, VEX or maybe VEX.
676 (define_attr "prefix" "orig,vex,maybe_vex,evex,maybe_evex"
677 (cond [(eq_attr "mode" "OI,V8SF,V4DF")
678 (const_string "vex")
679 (eq_attr "mode" "XI,V16SF,V8DF")
680 (const_string "evex")
681 (eq_attr "type" "ssemuladd")
682 (if_then_else (eq_attr "isa" "fma4")
683 (const_string "vex")
684 (const_string "maybe_evex"))
685 (eq_attr "type" "sse4arg")
686 (const_string "vex")
687 ]
688 (const_string "orig")))
689
690 ;; VEX W bit is used.
691 (define_attr "prefix_vex_w" "" (const_int 0))
692
693 ;; The length of VEX prefix
694 ;; Only instructions with 0f prefix can have 2 byte VEX prefix,
695 ;; 0f38/0f3a prefixes can't. In i386.md 0f3[8a] is
696 ;; still prefix_0f 1, with prefix_extra 1.
697 (define_attr "length_vex" ""
698 (if_then_else (and (eq_attr "prefix_0f" "1")
699 (eq_attr "prefix_extra" "0"))
700 (if_then_else (eq_attr "prefix_vex_w" "1")
701 (symbol_ref "ix86_attr_length_vex_default (insn, true, true)")
702 (symbol_ref "ix86_attr_length_vex_default (insn, true, false)"))
703 (if_then_else (eq_attr "prefix_vex_w" "1")
704 (symbol_ref "ix86_attr_length_vex_default (insn, false, true)")
705 (symbol_ref "ix86_attr_length_vex_default (insn, false, false)"))))
706
707 ;; 4-bytes evex prefix and 1 byte opcode.
708 (define_attr "length_evex" "" (const_int 5))
709
710 ;; Set when modrm byte is used.
711 (define_attr "modrm" ""
712 (cond [(eq_attr "type" "str,leave")
713 (const_int 0)
714 (eq_attr "unit" "i387")
715 (const_int 0)
716 (and (eq_attr "type" "incdec")
717 (and (not (match_test "TARGET_64BIT"))
718 (ior (match_operand:SI 1 "register_operand")
719 (match_operand:HI 1 "register_operand"))))
720 (const_int 0)
721 (and (eq_attr "type" "push")
722 (not (match_operand 1 "memory_operand")))
723 (const_int 0)
724 (and (eq_attr "type" "pop")
725 (not (match_operand 0 "memory_operand")))
726 (const_int 0)
727 (and (eq_attr "type" "imov")
728 (and (not (eq_attr "mode" "DI"))
729 (ior (and (match_operand 0 "register_operand")
730 (match_operand 1 "immediate_operand"))
731 (ior (and (match_operand 0 "ax_reg_operand")
732 (match_operand 1 "memory_displacement_only_operand"))
733 (and (match_operand 0 "memory_displacement_only_operand")
734 (match_operand 1 "ax_reg_operand"))))))
735 (const_int 0)
736 (and (eq_attr "type" "call")
737 (match_operand 0 "constant_call_address_operand"))
738 (const_int 0)
739 (and (eq_attr "type" "callv")
740 (match_operand 1 "constant_call_address_operand"))
741 (const_int 0)
742 (and (eq_attr "type" "alu,alu1,icmp,test")
743 (match_operand 0 "ax_reg_operand"))
744 (symbol_ref "(get_attr_length_immediate (insn) <= (get_attr_mode (insn) != MODE_QI))")
745 ]
746 (const_int 1)))
747
748 ;; The (bounding maximum) length of an instruction in bytes.
749 ;; ??? fistp and frndint are in fact fldcw/{fistp,frndint}/fldcw sequences.
750 ;; Later we may want to split them and compute proper length as for
751 ;; other insns.
752 (define_attr "length" ""
753 (cond [(eq_attr "type" "other,multi,fistp,frndint")
754 (const_int 16)
755 (eq_attr "type" "fcmp")
756 (const_int 4)
757 (eq_attr "unit" "i387")
758 (plus (const_int 2)
759 (plus (attr "prefix_data16")
760 (attr "length_address")))
761 (ior (eq_attr "prefix" "evex")
762 (and (ior (eq_attr "prefix" "maybe_evex")
763 (eq_attr "prefix" "maybe_vex"))
764 (match_test "TARGET_AVX512F")))
765 (plus (attr "length_evex")
766 (plus (attr "length_immediate")
767 (plus (attr "modrm")
768 (attr "length_address"))))
769 (ior (eq_attr "prefix" "vex")
770 (and (ior (eq_attr "prefix" "maybe_vex")
771 (eq_attr "prefix" "maybe_evex"))
772 (match_test "TARGET_AVX")))
773 (plus (attr "length_vex")
774 (plus (attr "length_immediate")
775 (plus (attr "modrm")
776 (attr "length_address"))))]
777 (plus (plus (attr "modrm")
778 (plus (attr "prefix_0f")
779 (plus (attr "prefix_rex")
780 (plus (attr "prefix_extra")
781 (const_int 1)))))
782 (plus (attr "prefix_rep")
783 (plus (attr "prefix_data16")
784 (plus (attr "length_immediate")
785 (attr "length_address")))))))
786
787 ;; The `memory' attribute is `none' if no memory is referenced, `load' or
788 ;; `store' if there is a simple memory reference therein, or `unknown'
789 ;; if the instruction is complex.
790
791 (define_attr "memory" "none,load,store,both,unknown"
792 (cond [(eq_attr "type" "other,multi,str,lwp")
793 (const_string "unknown")
794 (eq_attr "type" "lea,fcmov,fpspc")
795 (const_string "none")
796 (eq_attr "type" "fistp,leave")
797 (const_string "both")
798 (eq_attr "type" "frndint")
799 (const_string "load")
800 (eq_attr "type" "push")
801 (if_then_else (match_operand 1 "memory_operand")
802 (const_string "both")
803 (const_string "store"))
804 (eq_attr "type" "pop")
805 (if_then_else (match_operand 0 "memory_operand")
806 (const_string "both")
807 (const_string "load"))
808 (eq_attr "type" "setcc")
809 (if_then_else (match_operand 0 "memory_operand")
810 (const_string "store")
811 (const_string "none"))
812 (eq_attr "type" "icmp,test,ssecmp,ssecomi,mmxcmp,fcmp")
813 (if_then_else (ior (match_operand 0 "memory_operand")
814 (match_operand 1 "memory_operand"))
815 (const_string "load")
816 (const_string "none"))
817 (eq_attr "type" "ibr")
818 (if_then_else (match_operand 0 "memory_operand")
819 (const_string "load")
820 (const_string "none"))
821 (eq_attr "type" "call")
822 (if_then_else (match_operand 0 "constant_call_address_operand")
823 (const_string "none")
824 (const_string "load"))
825 (eq_attr "type" "callv")
826 (if_then_else (match_operand 1 "constant_call_address_operand")
827 (const_string "none")
828 (const_string "load"))
829 (and (eq_attr "type" "alu1,negnot,ishift1,rotate1,sselog1,sseshuf1")
830 (match_operand 1 "memory_operand"))
831 (const_string "both")
832 (and (match_operand 0 "memory_operand")
833 (match_operand 1 "memory_operand"))
834 (const_string "both")
835 (match_operand 0 "memory_operand")
836 (const_string "store")
837 (match_operand 1 "memory_operand")
838 (const_string "load")
839 (and (eq_attr "type"
840 "!alu1,negnot,ishift1,rotate1,
841 imov,imovx,icmp,test,bitmanip,
842 fmov,fcmp,fsgn,
843 sse,ssemov,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt,
844 sselog1,sseshuf1,sseadd1,sseiadd1,sseishft1,
845 mmx,mmxmov,mmxcmp,mmxcvt,mskmov,msklog")
846 (match_operand 2 "memory_operand"))
847 (const_string "load")
848 (and (eq_attr "type" "icmov,ssemuladd,sse4arg")
849 (match_operand 3 "memory_operand"))
850 (const_string "load")
851 ]
852 (const_string "none")))
853
854 ;; Indicates if an instruction has both an immediate and a displacement.
855
856 (define_attr "imm_disp" "false,true,unknown"
857 (cond [(eq_attr "type" "other,multi")
858 (const_string "unknown")
859 (and (eq_attr "type" "icmp,test,imov,alu1,ishift1,rotate1")
860 (and (match_operand 0 "memory_displacement_operand")
861 (match_operand 1 "immediate_operand")))
862 (const_string "true")
863 (and (eq_attr "type" "alu,ishift,ishiftx,rotate,rotatex,imul,idiv")
864 (and (match_operand 0 "memory_displacement_operand")
865 (match_operand 2 "immediate_operand")))
866 (const_string "true")
867 ]
868 (const_string "false")))
869
870 ;; Indicates if an FP operation has an integer source.
871
872 (define_attr "fp_int_src" "false,true"
873 (const_string "false"))
874
875 ;; Defines rounding mode of an FP operation.
876
877 (define_attr "i387_cw" "roundeven,floor,ceil,trunc,uninitialized,any"
878 (const_string "any"))
879
880 ;; Define attribute to indicate AVX insns with partial XMM register update.
881 (define_attr "avx_partial_xmm_update" "false,true"
882 (const_string "false"))
883
884 ;; Define attribute to classify add/sub insns that consumes carry flag (CF)
885 (define_attr "use_carry" "0,1" (const_string "0"))
886
887 ;; Define attribute to indicate unaligned ssemov insns
888 (define_attr "movu" "0,1" (const_string "0"))
889
890 ;; Define attribute to limit memory address register set.
891 (define_attr "addr" "gpr8,gpr16,gpr32" (const_string "gpr32"))
892
893 ;; Define instruction set of MMX instructions
894 (define_attr "mmx_isa" "base,native,sse,sse_noavx,avx"
895 (const_string "base"))
896
897 (define_attr "enabled" ""
898 (cond [(eq_attr "isa" "x64") (symbol_ref "TARGET_64BIT")
899 (eq_attr "isa" "nox64") (symbol_ref "!TARGET_64BIT")
900 (eq_attr "isa" "x64_sse2")
901 (symbol_ref "TARGET_64BIT && TARGET_SSE2")
902 (eq_attr "isa" "x64_sse4")
903 (symbol_ref "TARGET_64BIT && TARGET_SSE4_1")
904 (eq_attr "isa" "x64_sse4_noavx")
905 (symbol_ref "TARGET_64BIT && TARGET_SSE4_1 && !TARGET_AVX")
906 (eq_attr "isa" "x64_avx")
907 (symbol_ref "TARGET_64BIT && TARGET_AVX")
908 (eq_attr "isa" "x64_avx512bw")
909 (symbol_ref "TARGET_64BIT && TARGET_AVX512BW")
910 (eq_attr "isa" "x64_avx512dq")
911 (symbol_ref "TARGET_64BIT && TARGET_AVX512DQ")
912 (eq_attr "isa" "aes") (symbol_ref "TARGET_AES")
913 (eq_attr "isa" "sse_noavx")
914 (symbol_ref "TARGET_SSE && !TARGET_AVX")
915 (eq_attr "isa" "sse2") (symbol_ref "TARGET_SSE2")
916 (eq_attr "isa" "sse2_noavx")
917 (symbol_ref "TARGET_SSE2 && !TARGET_AVX")
918 (eq_attr "isa" "sse3") (symbol_ref "TARGET_SSE3")
919 (eq_attr "isa" "sse3_noavx")
920 (symbol_ref "TARGET_SSE3 && !TARGET_AVX")
921 (eq_attr "isa" "sse4") (symbol_ref "TARGET_SSE4_1")
922 (eq_attr "isa" "sse4_noavx")
923 (symbol_ref "TARGET_SSE4_1 && !TARGET_AVX")
924 (eq_attr "isa" "avx") (symbol_ref "TARGET_AVX")
925 (eq_attr "isa" "avx_noavx512f")
926 (symbol_ref "TARGET_AVX && !TARGET_AVX512F")
927 (eq_attr "isa" "avx_noavx512vl")
928 (symbol_ref "TARGET_AVX && !TARGET_AVX512VL")
929 (eq_attr "isa" "noavx") (symbol_ref "!TARGET_AVX")
930 (eq_attr "isa" "avx2") (symbol_ref "TARGET_AVX2")
931 (eq_attr "isa" "noavx2") (symbol_ref "!TARGET_AVX2")
932 (eq_attr "isa" "bmi") (symbol_ref "TARGET_BMI")
933 (eq_attr "isa" "bmi2") (symbol_ref "TARGET_BMI2")
934 (eq_attr "isa" "fma4") (symbol_ref "TARGET_FMA4")
935 (eq_attr "isa" "fma") (symbol_ref "TARGET_FMA")
936 (eq_attr "isa" "fma_or_avx512vl")
937 (symbol_ref "TARGET_FMA || TARGET_AVX512VL")
938 (eq_attr "isa" "avx512f") (symbol_ref "TARGET_AVX512F")
939 (eq_attr "isa" "avx512f_512")
940 (symbol_ref "TARGET_AVX512F && TARGET_EVEX512")
941 (eq_attr "isa" "noavx512f") (symbol_ref "!TARGET_AVX512F")
942 (eq_attr "isa" "avx512bw") (symbol_ref "TARGET_AVX512BW")
943 (eq_attr "isa" "avx512bw_512")
944 (symbol_ref "TARGET_AVX512BW && TARGET_EVEX512")
945 (eq_attr "isa" "noavx512bw") (symbol_ref "!TARGET_AVX512BW")
946 (eq_attr "isa" "avx512dq") (symbol_ref "TARGET_AVX512DQ")
947 (eq_attr "isa" "noavx512dq") (symbol_ref "!TARGET_AVX512DQ")
948 (eq_attr "isa" "avx512vl") (symbol_ref "TARGET_AVX512VL")
949 (eq_attr "isa" "noavx512vl") (symbol_ref "!TARGET_AVX512VL")
950 (eq_attr "isa" "avxvnni") (symbol_ref "TARGET_AVXVNNI")
951 (eq_attr "isa" "avx512vnnivl")
952 (symbol_ref "TARGET_AVX512VNNI && TARGET_AVX512VL")
953 (eq_attr "isa" "avx512fp16")
954 (symbol_ref "TARGET_AVX512FP16")
955 (eq_attr "isa" "avxifma") (symbol_ref "TARGET_AVXIFMA")
956 (eq_attr "isa" "avx512ifmavl")
957 (symbol_ref "TARGET_AVX512IFMA && TARGET_AVX512VL")
958 (eq_attr "isa" "avxneconvert") (symbol_ref "TARGET_AVXNECONVERT")
959 (eq_attr "isa" "avx512bf16vl")
960 (symbol_ref "TARGET_AVX512BF16 && TARGET_AVX512VL")
961 (eq_attr "isa" "vpclmulqdqvl")
962 (symbol_ref "TARGET_VPCLMULQDQ && TARGET_AVX512VL")
963 (eq_attr "isa" "apx_ndd")
964 (symbol_ref "TARGET_APX_NDD")
965
966 (eq_attr "mmx_isa" "native")
967 (symbol_ref "!TARGET_MMX_WITH_SSE")
968 (eq_attr "mmx_isa" "sse")
969 (symbol_ref "TARGET_MMX_WITH_SSE")
970 (eq_attr "mmx_isa" "sse_noavx")
971 (symbol_ref "TARGET_MMX_WITH_SSE && !TARGET_AVX")
972 (eq_attr "mmx_isa" "avx")
973 (symbol_ref "TARGET_MMX_WITH_SSE && TARGET_AVX")
974 ]
975 (const_int 1)))
976
977 (define_attr "preferred_for_size" "" (const_int 1))
978 (define_attr "preferred_for_speed" "" (const_int 1))
979
980 ;; Describe a user's asm statement.
981 (define_asm_attributes
982 [(set_attr "length" "128")
983 (set_attr "type" "multi")])
984
985 (define_code_iterator plusminus [plus minus])
986 (define_code_iterator plusminusmult [plus minus mult])
987 (define_code_iterator plusminusmultdiv [plus minus mult div])
988
989 (define_code_iterator sat_plusminus [ss_plus us_plus ss_minus us_minus])
990
991 ;; Base name for insn mnemonic.
992 (define_code_attr plusminus_mnemonic
993 [(plus "add") (ss_plus "adds") (us_plus "addus")
994 (minus "sub") (ss_minus "subs") (us_minus "subus")])
995
996 (define_code_iterator multdiv [mult div])
997
998 (define_code_attr multdiv_mnemonic
999 [(mult "mul") (div "div")])
1000
1001 ;; Mark commutative operators as such in constraints.
1002 (define_code_attr comm [(plus "%") (ss_plus "%") (us_plus "%")
1003 (minus "") (ss_minus "") (us_minus "")
1004 (mult "%") (div "")])
1005
1006 ;; Mapping of max and min
1007 (define_code_iterator maxmin [smax smin umax umin])
1008
1009 ;; Mapping of signed max and min
1010 (define_code_iterator smaxmin [smax smin])
1011
1012 ;; Mapping of unsigned max and min
1013 (define_code_iterator umaxmin [umax umin])
1014
1015 ;; Base name for integer and FP insn mnemonic
1016 (define_code_attr maxmin_int [(smax "maxs") (smin "mins")
1017 (umax "maxu") (umin "minu")])
1018 (define_code_attr maxmin_float [(smax "max") (smin "min")])
1019
1020 (define_int_iterator IEEE_MAXMIN
1021 [UNSPEC_IEEE_MAX
1022 UNSPEC_IEEE_MIN])
1023
1024 (define_int_attr ieee_maxmin
1025 [(UNSPEC_IEEE_MAX "max")
1026 (UNSPEC_IEEE_MIN "min")])
1027
1028 ;; Mapping of logic operators
1029 (define_code_iterator any_logic [and ior xor])
1030 (define_code_iterator any_or [ior xor])
1031 (define_code_iterator fpint_logic [and xor])
1032
1033 ;; Base name for insn mnemonic.
1034 (define_code_attr logic [(and "and") (ior "or") (xor "xor")])
1035
1036 ;; Mapping of logic-shift operators
1037 (define_code_iterator any_lshift [ashift lshiftrt])
1038
1039 ;; Mapping of shift-right operators
1040 (define_code_iterator any_shiftrt [lshiftrt ashiftrt])
1041
1042 ;; Mapping of all shift operators
1043 (define_code_iterator any_shift [ashift lshiftrt ashiftrt])
1044
1045 ;; Base name for insn mnemonic.
1046 (define_code_attr shift [(ashift "sal") (lshiftrt "shr") (ashiftrt "sar")])
1047 (define_code_attr vshift [(ashift "sll") (lshiftrt "srl") (ashiftrt "sra")])
1048
1049 ;; Mapping of rotate operators
1050 (define_code_iterator any_rotate [rotate rotatert])
1051
1052 ;; Base name for insn mnemonic.
1053 (define_code_attr rotate [(rotate "rol") (rotatert "ror")])
1054
1055 ;; Mapping of abs neg operators
1056 (define_code_iterator absneg [abs neg])
1057
1058 ;; Mapping of abs neg operators to logic operation
1059 (define_code_attr absneg_op [(abs "and") (neg "xor")])
1060
1061 ;; Base name for x87 insn mnemonic.
1062 (define_code_attr absneg_mnemonic [(abs "fabs") (neg "fchs")])
1063
1064 ;; Mapping of extend operators
1065 (define_code_iterator any_extend [sign_extend zero_extend])
1066
1067 ;; Mapping of highpart multiply operators
1068 (define_code_iterator any_mul_highpart [smul_highpart umul_highpart])
1069
1070 ;; Prefix for insn menmonic.
1071 (define_code_attr sgnprefix [(sign_extend "i") (zero_extend "")
1072 (smul_highpart "i") (umul_highpart "")
1073 (div "i") (udiv "")])
1074 ;; Prefix for define_insn
1075 (define_code_attr s [(sign_extend "s") (zero_extend "u")
1076 (smul_highpart "s") (umul_highpart "u")])
1077 (define_code_attr u [(sign_extend "") (zero_extend "u")
1078 (div "") (udiv "u")])
1079 (define_code_attr u_bool [(sign_extend "false") (zero_extend "true")
1080 (div "false") (udiv "true")])
1081
1082 ;; Used in signed and unsigned truncations.
1083 (define_code_iterator any_truncate [ss_truncate truncate us_truncate])
1084 ;; Instruction suffix for truncations.
1085 (define_code_attr trunsuffix
1086 [(ss_truncate "s") (truncate "") (us_truncate "us")])
1087
1088 ;; Instruction suffix for SSE sign and zero extensions.
1089 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
1090
1091 ;; Used in signed and unsigned fix.
1092 (define_code_iterator any_fix [fix unsigned_fix])
1093 (define_code_attr fixsuffix [(fix "") (unsigned_fix "u")])
1094 (define_code_attr fixunssuffix [(fix "") (unsigned_fix "uns")])
1095 (define_code_attr fixprefix [(fix "s") (unsigned_fix "u")])
1096
1097 ;; Used in signed and unsigned float.
1098 (define_code_iterator any_float [float unsigned_float])
1099 (define_code_attr floatsuffix [(float "") (unsigned_float "u")])
1100 (define_code_attr floatunssuffix [(float "") (unsigned_float "uns")])
1101 (define_code_attr floatprefix [(float "s") (unsigned_float "u")])
1102
1103 ;; Base name for expression
1104 (define_code_attr insn
1105 [(plus "add") (ss_plus "ssadd") (us_plus "usadd")
1106 (minus "sub") (ss_minus "sssub") (us_minus "ussub")
1107 (sign_extend "extend") (zero_extend "zero_extend")
1108 (ashift "ashl") (lshiftrt "lshr") (ashiftrt "ashr")
1109 (rotate "rotl") (rotatert "rotr")
1110 (mult "mul") (div "div")])
1111
1112 ;; All integer modes.
1113 (define_mode_iterator SWI1248x [QI HI SI DI])
1114
1115 ;; All integer modes without QImode.
1116 (define_mode_iterator SWI248x [HI SI DI])
1117
1118 ;; All integer modes without QImode and HImode.
1119 (define_mode_iterator SWI48x [SI DI])
1120
1121 ;; All integer modes without SImode and DImode.
1122 (define_mode_iterator SWI12 [QI HI])
1123
1124 ;; All integer modes without DImode.
1125 (define_mode_iterator SWI124 [QI HI SI])
1126
1127 ;; All integer modes without QImode and DImode.
1128 (define_mode_iterator SWI24 [HI SI])
1129
1130 ;; Single word integer modes.
1131 (define_mode_iterator SWI [QI HI SI (DI "TARGET_64BIT")])
1132
1133 ;; Single word integer modes without QImode.
1134 (define_mode_iterator SWI248 [HI SI (DI "TARGET_64BIT")])
1135
1136 ;; Single word integer modes without QImode and HImode.
1137 (define_mode_iterator SWI48 [SI (DI "TARGET_64BIT")])
1138
1139 ;; All math-dependant single and double word integer modes.
1140 (define_mode_iterator SDWIM [(QI "TARGET_QIMODE_MATH")
1141 (HI "TARGET_HIMODE_MATH")
1142 SI DI (TI "TARGET_64BIT")])
1143
1144 ;; Math-dependant single word integer modes.
1145 (define_mode_iterator SWIM [(QI "TARGET_QIMODE_MATH")
1146 (HI "TARGET_HIMODE_MATH")
1147 SI (DI "TARGET_64BIT")])
1148
1149 ;; Math-dependant integer modes without DImode.
1150 (define_mode_iterator SWIM124 [(QI "TARGET_QIMODE_MATH")
1151 (HI "TARGET_HIMODE_MATH")
1152 SI])
1153
1154 ;; Math-dependant integer modes with DImode.
1155 (define_mode_iterator SWIM1248x
1156 [(QI "TARGET_QIMODE_MATH")
1157 (HI "TARGET_HIMODE_MATH")
1158 SI DI])
1159
1160 ;; Math-dependant single word integer modes without QImode.
1161 (define_mode_iterator SWIM248 [(HI "TARGET_HIMODE_MATH")
1162 SI (DI "TARGET_64BIT")])
1163
1164 ;; Double word integer modes.
1165 (define_mode_iterator DWI [(DI "!TARGET_64BIT")
1166 (TI "TARGET_64BIT")])
1167
1168 ;; SWI and DWI together.
1169 (define_mode_iterator SWIDWI [QI HI SI DI (TI "TARGET_64BIT")])
1170
1171 ;; SWI48 and DWI together.
1172 (define_mode_iterator SWI48DWI [SI DI (TI "TARGET_64BIT")])
1173
1174 ;; GET_MODE_SIZE for selected modes. As GET_MODE_SIZE is not
1175 ;; compile time constant, it is faster to use <MODE_SIZE> than
1176 ;; GET_MODE_SIZE (<MODE>mode). For XFmode which depends on
1177 ;; command line options just use GET_MODE_SIZE macro.
1178 (define_mode_attr MODE_SIZE [(QI "1") (HI "2") (SI "4") (DI "8")
1179 (TI "16") (HF "2") (BF "2") (SF "4") (DF "8")
1180 (XF "GET_MODE_SIZE (XFmode)")
1181 (V16QI "16") (V32QI "32") (V64QI "64")
1182 (V8HI "16") (V16HI "32") (V32HI "64")
1183 (V4SI "16") (V8SI "32") (V16SI "64")
1184 (V2DI "16") (V4DI "32") (V8DI "64")
1185 (V1TI "16") (V2TI "32") (V4TI "64")
1186 (V2DF "16") (V4DF "32") (V8DF "64")
1187 (V4SF "16") (V8SF "32") (V16SF "64")
1188 (V8HF "16") (V16HF "32") (V32HF "64")
1189 (V4HF "8") (V2HF "4")
1190 (V8BF "16") (V16BF "32") (V32BF "64")
1191 (V4BF "8") (V2BF "4")])
1192
1193 ;; Double word integer modes as mode attribute.
1194 (define_mode_attr DWI [(QI "HI") (HI "SI") (SI "DI") (DI "TI") (TI "OI")])
1195 (define_mode_attr dwi [(QI "hi") (HI "si") (SI "di") (DI "ti") (TI "oi")])
1196
1197 ;; Half sized integer modes.
1198 (define_mode_attr HALF [(TI "DI") (DI "SI")])
1199 (define_mode_attr half [(TI "di") (DI "si")])
1200
1201 ;; LEA mode corresponding to an integer mode
1202 (define_mode_attr LEAMODE [(QI "SI") (HI "SI") (SI "SI") (DI "DI")])
1203
1204 ;; Half mode for double word integer modes.
1205 (define_mode_iterator DWIH [(SI "!TARGET_64BIT")
1206 (DI "TARGET_64BIT")])
1207
1208 ;; Instruction suffix for integer modes.
1209 (define_mode_attr imodesuffix [(QI "b") (HI "w") (SI "l") (DI "q")])
1210
1211 ;; Instruction suffix for masks.
1212 (define_mode_attr mskmodesuffix [(QI "b") (HI "w") (SI "d") (DI "q")])
1213
1214 ;; Pointer size prefix for integer modes (Intel asm dialect)
1215 (define_mode_attr iptrsize [(QI "BYTE")
1216 (HI "WORD")
1217 (SI "DWORD")
1218 (DI "QWORD")])
1219
1220 ;; Register class for integer modes.
1221 (define_mode_attr r [(QI "q") (HI "r") (SI "r") (DI "r")])
1222
1223 ;; Immediate operand constraint for integer modes.
1224 (define_mode_attr i [(QI "n") (HI "n") (SI "e") (DI "e")])
1225
1226 ;; General operand constraint for word modes.
1227 (define_mode_attr g [(QI "qmn") (HI "rmn") (SI "rme") (DI "rme")])
1228
1229 ;; Memory operand constraint for word modes.
1230 (define_mode_attr m [(QI "m") (HI "m") (SI "BM") (DI "BM")])
1231
1232 ;; Immediate operand constraint for double integer modes.
1233 (define_mode_attr di [(SI "nF") (DI "Wd")])
1234
1235 ;; Immediate operand constraint for shifts.
1236 (define_mode_attr S [(QI "I") (HI "I") (SI "I") (DI "J") (TI "O")])
1237 (define_mode_attr KS [(QI "Wb") (HI "Ww") (SI "I") (DI "J")])
1238
1239 ;; Print register name in the specified mode.
1240 (define_mode_attr k [(QI "b") (HI "w") (SI "k") (DI "q")])
1241
1242 ;; General operand predicate for integer modes.
1243 (define_mode_attr general_operand
1244 [(QI "general_operand")
1245 (HI "general_operand")
1246 (SI "x86_64_general_operand")
1247 (DI "x86_64_general_operand")
1248 (TI "x86_64_general_operand")])
1249
1250 ;; General operand predicate for integer modes, where for TImode
1251 ;; we need both words of the operand to be general operands.
1252 (define_mode_attr general_hilo_operand
1253 [(QI "general_operand")
1254 (HI "general_operand")
1255 (SI "x86_64_general_operand")
1256 (DI "x86_64_general_operand")
1257 (TI "x86_64_hilo_general_operand")])
1258
1259 ;; General sign extend operand predicate for integer modes,
1260 ;; which disallows VOIDmode operands and thus it is suitable
1261 ;; for use inside sign_extend.
1262 (define_mode_attr general_sext_operand
1263 [(QI "sext_operand")
1264 (HI "sext_operand")
1265 (SI "x86_64_sext_operand")
1266 (DI "x86_64_sext_operand")])
1267
1268 ;; General sign/zero extend operand predicate for integer modes.
1269 (define_mode_attr general_szext_operand
1270 [(QI "general_operand")
1271 (HI "general_operand")
1272 (SI "x86_64_szext_general_operand")
1273 (DI "x86_64_szext_general_operand")
1274 (TI "x86_64_hilo_general_operand")])
1275
1276 (define_mode_attr nonmemory_szext_operand
1277 [(QI "nonmemory_operand")
1278 (HI "nonmemory_operand")
1279 (SI "x86_64_szext_nonmemory_operand")
1280 (DI "x86_64_szext_nonmemory_operand")])
1281
1282 ;; Immediate operand predicate for integer modes.
1283 (define_mode_attr immediate_operand
1284 [(QI "immediate_operand")
1285 (HI "immediate_operand")
1286 (SI "x86_64_immediate_operand")
1287 (DI "x86_64_immediate_operand")])
1288
1289 ;; Nonmemory operand predicate for integer modes.
1290 (define_mode_attr nonmemory_operand
1291 [(QI "nonmemory_operand")
1292 (HI "nonmemory_operand")
1293 (SI "x86_64_nonmemory_operand")
1294 (DI "x86_64_nonmemory_operand")])
1295
1296 ;; Operand predicate for shifts.
1297 (define_mode_attr shift_operand
1298 [(QI "nonimmediate_operand")
1299 (HI "nonimmediate_operand")
1300 (SI "nonimmediate_operand")
1301 (DI "shiftdi_operand")
1302 (TI "register_operand")])
1303
1304 ;; Operand predicate for shift argument.
1305 (define_mode_attr shift_immediate_operand
1306 [(QI "const_1_to_31_operand")
1307 (HI "const_1_to_31_operand")
1308 (SI "const_1_to_31_operand")
1309 (DI "const_1_to_63_operand")])
1310
1311 ;; Input operand predicate for arithmetic left shifts.
1312 (define_mode_attr ashl_input_operand
1313 [(QI "nonimmediate_operand")
1314 (HI "nonimmediate_operand")
1315 (SI "nonimmediate_operand")
1316 (DI "ashldi_input_operand")
1317 (TI "reg_or_pm1_operand")])
1318
1319 ;; SSE and x87 SFmode and DFmode floating point modes
1320 (define_mode_iterator MODEF [SF DF])
1321
1322 ;; SSE floating point modes
1323 (define_mode_iterator MODEFH [(HF "TARGET_AVX512FP16") SF DF])
1324
1325 ;; All x87 floating point modes
1326 (define_mode_iterator X87MODEF [SF DF XF])
1327
1328 ;; All x87 floating point modes plus HFmode
1329 (define_mode_iterator X87MODEFH [HF SF DF XF BF])
1330
1331 ;; All SSE floating point modes
1332 (define_mode_iterator SSEMODEF [HF SF DF TF])
1333 (define_mode_attr ssevecmodef [(HF "V8HF") (SF "V4SF") (DF "V2DF") (TF "TF")])
1334
1335 ;; SSE instruction suffix for various modes
1336 (define_mode_attr ssemodesuffix
1337 [(HF "sh") (SF "ss") (DF "sd")
1338 (V32HF "ph") (V16SF "ps") (V8DF "pd")
1339 (V16HF "ph") (V16BF "bf") (V8SF "ps") (V4DF "pd")
1340 (V8HF "ph") (V8BF "bf") (V4SF "ps") (V2DF "pd")
1341 (V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")
1342 (V32QI "b") (V16HI "w") (V8SI "d") (V4DI "q")
1343 (V64QI "b") (V32HI "w") (V16SI "d") (V8DI "q")])
1344
1345 ;; SSE vector suffix for floating point modes
1346 (define_mode_attr ssevecmodesuffix [(SF "ps") (DF "pd")])
1347
1348 ;; SSE vector mode corresponding to a scalar mode
1349 (define_mode_attr ssevecmode
1350 [(QI "V16QI") (HI "V8HI") (SI "V4SI") (DI "V2DI") (HF "V8HF") (BF "V8BF") (SF "V4SF") (DF "V2DF")])
1351 (define_mode_attr ssevecmodelower
1352 [(QI "v16qi") (HI "v8hi") (SI "v4si") (DI "v2di") (SF "v4sf") (DF "v2df")])
1353
1354 ;; AVX512F vector mode corresponding to a scalar mode
1355 (define_mode_attr avx512fvecmode
1356 [(QI "V64QI") (HI "V32HI") (SI "V16SI") (DI "V8DI") (SF "V16SF") (DF "V8DF")])
1357
1358 ;; Instruction suffix for REX 64bit operators.
1359 (define_mode_attr rex64suffix [(SI "{l}") (DI "{q}")])
1360 (define_mode_attr rex64namesuffix [(SI "") (DI "q")])
1361
1362 ;; This mode iterator allows :P to be used for patterns that operate on
1363 ;; pointer-sized quantities. Exactly one of the two alternatives will match.
1364 (define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")])
1365
1366 ;; This mode iterator allows :W to be used for patterns that operate on
1367 ;; word_mode sized quantities.
1368 (define_mode_iterator W
1369 [(SI "word_mode == SImode") (DI "word_mode == DImode")])
1370
1371 ;; This mode iterator allows :PTR to be used for patterns that operate on
1372 ;; ptr_mode sized quantities.
1373 (define_mode_iterator PTR
1374 [(SI "ptr_mode == SImode") (DI "ptr_mode == DImode")])
1375 \f
1376 ;; Scheduling descriptions
1377
1378 (include "pentium.md")
1379 (include "ppro.md")
1380 (include "k6.md")
1381 (include "athlon.md")
1382 (include "bdver1.md")
1383 (include "bdver3.md")
1384 (include "btver2.md")
1385 (include "znver.md")
1386 (include "znver4.md")
1387 (include "geode.md")
1388 (include "atom.md")
1389 (include "slm.md")
1390 (include "glm.md")
1391 (include "core2.md")
1392 (include "haswell.md")
1393 (include "lujiazui.md")
1394 (include "yongfeng.md")
1395
1396 \f
1397 ;; Operand and operator predicates and constraints
1398
1399 (include "predicates.md")
1400 (include "constraints.md")
1401
1402 \f
1403 ;; Compare and branch/compare and store instructions.
1404
1405 (define_expand "cbranch<mode>4"
1406 [(set (reg:CC FLAGS_REG)
1407 (compare:CC (match_operand:SWIM1248x 1 "nonimmediate_operand")
1408 (match_operand:SWIM1248x 2 "<general_operand>")))
1409 (set (pc) (if_then_else
1410 (match_operator 0 "ordered_comparison_operator"
1411 [(reg:CC FLAGS_REG) (const_int 0)])
1412 (label_ref (match_operand 3))
1413 (pc)))]
1414 ""
1415 {
1416 if (MEM_P (operands[1]) && MEM_P (operands[2]))
1417 operands[1] = force_reg (<MODE>mode, operands[1]);
1418 ix86_expand_branch (GET_CODE (operands[0]),
1419 operands[1], operands[2], operands[3]);
1420 DONE;
1421 })
1422
1423 (define_expand "cbranchti4"
1424 [(set (reg:CC FLAGS_REG)
1425 (compare:CC (match_operand:TI 1 "nonimmediate_operand")
1426 (match_operand:TI 2 "ix86_timode_comparison_operand")))
1427 (set (pc) (if_then_else
1428 (match_operator 0 "ix86_timode_comparison_operator"
1429 [(reg:CC FLAGS_REG) (const_int 0)])
1430 (label_ref (match_operand 3))
1431 (pc)))]
1432 "TARGET_64BIT || TARGET_SSE4_1"
1433 {
1434 ix86_expand_branch (GET_CODE (operands[0]),
1435 operands[1], operands[2], operands[3]);
1436 DONE;
1437 })
1438
1439 (define_expand "cbranchoi4"
1440 [(set (reg:CC FLAGS_REG)
1441 (compare:CC (match_operand:OI 1 "nonimmediate_operand")
1442 (match_operand:OI 2 "nonimmediate_operand")))
1443 (set (pc) (if_then_else
1444 (match_operator 0 "bt_comparison_operator"
1445 [(reg:CC FLAGS_REG) (const_int 0)])
1446 (label_ref (match_operand 3))
1447 (pc)))]
1448 "TARGET_AVX"
1449 {
1450 ix86_expand_branch (GET_CODE (operands[0]),
1451 operands[1], operands[2], operands[3]);
1452 DONE;
1453 })
1454
1455 (define_expand "cbranchxi4"
1456 [(set (reg:CC FLAGS_REG)
1457 (compare:CC (match_operand:XI 1 "nonimmediate_operand")
1458 (match_operand:XI 2 "nonimmediate_operand")))
1459 (set (pc) (if_then_else
1460 (match_operator 0 "bt_comparison_operator"
1461 [(reg:CC FLAGS_REG) (const_int 0)])
1462 (label_ref (match_operand 3))
1463 (pc)))]
1464 "TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256"
1465 {
1466 ix86_expand_branch (GET_CODE (operands[0]),
1467 operands[1], operands[2], operands[3]);
1468 DONE;
1469 })
1470
1471 (define_expand "cstore<mode>4"
1472 [(set (reg:CC FLAGS_REG)
1473 (compare:CC (match_operand:SDWIM 2 "nonimmediate_operand")
1474 (match_operand:SDWIM 3 "<general_operand>")))
1475 (set (match_operand:QI 0 "register_operand")
1476 (match_operator 1 "ordered_comparison_operator"
1477 [(reg:CC FLAGS_REG) (const_int 0)]))]
1478 ""
1479 {
1480 if (<MODE>mode == (TARGET_64BIT ? TImode : DImode))
1481 {
1482 if (GET_CODE (operands[1]) != EQ
1483 && GET_CODE (operands[1]) != NE)
1484 FAIL;
1485 }
1486 else if (MEM_P (operands[2]) && MEM_P (operands[3]))
1487 operands[2] = force_reg (<MODE>mode, operands[2]);
1488 ix86_expand_setcc (operands[0], GET_CODE (operands[1]),
1489 operands[2], operands[3]);
1490 DONE;
1491 })
1492
1493 (define_expand "@cmp<mode>_1"
1494 [(set (reg:CC FLAGS_REG)
1495 (compare:CC (match_operand:SWI48 0 "nonimmediate_operand")
1496 (match_operand:SWI48 1 "<general_operand>")))])
1497
1498 (define_mode_iterator SWI1248_AVX512BWDQ_64
1499 [(QI "TARGET_AVX512DQ") HI
1500 (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW && TARGET_64BIT")])
1501
1502 (define_insn "*cmp<mode>_ccz_1"
1503 [(set (reg FLAGS_REG)
1504 (compare (match_operand:SWI1248_AVX512BWDQ_64 0
1505 "nonimmediate_operand" "<r>,?m<r>,$k")
1506 (match_operand:SWI1248_AVX512BWDQ_64 1 "const0_operand")))]
1507 "TARGET_AVX512F && ix86_match_ccmode (insn, CCZmode)"
1508 "@
1509 test{<imodesuffix>}\t%0, %0
1510 cmp{<imodesuffix>}\t{%1, %0|%0, %1}
1511 kortest<mskmodesuffix>\t%0, %0"
1512 [(set_attr "type" "test,icmp,msklog")
1513 (set_attr "length_immediate" "0,1,*")
1514 (set_attr "prefix" "*,*,vex")
1515 (set_attr "mode" "<MODE>")])
1516
1517 (define_insn "*cmp<mode>_ccno_1"
1518 [(set (reg FLAGS_REG)
1519 (compare (match_operand:SWI 0 "nonimmediate_operand" "<r>,?m<r>")
1520 (match_operand:SWI 1 "const0_operand")))]
1521 "ix86_match_ccmode (insn, CCNOmode)"
1522 "@
1523 test{<imodesuffix>}\t%0, %0
1524 cmp{<imodesuffix>}\t{%1, %0|%0, %1}"
1525 [(set_attr "type" "test,icmp")
1526 (set_attr "length_immediate" "0,1")
1527 (set_attr "mode" "<MODE>")])
1528
1529 (define_insn "*cmp<mode>_1"
1530 [(set (reg FLAGS_REG)
1531 (compare (match_operand:SWI 0 "nonimmediate_operand" "<r>m,<r>")
1532 (match_operand:SWI 1 "<general_operand>" "<r><i>,<r><m>")))]
1533 "ix86_match_ccmode (insn, CCmode)"
1534 "cmp{<imodesuffix>}\t{%1, %0|%0, %1}"
1535 [(set_attr "type" "icmp")
1536 (set_attr "mode" "<MODE>")])
1537
1538 (define_insn "*cmp<mode>_minus_1"
1539 [(set (reg FLAGS_REG)
1540 (compare
1541 (minus:SWI (match_operand:SWI 0 "nonimmediate_operand" "<r>m,<r>")
1542 (match_operand:SWI 1 "<general_operand>" "<r><i>,<r><m>"))
1543 (const_int 0)))]
1544 "ix86_match_ccmode (insn, CCGOCmode)"
1545 "cmp{<imodesuffix>}\t{%1, %0|%0, %1}"
1546 [(set_attr "type" "icmp")
1547 (set_attr "mode" "<MODE>")])
1548
1549 (define_insn "*cmpqi_ext<mode>_1"
1550 [(set (reg FLAGS_REG)
1551 (compare
1552 (match_operand:QI 0 "nonimmediate_operand" "QBn")
1553 (subreg:QI
1554 (match_operator:SWI248 2 "extract_operator"
1555 [(match_operand 1 "int248_register_operand" "Q")
1556 (const_int 8)
1557 (const_int 8)]) 0)))]
1558 "ix86_match_ccmode (insn, CCmode)"
1559 "cmp{b}\t{%h1, %0|%0, %h1}"
1560 [(set_attr "addr" "gpr8")
1561 (set_attr "type" "icmp")
1562 (set_attr "mode" "QI")])
1563
1564 (define_insn "*cmpqi_ext<mode>_2"
1565 [(set (reg FLAGS_REG)
1566 (compare
1567 (subreg:QI
1568 (match_operator:SWI248 2 "extract_operator"
1569 [(match_operand 0 "int248_register_operand" "Q")
1570 (const_int 8)
1571 (const_int 8)]) 0)
1572 (match_operand:QI 1 "const0_operand")))]
1573 "ix86_match_ccmode (insn, CCNOmode)"
1574 "test{b}\t%h0, %h0"
1575 [(set_attr "type" "test")
1576 (set_attr "length_immediate" "0")
1577 (set_attr "mode" "QI")])
1578
1579 (define_expand "cmpqi_ext_3"
1580 [(set (reg:CC FLAGS_REG)
1581 (compare:CC
1582 (subreg:QI
1583 (zero_extract:HI
1584 (match_operand:HI 0 "register_operand")
1585 (const_int 8)
1586 (const_int 8)) 0)
1587 (match_operand:QI 1 "const_int_operand")))])
1588
1589 (define_insn "*cmpqi_ext<mode>_3"
1590 [(set (reg FLAGS_REG)
1591 (compare
1592 (subreg:QI
1593 (match_operator:SWI248 2 "extract_operator"
1594 [(match_operand 0 "int248_register_operand" "Q")
1595 (const_int 8)
1596 (const_int 8)]) 0)
1597 (match_operand:QI 1 "general_operand" "QnBn")))]
1598 "ix86_match_ccmode (insn, CCmode)"
1599 "cmp{b}\t{%1, %h0|%h0, %1}"
1600 [(set_attr "addr" "gpr8")
1601 (set_attr "type" "icmp")
1602 (set_attr "mode" "QI")])
1603
1604 (define_insn "*cmpqi_ext<mode>_4"
1605 [(set (reg FLAGS_REG)
1606 (compare
1607 (subreg:QI
1608 (match_operator:SWI248 2 "extract_operator"
1609 [(match_operand 0 "int248_register_operand" "Q")
1610 (const_int 8)
1611 (const_int 8)]) 0)
1612 (subreg:QI
1613 (match_operator:SWI248 3 "extract_operator"
1614 [(match_operand 1 "int248_register_operand" "Q")
1615 (const_int 8)
1616 (const_int 8)]) 0)))]
1617 "ix86_match_ccmode (insn, CCmode)"
1618 "cmp{b}\t{%h1, %h0|%h0, %h1}"
1619 [(set_attr "type" "icmp")
1620 (set_attr "mode" "QI")])
1621
1622 (define_insn_and_split "*cmp<dwi>_doubleword"
1623 [(set (reg:CCZ FLAGS_REG)
1624 (compare:CCZ (match_operand:<DWI> 0 "nonimmediate_operand")
1625 (match_operand:<DWI> 1 "general_operand")))]
1626 "ix86_pre_reload_split ()"
1627 "#"
1628 "&& 1"
1629 [(parallel [(set (reg:CCZ FLAGS_REG)
1630 (compare:CCZ (ior:DWIH (match_dup 4) (match_dup 5))
1631 (const_int 0)))
1632 (set (match_dup 4) (ior:DWIH (match_dup 4) (match_dup 5)))])]
1633 {
1634 split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[2]);
1635 /* Placing the SUBREG pieces in pseudos helps reload. */
1636 for (int i = 0; i < 4; i++)
1637 if (SUBREG_P (operands[i]))
1638 operands[i] = force_reg (<MODE>mode, operands[i]);
1639
1640 operands[4] = gen_reg_rtx (<MODE>mode);
1641
1642 /* Special case comparisons against -1. */
1643 if (operands[1] == constm1_rtx && operands[3] == constm1_rtx)
1644 {
1645 emit_insn (gen_and<mode>3 (operands[4], operands[0], operands[2]));
1646 emit_insn (gen_cmp_1 (<MODE>mode, operands[4], constm1_rtx));
1647 DONE;
1648 }
1649
1650 if (operands[1] == const0_rtx)
1651 emit_move_insn (operands[4], operands[0]);
1652 else if (operands[0] == const0_rtx)
1653 emit_move_insn (operands[4], operands[1]);
1654 else if (operands[1] == constm1_rtx)
1655 emit_insn (gen_one_cmpl<mode>2 (operands[4], operands[0]));
1656 else if (operands[0] == constm1_rtx)
1657 emit_insn (gen_one_cmpl<mode>2 (operands[4], operands[1]));
1658 else
1659 {
1660 if (CONST_SCALAR_INT_P (operands[1])
1661 && !x86_64_immediate_operand (operands[1], <MODE>mode))
1662 operands[1] = force_reg (<MODE>mode, operands[1]);
1663 emit_insn (gen_xor<mode>3 (operands[4], operands[0], operands[1]));
1664 }
1665
1666 if (operands[3] == const0_rtx)
1667 operands[5] = operands[2];
1668 else if (operands[2] == const0_rtx)
1669 operands[5] = operands[3];
1670 else
1671 {
1672 operands[5] = gen_reg_rtx (<MODE>mode);
1673 if (operands[3] == constm1_rtx)
1674 emit_insn (gen_one_cmpl<mode>2 (operands[5], operands[2]));
1675 else if (operands[2] == constm1_rtx)
1676 emit_insn (gen_one_cmpl<mode>2 (operands[5], operands[3]));
1677 else
1678 {
1679 if (CONST_SCALAR_INT_P (operands[3])
1680 && !x86_64_immediate_operand (operands[3], <MODE>mode))
1681 operands[3] = force_reg (<MODE>mode, operands[3]);
1682 emit_insn (gen_xor<mode>3 (operands[5], operands[2], operands[3]));
1683 }
1684 }
1685 })
1686
1687 ;; These implement float point compares.
1688 ;; %%% See if we can get away with VOIDmode operands on the actual insns,
1689 ;; which would allow mix and match FP modes on the compares. Which is what
1690 ;; the old patterns did, but with many more of them.
1691
1692 (define_expand "cbranchxf4"
1693 [(set (reg:CC FLAGS_REG)
1694 (compare:CC (match_operand:XF 1 "nonmemory_operand")
1695 (match_operand:XF 2 "nonmemory_operand")))
1696 (set (pc) (if_then_else
1697 (match_operator 0 "ix86_fp_comparison_operator"
1698 [(reg:CC FLAGS_REG)
1699 (const_int 0)])
1700 (label_ref (match_operand 3))
1701 (pc)))]
1702 "TARGET_80387"
1703 {
1704 ix86_expand_branch (GET_CODE (operands[0]),
1705 operands[1], operands[2], operands[3]);
1706 DONE;
1707 })
1708
1709 (define_expand "cstorexf4"
1710 [(set (reg:CC FLAGS_REG)
1711 (compare:CC (match_operand:XF 2 "nonmemory_operand")
1712 (match_operand:XF 3 "nonmemory_operand")))
1713 (set (match_operand:QI 0 "register_operand")
1714 (match_operator 1 "ix86_fp_comparison_operator"
1715 [(reg:CC FLAGS_REG)
1716 (const_int 0)]))]
1717 "TARGET_80387"
1718 {
1719 ix86_expand_setcc (operands[0], GET_CODE (operands[1]),
1720 operands[2], operands[3]);
1721 DONE;
1722 })
1723
1724 (define_expand "cbranchhf4"
1725 [(set (reg:CC FLAGS_REG)
1726 (compare:CC (match_operand:HF 1 "cmp_fp_expander_operand")
1727 (match_operand:HF 2 "cmp_fp_expander_operand")))
1728 (set (pc) (if_then_else
1729 (match_operator 0 "ix86_fp_comparison_operator"
1730 [(reg:CC FLAGS_REG)
1731 (const_int 0)])
1732 (label_ref (match_operand 3))
1733 (pc)))]
1734 "TARGET_AVX512FP16"
1735 {
1736 ix86_expand_branch (GET_CODE (operands[0]),
1737 operands[1], operands[2], operands[3]);
1738 DONE;
1739 })
1740
1741 (define_expand "cbranch<mode>4"
1742 [(set (reg:CC FLAGS_REG)
1743 (compare:CC (match_operand:MODEF 1 "cmp_fp_expander_operand")
1744 (match_operand:MODEF 2 "cmp_fp_expander_operand")))
1745 (set (pc) (if_then_else
1746 (match_operator 0 "ix86_fp_comparison_operator"
1747 [(reg:CC FLAGS_REG)
1748 (const_int 0)])
1749 (label_ref (match_operand 3))
1750 (pc)))]
1751 "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
1752 {
1753 ix86_expand_branch (GET_CODE (operands[0]),
1754 operands[1], operands[2], operands[3]);
1755 DONE;
1756 })
1757
1758 (define_expand "cbranchbf4"
1759 [(set (reg:CC FLAGS_REG)
1760 (compare:CC (match_operand:BF 1 "cmp_fp_expander_operand")
1761 (match_operand:BF 2 "cmp_fp_expander_operand")))
1762 (set (pc) (if_then_else
1763 (match_operator 0 "comparison_operator"
1764 [(reg:CC FLAGS_REG)
1765 (const_int 0)])
1766 (label_ref (match_operand 3))
1767 (pc)))]
1768 "TARGET_80387 || (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH)"
1769 {
1770 rtx op1 = ix86_expand_fast_convert_bf_to_sf (operands[1]);
1771 rtx op2 = ix86_expand_fast_convert_bf_to_sf (operands[2]);
1772 do_compare_rtx_and_jump (op1, op2, GET_CODE (operands[0]), 0,
1773 SFmode, NULL_RTX, NULL,
1774 as_a <rtx_code_label *> (operands[3]),
1775 /* Unfortunately this isn't propagated. */
1776 profile_probability::even ());
1777 DONE;
1778 })
1779
1780 (define_expand "cstorehf4"
1781 [(set (reg:CC FLAGS_REG)
1782 (compare:CC (match_operand:HF 2 "cmp_fp_expander_operand")
1783 (match_operand:HF 3 "cmp_fp_expander_operand")))
1784 (set (match_operand:QI 0 "register_operand")
1785 (match_operator 1 "ix86_fp_comparison_operator"
1786 [(reg:CC FLAGS_REG)
1787 (const_int 0)]))]
1788 "TARGET_AVX512FP16"
1789 {
1790 ix86_expand_setcc (operands[0], GET_CODE (operands[1]),
1791 operands[2], operands[3]);
1792 DONE;
1793 })
1794
1795 (define_expand "cstorebf4"
1796 [(set (reg:CC FLAGS_REG)
1797 (compare:CC (match_operand:BF 2 "cmp_fp_expander_operand")
1798 (match_operand:BF 3 "cmp_fp_expander_operand")))
1799 (set (match_operand:QI 0 "register_operand")
1800 (match_operator 1 "comparison_operator"
1801 [(reg:CC FLAGS_REG)
1802 (const_int 0)]))]
1803 "TARGET_80387 || (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH)"
1804 {
1805 rtx op1 = ix86_expand_fast_convert_bf_to_sf (operands[2]);
1806 rtx op2 = ix86_expand_fast_convert_bf_to_sf (operands[3]);
1807 rtx res = emit_store_flag_force (operands[0], GET_CODE (operands[1]),
1808 op1, op2, SFmode, 0, 1);
1809 if (!rtx_equal_p (res, operands[0]))
1810 emit_move_insn (operands[0], res);
1811 DONE;
1812 })
1813
1814 (define_expand "cstore<mode>4"
1815 [(set (reg:CC FLAGS_REG)
1816 (compare:CC (match_operand:MODEF 2 "cmp_fp_expander_operand")
1817 (match_operand:MODEF 3 "cmp_fp_expander_operand")))
1818 (set (match_operand:QI 0 "register_operand")
1819 (match_operator 1 "ix86_fp_comparison_operator"
1820 [(reg:CC FLAGS_REG)
1821 (const_int 0)]))]
1822 "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
1823 {
1824 ix86_expand_setcc (operands[0], GET_CODE (operands[1]),
1825 operands[2], operands[3]);
1826 DONE;
1827 })
1828
1829 (define_expand "cbranchcc4"
1830 [(set (pc) (if_then_else
1831 (match_operator 0 "comparison_operator"
1832 [(match_operand 1 "flags_reg_operand")
1833 (match_operand 2 "const0_operand")])
1834 (label_ref (match_operand 3))
1835 (pc)))]
1836 ""
1837 {
1838 ix86_expand_branch (GET_CODE (operands[0]),
1839 operands[1], operands[2], operands[3]);
1840 DONE;
1841 })
1842
1843 (define_expand "cstorecc4"
1844 [(set (match_operand:QI 0 "register_operand")
1845 (match_operator 1 "comparison_operator"
1846 [(match_operand 2 "flags_reg_operand")
1847 (match_operand 3 "const0_operand")]))]
1848 ""
1849 {
1850 ix86_expand_setcc (operands[0], GET_CODE (operands[1]),
1851 operands[2], operands[3]);
1852 DONE;
1853 })
1854
1855 ;; FP compares, step 1:
1856 ;; Set the FP condition codes and move fpsr to ax.
1857
1858 ;; We may not use "#" to split and emit these
1859 ;; due to reg-stack pops killing fpsr.
1860
1861 (define_insn "*cmpxf_i387"
1862 [(set (match_operand:HI 0 "register_operand" "=a")
1863 (unspec:HI
1864 [(compare:CCFP
1865 (match_operand:XF 1 "register_operand" "f")
1866 (match_operand:XF 2 "reg_or_0_operand" "fC"))]
1867 UNSPEC_FNSTSW))]
1868 "TARGET_80387"
1869 "* return output_fp_compare (insn, operands, false, false);"
1870 [(set_attr "type" "multi")
1871 (set_attr "unit" "i387")
1872 (set_attr "mode" "XF")])
1873
1874 (define_insn "*cmp<mode>_i387"
1875 [(set (match_operand:HI 0 "register_operand" "=a")
1876 (unspec:HI
1877 [(compare:CCFP
1878 (match_operand:MODEF 1 "register_operand" "f")
1879 (match_operand:MODEF 2 "nonimm_or_0_operand" "fmC"))]
1880 UNSPEC_FNSTSW))]
1881 "TARGET_80387"
1882 "* return output_fp_compare (insn, operands, false, false);"
1883 [(set_attr "type" "multi")
1884 (set_attr "unit" "i387")
1885 (set_attr "mode" "<MODE>")])
1886
1887 (define_insn "*cmp<X87MODEF:mode>_<SWI24:mode>_i387"
1888 [(set (match_operand:HI 0 "register_operand" "=a")
1889 (unspec:HI
1890 [(compare:CCFP
1891 (match_operand:X87MODEF 1 "register_operand" "f")
1892 (float:X87MODEF
1893 (match_operand:SWI24 2 "nonimmediate_operand" "m")))]
1894 UNSPEC_FNSTSW))]
1895 "TARGET_80387
1896 && (TARGET_USE_<SWI24:MODE>MODE_FIOP
1897 || optimize_function_for_size_p (cfun))"
1898 "* return output_fp_compare (insn, operands, false, false);"
1899 [(set_attr "type" "multi")
1900 (set_attr "unit" "i387")
1901 (set_attr "fp_int_src" "true")
1902 (set_attr "mode" "<SWI24:MODE>")])
1903
1904 (define_insn "*cmpu<mode>_i387"
1905 [(set (match_operand:HI 0 "register_operand" "=a")
1906 (unspec:HI
1907 [(unspec:CCFP
1908 [(compare:CCFP
1909 (match_operand:X87MODEF 1 "register_operand" "f")
1910 (match_operand:X87MODEF 2 "register_operand" "f"))]
1911 UNSPEC_NOTRAP)]
1912 UNSPEC_FNSTSW))]
1913 "TARGET_80387"
1914 "* return output_fp_compare (insn, operands, false, true);"
1915 [(set_attr "type" "multi")
1916 (set_attr "unit" "i387")
1917 (set_attr "mode" "<MODE>")])
1918
1919 ;; FP compares, step 2:
1920 ;; Get ax into flags, general case.
1921
1922 (define_insn "x86_sahf_1"
1923 [(set (reg:CC FLAGS_REG)
1924 (unspec:CC [(match_operand:HI 0 "register_operand" "a")]
1925 UNSPEC_SAHF))]
1926 "TARGET_SAHF"
1927 {
1928 #ifndef HAVE_AS_IX86_SAHF
1929 if (TARGET_64BIT)
1930 return ASM_BYTE "0x9e";
1931 else
1932 #endif
1933 return "sahf";
1934 }
1935 [(set_attr "length" "1")
1936 (set_attr "athlon_decode" "vector")
1937 (set_attr "amdfam10_decode" "direct")
1938 (set_attr "bdver1_decode" "direct")
1939 (set_attr "mode" "SI")])
1940
1941 ;; Pentium Pro can do both steps in one go.
1942 ;; (these instructions set flags directly)
1943
1944 (define_subst_attr "unord" "unord_subst" "" "u")
1945 (define_subst_attr "unordered" "unord_subst" "false" "true")
1946
1947 (define_subst "unord_subst"
1948 [(set (match_operand:CCFP 0)
1949 (match_operand:CCFP 1))]
1950 ""
1951 [(set (match_dup 0)
1952 (unspec:CCFP
1953 [(match_dup 1)]
1954 UNSPEC_NOTRAP))])
1955
1956 (define_insn "*cmpi<unord>xf_i387"
1957 [(set (reg:CCFP FLAGS_REG)
1958 (compare:CCFP
1959 (match_operand:XF 0 "register_operand" "f")
1960 (match_operand:XF 1 "register_operand" "f")))]
1961 "TARGET_80387 && TARGET_CMOVE"
1962 "* return output_fp_compare (insn, operands, true, <unordered>);"
1963 [(set_attr "type" "fcmp")
1964 (set_attr "mode" "XF")
1965 (set_attr "athlon_decode" "vector")
1966 (set_attr "amdfam10_decode" "direct")
1967 (set_attr "bdver1_decode" "double")
1968 (set_attr "znver1_decode" "double")])
1969
1970 (define_insn "*cmpi<unord><MODEF:mode>"
1971 [(set (reg:CCFP FLAGS_REG)
1972 (compare:CCFP
1973 (match_operand:MODEF 0 "register_operand" "f,v")
1974 (match_operand:MODEF 1 "register_ssemem_operand" "f,vm")))]
1975 "(SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH)
1976 || (TARGET_80387 && TARGET_CMOVE)"
1977 "@
1978 * return output_fp_compare (insn, operands, true, <unordered>);
1979 %v<unord>comi<MODEF:ssemodesuffix>\t{%1, %0|%0, %1}"
1980 [(set_attr "type" "fcmp,ssecomi")
1981 (set_attr "prefix" "orig,maybe_vex")
1982 (set_attr "mode" "<MODEF:MODE>")
1983 (set_attr "prefix_rep" "*,0")
1984 (set (attr "prefix_data16")
1985 (cond [(eq_attr "alternative" "0")
1986 (const_string "*")
1987 (eq_attr "mode" "DF")
1988 (const_string "1")
1989 ]
1990 (const_string "0")))
1991 (set_attr "athlon_decode" "vector")
1992 (set_attr "amdfam10_decode" "direct")
1993 (set_attr "bdver1_decode" "double")
1994 (set_attr "znver1_decode" "double")
1995 (set (attr "enabled")
1996 (if_then_else
1997 (match_test ("SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH"))
1998 (if_then_else
1999 (eq_attr "alternative" "0")
2000 (symbol_ref "TARGET_MIX_SSE_I387")
2001 (symbol_ref "true"))
2002 (if_then_else
2003 (eq_attr "alternative" "0")
2004 (symbol_ref "true")
2005 (symbol_ref "false"))))])
2006
2007 (define_insn "*cmpi<unord>hf"
2008 [(set (reg:CCFP FLAGS_REG)
2009 (compare:CCFP
2010 (match_operand:HF 0 "register_operand" "v")
2011 (match_operand:HF 1 "nonimmediate_operand" "vm")))]
2012 "TARGET_AVX512FP16"
2013 "v<unord>comish\t{%1, %0|%0, %1}"
2014 [(set_attr "type" "ssecomi")
2015 (set_attr "prefix" "evex")
2016 (set_attr "mode" "HF")])
2017
2018 ;; Set carry flag.
2019 (define_insn "x86_stc"
2020 [(set (reg:CCC FLAGS_REG) (unspec:CCC [(const_int 0)] UNSPEC_STC))]
2021 ""
2022 "stc"
2023 [(set_attr "length" "1")
2024 (set_attr "length_immediate" "0")
2025 (set_attr "modrm" "0")])
2026
2027 ;; On Pentium 4, set the carry flag using mov $1,%al;addb $-1,%al.
2028 (define_peephole2
2029 [(match_scratch:QI 0 "r")
2030 (set (reg:CCC FLAGS_REG) (unspec:CCC [(const_int 0)] UNSPEC_STC))]
2031 "TARGET_SLOW_STC && !optimize_insn_for_size_p ()"
2032 [(set (match_dup 0) (const_int 1))
2033 (parallel
2034 [(set (reg:CCC FLAGS_REG)
2035 (compare:CCC (plus:QI (match_dup 0) (const_int -1))
2036 (match_dup 0)))
2037 (set (match_dup 0) (plus:QI (match_dup 0) (const_int -1)))])])
2038
2039 ;; Complement carry flag.
2040 (define_insn "*x86_cmc"
2041 [(set (reg:CCC FLAGS_REG)
2042 (compare:CCC (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
2043 (geu:QI (reg:CCC FLAGS_REG) (const_int 0))))]
2044 ""
2045 "cmc"
2046 [(set_attr "length" "1")
2047 (set_attr "length_immediate" "0")
2048 (set_attr "use_carry" "1")
2049 (set_attr "modrm" "0")])
2050
2051 ;; On Pentium 4, cmc is replaced with setnc %al;addb $-1,%al.
2052 (define_peephole2
2053 [(match_scratch:QI 0 "r")
2054 (set (reg:CCC FLAGS_REG)
2055 (compare:CCC (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
2056 (geu:QI (reg:CCC FLAGS_REG) (const_int 0))))]
2057 "TARGET_SLOW_STC && !optimize_insn_for_size_p ()"
2058 [(set (match_dup 0) (ne:QI (reg:CCC FLAGS_REG) (const_int 0)))
2059 (parallel
2060 [(set (reg:CCC FLAGS_REG)
2061 (compare:CCC (plus:QI (match_dup 0) (const_int -1))
2062 (match_dup 0)))
2063 (set (match_dup 0) (plus:QI (match_dup 0) (const_int -1)))])])
2064 \f
2065 ;; Push/pop instructions.
2066
2067 (define_insn_and_split "*pushv1ti2"
2068 [(set (match_operand:V1TI 0 "push_operand" "=<")
2069 (match_operand:V1TI 1 "register_operand" "v"))]
2070 "TARGET_64BIT && TARGET_STV"
2071 "#"
2072 "&& reload_completed"
2073 [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
2074 (set (match_dup 0) (match_dup 1))]
2075 {
2076 operands[2] = GEN_INT (-PUSH_ROUNDING (GET_MODE_SIZE (V1TImode)));
2077 /* Preserve memory attributes. */
2078 operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
2079 }
2080 [(set_attr "type" "multi")
2081 (set_attr "mode" "TI")])
2082
2083 (define_insn "*push<mode>2"
2084 [(set (match_operand:DWI 0 "push_operand" "=<,<")
2085 (match_operand:DWI 1 "general_no_elim_operand" "riF*o,*v"))]
2086 ""
2087 "#"
2088 [(set_attr "type" "multi")
2089 (set_attr "mode" "<MODE>")])
2090
2091 (define_split
2092 [(set (match_operand:DWI 0 "push_operand")
2093 (match_operand:DWI 1 "general_gr_operand"))]
2094 "reload_completed"
2095 [(const_int 0)]
2096 "ix86_split_long_move (operands); DONE;")
2097
2098 (define_insn "*pushdi2_rex64"
2099 [(set (match_operand:DI 0 "push_operand" "=<,<,!<")
2100 (match_operand:DI 1 "general_no_elim_operand" "re*m,*v,n"))]
2101 "TARGET_64BIT"
2102 "@
2103 push{q}\t%1
2104 #
2105 #"
2106 [(set_attr "type" "push,multi,multi")
2107 (set_attr "mode" "DI")])
2108
2109 ;; Convert impossible pushes of immediate to existing instructions.
2110 ;; First try to get scratch register and go through it. In case this
2111 ;; fails, push sign extended lower part first and then overwrite
2112 ;; upper part by 32bit move.
2113
2114 (define_peephole2
2115 [(match_scratch:DI 2 "r")
2116 (set (match_operand:DI 0 "push_operand")
2117 (match_operand:DI 1 "immediate_operand"))]
2118 "TARGET_64BIT
2119 && !symbolic_operand (operands[1], DImode)
2120 && !x86_64_immediate_operand (operands[1], DImode)"
2121 [(set (match_dup 2) (match_dup 1))
2122 (set (match_dup 0) (match_dup 2))])
2123
2124 (define_split
2125 [(set (match_operand:DI 0 "push_operand")
2126 (match_operand:DI 1 "immediate_operand"))]
2127 "TARGET_64BIT && epilogue_completed
2128 && !symbolic_operand (operands[1], DImode)
2129 && !x86_64_immediate_operand (operands[1], DImode)"
2130 [(set (match_dup 0) (match_dup 1))
2131 (set (match_dup 2) (match_dup 3))]
2132 {
2133 split_double_mode (DImode, &operands[1], 1, &operands[2], &operands[3]);
2134
2135 operands[1] = gen_lowpart (DImode, operands[2]);
2136 operands[2] = gen_rtx_MEM (SImode,
2137 plus_constant (Pmode, stack_pointer_rtx, 4));
2138 })
2139
2140 ;; For TARGET_64BIT we always round up to 8 bytes.
2141 (define_insn "*pushsi2_rex64"
2142 [(set (match_operand:SI 0 "push_operand" "=X,X")
2143 (match_operand:SI 1 "nonmemory_no_elim_operand" "re,*v"))]
2144 "TARGET_64BIT"
2145 "@
2146 push{q}\t%q1
2147 #"
2148 [(set_attr "type" "push,multi")
2149 (set_attr "mode" "DI")])
2150
2151 (define_insn "*pushsi2"
2152 [(set (match_operand:SI 0 "push_operand" "=<,<")
2153 (match_operand:SI 1 "general_no_elim_operand" "ri*m,*v"))]
2154 "!TARGET_64BIT"
2155 "@
2156 push{l}\t%1
2157 #"
2158 [(set_attr "type" "push,multi")
2159 (set_attr "mode" "SI")])
2160
2161 (define_split
2162 [(set (match_operand:SWI48DWI 0 "push_operand")
2163 (match_operand:SWI48DWI 1 "sse_reg_operand"))]
2164 "TARGET_SSE && reload_completed"
2165 [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
2166 (set (match_dup 0) (match_dup 1))]
2167 {
2168 operands[2] = GEN_INT (-PUSH_ROUNDING (GET_MODE_SIZE (<SWI48DWI:MODE>mode)));
2169 /* Preserve memory attributes. */
2170 operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
2171 })
2172
2173 ;; emit_push_insn when it calls move_by_pieces requires an insn to
2174 ;; "push a byte/word". But actually we use push{l,q}, which has
2175 ;; the effect of rounding the amount pushed up to a word.
2176
2177 (define_insn "*push<mode>2"
2178 [(set (match_operand:SWI12 0 "push_operand" "=X")
2179 (match_operand:SWI12 1 "nonmemory_no_elim_operand" "rn"))]
2180 ""
2181 "* return TARGET_64BIT ? \"push{q}\t%q1\" : \"push{l}\t%k1\";"
2182 [(set_attr "type" "push")
2183 (set (attr "mode")
2184 (if_then_else (match_test "TARGET_64BIT")
2185 (const_string "DI")
2186 (const_string "SI")))])
2187
2188 (define_insn "*push<mode>2_prologue"
2189 [(set (match_operand:W 0 "push_operand" "=<")
2190 (match_operand:W 1 "general_no_elim_operand" "r<i>*m"))
2191 (clobber (mem:BLK (scratch)))]
2192 ""
2193 "push{<imodesuffix>}\t%1"
2194 [(set_attr "type" "push")
2195 (set_attr "mode" "<MODE>")])
2196
2197 (define_insn "*pop<mode>1"
2198 [(set (match_operand:W 0 "nonimmediate_operand" "=r*m")
2199 (match_operand:W 1 "pop_operand" ">"))]
2200 ""
2201 "pop{<imodesuffix>}\t%0"
2202 [(set_attr "type" "pop")
2203 (set_attr "mode" "<MODE>")])
2204
2205 (define_insn "*pop<mode>1_epilogue"
2206 [(set (match_operand:W 0 "nonimmediate_operand" "=r*m")
2207 (match_operand:W 1 "pop_operand" ">"))
2208 (clobber (mem:BLK (scratch)))]
2209 ""
2210 "pop{<imodesuffix>}\t%0"
2211 [(set_attr "type" "pop")
2212 (set_attr "mode" "<MODE>")])
2213
2214 (define_insn "@pushfl<mode>2"
2215 [(set (match_operand:W 0 "push_operand" "=<")
2216 (unspec:W [(match_operand:CC 1 "flags_reg_operand")]
2217 UNSPEC_PUSHFL))]
2218 ""
2219 "pushf{<imodesuffix>}"
2220 [(set_attr "type" "push")
2221 (set_attr "mode" "<MODE>")])
2222
2223 (define_insn "@popfl<mode>1"
2224 [(set (match_operand:CC 0 "flags_reg_operand")
2225 (unspec:CC [(match_operand:W 1 "pop_operand" ">")]
2226 UNSPEC_POPFL))]
2227 ""
2228 "popf{<imodesuffix>}"
2229 [(set_attr "type" "pop")
2230 (set_attr "mode" "<MODE>")])
2231
2232 \f
2233 ;; Reload patterns to support multi-word load/store
2234 ;; with non-offsetable address.
2235 (define_expand "reload_noff_store"
2236 [(parallel [(match_operand 0 "memory_operand" "=m")
2237 (match_operand 1 "register_operand" "r")
2238 (match_operand:DI 2 "register_operand" "=&r")])]
2239 "TARGET_64BIT"
2240 {
2241 rtx mem = operands[0];
2242 rtx addr = XEXP (mem, 0);
2243
2244 emit_move_insn (operands[2], addr);
2245 mem = replace_equiv_address_nv (mem, operands[2]);
2246
2247 emit_insn (gen_rtx_SET (mem, operands[1]));
2248 DONE;
2249 })
2250
2251 (define_expand "reload_noff_load"
2252 [(parallel [(match_operand 0 "register_operand" "=r")
2253 (match_operand 1 "memory_operand" "m")
2254 (match_operand:DI 2 "register_operand" "=r")])]
2255 "TARGET_64BIT"
2256 {
2257 rtx mem = operands[1];
2258 rtx addr = XEXP (mem, 0);
2259
2260 emit_move_insn (operands[2], addr);
2261 mem = replace_equiv_address_nv (mem, operands[2]);
2262
2263 emit_insn (gen_rtx_SET (operands[0], mem));
2264 DONE;
2265 })
2266
2267 ;; Move instructions.
2268
2269 (define_expand "movxi"
2270 [(set (match_operand:XI 0 "nonimmediate_operand")
2271 (match_operand:XI 1 "general_operand"))]
2272 "TARGET_AVX512F && TARGET_EVEX512"
2273 "ix86_expand_vector_move (XImode, operands); DONE;")
2274
2275 (define_expand "movoi"
2276 [(set (match_operand:OI 0 "nonimmediate_operand")
2277 (match_operand:OI 1 "general_operand"))]
2278 "TARGET_AVX"
2279 "ix86_expand_vector_move (OImode, operands); DONE;")
2280
2281 (define_expand "movti"
2282 [(set (match_operand:TI 0 "nonimmediate_operand")
2283 (match_operand:TI 1 "general_operand"))]
2284 "TARGET_64BIT || TARGET_SSE"
2285 {
2286 if (TARGET_64BIT)
2287 ix86_expand_move (TImode, operands);
2288 else
2289 ix86_expand_vector_move (TImode, operands);
2290 DONE;
2291 })
2292
2293 ;; This expands to what emit_move_complex would generate if we didn't
2294 ;; have a movti pattern. Having this avoids problems with reload on
2295 ;; 32-bit targets when SSE is present, but doesn't seem to be harmful
2296 ;; to have around all the time.
2297 (define_expand "movcdi"
2298 [(set (match_operand:CDI 0 "nonimmediate_operand")
2299 (match_operand:CDI 1 "general_operand"))]
2300 ""
2301 {
2302 if (push_operand (operands[0], CDImode))
2303 emit_move_complex_push (CDImode, operands[0], operands[1]);
2304 else
2305 emit_move_complex_parts (operands[0], operands[1]);
2306 DONE;
2307 })
2308
2309 (define_expand "mov<mode>"
2310 [(set (match_operand:SWI1248x 0 "nonimmediate_operand")
2311 (match_operand:SWI1248x 1 "general_operand"))]
2312 ""
2313 "ix86_expand_move (<MODE>mode, operands); DONE;")
2314
2315 (define_insn "*mov<mode>_xor"
2316 [(set (match_operand:SWI48 0 "register_operand" "=r")
2317 (match_operand:SWI48 1 "const0_operand"))
2318 (clobber (reg:CC FLAGS_REG))]
2319 "reload_completed"
2320 "xor{l}\t%k0, %k0"
2321 [(set_attr "type" "alu1")
2322 (set_attr "mode" "SI")
2323 (set_attr "length_immediate" "0")])
2324
2325 (define_insn "*mov<mode>_and"
2326 [(set (match_operand:SWI248 0 "memory_operand" "=m")
2327 (match_operand:SWI248 1 "const0_operand"))
2328 (clobber (reg:CC FLAGS_REG))]
2329 "reload_completed"
2330 "and{<imodesuffix>}\t{%1, %0|%0, %1}"
2331 [(set_attr "type" "alu1")
2332 (set_attr "mode" "<MODE>")
2333 (set_attr "length_immediate" "1")])
2334
2335 (define_insn "*mov<mode>_or"
2336 [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm")
2337 (match_operand:SWI248 1 "constm1_operand"))
2338 (clobber (reg:CC FLAGS_REG))]
2339 "reload_completed"
2340 "or{<imodesuffix>}\t{%1, %0|%0, %1}"
2341 [(set_attr "type" "alu1")
2342 (set_attr "mode" "<MODE>")
2343 (set_attr "length_immediate" "1")])
2344
2345 (define_insn "*movxi_internal_avx512f"
2346 [(set (match_operand:XI 0 "nonimmediate_operand" "=v,v ,v ,m")
2347 (match_operand:XI 1 "nonimmediate_or_sse_const_operand" " C,BC,vm,v"))]
2348 "TARGET_AVX512F && TARGET_EVEX512
2349 && (register_operand (operands[0], XImode)
2350 || register_operand (operands[1], XImode))"
2351 {
2352 switch (get_attr_type (insn))
2353 {
2354 case TYPE_SSELOG1:
2355 return standard_sse_constant_opcode (insn, operands);
2356
2357 case TYPE_SSEMOV:
2358 return ix86_output_ssemov (insn, operands);
2359
2360 default:
2361 gcc_unreachable ();
2362 }
2363 }
2364 [(set_attr "type" "sselog1,sselog1,ssemov,ssemov")
2365 (set_attr "prefix" "evex")
2366 (set_attr "mode" "XI")])
2367
2368 (define_insn "*movoi_internal_avx"
2369 [(set (match_operand:OI 0 "nonimmediate_operand" "=v,v ,v ,m")
2370 (match_operand:OI 1 "nonimmediate_or_sse_const_operand" " C,BC,vm,v"))]
2371 "TARGET_AVX
2372 && (register_operand (operands[0], OImode)
2373 || register_operand (operands[1], OImode))"
2374 {
2375 switch (get_attr_type (insn))
2376 {
2377 case TYPE_SSELOG1:
2378 return standard_sse_constant_opcode (insn, operands);
2379
2380 case TYPE_SSEMOV:
2381 return ix86_output_ssemov (insn, operands);
2382
2383 default:
2384 gcc_unreachable ();
2385 }
2386 }
2387 [(set_attr "isa" "*,avx2,*,*")
2388 (set_attr "type" "sselog1,sselog1,ssemov,ssemov")
2389 (set_attr "prefix" "vex")
2390 (set_attr "mode" "OI")])
2391
2392 (define_insn "*movti_internal"
2393 [(set (match_operand:TI 0 "nonimmediate_operand" "=!r ,o ,v,v ,v ,m,?jc,?Yd")
2394 (match_operand:TI 1 "general_operand" "riFo,re,C,BC,vm,v,Yd,jc"))]
2395 "(TARGET_64BIT
2396 && !(MEM_P (operands[0]) && MEM_P (operands[1])))
2397 || (TARGET_SSE
2398 && nonimmediate_or_sse_const_operand (operands[1], TImode)
2399 && (register_operand (operands[0], TImode)
2400 || register_operand (operands[1], TImode)))"
2401 {
2402 switch (get_attr_type (insn))
2403 {
2404 case TYPE_MULTI:
2405 return "#";
2406
2407 case TYPE_SSELOG1:
2408 return standard_sse_constant_opcode (insn, operands);
2409
2410 case TYPE_SSEMOV:
2411 return ix86_output_ssemov (insn, operands);
2412
2413 default:
2414 gcc_unreachable ();
2415 }
2416 }
2417 [(set (attr "isa")
2418 (cond [(eq_attr "alternative" "0,1,6,7")
2419 (const_string "x64")
2420 (eq_attr "alternative" "3")
2421 (const_string "sse2")
2422 ]
2423 (const_string "*")))
2424 (set (attr "type")
2425 (cond [(eq_attr "alternative" "0,1,6,7")
2426 (const_string "multi")
2427 (eq_attr "alternative" "2,3")
2428 (const_string "sselog1")
2429 ]
2430 (const_string "ssemov")))
2431 (set (attr "prefix")
2432 (if_then_else (eq_attr "type" "sselog1,ssemov")
2433 (const_string "maybe_vex")
2434 (const_string "orig")))
2435 (set (attr "mode")
2436 (cond [(eq_attr "alternative" "0,1")
2437 (const_string "DI")
2438 (match_test "TARGET_AVX")
2439 (const_string "TI")
2440 (ior (not (match_test "TARGET_SSE2"))
2441 (match_test "optimize_function_for_size_p (cfun)"))
2442 (const_string "V4SF")
2443 (and (eq_attr "alternative" "5")
2444 (match_test "TARGET_SSE_TYPELESS_STORES"))
2445 (const_string "V4SF")
2446 ]
2447 (const_string "TI")))
2448 (set (attr "preferred_for_speed")
2449 (cond [(eq_attr "alternative" "6")
2450 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
2451 (eq_attr "alternative" "7")
2452 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
2453 ]
2454 (symbol_ref "true")))])
2455
2456 (define_split
2457 [(set (match_operand:TI 0 "sse_reg_operand")
2458 (match_operand:TI 1 "general_reg_operand"))]
2459 "TARGET_64BIT && TARGET_SSE4_1
2460 && reload_completed"
2461 [(set (match_dup 2)
2462 (vec_merge:V2DI
2463 (vec_duplicate:V2DI (match_dup 3))
2464 (match_dup 2)
2465 (const_int 2)))]
2466 {
2467 operands[2] = lowpart_subreg (V2DImode, operands[0], TImode);
2468 operands[3] = gen_highpart (DImode, operands[1]);
2469
2470 emit_move_insn (gen_lowpart (DImode, operands[0]),
2471 gen_lowpart (DImode, operands[1]));
2472 })
2473
2474 (define_insn "*movdi_internal"
2475 [(set (match_operand:DI 0 "nonimmediate_operand"
2476 "=r ,o ,r,r ,r,m ,*y,*y,?*y,?m,?r,?*y,?Yv,?v,?v,m ,m,?jc,?*Yd,?r,?v,?*y,?*x,*k,*k ,*r,*m,*k")
2477 (match_operand:DI 1 "general_operand"
2478 "riFo,riF,Z,rem,i,re,C ,*y,Bk ,*y,*y,r ,C ,?v,Bk,?v,v,*Yd,jc ,?v,r ,*x ,*y ,*r,*kBk,*k,*k,CBC"))]
2479 "!(MEM_P (operands[0]) && MEM_P (operands[1]))
2480 && ix86_hardreg_mov_ok (operands[0], operands[1])"
2481 {
2482 switch (get_attr_type (insn))
2483 {
2484 case TYPE_MSKMOV:
2485 return "kmovq\t{%1, %0|%0, %1}";
2486
2487 case TYPE_MSKLOG:
2488 if (operands[1] == const0_rtx)
2489 return "kxorq\t%0, %0, %0";
2490 else if (operands[1] == constm1_rtx)
2491 return "kxnorq\t%0, %0, %0";
2492 gcc_unreachable ();
2493
2494 case TYPE_MULTI:
2495 return "#";
2496
2497 case TYPE_MMX:
2498 return "pxor\t%0, %0";
2499
2500 case TYPE_MMXMOV:
2501 /* Handle broken assemblers that require movd instead of movq. */
2502 if (!HAVE_AS_IX86_INTERUNIT_MOVQ
2503 && (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1])))
2504 return "movd\t{%1, %0|%0, %1}";
2505 return "movq\t{%1, %0|%0, %1}";
2506
2507 case TYPE_SSELOG1:
2508 return standard_sse_constant_opcode (insn, operands);
2509
2510 case TYPE_SSEMOV:
2511 return ix86_output_ssemov (insn, operands);
2512
2513 case TYPE_SSECVT:
2514 if (SSE_REG_P (operands[0]))
2515 return "movq2dq\t{%1, %0|%0, %1}";
2516 else
2517 return "movdq2q\t{%1, %0|%0, %1}";
2518
2519 case TYPE_LEA:
2520 return "lea{q}\t{%E1, %0|%0, %E1}";
2521
2522 case TYPE_IMOV:
2523 gcc_assert (!flag_pic || LEGITIMATE_PIC_OPERAND_P (operands[1]));
2524 if (get_attr_mode (insn) == MODE_SI)
2525 return "mov{l}\t{%k1, %k0|%k0, %k1}";
2526 else if (which_alternative == 4)
2527 return "movabs{q}\t{%1, %0|%0, %1}";
2528 else if (ix86_use_lea_for_mov (insn, operands))
2529 return "lea{q}\t{%E1, %0|%0, %E1}";
2530 else
2531 return "mov{q}\t{%1, %0|%0, %1}";
2532
2533 default:
2534 gcc_unreachable ();
2535 }
2536 }
2537 [(set (attr "isa")
2538 (cond [(eq_attr "alternative" "0,1,17,18")
2539 (const_string "nox64")
2540 (eq_attr "alternative" "2,3,4,5,10,11,23,25")
2541 (const_string "x64")
2542 (eq_attr "alternative" "19,20")
2543 (const_string "x64_sse2")
2544 (eq_attr "alternative" "21,22")
2545 (const_string "sse2")
2546 ]
2547 (const_string "*")))
2548 (set (attr "type")
2549 (cond [(eq_attr "alternative" "0,1,17,18")
2550 (const_string "multi")
2551 (eq_attr "alternative" "6")
2552 (const_string "mmx")
2553 (eq_attr "alternative" "7,8,9,10,11")
2554 (const_string "mmxmov")
2555 (eq_attr "alternative" "12")
2556 (const_string "sselog1")
2557 (eq_attr "alternative" "13,14,15,16,19,20")
2558 (const_string "ssemov")
2559 (eq_attr "alternative" "21,22")
2560 (const_string "ssecvt")
2561 (eq_attr "alternative" "23,24,25,26")
2562 (const_string "mskmov")
2563 (eq_attr "alternative" "27")
2564 (const_string "msklog")
2565 (and (match_operand 0 "register_operand")
2566 (match_operand 1 "pic_32bit_operand"))
2567 (const_string "lea")
2568 ]
2569 (const_string "imov")))
2570 (set (attr "modrm")
2571 (if_then_else
2572 (and (eq_attr "alternative" "4") (eq_attr "type" "imov"))
2573 (const_string "0")
2574 (const_string "*")))
2575 (set (attr "length_immediate")
2576 (if_then_else
2577 (and (eq_attr "alternative" "4") (eq_attr "type" "imov"))
2578 (const_string "8")
2579 (const_string "*")))
2580 (set (attr "prefix_rex")
2581 (if_then_else
2582 (eq_attr "alternative" "10,11,19,20")
2583 (const_string "1")
2584 (const_string "*")))
2585 (set (attr "prefix")
2586 (if_then_else (eq_attr "type" "sselog1,ssemov")
2587 (const_string "maybe_vex")
2588 (const_string "orig")))
2589 (set (attr "prefix_data16")
2590 (if_then_else (and (eq_attr "type" "ssemov") (eq_attr "mode" "DI"))
2591 (const_string "1")
2592 (const_string "*")))
2593 (set (attr "mode")
2594 (cond [(eq_attr "alternative" "2")
2595 (const_string "SI")
2596 (eq_attr "alternative" "12")
2597 (cond [(match_test "TARGET_AVX")
2598 (const_string "TI")
2599 (ior (not (match_test "TARGET_SSE2"))
2600 (match_test "optimize_function_for_size_p (cfun)"))
2601 (const_string "V4SF")
2602 ]
2603 (const_string "TI"))
2604 (eq_attr "alternative" "13")
2605 (cond [(match_test "TARGET_AVX512VL")
2606 (const_string "TI")
2607 (match_test "TARGET_AVX512F")
2608 (const_string "DF")
2609 (match_test "TARGET_AVX")
2610 (const_string "TI")
2611 (ior (not (match_test "TARGET_SSE2"))
2612 (match_test "optimize_function_for_size_p (cfun)"))
2613 (const_string "V4SF")
2614 ]
2615 (const_string "TI"))
2616
2617 (and (eq_attr "alternative" "14,15,16")
2618 (not (match_test "TARGET_SSE2")))
2619 (const_string "V2SF")
2620 ]
2621 (const_string "DI")))
2622 (set (attr "preferred_for_speed")
2623 (cond [(eq_attr "alternative" "10,17,19")
2624 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
2625 (eq_attr "alternative" "11,18,20")
2626 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
2627 ]
2628 (symbol_ref "true")))
2629 (set (attr "enabled")
2630 (cond [(eq_attr "alternative" "15")
2631 (if_then_else
2632 (match_test "TARGET_STV && TARGET_SSE2")
2633 (symbol_ref "false")
2634 (const_string "*"))
2635 (eq_attr "alternative" "16")
2636 (if_then_else
2637 (match_test "TARGET_STV && TARGET_SSE2")
2638 (symbol_ref "true")
2639 (symbol_ref "false"))
2640 ]
2641 (const_string "*")))])
2642
2643 (define_split
2644 [(set (match_operand:<DWI> 0 "general_reg_operand")
2645 (match_operand:<DWI> 1 "sse_reg_operand"))]
2646 "TARGET_SSE4_1
2647 && reload_completed"
2648 [(set (match_dup 2)
2649 (vec_select:DWIH
2650 (match_dup 3)
2651 (parallel [(const_int 1)])))]
2652 {
2653 operands[2] = gen_highpart (<MODE>mode, operands[0]);
2654 operands[3] = lowpart_subreg (<ssevecmode>mode, operands[1], <DWI>mode);
2655
2656 emit_move_insn (gen_lowpart (<MODE>mode, operands[0]),
2657 gen_lowpart (<MODE>mode, operands[1]));
2658 })
2659
2660 (define_split
2661 [(set (match_operand:DWI 0 "nonimmediate_gr_operand")
2662 (match_operand:DWI 1 "general_gr_operand"))]
2663 "reload_completed"
2664 [(const_int 0)]
2665 "ix86_split_long_move (operands); DONE;")
2666
2667 (define_split
2668 [(set (match_operand:DI 0 "sse_reg_operand")
2669 (match_operand:DI 1 "general_reg_operand"))]
2670 "!TARGET_64BIT && TARGET_SSE4_1
2671 && reload_completed"
2672 [(set (match_dup 2)
2673 (vec_merge:V4SI
2674 (vec_duplicate:V4SI (match_dup 3))
2675 (match_dup 2)
2676 (const_int 2)))]
2677 {
2678 operands[2] = lowpart_subreg (V4SImode, operands[0], DImode);
2679 operands[3] = gen_highpart (SImode, operands[1]);
2680
2681 emit_move_insn (gen_lowpart (SImode, operands[0]),
2682 gen_lowpart (SImode, operands[1]));
2683 })
2684
2685 ;; movabsq $0x0012345678000000, %rax is longer
2686 ;; than movl $0x12345678, %eax; shlq $24, %rax.
2687 (define_peephole2
2688 [(set (match_operand:DI 0 "register_operand")
2689 (match_operand:DI 1 "const_int_operand"))]
2690 "TARGET_64BIT
2691 && optimize_insn_for_size_p ()
2692 && LEGACY_INT_REG_P (operands[0])
2693 && !x86_64_immediate_operand (operands[1], DImode)
2694 && !x86_64_zext_immediate_operand (operands[1], DImode)
2695 && !((UINTVAL (operands[1]) >> ctz_hwi (UINTVAL (operands[1])))
2696 & ~(HOST_WIDE_INT) 0xffffffff)
2697 && peep2_regno_dead_p (0, FLAGS_REG)"
2698 [(set (match_dup 0) (match_dup 1))
2699 (parallel [(set (match_dup 0) (ashift:DI (match_dup 0) (match_dup 2)))
2700 (clobber (reg:CC FLAGS_REG))])]
2701 {
2702 int shift = ctz_hwi (UINTVAL (operands[1]));
2703 rtx op1 = gen_int_mode (UINTVAL (operands[1]) >> shift, DImode);
2704 if (ix86_endbr_immediate_operand (op1, VOIDmode))
2705 FAIL;
2706 operands[1] = op1;
2707 operands[2] = gen_int_mode (shift, QImode);
2708 })
2709
2710 (define_insn "*movsi_internal"
2711 [(set (match_operand:SI 0 "nonimmediate_operand"
2712 "=r,m ,*y,*y,?*y,?m,?r,?*y,?Yv,?v,?v,m ,?r,?v,*k,*k ,*rm,*k")
2713 (match_operand:SI 1 "general_operand"
2714 "g ,re,C ,*y,Bk ,*y,*y,r ,C ,?v,Bk,?v,?v,r ,*r,*kBk,*k ,CBC"))]
2715 "!(MEM_P (operands[0]) && MEM_P (operands[1]))
2716 && ix86_hardreg_mov_ok (operands[0], operands[1])"
2717 {
2718 switch (get_attr_type (insn))
2719 {
2720 case TYPE_SSELOG1:
2721 return standard_sse_constant_opcode (insn, operands);
2722
2723 case TYPE_MSKMOV:
2724 return "kmovd\t{%1, %0|%0, %1}";
2725
2726 case TYPE_MSKLOG:
2727 if (operands[1] == const0_rtx)
2728 return "kxord\t%0, %0, %0";
2729 else if (operands[1] == constm1_rtx)
2730 return "kxnord\t%0, %0, %0";
2731 gcc_unreachable ();
2732
2733 case TYPE_SSEMOV:
2734 return ix86_output_ssemov (insn, operands);
2735
2736 case TYPE_MMX:
2737 return "pxor\t%0, %0";
2738
2739 case TYPE_MMXMOV:
2740 switch (get_attr_mode (insn))
2741 {
2742 case MODE_DI:
2743 return "movq\t{%1, %0|%0, %1}";
2744 case MODE_SI:
2745 return "movd\t{%1, %0|%0, %1}";
2746
2747 default:
2748 gcc_unreachable ();
2749 }
2750
2751 case TYPE_LEA:
2752 return "lea{l}\t{%E1, %0|%0, %E1}";
2753
2754 case TYPE_IMOV:
2755 gcc_assert (!flag_pic || LEGITIMATE_PIC_OPERAND_P (operands[1]));
2756 if (ix86_use_lea_for_mov (insn, operands))
2757 return "lea{l}\t{%E1, %0|%0, %E1}";
2758 else
2759 return "mov{l}\t{%1, %0|%0, %1}";
2760
2761 default:
2762 gcc_unreachable ();
2763 }
2764 }
2765 [(set (attr "isa")
2766 (cond [(eq_attr "alternative" "12,13")
2767 (const_string "sse2")
2768 ]
2769 (const_string "*")))
2770 (set (attr "type")
2771 (cond [(eq_attr "alternative" "2")
2772 (const_string "mmx")
2773 (eq_attr "alternative" "3,4,5,6,7")
2774 (const_string "mmxmov")
2775 (eq_attr "alternative" "8")
2776 (const_string "sselog1")
2777 (eq_attr "alternative" "9,10,11,12,13")
2778 (const_string "ssemov")
2779 (eq_attr "alternative" "14,15,16")
2780 (const_string "mskmov")
2781 (eq_attr "alternative" "17")
2782 (const_string "msklog")
2783 (and (match_operand 0 "register_operand")
2784 (match_operand 1 "pic_32bit_operand"))
2785 (const_string "lea")
2786 ]
2787 (const_string "imov")))
2788 (set (attr "prefix")
2789 (if_then_else (eq_attr "type" "sselog1,ssemov")
2790 (const_string "maybe_vex")
2791 (const_string "orig")))
2792 (set (attr "prefix_data16")
2793 (if_then_else (and (eq_attr "type" "ssemov") (eq_attr "mode" "SI"))
2794 (const_string "1")
2795 (const_string "*")))
2796 (set (attr "mode")
2797 (cond [(eq_attr "alternative" "2,3")
2798 (const_string "DI")
2799 (eq_attr "alternative" "8")
2800 (cond [(match_test "TARGET_AVX")
2801 (const_string "TI")
2802 (ior (not (match_test "TARGET_SSE2"))
2803 (match_test "optimize_function_for_size_p (cfun)"))
2804 (const_string "V4SF")
2805 ]
2806 (const_string "TI"))
2807 (eq_attr "alternative" "9")
2808 (cond [(match_test "TARGET_AVX512VL")
2809 (const_string "TI")
2810 (match_test "TARGET_AVX512F")
2811 (const_string "SF")
2812 (match_test "TARGET_AVX")
2813 (const_string "TI")
2814 (ior (not (match_test "TARGET_SSE2"))
2815 (match_test "optimize_function_for_size_p (cfun)"))
2816 (const_string "V4SF")
2817 ]
2818 (const_string "TI"))
2819
2820 (and (eq_attr "alternative" "10,11")
2821 (not (match_test "TARGET_SSE2")))
2822 (const_string "SF")
2823 ]
2824 (const_string "SI")))
2825 (set (attr "preferred_for_speed")
2826 (cond [(eq_attr "alternative" "6,12")
2827 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
2828 (eq_attr "alternative" "7,13")
2829 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
2830 ]
2831 (symbol_ref "true")))])
2832
2833 ;; With -Oz, transform mov $imm,reg to the shorter push $imm; pop reg.
2834 (define_peephole2
2835 [(set (match_operand:SWI248 0 "general_reg_operand")
2836 (match_operand:SWI248 1 "const_int_operand"))]
2837 "optimize_insn_for_size_p () && optimize_size > 1
2838 && operands[1] != const0_rtx
2839 && IN_RANGE (INTVAL (operands[1]), -128, 127)
2840 && !ix86_red_zone_used
2841 && REGNO (operands[0]) != SP_REG"
2842 [(set (match_dup 2) (match_dup 1))
2843 (set (match_dup 0) (match_dup 3))]
2844 {
2845 if (GET_MODE (operands[0]) != word_mode)
2846 operands[0] = gen_rtx_REG (word_mode, REGNO (operands[0]));
2847
2848 operands[2] = gen_rtx_MEM (word_mode,
2849 gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx));
2850 operands[3] = gen_rtx_MEM (word_mode,
2851 gen_rtx_POST_INC (Pmode, stack_pointer_rtx));
2852 })
2853
2854 ;; With -Oz, transform mov $0,mem to the shorter and $0,mem.
2855 ;; Likewise, transform mov $-1,mem to the shorter or $-1,mem.
2856 (define_peephole2
2857 [(set (match_operand:SWI248 0 "memory_operand")
2858 (match_operand:SWI248 1 "const_int_operand"))]
2859 "(operands[1] == const0_rtx || operands[1] == constm1_rtx)
2860 && optimize_insn_for_size_p () && optimize_size > 1
2861 && peep2_regno_dead_p (0, FLAGS_REG)"
2862 [(parallel [(set (match_dup 0) (match_dup 1))
2863 (clobber (reg:CC FLAGS_REG))])])
2864
2865 (define_insn "*movhi_internal"
2866 [(set (match_operand:HI 0 "nonimmediate_operand"
2867 "=r,r,r,m ,*k,*k ,r ,m ,*k ,?r,?*v,*Yv,*v,*v,jm,m")
2868 (match_operand:HI 1 "general_operand"
2869 "r ,n,m,rn,r ,*km,*k,*k,CBC,*v,r ,C ,*v,m ,*x,*v"))]
2870 "!(MEM_P (operands[0]) && MEM_P (operands[1]))
2871 && ix86_hardreg_mov_ok (operands[0], operands[1])"
2872 {
2873 switch (get_attr_type (insn))
2874 {
2875 case TYPE_IMOVX:
2876 /* movzwl is faster than movw on p2 due to partial word stalls,
2877 though not as fast as an aligned movl. */
2878 return "movz{wl|x}\t{%1, %k0|%k0, %1}";
2879
2880 case TYPE_MSKMOV:
2881 switch (which_alternative)
2882 {
2883 case 4:
2884 return "kmovw\t{%k1, %0|%0, %k1}";
2885 case 6:
2886 return "kmovw\t{%1, %k0|%k0, %1}";
2887 case 5:
2888 case 7:
2889 return "kmovw\t{%1, %0|%0, %1}";
2890 default:
2891 gcc_unreachable ();
2892 }
2893
2894 case TYPE_SSEMOV:
2895 return ix86_output_ssemov (insn, operands);
2896
2897 case TYPE_SSELOG1:
2898 if (satisfies_constraint_C (operands[1]))
2899 return standard_sse_constant_opcode (insn, operands);
2900
2901 if (SSE_REG_P (operands[0]))
2902 return "%vpinsrw\t{$0, %1, %d0|%d0, %1, 0}";
2903 else
2904 return "%vpextrw\t{$0, %1, %0|%0, %1, 0}";
2905
2906 case TYPE_MSKLOG:
2907 if (operands[1] == const0_rtx)
2908 return "kxorw\t%0, %0, %0";
2909 else if (operands[1] == constm1_rtx)
2910 return "kxnorw\t%0, %0, %0";
2911 gcc_unreachable ();
2912
2913 default:
2914 if (get_attr_mode (insn) == MODE_SI)
2915 return "mov{l}\t{%k1, %k0|%k0, %k1}";
2916 else
2917 return "mov{w}\t{%1, %0|%0, %1}";
2918 }
2919 }
2920 [(set (attr "isa")
2921 (cond [(eq_attr "alternative" "9,10,11,12,13")
2922 (const_string "sse2")
2923 (eq_attr "alternative" "14")
2924 (const_string "sse4_noavx")
2925 (eq_attr "alternative" "15")
2926 (const_string "avx")
2927 ]
2928 (const_string "*")))
2929 (set (attr "addr")
2930 (if_then_else (eq_attr "alternative" "14")
2931 (const_string "gpr16")
2932 (const_string "*")))
2933 (set (attr "type")
2934 (cond [(eq_attr "alternative" "4,5,6,7")
2935 (const_string "mskmov")
2936 (eq_attr "alternative" "8")
2937 (const_string "msklog")
2938 (eq_attr "alternative" "13,14,15")
2939 (if_then_else (match_test "TARGET_AVX512FP16")
2940 (const_string "ssemov")
2941 (const_string "sselog1"))
2942 (eq_attr "alternative" "11")
2943 (const_string "sselog1")
2944 (eq_attr "alternative" "9,10,12")
2945 (const_string "ssemov")
2946 (match_test "optimize_function_for_size_p (cfun)")
2947 (const_string "imov")
2948 (and (eq_attr "alternative" "0")
2949 (ior (not (match_test "TARGET_PARTIAL_REG_STALL"))
2950 (not (match_test "TARGET_HIMODE_MATH"))))
2951 (const_string "imov")
2952 (and (eq_attr "alternative" "1,2")
2953 (match_operand:HI 1 "aligned_operand"))
2954 (const_string "imov")
2955 (and (match_test "TARGET_MOVX")
2956 (eq_attr "alternative" "0,2"))
2957 (const_string "imovx")
2958 ]
2959 (const_string "imov")))
2960 (set (attr "prefix")
2961 (cond [(eq_attr "alternative" "4,5,6,7,8")
2962 (const_string "vex")
2963 (eq_attr "alternative" "9,10,11,12,13,14,15")
2964 (const_string "maybe_evex")
2965 ]
2966 (const_string "orig")))
2967 (set (attr "mode")
2968 (cond [(eq_attr "alternative" "9,10")
2969 (if_then_else (match_test "TARGET_AVX512FP16")
2970 (const_string "HI")
2971 (const_string "SI"))
2972 (eq_attr "alternative" "13,14,15")
2973 (if_then_else (match_test "TARGET_AVX512FP16")
2974 (const_string "HI")
2975 (const_string "TI"))
2976 (eq_attr "alternative" "11")
2977 (cond [(match_test "TARGET_AVX")
2978 (const_string "TI")
2979 (ior (not (match_test "TARGET_SSE2"))
2980 (match_test "optimize_function_for_size_p (cfun)"))
2981 (const_string "V4SF")
2982 ]
2983 (const_string "TI"))
2984 (eq_attr "alternative" "12")
2985 (cond [(match_test "TARGET_AVX512VL")
2986 (const_string "TI")
2987 (match_test "TARGET_AVX512FP16")
2988 (const_string "HF")
2989 (match_test "TARGET_AVX512F")
2990 (const_string "SF")
2991 (match_test "TARGET_AVX")
2992 (const_string "TI")
2993 (ior (not (match_test "TARGET_SSE2"))
2994 (match_test "optimize_function_for_size_p (cfun)"))
2995 (const_string "V4SF")
2996 ]
2997 (const_string "TI"))
2998 (eq_attr "type" "imovx")
2999 (const_string "SI")
3000 (and (eq_attr "alternative" "1,2")
3001 (match_operand:HI 1 "aligned_operand"))
3002 (const_string "SI")
3003 (and (eq_attr "alternative" "0")
3004 (ior (not (match_test "TARGET_PARTIAL_REG_STALL"))
3005 (not (match_test "TARGET_HIMODE_MATH"))))
3006 (const_string "SI")
3007 ]
3008 (const_string "HI")))
3009 (set (attr "preferred_for_speed")
3010 (cond [(eq_attr "alternative" "9")
3011 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
3012 (eq_attr "alternative" "10")
3013 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
3014 ]
3015 (symbol_ref "true")))])
3016
3017 ;; Situation is quite tricky about when to choose full sized (SImode) move
3018 ;; over QImode moves. For Q_REG -> Q_REG move we use full size only for
3019 ;; partial register dependency machines (such as AMD Athlon), where QImode
3020 ;; moves issue extra dependency and for partial register stalls machines
3021 ;; that don't use QImode patterns (and QImode move cause stall on the next
3022 ;; instruction).
3023 ;;
3024 ;; For loads of Q_REG to NONQ_REG we use full sized moves except for partial
3025 ;; register stall machines with, where we use QImode instructions, since
3026 ;; partial register stall can be caused there. Then we use movzx.
3027
3028 (define_insn "*movqi_internal"
3029 [(set (match_operand:QI 0 "nonimmediate_operand"
3030 "=Q,R,r,q,q,r,r ,?r,m ,*k,*k,*r,*m,*k,*k,*k")
3031 (match_operand:QI 1 "general_operand"
3032 "Q ,R,r,n,m,q,rn, m,qn,*r,*k,*k,*k,*m,C,BC"))]
3033 "!(MEM_P (operands[0]) && MEM_P (operands[1]))
3034 && ix86_hardreg_mov_ok (operands[0], operands[1])"
3035
3036 {
3037 char buf[128];
3038 const char *ops;
3039 const char *suffix;
3040
3041 switch (get_attr_type (insn))
3042 {
3043 case TYPE_IMOVX:
3044 gcc_assert (ANY_QI_REG_P (operands[1]) || MEM_P (operands[1]));
3045 return "movz{bl|x}\t{%1, %k0|%k0, %1}";
3046
3047 case TYPE_MSKMOV:
3048 switch (which_alternative)
3049 {
3050 case 9:
3051 ops = "kmov%s\t{%%k1, %%0|%%0, %%k1}";
3052 break;
3053 case 11:
3054 ops = "kmov%s\t{%%1, %%k0|%%k0, %%1}";
3055 break;
3056 case 12:
3057 case 13:
3058 gcc_assert (TARGET_AVX512DQ);
3059 /* FALLTHRU */
3060 case 10:
3061 ops = "kmov%s\t{%%1, %%0|%%0, %%1}";
3062 break;
3063 default:
3064 gcc_unreachable ();
3065 }
3066
3067 suffix = (get_attr_mode (insn) == MODE_HI) ? "w" : "b";
3068
3069 snprintf (buf, sizeof (buf), ops, suffix);
3070 output_asm_insn (buf, operands);
3071 return "";
3072
3073 case TYPE_MSKLOG:
3074 if (operands[1] == const0_rtx)
3075 {
3076 if (get_attr_mode (insn) == MODE_HI)
3077 return "kxorw\t%0, %0, %0";
3078 else
3079 return "kxorb\t%0, %0, %0";
3080 }
3081 else if (operands[1] == constm1_rtx)
3082 {
3083 gcc_assert (TARGET_AVX512DQ);
3084 return "kxnorb\t%0, %0, %0";
3085 }
3086 gcc_unreachable ();
3087
3088 default:
3089 if (get_attr_mode (insn) == MODE_SI)
3090 return "mov{l}\t{%k1, %k0|%k0, %k1}";
3091 else
3092 return "mov{b}\t{%1, %0|%0, %1}";
3093 }
3094 }
3095 [(set (attr "isa")
3096 (cond [(eq_attr "alternative" "1,2")
3097 (const_string "x64")
3098 (eq_attr "alternative" "12,13,15")
3099 (const_string "avx512dq")
3100 ]
3101 (const_string "*")))
3102 (set (attr "type")
3103 (cond [(eq_attr "alternative" "9,10,11,12,13")
3104 (const_string "mskmov")
3105 (eq_attr "alternative" "14,15")
3106 (const_string "msklog")
3107 (and (eq_attr "alternative" "7")
3108 (not (match_operand:QI 1 "aligned_operand")))
3109 (const_string "imovx")
3110 (match_test "optimize_function_for_size_p (cfun)")
3111 (const_string "imov")
3112 (and (eq_attr "alternative" "5")
3113 (ior (not (match_test "TARGET_PARTIAL_REG_STALL"))
3114 (not (match_test "TARGET_QIMODE_MATH"))))
3115 (const_string "imov")
3116 (eq_attr "alternative" "5,7")
3117 (const_string "imovx")
3118 (and (match_test "TARGET_MOVX")
3119 (eq_attr "alternative" "4"))
3120 (const_string "imovx")
3121 ]
3122 (const_string "imov")))
3123 (set (attr "prefix")
3124 (if_then_else (eq_attr "alternative" "9,10,11,12,13,14,15")
3125 (const_string "vex")
3126 (const_string "orig")))
3127 (set (attr "mode")
3128 (cond [(eq_attr "alternative" "5,6,7")
3129 (const_string "SI")
3130 (eq_attr "alternative" "8")
3131 (const_string "QI")
3132 (and (eq_attr "alternative" "9,10,11,14")
3133 (not (match_test "TARGET_AVX512DQ")))
3134 (const_string "HI")
3135 (eq_attr "type" "imovx")
3136 (const_string "SI")
3137 ;; For -Os, 8-bit immediates are always shorter than 32-bit
3138 ;; ones.
3139 (and (eq_attr "type" "imov")
3140 (and (eq_attr "alternative" "3")
3141 (match_test "optimize_function_for_size_p (cfun)")))
3142 (const_string "QI")
3143 ;; For -Os, movl where one or both operands are NON_Q_REGS
3144 ;; and both are LEGACY_REGS is shorter than movb.
3145 ;; Otherwise movb and movl sizes are the same, so decide purely
3146 ;; based on speed factors.
3147 (and (eq_attr "type" "imov")
3148 (and (eq_attr "alternative" "1")
3149 (match_test "optimize_function_for_size_p (cfun)")))
3150 (const_string "SI")
3151 (and (eq_attr "type" "imov")
3152 (and (eq_attr "alternative" "0,1,2,3")
3153 (and (match_test "TARGET_PARTIAL_REG_DEPENDENCY")
3154 (not (match_test "TARGET_PARTIAL_REG_STALL")))))
3155 (const_string "SI")
3156 ;; Avoid partial register stalls when not using QImode arithmetic
3157 (and (eq_attr "type" "imov")
3158 (and (eq_attr "alternative" "0,1,2,3")
3159 (and (match_test "TARGET_PARTIAL_REG_STALL")
3160 (not (match_test "TARGET_QIMODE_MATH")))))
3161 (const_string "SI")
3162 ]
3163 (const_string "QI")))])
3164
3165 /* Reload dislikes loading 0/-1 directly into mask registers.
3166 Try to tidy things up here. */
3167 (define_peephole2
3168 [(set (match_operand:SWI 0 "general_reg_operand")
3169 (match_operand:SWI 1 "immediate_operand"))
3170 (set (match_operand:SWI 2 "mask_reg_operand")
3171 (match_dup 0))]
3172 "peep2_reg_dead_p (2, operands[0])
3173 && (const0_operand (operands[1], <MODE>mode)
3174 || (constm1_operand (operands[1], <MODE>mode)
3175 && (<MODE_SIZE> > 1 || TARGET_AVX512DQ)))"
3176 [(set (match_dup 2) (match_dup 1))])
3177
3178 ;; Stores and loads of ax to arbitrary constant address.
3179 ;; We fake an second form of instruction to force reload to load address
3180 ;; into register when rax is not available
3181 (define_insn "*movabs<mode>_1"
3182 [(set (mem:SWI1248x (match_operand:DI 0 "x86_64_movabs_operand" "i,r"))
3183 (match_operand:SWI1248x 1 "nonmemory_operand" "a,r<i>"))]
3184 "TARGET_LP64 && ix86_check_movabs (insn, 0)"
3185 {
3186 /* Recover the full memory rtx. */
3187 operands[0] = SET_DEST (PATTERN (insn));
3188 switch (which_alternative)
3189 {
3190 case 0:
3191 return "movabs{<imodesuffix>}\t{%1, %P0|<iptrsize> PTR [%P0], %1}";
3192 case 1:
3193 return "mov{<imodesuffix>}\t{%1, %0|%0, %1}";
3194 default:
3195 gcc_unreachable ();
3196 }
3197 }
3198 [(set_attr "type" "imov")
3199 (set_attr "modrm" "0,*")
3200 (set_attr "length_address" "8,0")
3201 (set_attr "length_immediate" "0,*")
3202 (set_attr "memory" "store")
3203 (set_attr "mode" "<MODE>")])
3204
3205 (define_insn "*movabs<mode>_2"
3206 [(set (match_operand:SWI1248x 0 "register_operand" "=a,r")
3207 (mem:SWI1248x (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))]
3208 "TARGET_LP64 && ix86_check_movabs (insn, 1)"
3209 {
3210 /* Recover the full memory rtx. */
3211 operands[1] = SET_SRC (PATTERN (insn));
3212 switch (which_alternative)
3213 {
3214 case 0:
3215 return "movabs{<imodesuffix>}\t{%P1, %0|%0, <iptrsize> PTR [%P1]}";
3216 case 1:
3217 return "mov{<imodesuffix>}\t{%1, %0|%0, %1}";
3218 default:
3219 gcc_unreachable ();
3220 }
3221 }
3222 [(set_attr "type" "imov")
3223 (set_attr "modrm" "0,*")
3224 (set_attr "length_address" "8,0")
3225 (set_attr "length_immediate" "0")
3226 (set_attr "memory" "load")
3227 (set_attr "mode" "<MODE>")])
3228
3229 (define_insn "swap<mode>"
3230 [(set (match_operand:SWI48 0 "register_operand" "+r")
3231 (match_operand:SWI48 1 "register_operand" "+r"))
3232 (set (match_dup 1)
3233 (match_dup 0))]
3234 ""
3235 "xchg{<imodesuffix>}\t%1, %0"
3236 [(set_attr "type" "imov")
3237 (set_attr "mode" "<MODE>")
3238 (set_attr "pent_pair" "np")
3239 (set_attr "athlon_decode" "vector")
3240 (set_attr "amdfam10_decode" "double")
3241 (set_attr "bdver1_decode" "double")])
3242
3243 (define_insn "*swap<mode>"
3244 [(set (match_operand:SWI12 0 "register_operand" "+<r>,r")
3245 (match_operand:SWI12 1 "register_operand" "+<r>,r"))
3246 (set (match_dup 1)
3247 (match_dup 0))]
3248 ""
3249 "@
3250 xchg{<imodesuffix>}\t%1, %0
3251 xchg{l}\t%k1, %k0"
3252 [(set_attr "type" "imov")
3253 (set_attr "mode" "<MODE>,SI")
3254 (set (attr "preferred_for_size")
3255 (cond [(eq_attr "alternative" "0")
3256 (symbol_ref "false")]
3257 (symbol_ref "true")))
3258 ;; Potential partial reg stall on alternative 1.
3259 (set (attr "preferred_for_speed")
3260 (cond [(eq_attr "alternative" "1")
3261 (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
3262 (symbol_ref "true")))
3263 (set_attr "pent_pair" "np")
3264 (set_attr "athlon_decode" "vector")
3265 (set_attr "amdfam10_decode" "double")
3266 (set_attr "bdver1_decode" "double")])
3267
3268 (define_peephole2
3269 [(set (match_operand:SWI 0 "general_reg_operand")
3270 (match_operand:SWI 1 "general_reg_operand"))
3271 (set (match_dup 1)
3272 (match_operand:SWI 2 "general_reg_operand"))
3273 (set (match_dup 2) (match_dup 0))]
3274 "peep2_reg_dead_p (3, operands[0])
3275 && optimize_insn_for_size_p ()"
3276 [(parallel [(set (match_dup 1) (match_dup 2))
3277 (set (match_dup 2) (match_dup 1))])])
3278
3279 ;; Convert xchg with a REG_UNUSED note to a mov (variant #1).
3280 (define_peephole2
3281 [(parallel [(set (match_operand:SWI 0 "general_reg_operand")
3282 (match_operand:SWI 1 "general_reg_operand"))
3283 (set (match_dup 1) (match_dup 0))])]
3284 "((REGNO (operands[0]) != AX_REG
3285 && REGNO (operands[1]) != AX_REG)
3286 || optimize_size < 2
3287 || !optimize_insn_for_size_p ())
3288 && peep2_reg_dead_p (1, operands[0])"
3289 [(set (match_dup 1) (match_dup 0))])
3290
3291 ;; Convert xchg with a REG_UNUSED note to a mov (variant #2).
3292 (define_peephole2
3293 [(parallel [(set (match_operand:SWI 0 "general_reg_operand")
3294 (match_operand:SWI 1 "general_reg_operand"))
3295 (set (match_dup 1) (match_dup 0))])]
3296 "((REGNO (operands[0]) != AX_REG
3297 && REGNO (operands[1]) != AX_REG)
3298 || optimize_size < 2
3299 || !optimize_insn_for_size_p ())
3300 && peep2_reg_dead_p (1, operands[1])"
3301 [(set (match_dup 0) (match_dup 1))])
3302
3303 ;; Convert moves to/from AX_REG into xchg with -Oz.
3304 (define_peephole2
3305 [(set (match_operand:SWI48 0 "general_reg_operand")
3306 (match_operand:SWI48 1 "general_reg_operand"))]
3307 "optimize_size > 1
3308 && ((REGNO (operands[0]) == AX_REG)
3309 != (REGNO (operands[1]) == AX_REG))
3310 && optimize_insn_for_size_p ()
3311 && peep2_reg_dead_p (1, operands[1])"
3312 [(parallel [(set (match_dup 0) (match_dup 1))
3313 (set (match_dup 1) (match_dup 0))])])
3314
3315 (define_expand "movstrict<mode>"
3316 [(set (strict_low_part (match_operand:SWI12 0 "register_operand"))
3317 (match_operand:SWI12 1 "general_operand"))]
3318 ""
3319 {
3320 gcc_assert (SUBREG_P (operands[0]));
3321 if ((TARGET_PARTIAL_REG_STALL && optimize_function_for_speed_p (cfun))
3322 || !VALID_INT_MODE_P (GET_MODE (SUBREG_REG (operands[0]))))
3323 FAIL;
3324 })
3325
3326 (define_insn "*movstrict<mode>_1"
3327 [(set (strict_low_part
3328 (match_operand:SWI12 0 "register_operand" "+<r>"))
3329 (match_operand:SWI12 1 "general_operand" "<r>mn"))]
3330 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
3331 "mov{<imodesuffix>}\t{%1, %0|%0, %1}"
3332 [(set_attr "type" "imov")
3333 (set_attr "mode" "<MODE>")])
3334
3335 (define_insn "*movstrict<mode>_xor"
3336 [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>"))
3337 (match_operand:SWI12 1 "const0_operand"))
3338 (clobber (reg:CC FLAGS_REG))]
3339 "reload_completed"
3340 "xor{<imodesuffix>}\t%0, %0"
3341 [(set_attr "type" "alu1")
3342 (set_attr "mode" "<MODE>")
3343 (set_attr "length_immediate" "0")])
3344
3345 (define_insn "*movstrictqi_ext<mode>_1"
3346 [(set (strict_low_part
3347 (match_operand:QI 0 "register_operand" "+Q"))
3348 (subreg:QI
3349 (match_operator:SWI248 2 "extract_operator"
3350 [(match_operand 1 "int248_register_operand" "Q")
3351 (const_int 8)
3352 (const_int 8)]) 0))]
3353 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
3354 "mov{b}\t{%h1, %0|%0, %h1}"
3355 [(set_attr "type" "imov")
3356 (set_attr "mode" "QI")])
3357
3358 (define_expand "extv<mode>"
3359 [(set (match_operand:SWI24 0 "register_operand")
3360 (sign_extract:SWI24 (match_operand:SWI24 1 "register_operand")
3361 (match_operand:QI 2 "const_int_operand")
3362 (match_operand:QI 3 "const_int_operand")))]
3363 ""
3364 {
3365 /* Handle extractions from %ah et al. */
3366 if (INTVAL (operands[2]) != 8 || INTVAL (operands[3]) != 8)
3367 FAIL;
3368
3369 unsigned int regno = reg_or_subregno (operands[1]);
3370
3371 /* Be careful to expand only with registers having upper parts. */
3372 if (regno <= LAST_VIRTUAL_REGISTER && !QI_REGNO_P (regno))
3373 operands[1] = copy_to_reg (operands[1]);
3374 })
3375
3376 (define_insn "*extv<mode>"
3377 [(set (match_operand:SWI24 0 "register_operand" "=R")
3378 (sign_extract:SWI24 (match_operand 1 "int248_register_operand" "Q")
3379 (const_int 8)
3380 (const_int 8)))]
3381 ""
3382 "movs{bl|x}\t{%h1, %k0|%k0, %h1}"
3383 [(set_attr "type" "imovx")
3384 (set_attr "mode" "SI")])
3385
3386 ;; Split sign-extension of single least significant bit as and x,$1;neg x
3387 (define_insn_and_split "*extv<mode>_1_0"
3388 [(set (match_operand:SWI48 0 "register_operand" "=r")
3389 (sign_extract:SWI48 (match_operand:SWI48 1 "register_operand" "0")
3390 (const_int 1)
3391 (const_int 0)))
3392 (clobber (reg:CC FLAGS_REG))]
3393 ""
3394 "#"
3395 ""
3396 [(parallel [(set (match_dup 0) (and:SWI48 (match_dup 1) (const_int 1)))
3397 (clobber (reg:CC FLAGS_REG))])
3398 (parallel [(set (match_dup 0) (neg:SWI48 (match_dup 0)))
3399 (clobber (reg:CC FLAGS_REG))])])
3400
3401 (define_expand "extzv<mode>"
3402 [(set (match_operand:SWI248 0 "register_operand")
3403 (zero_extract:SWI248 (match_operand:SWI248 1 "register_operand")
3404 (match_operand:QI 2 "const_int_operand")
3405 (match_operand:QI 3 "const_int_operand")))]
3406 ""
3407 {
3408 if (ix86_expand_pextr (operands))
3409 DONE;
3410
3411 /* Handle extractions from %ah et al. */
3412 if (INTVAL (operands[2]) != 8 || INTVAL (operands[3]) != 8)
3413 FAIL;
3414
3415 unsigned int regno = reg_or_subregno (operands[1]);
3416
3417 /* Be careful to expand only with registers having upper parts. */
3418 if (regno <= LAST_VIRTUAL_REGISTER && !QI_REGNO_P (regno))
3419 operands[1] = copy_to_reg (operands[1]);
3420 })
3421
3422 (define_insn "*extzv<mode>"
3423 [(set (match_operand:SWI248 0 "register_operand" "=R")
3424 (zero_extract:SWI248 (match_operand 1 "int248_register_operand" "Q")
3425 (const_int 8)
3426 (const_int 8)))]
3427 ""
3428 "movz{bl|x}\t{%h1, %k0|%k0, %h1}"
3429 [(set_attr "type" "imovx")
3430 (set_attr "mode" "SI")])
3431
3432 (define_insn "*extzvqi"
3433 [(set (match_operand:QI 0 "nonimmediate_operand" "=QBn,?R")
3434 (subreg:QI
3435 (match_operator:SWI248 2 "extract_operator"
3436 [(match_operand 1 "int248_register_operand" "Q,Q")
3437 (const_int 8)
3438 (const_int 8)]) 0))]
3439 ""
3440 {
3441 switch (get_attr_type (insn))
3442 {
3443 case TYPE_IMOVX:
3444 return "movz{bl|x}\t{%h1, %k0|%k0, %h1}";
3445 default:
3446 return "mov{b}\t{%h1, %0|%0, %h1}";
3447 }
3448 }
3449 [(set_attr "addr" "gpr8,*")
3450 (set (attr "type")
3451 (if_then_else (and (match_operand:QI 0 "register_operand")
3452 (ior (not (match_operand:QI 0 "QIreg_operand"))
3453 (match_test "TARGET_MOVX")))
3454 (const_string "imovx")
3455 (const_string "imov")))
3456 (set (attr "mode")
3457 (if_then_else (eq_attr "type" "imovx")
3458 (const_string "SI")
3459 (const_string "QI")))])
3460
3461 (define_expand "insv<mode>"
3462 [(set (zero_extract:SWI248 (match_operand:SWI248 0 "register_operand")
3463 (match_operand:QI 1 "const_int_operand")
3464 (match_operand:QI 2 "const_int_operand"))
3465 (match_operand:SWI248 3 "register_operand"))]
3466 ""
3467 {
3468 rtx dst;
3469
3470 if (ix86_expand_pinsr (operands))
3471 DONE;
3472
3473 /* Handle insertions to %ah et al. */
3474 if (INTVAL (operands[1]) != 8 || INTVAL (operands[2]) != 8)
3475 FAIL;
3476
3477 unsigned int regno = reg_or_subregno (operands[0]);
3478
3479 /* Be careful to expand only with registers having upper parts. */
3480 if (regno <= LAST_VIRTUAL_REGISTER && !QI_REGNO_P (regno))
3481 dst = copy_to_reg (operands[0]);
3482 else
3483 dst = operands[0];
3484
3485 emit_insn (gen_insv_1 (<MODE>mode, dst, operands[3]));
3486
3487 /* Fix up the destination if needed. */
3488 if (dst != operands[0])
3489 emit_move_insn (operands[0], dst);
3490
3491 DONE;
3492 })
3493
3494 (define_insn "@insv<mode>_1"
3495 [(set (zero_extract:SWI248
3496 (match_operand 0 "int248_register_operand" "+Q")
3497 (const_int 8)
3498 (const_int 8))
3499 (match_operand:SWI248 1 "general_operand" "QnBn"))]
3500 ""
3501 {
3502 if (CONST_INT_P (operands[1]))
3503 operands[1] = gen_int_mode (INTVAL (operands[1]), QImode);
3504 return "mov{b}\t{%b1, %h0|%h0, %b1}";
3505 }
3506 [(set_attr "addr" "gpr8")
3507 (set_attr "type" "imov")
3508 (set_attr "mode" "QI")])
3509
3510 (define_insn "*insvqi_1"
3511 [(set (zero_extract:SWI248
3512 (match_operand 0 "int248_register_operand" "+Q")
3513 (const_int 8)
3514 (const_int 8))
3515 (subreg:SWI248
3516 (match_operand:QI 1 "general_operand" "QnBn") 0))]
3517 ""
3518 "mov{b}\t{%1, %h0|%h0, %1}"
3519 [(set_attr "addr" "gpr8")
3520 (set_attr "type" "imov")
3521 (set_attr "mode" "QI")])
3522
3523 ;; Eliminate redundant insv, e.g. xorl %eax,%eax; movb $0, %ah
3524 (define_peephole2
3525 [(parallel [(set (match_operand:SWI48 0 "general_reg_operand")
3526 (const_int 0))
3527 (clobber (reg:CC FLAGS_REG))])
3528 (set (zero_extract:SWI248 (match_operand 1 "int248_register_operand")
3529 (const_int 8)
3530 (const_int 8))
3531 (const_int 0))]
3532 "REGNO (operands[0]) == REGNO (operands[1])"
3533 [(parallel [(set (match_operand:SWI48 0 "general_reg_operand")
3534 (const_int 0))
3535 (clobber (reg:CC FLAGS_REG))])])
3536
3537 ;; Combine movl followed by movb.
3538 (define_peephole2
3539 [(set (match_operand:SWI48 0 "general_reg_operand")
3540 (match_operand:SWI48 1 "const_int_operand"))
3541 (set (zero_extract:SWI248 (match_operand 2 "int248_register_operand")
3542 (const_int 8)
3543 (const_int 8))
3544 (match_operand:SWI248 3 "const_int_operand"))]
3545 "REGNO (operands[0]) == REGNO (operands[2])"
3546 [(set (match_operand:SWI48 0 "general_reg_operand")
3547 (match_dup 4))]
3548 {
3549 HOST_WIDE_INT tmp = INTVAL (operands[1]) & ~(HOST_WIDE_INT)0xff00;
3550 tmp |= (INTVAL (operands[3]) & 0xff) << 8;
3551 operands[4] = gen_int_mode (tmp, <SWI48:MODE>mode);
3552 })
3553
3554 (define_insn "*insvqi_2"
3555 [(set (zero_extract:SWI248
3556 (match_operand 0 "int248_register_operand" "+Q")
3557 (const_int 8)
3558 (const_int 8))
3559 (match_operator:SWI248 2 "extract_operator"
3560 [(match_operand 1 "int248_register_operand" "Q")
3561 (const_int 8)
3562 (const_int 8)]))]
3563 ""
3564 "mov{b}\t{%h1, %h0|%h0, %h1}"
3565 [(set_attr "type" "imov")
3566 (set_attr "mode" "QI")])
3567
3568 (define_insn "*insvqi_3"
3569 [(set (zero_extract:SWI248
3570 (match_operand 0 "int248_register_operand" "+Q")
3571 (const_int 8)
3572 (const_int 8))
3573 (any_shiftrt:SWI248
3574 (match_operand:SWI248 1 "register_operand" "Q")
3575 (const_int 8)))]
3576 ""
3577 "mov{b}\t{%h1, %h0|%h0, %h1}"
3578 [(set_attr "type" "imov")
3579 (set_attr "mode" "QI")])
3580
3581 (define_code_iterator any_or_plus [plus ior xor])
3582
3583 (define_insn_and_split "*insvti_highpart_1"
3584 [(set (match_operand:TI 0 "nonimmediate_operand" "=ro,r,r,&r")
3585 (any_or_plus:TI
3586 (and:TI
3587 (match_operand:TI 1 "nonimmediate_operand" "r,m,r,m")
3588 (match_operand:TI 3 "const_scalar_int_operand" "n,n,n,n"))
3589 (ashift:TI
3590 (zero_extend:TI
3591 (match_operand:DI 2 "nonimmediate_operand" "r,r,m,m"))
3592 (const_int 64))))]
3593 "TARGET_64BIT
3594 && CONST_WIDE_INT_P (operands[3])
3595 && CONST_WIDE_INT_NUNITS (operands[3]) == 2
3596 && CONST_WIDE_INT_ELT (operands[3], 0) == -1
3597 && CONST_WIDE_INT_ELT (operands[3], 1) == 0"
3598 "#"
3599 "&& reload_completed"
3600 [(const_int 0)]
3601 {
3602 operands[4] = gen_lowpart (DImode, operands[1]);
3603 split_double_concat (TImode, operands[0], operands[4], operands[2]);
3604 DONE;
3605 })
3606
3607 (define_insn_and_split "*insvti_lowpart_1"
3608 [(set (match_operand:TI 0 "nonimmediate_operand" "=ro,r,r,&r")
3609 (any_or_plus:TI
3610 (and:TI
3611 (match_operand:TI 1 "nonimmediate_operand" "r,m,r,m")
3612 (match_operand:TI 3 "const_scalar_int_operand" "n,n,n,n"))
3613 (zero_extend:TI
3614 (match_operand:DI 2 "nonimmediate_operand" "r,r,m,m"))))]
3615 "TARGET_64BIT
3616 && CONST_WIDE_INT_P (operands[3])
3617 && CONST_WIDE_INT_NUNITS (operands[3]) == 2
3618 && CONST_WIDE_INT_ELT (operands[3], 0) == 0
3619 && CONST_WIDE_INT_ELT (operands[3], 1) == -1"
3620 "#"
3621 "&& reload_completed"
3622 [(const_int 0)]
3623 {
3624 operands[4] = gen_highpart (DImode, operands[1]);
3625 split_double_concat (TImode, operands[0], operands[2], operands[4]);
3626 DONE;
3627 })
3628
3629 (define_insn_and_split "*insvdi_lowpart_1"
3630 [(set (match_operand:DI 0 "nonimmediate_operand" "=ro,r,r,&r")
3631 (any_or_plus:DI
3632 (and:DI
3633 (match_operand:DI 1 "nonimmediate_operand" "r,m,r,m")
3634 (match_operand:DI 3 "const_int_operand" "n,n,n,n"))
3635 (zero_extend:DI
3636 (match_operand:SI 2 "nonimmediate_operand" "r,r,m,m"))))]
3637 "!TARGET_64BIT
3638 && CONST_INT_P (operands[3])
3639 && UINTVAL (operands[3]) == 0xffffffff00000000ll"
3640 "#"
3641 "&& reload_completed"
3642 [(const_int 0)]
3643 {
3644 operands[4] = gen_highpart (SImode, operands[1]);
3645 split_double_concat (DImode, operands[0], operands[2], operands[4]);
3646 DONE;
3647 })
3648 \f
3649 ;; Floating point push instructions.
3650
3651 (define_insn "*pushtf"
3652 [(set (match_operand:TF 0 "push_operand" "=<,<")
3653 (match_operand:TF 1 "general_no_elim_operand" "v,*roC"))]
3654 "TARGET_64BIT || TARGET_SSE"
3655 {
3656 /* This insn should be already split before reg-stack. */
3657 return "#";
3658 }
3659 [(set_attr "isa" "*,x64")
3660 (set_attr "type" "multi")
3661 (set_attr "unit" "sse,*")
3662 (set_attr "mode" "TF,DI")])
3663
3664 ;; %%% Kill this when call knows how to work this out.
3665 (define_split
3666 [(set (match_operand:TF 0 "push_operand")
3667 (match_operand:TF 1 "sse_reg_operand"))]
3668 "TARGET_SSE && reload_completed"
3669 [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -16)))
3670 (set (match_dup 0) (match_dup 1))]
3671 {
3672 /* Preserve memory attributes. */
3673 operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
3674 })
3675
3676 (define_insn "*pushxf"
3677 [(set (match_operand:XF 0 "push_operand" "=<,<,<,<,<")
3678 (match_operand:XF 1 "general_no_elim_operand" "f,r,*r,oF,oC"))]
3679 ""
3680 {
3681 /* This insn should be already split before reg-stack. */
3682 return "#";
3683 }
3684 [(set_attr "isa" "*,*,*,nox64,x64")
3685 (set_attr "type" "multi")
3686 (set_attr "unit" "i387,*,*,*,*")
3687 (set (attr "mode")
3688 (cond [(eq_attr "alternative" "1,2,3,4")
3689 (if_then_else (match_test "TARGET_64BIT")
3690 (const_string "DI")
3691 (const_string "SI"))
3692 ]
3693 (const_string "XF")))
3694 (set (attr "preferred_for_size")
3695 (cond [(eq_attr "alternative" "1")
3696 (symbol_ref "false")]
3697 (symbol_ref "true")))])
3698
3699 ;; %%% Kill this when call knows how to work this out.
3700 (define_split
3701 [(set (match_operand:XF 0 "push_operand")
3702 (match_operand:XF 1 "fp_register_operand"))]
3703 "reload_completed"
3704 [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
3705 (set (match_dup 0) (match_dup 1))]
3706 {
3707 operands[2] = GEN_INT (-PUSH_ROUNDING (GET_MODE_SIZE (XFmode)));
3708 /* Preserve memory attributes. */
3709 operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
3710 })
3711
3712 (define_insn "*pushdf"
3713 [(set (match_operand:DF 0 "push_operand" "=<,<,<,<,<,<")
3714 (match_operand:DF 1 "general_no_elim_operand" "f,r,*r,oF,rmC,v"))]
3715 ""
3716 {
3717 /* This insn should be already split before reg-stack. */
3718 return "#";
3719 }
3720 [(set_attr "isa" "*,nox64,nox64,nox64,x64,sse2")
3721 (set_attr "type" "multi")
3722 (set_attr "unit" "i387,*,*,*,*,sse")
3723 (set_attr "mode" "DF,SI,SI,SI,DI,DF")
3724 (set (attr "preferred_for_size")
3725 (cond [(eq_attr "alternative" "1")
3726 (symbol_ref "false")]
3727 (symbol_ref "true")))
3728 (set (attr "preferred_for_speed")
3729 (cond [(eq_attr "alternative" "1")
3730 (symbol_ref "TARGET_INTEGER_DFMODE_MOVES")]
3731 (symbol_ref "true")))])
3732
3733 ;; %%% Kill this when call knows how to work this out.
3734 (define_split
3735 [(set (match_operand:DF 0 "push_operand")
3736 (match_operand:DF 1 "any_fp_register_operand"))]
3737 "reload_completed"
3738 [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -8)))
3739 (set (match_dup 0) (match_dup 1))]
3740 {
3741 /* Preserve memory attributes. */
3742 operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
3743 })
3744
3745 (define_mode_iterator HFBF [HF BF])
3746
3747 (define_insn "*push<mode>_rex64"
3748 [(set (match_operand:HFBF 0 "push_operand" "=X,X")
3749 (match_operand:HFBF 1 "nonmemory_no_elim_operand" "r,x"))]
3750 "TARGET_64BIT"
3751 {
3752 /* Anything else should be already split before reg-stack. */
3753 gcc_assert (which_alternative == 0);
3754 return "push{q}\t%q1";
3755 }
3756 [(set_attr "isa" "*,sse4")
3757 (set_attr "type" "push,multi")
3758 (set_attr "mode" "DI,TI")])
3759
3760 (define_insn "*push<mode>"
3761 [(set (match_operand:HFBF 0 "push_operand" "=X,X")
3762 (match_operand:HFBF 1 "general_no_elim_operand" "rmF,x"))]
3763 "!TARGET_64BIT"
3764 {
3765 /* Anything else should be already split before reg-stack. */
3766 gcc_assert (which_alternative == 0);
3767 return "push{l}\t%k1";
3768 }
3769 [(set_attr "isa" "*,sse4")
3770 (set_attr "type" "push,multi")
3771 (set_attr "mode" "SI,TI")])
3772
3773 (define_insn "push2_di"
3774 [(set (match_operand:TI 0 "push_operand" "=<")
3775 (unspec:TI [(match_operand:DI 1 "register_operand" "r")
3776 (match_operand:DI 2 "register_operand" "r")]
3777 UNSPEC_APXPUSH2))]
3778 "TARGET_APX_PUSH2POP2"
3779 "push2\t{%2, %1|%1, %2}"
3780 [(set_attr "mode" "TI")
3781 (set_attr "type" "multi")
3782 (set_attr "prefix" "evex")])
3783
3784 (define_insn "pop2_di"
3785 [(parallel [(set (match_operand:DI 0 "register_operand" "=r")
3786 (unspec:DI [(match_operand:TI 1 "pop_operand" ">")]
3787 UNSPEC_APXPOP2_LOW))
3788 (set (match_operand:DI 2 "register_operand" "=r")
3789 (unspec:DI [(const_int 0)] UNSPEC_APXPOP2_HIGH))])]
3790 "TARGET_APX_PUSH2POP2"
3791 "pop2\t{%2, %0|%0, %2}"
3792 [(set_attr "mode" "TI")
3793 (set_attr "prefix" "evex")])
3794
3795 (define_insn "pushp_di"
3796 [(set (match_operand:DI 0 "push_operand" "=<")
3797 (match_operand:DI 1 "register_operand" "r"))
3798 (unspec:DI [(const_int 0)] UNSPEC_APX_PPX)]
3799 "TARGET_64BIT"
3800 "pushp\t%1"
3801 [(set_attr "mode" "DI")])
3802
3803 (define_insn "popp_di"
3804 [(set (match_operand:DI 0 "register_operand" "=r")
3805 (match_operand:DI 1 "pop_operand" ">"))
3806 (unspec:DI [(const_int 0)] UNSPEC_APX_PPX)]
3807 "TARGET_APX_PPX"
3808 "popp\t%0"
3809 [(set_attr "mode" "DI")])
3810
3811 (define_insn "push2p_di"
3812 [(set (match_operand:TI 0 "push_operand" "=<")
3813 (unspec:TI [(match_operand:DI 1 "register_operand" "r")
3814 (match_operand:DI 2 "register_operand" "r")]
3815 UNSPEC_APXPUSH2))
3816 (unspec:DI [(const_int 0)] UNSPEC_APX_PPX)]
3817 "TARGET_APX_PUSH2POP2 && TARGET_APX_PPX"
3818 "push2p\t{%2, %1|%1, %2}"
3819 [(set_attr "mode" "TI")
3820 (set_attr "type" "multi")
3821 (set_attr "prefix" "evex")])
3822
3823 (define_insn "pop2p_di"
3824 [(parallel [(set (match_operand:DI 0 "register_operand" "=r")
3825 (unspec:DI [(match_operand:TI 1 "pop_operand" ">")]
3826 UNSPEC_APXPOP2_LOW))
3827 (set (match_operand:DI 2 "register_operand" "=r")
3828 (unspec:DI [(const_int 0)] UNSPEC_APXPOP2_HIGH))
3829 (unspec:DI [(const_int 0)] UNSPEC_APX_PPX)])]
3830 "TARGET_APX_PUSH2POP2 && TARGET_APX_PPX"
3831 "pop2p\t{%2, %0|%0, %2}"
3832 [(set_attr "mode" "TI")
3833 (set_attr "prefix" "evex")])
3834
3835 (define_insn "*pushsf_rex64"
3836 [(set (match_operand:SF 0 "push_operand" "=X,X,X")
3837 (match_operand:SF 1 "nonmemory_no_elim_operand" "f,rF,v"))]
3838 "TARGET_64BIT"
3839 {
3840 /* Anything else should be already split before reg-stack. */
3841 if (which_alternative != 1)
3842 return "#";
3843 return "push{q}\t%q1";
3844 }
3845 [(set_attr "type" "multi,push,multi")
3846 (set_attr "unit" "i387,*,*")
3847 (set_attr "mode" "SF,DI,SF")])
3848
3849 (define_insn "*pushsf"
3850 [(set (match_operand:SF 0 "push_operand" "=<,<,<")
3851 (match_operand:SF 1 "general_no_elim_operand" "f,rmF,v"))]
3852 "!TARGET_64BIT"
3853 {
3854 /* Anything else should be already split before reg-stack. */
3855 if (which_alternative != 1)
3856 return "#";
3857 return "push{l}\t%1";
3858 }
3859 [(set_attr "type" "multi,push,multi")
3860 (set_attr "unit" "i387,*,*")
3861 (set_attr "mode" "SF,SI,SF")])
3862
3863 (define_mode_iterator MODESH [SF HF BF])
3864 ;; %%% Kill this when call knows how to work this out.
3865 (define_split
3866 [(set (match_operand:MODESH 0 "push_operand")
3867 (match_operand:MODESH 1 "any_fp_register_operand"))]
3868 "reload_completed"
3869 [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
3870 (set (match_dup 0) (match_dup 1))]
3871 {
3872 rtx op = XEXP (operands[0], 0);
3873 if (GET_CODE (op) == PRE_DEC)
3874 {
3875 gcc_assert (!TARGET_64BIT);
3876 op = GEN_INT (-4);
3877 }
3878 else
3879 {
3880 op = XEXP (XEXP (op, 1), 1);
3881 gcc_assert (CONST_INT_P (op));
3882 }
3883 operands[2] = op;
3884 /* Preserve memory attributes. */
3885 operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
3886 })
3887
3888 (define_split
3889 [(set (match_operand:SF 0 "push_operand")
3890 (match_operand:SF 1 "memory_operand"))]
3891 "reload_completed
3892 && find_constant_src (insn)"
3893 [(set (match_dup 0) (match_dup 2))]
3894 "operands[2] = find_constant_src (curr_insn);")
3895
3896 (define_split
3897 [(set (match_operand 0 "push_operand")
3898 (match_operand 1 "general_gr_operand"))]
3899 "reload_completed
3900 && (GET_MODE (operands[0]) == TFmode
3901 || GET_MODE (operands[0]) == XFmode
3902 || GET_MODE (operands[0]) == DFmode)"
3903 [(const_int 0)]
3904 "ix86_split_long_move (operands); DONE;")
3905 \f
3906 ;; Floating point move instructions.
3907
3908 (define_expand "movtf"
3909 [(set (match_operand:TF 0 "nonimmediate_operand")
3910 (match_operand:TF 1 "nonimmediate_operand"))]
3911 "TARGET_64BIT || TARGET_SSE"
3912 "ix86_expand_move (TFmode, operands); DONE;")
3913
3914 (define_expand "mov<mode>"
3915 [(set (match_operand:X87MODEFH 0 "nonimmediate_operand")
3916 (match_operand:X87MODEFH 1 "general_operand"))]
3917 ""
3918 "ix86_expand_move (<MODE>mode, operands); DONE;")
3919
3920 (define_insn "*movtf_internal"
3921 [(set (match_operand:TF 0 "nonimmediate_operand" "=v,v ,m,?*r ,!o")
3922 (match_operand:TF 1 "general_operand" "C ,vm,v,*roF,*rC"))]
3923 "(TARGET_64BIT || TARGET_SSE)
3924 && !(MEM_P (operands[0]) && MEM_P (operands[1]))
3925 && (lra_in_progress || reload_completed
3926 || !CONST_DOUBLE_P (operands[1])
3927 || (standard_sse_constant_p (operands[1], TFmode) == 1
3928 && !memory_operand (operands[0], TFmode))
3929 || (!TARGET_MEMORY_MISMATCH_STALL
3930 && memory_operand (operands[0], TFmode)))"
3931 {
3932 switch (get_attr_type (insn))
3933 {
3934 case TYPE_SSELOG1:
3935 return standard_sse_constant_opcode (insn, operands);
3936
3937 case TYPE_SSEMOV:
3938 return ix86_output_ssemov (insn, operands);
3939
3940 case TYPE_MULTI:
3941 return "#";
3942
3943 default:
3944 gcc_unreachable ();
3945 }
3946 }
3947 [(set_attr "isa" "*,*,*,x64,x64")
3948 (set_attr "type" "sselog1,ssemov,ssemov,multi,multi")
3949 (set (attr "prefix")
3950 (if_then_else (eq_attr "type" "sselog1,ssemov")
3951 (const_string "maybe_vex")
3952 (const_string "orig")))
3953 (set (attr "mode")
3954 (cond [(eq_attr "alternative" "3,4")
3955 (const_string "DI")
3956 (match_test "TARGET_AVX")
3957 (const_string "TI")
3958 (ior (not (match_test "TARGET_SSE2"))
3959 (match_test "optimize_function_for_size_p (cfun)"))
3960 (const_string "V4SF")
3961 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3962 (const_string "V4SF")
3963 (and (eq_attr "alternative" "2")
3964 (match_test "TARGET_SSE_TYPELESS_STORES"))
3965 (const_string "V4SF")
3966 ]
3967 (const_string "TI")))])
3968
3969 (define_split
3970 [(set (match_operand:TF 0 "nonimmediate_gr_operand")
3971 (match_operand:TF 1 "general_gr_operand"))]
3972 "reload_completed"
3973 [(const_int 0)]
3974 "ix86_split_long_move (operands); DONE;")
3975
3976 ;; Possible store forwarding (partial memory) stall
3977 ;; in alternatives 4, 6, 7 and 8.
3978 (define_insn "*movxf_internal"
3979 [(set (match_operand:XF 0 "nonimmediate_operand"
3980 "=f,m,f,?r ,!o,?*r ,!o,!o,!o,r ,o ,o")
3981 (match_operand:XF 1 "general_operand"
3982 "fm,f,G,roF,r ,*roF,*r,F ,C ,roF,rF,rC"))]
3983 "!(MEM_P (operands[0]) && MEM_P (operands[1]))
3984 && (lra_in_progress || reload_completed
3985 || !CONST_DOUBLE_P (operands[1])
3986 || ((optimize_function_for_size_p (cfun)
3987 || (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC))
3988 && standard_80387_constant_p (operands[1]) > 0
3989 && !memory_operand (operands[0], XFmode))
3990 || (!TARGET_MEMORY_MISMATCH_STALL
3991 && memory_operand (operands[0], XFmode))
3992 || !TARGET_HARD_XF_REGS)"
3993 {
3994 switch (get_attr_type (insn))
3995 {
3996 case TYPE_FMOV:
3997 if (which_alternative == 2)
3998 return standard_80387_constant_opcode (operands[1]);
3999 return output_387_reg_move (insn, operands);
4000
4001 case TYPE_MULTI:
4002 return "#";
4003
4004 default:
4005 gcc_unreachable ();
4006 }
4007 }
4008 [(set (attr "isa")
4009 (cond [(eq_attr "alternative" "7,10")
4010 (const_string "nox64")
4011 (eq_attr "alternative" "8,11")
4012 (const_string "x64")
4013 ]
4014 (const_string "*")))
4015 (set (attr "type")
4016 (cond [(eq_attr "alternative" "3,4,5,6,7,8,9,10,11")
4017 (const_string "multi")
4018 ]
4019 (const_string "fmov")))
4020 (set (attr "mode")
4021 (cond [(eq_attr "alternative" "3,4,5,6,7,8,9,10,11")
4022 (if_then_else (match_test "TARGET_64BIT")
4023 (const_string "DI")
4024 (const_string "SI"))
4025 ]
4026 (const_string "XF")))
4027 (set (attr "preferred_for_size")
4028 (cond [(eq_attr "alternative" "3,4")
4029 (symbol_ref "false")]
4030 (symbol_ref "true")))
4031 (set (attr "enabled")
4032 (cond [(eq_attr "alternative" "9,10,11")
4033 (if_then_else
4034 (match_test "TARGET_HARD_XF_REGS")
4035 (symbol_ref "false")
4036 (const_string "*"))
4037 (not (match_test "TARGET_HARD_XF_REGS"))
4038 (symbol_ref "false")
4039 ]
4040 (const_string "*")))])
4041
4042 (define_split
4043 [(set (match_operand:XF 0 "nonimmediate_gr_operand")
4044 (match_operand:XF 1 "general_gr_operand"))]
4045 "reload_completed"
4046 [(const_int 0)]
4047 "ix86_split_long_move (operands); DONE;")
4048
4049 ;; Possible store forwarding (partial memory) stall in alternatives 4, 6 and 7.
4050 (define_insn "*movdf_internal"
4051 [(set (match_operand:DF 0 "nonimmediate_operand"
4052 "=Yf*f,m ,Yf*f,?r ,!o,?*r ,!o,!o,?r,?m,?r,?r,Yv,v,v,m,*x,*x,*x,m ,?r,?v,r ,o ,r ,m")
4053 (match_operand:DF 1 "general_operand"
4054 "Yf*fm,Yf*f,G ,roF,r ,*roF,*r,F ,rm,rC,C ,F ,C ,v,m,v,C ,*x,m ,*x, v, r,roF,rF,rmF,rC"))]
4055 "!(MEM_P (operands[0]) && MEM_P (operands[1]))
4056 && (lra_in_progress || reload_completed
4057 || !CONST_DOUBLE_P (operands[1])
4058 || ((optimize_function_for_size_p (cfun)
4059 || (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC))
4060 && IS_STACK_MODE (DFmode)
4061 && standard_80387_constant_p (operands[1]) > 0
4062 && !memory_operand (operands[0], DFmode))
4063 || (TARGET_SSE2 && TARGET_SSE_MATH
4064 && standard_sse_constant_p (operands[1], DFmode) == 1
4065 && !memory_operand (operands[0], DFmode))
4066 || ((TARGET_64BIT || !TARGET_MEMORY_MISMATCH_STALL)
4067 && memory_operand (operands[0], DFmode))
4068 || !TARGET_HARD_DF_REGS)"
4069 {
4070 switch (get_attr_type (insn))
4071 {
4072 case TYPE_FMOV:
4073 if (which_alternative == 2)
4074 return standard_80387_constant_opcode (operands[1]);
4075 return output_387_reg_move (insn, operands);
4076
4077 case TYPE_MULTI:
4078 return "#";
4079
4080 case TYPE_IMOV:
4081 if (get_attr_mode (insn) == MODE_SI)
4082 return "mov{l}\t{%1, %k0|%k0, %1}";
4083 else if (which_alternative == 11)
4084 return "movabs{q}\t{%1, %0|%0, %1}";
4085 else
4086 return "mov{q}\t{%1, %0|%0, %1}";
4087
4088 case TYPE_SSELOG1:
4089 return standard_sse_constant_opcode (insn, operands);
4090
4091 case TYPE_SSEMOV:
4092 return ix86_output_ssemov (insn, operands);
4093
4094 default:
4095 gcc_unreachable ();
4096 }
4097 }
4098 [(set (attr "isa")
4099 (cond [(eq_attr "alternative" "3,4,5,6,7,22,23")
4100 (const_string "nox64")
4101 (eq_attr "alternative" "8,9,10,11,24,25")
4102 (const_string "x64")
4103 (eq_attr "alternative" "12,13,14,15")
4104 (const_string "sse2")
4105 (eq_attr "alternative" "20,21")
4106 (const_string "x64_sse2")
4107 ]
4108 (const_string "*")))
4109 (set (attr "type")
4110 (cond [(eq_attr "alternative" "0,1,2")
4111 (const_string "fmov")
4112 (eq_attr "alternative" "3,4,5,6,7,22,23")
4113 (const_string "multi")
4114 (eq_attr "alternative" "8,9,10,11,24,25")
4115 (const_string "imov")
4116 (eq_attr "alternative" "12,16")
4117 (const_string "sselog1")
4118 ]
4119 (const_string "ssemov")))
4120 (set (attr "modrm")
4121 (if_then_else (eq_attr "alternative" "11")
4122 (const_string "0")
4123 (const_string "*")))
4124 (set (attr "length_immediate")
4125 (if_then_else (eq_attr "alternative" "11")
4126 (const_string "8")
4127 (const_string "*")))
4128 (set (attr "prefix")
4129 (if_then_else (eq_attr "type" "sselog1,ssemov")
4130 (const_string "maybe_vex")
4131 (const_string "orig")))
4132 (set (attr "prefix_data16")
4133 (if_then_else
4134 (ior (and (eq_attr "type" "ssemov") (eq_attr "mode" "DI"))
4135 (eq_attr "mode" "V1DF"))
4136 (const_string "1")
4137 (const_string "*")))
4138 (set (attr "mode")
4139 (cond [(eq_attr "alternative" "3,4,5,6,7,10,22,23")
4140 (const_string "SI")
4141 (eq_attr "alternative" "8,9,11,20,21,24,25")
4142 (const_string "DI")
4143
4144 /* xorps is one byte shorter for non-AVX targets. */
4145 (eq_attr "alternative" "12,16")
4146 (cond [(match_test "TARGET_AVX")
4147 (const_string "V2DF")
4148 (ior (not (match_test "TARGET_SSE2"))
4149 (match_test "optimize_function_for_size_p (cfun)"))
4150 (const_string "V4SF")
4151 (match_test "TARGET_SSE_LOAD0_BY_PXOR")
4152 (const_string "TI")
4153 ]
4154 (const_string "V2DF"))
4155
4156 /* For architectures resolving dependencies on
4157 whole SSE registers use movapd to break dependency
4158 chains, otherwise use short move to avoid extra work. */
4159
4160 /* movaps is one byte shorter for non-AVX targets. */
4161 (eq_attr "alternative" "13,17")
4162 (cond [(match_test "TARGET_AVX512VL")
4163 (const_string "V2DF")
4164 (match_test "TARGET_AVX512F")
4165 (const_string "DF")
4166 (match_test "TARGET_AVX")
4167 (const_string "V2DF")
4168 (ior (not (match_test "TARGET_SSE2"))
4169 (match_test "optimize_function_for_size_p (cfun)"))
4170 (const_string "V4SF")
4171 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
4172 (const_string "V4SF")
4173 (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
4174 (const_string "V2DF")
4175 ]
4176 (const_string "DF"))
4177
4178 /* For architectures resolving dependencies on register
4179 parts we may avoid extra work to zero out upper part
4180 of register. */
4181 (eq_attr "alternative" "14,18")
4182 (cond [(not (match_test "TARGET_SSE2"))
4183 (const_string "V2SF")
4184 (match_test "TARGET_AVX")
4185 (const_string "DF")
4186 (match_test "TARGET_SSE_SPLIT_REGS")
4187 (const_string "V1DF")
4188 ]
4189 (const_string "DF"))
4190
4191 (and (eq_attr "alternative" "15,19")
4192 (not (match_test "TARGET_SSE2")))
4193 (const_string "V2SF")
4194 ]
4195 (const_string "DF")))
4196 (set (attr "preferred_for_size")
4197 (cond [(eq_attr "alternative" "3,4")
4198 (symbol_ref "false")]
4199 (symbol_ref "true")))
4200 (set (attr "preferred_for_speed")
4201 (cond [(eq_attr "alternative" "3,4")
4202 (symbol_ref "TARGET_INTEGER_DFMODE_MOVES")
4203 (eq_attr "alternative" "20")
4204 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
4205 (eq_attr "alternative" "21")
4206 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
4207 ]
4208 (symbol_ref "true")))
4209 (set (attr "enabled")
4210 (cond [(eq_attr "alternative" "22,23,24,25")
4211 (if_then_else
4212 (match_test "TARGET_HARD_DF_REGS")
4213 (symbol_ref "false")
4214 (const_string "*"))
4215 (not (match_test "TARGET_HARD_DF_REGS"))
4216 (symbol_ref "false")
4217 ]
4218 (const_string "*")))])
4219
4220 (define_split
4221 [(set (match_operand:DF 0 "nonimmediate_gr_operand")
4222 (match_operand:DF 1 "general_gr_operand"))]
4223 "!TARGET_64BIT && reload_completed"
4224 [(const_int 0)]
4225 "ix86_split_long_move (operands); DONE;")
4226
4227 (define_insn "*movsf_internal"
4228 [(set (match_operand:SF 0 "nonimmediate_operand"
4229 "=Yf*f,m ,Yf*f,?r ,?m,Yv,v,v,m,?r,?v,!*y,!*y,!m,!r,!*y,r ,m")
4230 (match_operand:SF 1 "general_operand"
4231 "Yf*fm,Yf*f,G ,rmF,rF,C ,v,m,v,v ,r ,*y ,m ,*y,*y,r ,rmF,rF"))]
4232 "!(MEM_P (operands[0]) && MEM_P (operands[1]))
4233 && (lra_in_progress || reload_completed
4234 || !CONST_DOUBLE_P (operands[1])
4235 || ((optimize_function_for_size_p (cfun)
4236 || (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC))
4237 && IS_STACK_MODE (SFmode)
4238 && standard_80387_constant_p (operands[1]) > 0)
4239 || (TARGET_SSE && TARGET_SSE_MATH
4240 && standard_sse_constant_p (operands[1], SFmode) == 1)
4241 || memory_operand (operands[0], SFmode)
4242 || !TARGET_HARD_SF_REGS)"
4243 {
4244 switch (get_attr_type (insn))
4245 {
4246 case TYPE_FMOV:
4247 if (which_alternative == 2)
4248 return standard_80387_constant_opcode (operands[1]);
4249 return output_387_reg_move (insn, operands);
4250
4251 case TYPE_IMOV:
4252 return "mov{l}\t{%1, %0|%0, %1}";
4253
4254 case TYPE_SSELOG1:
4255 return standard_sse_constant_opcode (insn, operands);
4256
4257 case TYPE_SSEMOV:
4258 return ix86_output_ssemov (insn, operands);
4259
4260 case TYPE_MMXMOV:
4261 switch (get_attr_mode (insn))
4262 {
4263 case MODE_DI:
4264 return "movq\t{%1, %0|%0, %1}";
4265 case MODE_SI:
4266 return "movd\t{%1, %0|%0, %1}";
4267
4268 default:
4269 gcc_unreachable ();
4270 }
4271
4272 default:
4273 gcc_unreachable ();
4274 }
4275 }
4276 [(set (attr "isa")
4277 (cond [(eq_attr "alternative" "9,10")
4278 (const_string "sse2")
4279 ]
4280 (const_string "*")))
4281 (set (attr "type")
4282 (cond [(eq_attr "alternative" "0,1,2")
4283 (const_string "fmov")
4284 (eq_attr "alternative" "3,4,16,17")
4285 (const_string "imov")
4286 (eq_attr "alternative" "5")
4287 (const_string "sselog1")
4288 (eq_attr "alternative" "11,12,13,14,15")
4289 (const_string "mmxmov")
4290 ]
4291 (const_string "ssemov")))
4292 (set (attr "prefix")
4293 (if_then_else (eq_attr "type" "sselog1,ssemov")
4294 (const_string "maybe_vex")
4295 (const_string "orig")))
4296 (set (attr "prefix_data16")
4297 (if_then_else (and (eq_attr "type" "ssemov") (eq_attr "mode" "SI"))
4298 (const_string "1")
4299 (const_string "*")))
4300 (set (attr "mode")
4301 (cond [(eq_attr "alternative" "3,4,9,10,12,13,14,15,16,17")
4302 (const_string "SI")
4303 (eq_attr "alternative" "11")
4304 (const_string "DI")
4305 (eq_attr "alternative" "5")
4306 (cond [(and (match_test "TARGET_AVX512F && TARGET_EVEX512")
4307 (not (match_test "TARGET_PREFER_AVX256")))
4308 (const_string "V16SF")
4309 (match_test "TARGET_AVX")
4310 (const_string "V4SF")
4311 (ior (not (match_test "TARGET_SSE2"))
4312 (match_test "optimize_function_for_size_p (cfun)"))
4313 (const_string "V4SF")
4314 (match_test "TARGET_SSE_LOAD0_BY_PXOR")
4315 (const_string "TI")
4316 ]
4317 (const_string "V4SF"))
4318
4319 /* For architectures resolving dependencies on
4320 whole SSE registers use APS move to break dependency
4321 chains, otherwise use short move to avoid extra work.
4322
4323 Do the same for architectures resolving dependencies on
4324 the parts. While in DF mode it is better to always handle
4325 just register parts, the SF mode is different due to lack
4326 of instructions to load just part of the register. It is
4327 better to maintain the whole registers in single format
4328 to avoid problems on using packed logical operations. */
4329 (eq_attr "alternative" "6")
4330 (cond [(match_test "TARGET_AVX512VL")
4331 (const_string "V4SF")
4332 (match_test "TARGET_AVX512F")
4333 (const_string "SF")
4334 (ior (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
4335 (match_test "TARGET_SSE_SPLIT_REGS"))
4336 (const_string "V4SF")
4337 ]
4338 (const_string "SF"))
4339 ]
4340 (const_string "SF")))
4341 (set (attr "preferred_for_speed")
4342 (cond [(eq_attr "alternative" "9,14")
4343 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
4344 (eq_attr "alternative" "10,15")
4345 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
4346 ]
4347 (symbol_ref "true")))
4348 (set (attr "enabled")
4349 (cond [(eq_attr "alternative" "16,17")
4350 (if_then_else
4351 (match_test "TARGET_HARD_SF_REGS")
4352 (symbol_ref "false")
4353 (const_string "*"))
4354 (not (match_test "TARGET_HARD_SF_REGS"))
4355 (symbol_ref "false")
4356 ]
4357 (const_string "*")))])
4358
4359 (define_mode_attr hfbfconstf
4360 [(HF "F") (BF "")])
4361
4362 (define_insn "*mov<mode>_internal"
4363 [(set (match_operand:HFBF 0 "nonimmediate_operand"
4364 "=?r,?r,?r,?m ,Yv,v,?r,jm,m,?v,v")
4365 (match_operand:HFBF 1 "general_operand"
4366 "r ,F ,m ,r<hfbfconstf>,C ,v, v,v ,v,r ,m"))]
4367 "!(MEM_P (operands[0]) && MEM_P (operands[1]))
4368 && (lra_in_progress
4369 || reload_completed
4370 || !CONST_DOUBLE_P (operands[1])
4371 || (TARGET_SSE2
4372 && standard_sse_constant_p (operands[1], <MODE>mode) == 1)
4373 || memory_operand (operands[0], <MODE>mode))"
4374 {
4375 switch (get_attr_type (insn))
4376 {
4377 case TYPE_IMOVX:
4378 /* movzwl is faster than movw on p2 due to partial word stalls,
4379 though not as fast as an aligned movl. */
4380 return "movz{wl|x}\t{%1, %k0|%k0, %1}";
4381
4382 case TYPE_SSEMOV:
4383 return ix86_output_ssemov (insn, operands);
4384
4385 case TYPE_SSELOG1:
4386 if (satisfies_constraint_C (operands[1]))
4387 return standard_sse_constant_opcode (insn, operands);
4388
4389 if (SSE_REG_P (operands[0]))
4390 return "%vpinsrw\t{$0, %1, %d0|%d0, %1, 0}";
4391 else
4392 return "%vpextrw\t{$0, %1, %0|%0, %1, 0}";
4393
4394 default:
4395 if (get_attr_mode (insn) == MODE_SI)
4396 return "mov{l}\t{%k1, %k0|%k0, %k1}";
4397 else
4398 return "mov{w}\t{%1, %0|%0, %1}";
4399 }
4400 }
4401 [(set (attr "isa")
4402 (cond [(eq_attr "alternative" "4,5,6,9,10")
4403 (const_string "sse2")
4404 (eq_attr "alternative" "7")
4405 (const_string "sse4_noavx")
4406 (eq_attr "alternative" "8")
4407 (const_string "avx")
4408 ]
4409 (const_string "*")))
4410 (set (attr "addr")
4411 (if_then_else (eq_attr "alternative" "7")
4412 (const_string "gpr16")
4413 (const_string "*")))
4414 (set (attr "type")
4415 (cond [(eq_attr "alternative" "4")
4416 (const_string "sselog1")
4417 (eq_attr "alternative" "5,6,9")
4418 (const_string "ssemov")
4419 (eq_attr "alternative" "7,8,10")
4420 (if_then_else
4421 (match_test ("TARGET_AVX512FP16"))
4422 (const_string "ssemov")
4423 (const_string "sselog1"))
4424 (match_test "optimize_function_for_size_p (cfun)")
4425 (const_string "imov")
4426 (and (eq_attr "alternative" "0")
4427 (ior (not (match_test "TARGET_PARTIAL_REG_STALL"))
4428 (not (match_test "TARGET_HIMODE_MATH"))))
4429 (const_string "imov")
4430 (and (eq_attr "alternative" "1,2")
4431 (match_operand:HI 1 "aligned_operand"))
4432 (const_string "imov")
4433 (and (match_test "TARGET_MOVX")
4434 (eq_attr "alternative" "0,2"))
4435 (const_string "imovx")
4436 ]
4437 (const_string "imov")))
4438 (set (attr "prefix")
4439 (cond [(eq_attr "alternative" "4,5,6,7,8,9,10")
4440 (const_string "maybe_vex")
4441 ]
4442 (const_string "orig")))
4443 (set (attr "mode")
4444 (cond [(eq_attr "alternative" "4")
4445 (const_string "V4SF")
4446 (eq_attr "alternative" "6,9")
4447 (if_then_else
4448 (match_test "TARGET_AVX512FP16")
4449 (const_string "HI")
4450 (const_string "SI"))
4451 (eq_attr "alternative" "7,8,10")
4452 (if_then_else
4453 (match_test "TARGET_AVX512FP16")
4454 (const_string "HI")
4455 (const_string "TI"))
4456 (eq_attr "alternative" "5")
4457 (cond [(match_test "TARGET_AVX512VL")
4458 (const_string "V4SF")
4459 (match_test "TARGET_AVX512FP16")
4460 (const_string "HF")
4461 (match_test "TARGET_AVX512F")
4462 (const_string "SF")
4463 (match_test "TARGET_AVX")
4464 (const_string "V4SF")
4465 (ior (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
4466 (match_test "TARGET_SSE_SPLIT_REGS"))
4467 (const_string "V4SF")
4468 ]
4469 (const_string "SF"))
4470 (eq_attr "type" "imovx")
4471 (const_string "SI")
4472 (and (eq_attr "alternative" "1,2")
4473 (match_operand:HI 1 "aligned_operand"))
4474 (const_string "SI")
4475 (and (eq_attr "alternative" "0")
4476 (ior (not (match_test "TARGET_PARTIAL_REG_STALL"))
4477 (not (match_test "TARGET_HIMODE_MATH"))))
4478 (const_string "SI")
4479 ]
4480 (const_string "HI")))
4481 (set (attr "enabled")
4482 (cond [(and (match_test "<MODE>mode == BFmode")
4483 (eq_attr "alternative" "1"))
4484 (symbol_ref "false")
4485 ]
4486 (const_string "*")))])
4487
4488 (define_split
4489 [(set (match_operand 0 "any_fp_register_operand")
4490 (match_operand 1 "memory_operand"))]
4491 "reload_completed
4492 && (GET_MODE (operands[0]) == TFmode
4493 || GET_MODE (operands[0]) == XFmode
4494 || GET_MODE (operands[0]) == DFmode
4495 || GET_MODE (operands[0]) == SFmode)
4496 && ix86_standard_x87sse_constant_load_p (insn, operands[0])"
4497 [(set (match_dup 0) (match_dup 2))]
4498 "operands[2] = find_constant_src (curr_insn);")
4499
4500 (define_split
4501 [(set (match_operand 0 "any_fp_register_operand")
4502 (float_extend (match_operand 1 "memory_operand")))]
4503 "reload_completed
4504 && (GET_MODE (operands[0]) == TFmode
4505 || GET_MODE (operands[0]) == XFmode
4506 || GET_MODE (operands[0]) == DFmode)
4507 && ix86_standard_x87sse_constant_load_p (insn, operands[0])"
4508 [(set (match_dup 0) (match_dup 2))]
4509 "operands[2] = find_constant_src (curr_insn);")
4510
4511 ;; Split the load of -0.0 or -1.0 into fldz;fchs or fld1;fchs sequence
4512 (define_split
4513 [(set (match_operand:X87MODEF 0 "fp_register_operand")
4514 (match_operand:X87MODEF 1 "immediate_operand"))]
4515 "reload_completed
4516 && (standard_80387_constant_p (operands[1]) == 8
4517 || standard_80387_constant_p (operands[1]) == 9)"
4518 [(set (match_dup 0)(match_dup 1))
4519 (set (match_dup 0)
4520 (neg:X87MODEF (match_dup 0)))]
4521 {
4522 if (real_isnegzero (CONST_DOUBLE_REAL_VALUE (operands[1])))
4523 operands[1] = CONST0_RTX (<MODE>mode);
4524 else
4525 operands[1] = CONST1_RTX (<MODE>mode);
4526 })
4527
4528 (define_insn "*swapxf"
4529 [(set (match_operand:XF 0 "register_operand" "+f")
4530 (match_operand:XF 1 "register_operand" "+f"))
4531 (set (match_dup 1)
4532 (match_dup 0))]
4533 "TARGET_80387"
4534 {
4535 if (STACK_TOP_P (operands[0]))
4536 return "fxch\t%1";
4537 else
4538 return "fxch\t%0";
4539 }
4540 [(set_attr "type" "fxch")
4541 (set_attr "mode" "XF")])
4542 \f
4543
4544 ;; Zero extension instructions
4545
4546 (define_insn_and_split "zero_extendditi2"
4547 [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o")
4548 (zero_extend:TI (match_operand:DI 1 "nonimmediate_operand" "rm,r")))]
4549 "TARGET_64BIT"
4550 "#"
4551 "&& reload_completed"
4552 [(set (match_dup 3) (match_dup 1))
4553 (set (match_dup 4) (const_int 0))]
4554 "split_double_mode (TImode, &operands[0], 1, &operands[3], &operands[4]);")
4555
4556 (define_expand "zero_extendsidi2"
4557 [(set (match_operand:DI 0 "nonimmediate_operand")
4558 (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand")))])
4559
4560 (define_insn "*zero_extendsidi2"
4561 [(set (match_operand:DI 0 "nonimmediate_operand"
4562 "=r,?r,?o,r ,o,?*y,?!*y,$r,$v,$x,*x,*v,*r,*k")
4563 (zero_extend:DI
4564 (match_operand:SI 1 "x86_64_zext_operand"
4565 "0 ,rm,r ,rmWz,0,r ,m ,v ,r ,m ,*x,*v,*k,*km")))]
4566 ""
4567 {
4568 switch (get_attr_type (insn))
4569 {
4570 case TYPE_IMOVX:
4571 if (ix86_use_lea_for_mov (insn, operands))
4572 return "lea{l}\t{%E1, %k0|%k0, %E1}";
4573 else
4574 return "mov{l}\t{%1, %k0|%k0, %1}";
4575
4576 case TYPE_MULTI:
4577 return "#";
4578
4579 case TYPE_MMXMOV:
4580 return "movd\t{%1, %0|%0, %1}";
4581
4582 case TYPE_SSEMOV:
4583 if (SSE_REG_P (operands[0]) && SSE_REG_P (operands[1]))
4584 {
4585 if (EXT_REX_SSE_REG_P (operands[0])
4586 || EXT_REX_SSE_REG_P (operands[1]))
4587 return "vpmovzxdq\t{%t1, %g0|%g0, %t1}";
4588 else
4589 return "%vpmovzxdq\t{%1, %0|%0, %1}";
4590 }
4591
4592 if (GENERAL_REG_P (operands[0]))
4593 return "%vmovd\t{%1, %k0|%k0, %1}";
4594
4595 return "%vmovd\t{%1, %0|%0, %1}";
4596
4597 case TYPE_MSKMOV:
4598 return "kmovd\t{%1, %k0|%k0, %1}";
4599
4600 default:
4601 gcc_unreachable ();
4602 }
4603 }
4604 [(set (attr "isa")
4605 (cond [(eq_attr "alternative" "0,1,2")
4606 (const_string "nox64")
4607 (eq_attr "alternative" "3")
4608 (const_string "x64")
4609 (eq_attr "alternative" "7,8,9")
4610 (const_string "sse2")
4611 (eq_attr "alternative" "10")
4612 (const_string "sse4")
4613 (eq_attr "alternative" "11")
4614 (const_string "avx512f")
4615 (eq_attr "alternative" "12")
4616 (const_string "x64_avx512bw")
4617 (eq_attr "alternative" "13")
4618 (const_string "avx512bw")
4619 ]
4620 (const_string "*")))
4621 (set (attr "mmx_isa")
4622 (if_then_else (eq_attr "alternative" "5,6")
4623 (const_string "native")
4624 (const_string "*")))
4625 (set (attr "type")
4626 (cond [(eq_attr "alternative" "0,1,2,4")
4627 (const_string "multi")
4628 (eq_attr "alternative" "5,6")
4629 (const_string "mmxmov")
4630 (eq_attr "alternative" "7")
4631 (if_then_else (match_test "TARGET_64BIT")
4632 (const_string "ssemov")
4633 (const_string "multi"))
4634 (eq_attr "alternative" "8,9,10,11")
4635 (const_string "ssemov")
4636 (eq_attr "alternative" "12,13")
4637 (const_string "mskmov")
4638 ]
4639 (const_string "imovx")))
4640 (set (attr "prefix_extra")
4641 (if_then_else (eq_attr "alternative" "10,11")
4642 (const_string "1")
4643 (const_string "*")))
4644 (set (attr "prefix")
4645 (if_then_else (eq_attr "type" "ssemov")
4646 (const_string "maybe_vex")
4647 (const_string "orig")))
4648 (set (attr "prefix_0f")
4649 (if_then_else (eq_attr "type" "imovx")
4650 (const_string "0")
4651 (const_string "*")))
4652 (set (attr "mode")
4653 (cond [(eq_attr "alternative" "5,6")
4654 (const_string "DI")
4655 (and (eq_attr "alternative" "7")
4656 (match_test "TARGET_64BIT"))
4657 (const_string "TI")
4658 (eq_attr "alternative" "8,10,11")
4659 (const_string "TI")
4660 ]
4661 (const_string "SI")))
4662 (set (attr "preferred_for_speed")
4663 (cond [(eq_attr "alternative" "7")
4664 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
4665 (eq_attr "alternative" "5,8")
4666 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
4667 ]
4668 (symbol_ref "true")))])
4669
4670 (define_split
4671 [(set (match_operand:DI 0 "memory_operand")
4672 (zero_extend:DI (match_operand:SI 1 "memory_operand")))]
4673 "reload_completed"
4674 [(set (match_dup 4) (const_int 0))]
4675 "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);")
4676
4677 (define_split
4678 [(set (match_operand:DI 0 "general_reg_operand")
4679 (zero_extend:DI (match_operand:SI 1 "general_reg_operand")))]
4680 "!TARGET_64BIT && reload_completed
4681 && REGNO (operands[0]) == REGNO (operands[1])"
4682 [(set (match_dup 4) (const_int 0))]
4683 "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);")
4684
4685 (define_split
4686 [(set (match_operand:DI 0 "nonimmediate_gr_operand")
4687 (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand")))]
4688 "!TARGET_64BIT && reload_completed
4689 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4690 [(set (match_dup 3) (match_dup 1))
4691 (set (match_dup 4) (const_int 0))]
4692 "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);")
4693
4694 (define_mode_attr kmov_isa
4695 [(QI "avx512dq") (HI "avx512f") (SI "avx512bw") (DI "avx512bw")])
4696
4697 (define_insn "zero_extend<mode>di2"
4698 [(set (match_operand:DI 0 "register_operand" "=r,*r,*k")
4699 (zero_extend:DI
4700 (match_operand:SWI12 1 "nonimmediate_operand" "<r>m,*k,*km")))]
4701 "TARGET_64BIT"
4702 "@
4703 movz{<imodesuffix>l|x}\t{%1, %k0|%k0, %1}
4704 kmov<mskmodesuffix>\t{%1, %k0|%k0, %1}
4705 kmov<mskmodesuffix>\t{%1, %k0|%k0, %1}"
4706 [(set_attr "isa" "*,<kmov_isa>,<kmov_isa>")
4707 (set_attr "type" "imovx,mskmov,mskmov")
4708 (set_attr "mode" "SI,<MODE>,<MODE>")])
4709
4710 (define_expand "zero_extend<mode>si2"
4711 [(set (match_operand:SI 0 "register_operand")
4712 (zero_extend:SI (match_operand:SWI12 1 "nonimmediate_operand")))]
4713 ""
4714 {
4715 if (TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))
4716 {
4717 operands[1] = force_reg (<MODE>mode, operands[1]);
4718 emit_insn (gen_zero_extend<mode>si2_and (operands[0], operands[1]));
4719 DONE;
4720 }
4721 })
4722
4723 (define_insn_and_split "zero_extend<mode>si2_and"
4724 [(set (match_operand:SI 0 "register_operand" "=r,?&<r>")
4725 (zero_extend:SI
4726 (match_operand:SWI12 1 "nonimmediate_operand" "0,<r>m")))
4727 (clobber (reg:CC FLAGS_REG))]
4728 "TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)"
4729 "#"
4730 "&& reload_completed"
4731 [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (match_dup 2)))
4732 (clobber (reg:CC FLAGS_REG))])]
4733 {
4734 if (!REG_P (operands[1])
4735 || REGNO (operands[0]) != REGNO (operands[1]))
4736 {
4737 ix86_expand_clear (operands[0]);
4738
4739 gcc_assert (!TARGET_PARTIAL_REG_STALL);
4740 emit_insn (gen_rtx_SET
4741 (gen_rtx_STRICT_LOW_PART
4742 (VOIDmode, gen_lowpart (<MODE>mode, operands[0])),
4743 operands[1]));
4744 DONE;
4745 }
4746
4747 operands[2] = GEN_INT (GET_MODE_MASK (<MODE>mode));
4748 }
4749 [(set_attr "type" "alu1")
4750 (set_attr "mode" "SI")])
4751
4752 (define_insn "*zero_extend<mode>si2"
4753 [(set (match_operand:SI 0 "register_operand" "=r,*r,*k")
4754 (zero_extend:SI
4755 (match_operand:SWI12 1 "nonimmediate_operand" "<r>m,*k,*km")))]
4756 "!(TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))"
4757 "@
4758 movz{<imodesuffix>l|x}\t{%1, %0|%0, %1}
4759 kmov<mskmodesuffix>\t{%1, %0|%0, %1}
4760 kmov<mskmodesuffix>\t{%1, %0|%0, %1}"
4761 [(set_attr "isa" "*,<kmov_isa>,<kmov_isa>")
4762 (set_attr "type" "imovx,mskmov,mskmov")
4763 (set_attr "mode" "SI,<MODE>,<MODE>")])
4764
4765 (define_expand "zero_extendqihi2"
4766 [(set (match_operand:HI 0 "register_operand")
4767 (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand")))]
4768 ""
4769 {
4770 if (TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))
4771 {
4772 operands[1] = force_reg (QImode, operands[1]);
4773 emit_insn (gen_zero_extendqihi2_and (operands[0], operands[1]));
4774 DONE;
4775 }
4776 })
4777
4778 (define_insn_and_split "zero_extendqihi2_and"
4779 [(set (match_operand:HI 0 "register_operand" "=r,?&q")
4780 (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "0,qm")))
4781 (clobber (reg:CC FLAGS_REG))]
4782 "TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)"
4783 "#"
4784 "&& reload_completed"
4785 [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (const_int 255)))
4786 (clobber (reg:CC FLAGS_REG))])]
4787 {
4788 if (!REG_P (operands[1])
4789 || REGNO (operands[0]) != REGNO (operands[1]))
4790 {
4791 ix86_expand_clear (operands[0]);
4792
4793 gcc_assert (!TARGET_PARTIAL_REG_STALL);
4794 emit_insn (gen_rtx_SET
4795 (gen_rtx_STRICT_LOW_PART
4796 (VOIDmode, gen_lowpart (QImode, operands[0])),
4797 operands[1]));
4798 DONE;
4799 }
4800
4801 operands[0] = gen_lowpart (SImode, operands[0]);
4802 }
4803 [(set_attr "type" "alu1")
4804 (set_attr "mode" "SI")])
4805
4806 ; zero extend to SImode to avoid partial register stalls
4807 (define_insn "*zero_extendqihi2"
4808 [(set (match_operand:HI 0 "register_operand" "=r,*r,*k")
4809 (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "qm,*k,*km")))]
4810 "!(TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))"
4811 "@
4812 movz{bl|x}\t{%1, %k0|%k0, %1}
4813 kmovb\t{%1, %k0|%k0, %1}
4814 kmovb\t{%1, %0|%0, %1}"
4815 [(set_attr "isa" "*,avx512dq,avx512dq")
4816 (set_attr "type" "imovx,mskmov,mskmov")
4817 (set_attr "mode" "SI,QI,QI")])
4818
4819 ;; Transform xorl; mov[bw] (set strict_low_part) into movz[bw]l.
4820 (define_peephole2
4821 [(parallel [(set (match_operand:SWI48 0 "general_reg_operand")
4822 (const_int 0))
4823 (clobber (reg:CC FLAGS_REG))])
4824 (set (strict_low_part (match_operand:SWI12 1 "general_reg_operand"))
4825 (match_operand:SWI12 2 "nonimmediate_operand"))]
4826 "REGNO (operands[0]) == REGNO (operands[1])
4827 && (<SWI48:MODE>mode != SImode
4828 || !TARGET_ZERO_EXTEND_WITH_AND
4829 || !optimize_function_for_speed_p (cfun))"
4830 [(set (match_dup 0) (zero_extend:SWI48 (match_dup 2)))])
4831
4832 ;; Likewise, but preserving FLAGS_REG.
4833 (define_peephole2
4834 [(set (match_operand:SWI48 0 "general_reg_operand") (const_int 0))
4835 (set (strict_low_part (match_operand:SWI12 1 "general_reg_operand"))
4836 (match_operand:SWI12 2 "nonimmediate_operand"))]
4837 "REGNO (operands[0]) == REGNO (operands[1])
4838 && (<SWI48:MODE>mode != SImode
4839 || !TARGET_ZERO_EXTEND_WITH_AND
4840 || !optimize_function_for_speed_p (cfun))"
4841 [(set (match_dup 0) (zero_extend:SWI48 (match_dup 2)))])
4842 \f
4843 ;; Sign extension instructions
4844
4845 (define_expand "extendsidi2"
4846 [(set (match_operand:DI 0 "register_operand")
4847 (sign_extend:DI (match_operand:SI 1 "register_operand")))]
4848 ""
4849 {
4850 if (!TARGET_64BIT)
4851 {
4852 emit_insn (gen_extendsidi2_1 (operands[0], operands[1]));
4853 DONE;
4854 }
4855 })
4856
4857 (define_insn "*extendsidi2_rex64"
4858 [(set (match_operand:DI 0 "register_operand" "=*a,r")
4859 (sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "*0,rm")))]
4860 "TARGET_64BIT"
4861 "@
4862 {cltq|cdqe}
4863 movs{lq|x}\t{%1, %0|%0, %1}"
4864 [(set_attr "type" "imovx")
4865 (set_attr "mode" "DI")
4866 (set_attr "prefix_0f" "0")
4867 (set_attr "modrm" "0,1")])
4868
4869 (define_insn "extendsidi2_1"
4870 [(set (match_operand:DI 0 "nonimmediate_operand" "=*A,r,?r,?*o")
4871 (sign_extend:DI (match_operand:SI 1 "register_operand" "0,0,r,r")))
4872 (clobber (reg:CC FLAGS_REG))
4873 (clobber (match_scratch:SI 2 "=X,X,X,&r"))]
4874 "!TARGET_64BIT"
4875 "#")
4876
4877 (define_insn "extendditi2"
4878 [(set (match_operand:TI 0 "nonimmediate_operand" "=*A,r,?r,?*o")
4879 (sign_extend:TI (match_operand:DI 1 "register_operand" "0,0,r,r")))
4880 (clobber (reg:CC FLAGS_REG))
4881 (clobber (match_scratch:DI 2 "=X,X,X,&r"))]
4882 "TARGET_64BIT"
4883 "#")
4884
4885 ;; Split the memory case. If the source register doesn't die, it will stay
4886 ;; this way, if it does die, following peephole2s take care of it.
4887 (define_split
4888 [(set (match_operand:<DWI> 0 "memory_operand")
4889 (sign_extend:<DWI> (match_operand:DWIH 1 "register_operand")))
4890 (clobber (reg:CC FLAGS_REG))
4891 (clobber (match_operand:DWIH 2 "register_operand"))]
4892 "reload_completed"
4893 [(const_int 0)]
4894 {
4895 rtx bits = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT - 1);
4896
4897 split_double_mode (<DWI>mode, &operands[0], 1, &operands[3], &operands[4]);
4898
4899 emit_move_insn (operands[3], operands[1]);
4900
4901 /* Generate a cltd if possible and doing so it profitable. */
4902 if ((optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
4903 && REGNO (operands[1]) == AX_REG
4904 && REGNO (operands[2]) == DX_REG)
4905 {
4906 emit_insn (gen_ashr<mode>3_cvt (operands[2], operands[1], bits));
4907 }
4908 else
4909 {
4910 emit_move_insn (operands[2], operands[1]);
4911 emit_insn (gen_ashr<mode>3_cvt (operands[2], operands[2], bits));
4912 }
4913 emit_move_insn (operands[4], operands[2]);
4914 DONE;
4915 })
4916
4917 ;; Peepholes for the case where the source register does die, after
4918 ;; being split with the above splitter.
4919 (define_peephole2
4920 [(set (match_operand:DWIH 0 "memory_operand")
4921 (match_operand:DWIH 1 "general_reg_operand"))
4922 (set (match_operand:DWIH 2 "general_reg_operand") (match_dup 1))
4923 (parallel [(set (match_dup 2)
4924 (ashiftrt:DWIH (match_dup 2)
4925 (match_operand 4 "const_int_operand")))
4926 (clobber (reg:CC FLAGS_REG))])
4927 (set (match_operand:DWIH 3 "memory_operand") (match_dup 2))]
4928 "REGNO (operands[1]) != REGNO (operands[2])
4929 && INTVAL (operands[4]) == (<MODE_SIZE> * BITS_PER_UNIT - 1)
4930 && peep2_reg_dead_p (2, operands[1])
4931 && peep2_reg_dead_p (4, operands[2])
4932 && !reg_mentioned_p (operands[2], operands[3])"
4933 [(set (match_dup 0) (match_dup 1))
4934 (parallel [(set (match_dup 1) (ashiftrt:DWIH (match_dup 1) (match_dup 4)))
4935 (clobber (reg:CC FLAGS_REG))])
4936 (set (match_dup 3) (match_dup 1))])
4937
4938 (define_peephole2
4939 [(set (match_operand:DWIH 0 "memory_operand")
4940 (match_operand:DWIH 1 "general_reg_operand"))
4941 (parallel [(set (match_operand:DWIH 2 "general_reg_operand")
4942 (ashiftrt:DWIH (match_dup 1)
4943 (match_operand 4 "const_int_operand")))
4944 (clobber (reg:CC FLAGS_REG))])
4945 (set (match_operand:DWIH 3 "memory_operand") (match_dup 2))]
4946 "/* cltd is shorter than sarl $31, %eax */
4947 !optimize_function_for_size_p (cfun)
4948 && REGNO (operands[1]) == AX_REG
4949 && REGNO (operands[2]) == DX_REG
4950 && INTVAL (operands[4]) == (<MODE_SIZE> * BITS_PER_UNIT - 1)
4951 && peep2_reg_dead_p (2, operands[1])
4952 && peep2_reg_dead_p (3, operands[2])
4953 && !reg_mentioned_p (operands[2], operands[3])"
4954 [(set (match_dup 0) (match_dup 1))
4955 (parallel [(set (match_dup 1) (ashiftrt:DWIH (match_dup 1) (match_dup 4)))
4956 (clobber (reg:CC FLAGS_REG))])
4957 (set (match_dup 3) (match_dup 1))])
4958
4959 ;; Extend to register case. Optimize case where source and destination
4960 ;; registers match and cases where we can use cltd.
4961 (define_split
4962 [(set (match_operand:<DWI> 0 "register_operand")
4963 (sign_extend:<DWI> (match_operand:DWIH 1 "register_operand")))
4964 (clobber (reg:CC FLAGS_REG))
4965 (clobber (match_scratch:DWIH 2))]
4966 "reload_completed"
4967 [(const_int 0)]
4968 {
4969 rtx bits = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT - 1);
4970
4971 split_double_mode (<DWI>mode, &operands[0], 1, &operands[3], &operands[4]);
4972
4973 if (REGNO (operands[3]) != REGNO (operands[1]))
4974 emit_move_insn (operands[3], operands[1]);
4975
4976 rtx src = operands[1];
4977 if (REGNO (operands[3]) == AX_REG)
4978 src = operands[3];
4979
4980 /* Generate a cltd if possible and doing so it profitable. */
4981 if ((optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
4982 && REGNO (src) == AX_REG
4983 && REGNO (operands[4]) == DX_REG)
4984 {
4985 emit_insn (gen_ashr<mode>3_cvt (operands[4], src, bits));
4986 DONE;
4987 }
4988
4989 if (REGNO (operands[4]) != REGNO (operands[1]))
4990 emit_move_insn (operands[4], operands[1]);
4991
4992 emit_insn (gen_ashr<mode>3_cvt (operands[4], operands[4], bits));
4993 DONE;
4994 })
4995
4996 (define_insn "extend<mode>di2"
4997 [(set (match_operand:DI 0 "register_operand" "=r")
4998 (sign_extend:DI
4999 (match_operand:SWI12 1 "nonimmediate_operand" "<r>m")))]
5000 "TARGET_64BIT"
5001 "movs{<imodesuffix>q|x}\t{%1, %0|%0, %1}"
5002 [(set_attr "type" "imovx")
5003 (set_attr "mode" "DI")])
5004
5005 (define_insn "extendhisi2"
5006 [(set (match_operand:SI 0 "register_operand" "=*a,r")
5007 (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "*0,rm")))]
5008 ""
5009 {
5010 switch (get_attr_prefix_0f (insn))
5011 {
5012 case 0:
5013 return "{cwtl|cwde}";
5014 default:
5015 return "movs{wl|x}\t{%1, %0|%0, %1}";
5016 }
5017 }
5018 [(set_attr "type" "imovx")
5019 (set_attr "mode" "SI")
5020 (set (attr "prefix_0f")
5021 ;; movsx is short decodable while cwtl is vector decoded.
5022 (if_then_else (and (eq_attr "cpu" "!k6")
5023 (eq_attr "alternative" "0"))
5024 (const_string "0")
5025 (const_string "1")))
5026 (set (attr "znver1_decode")
5027 (if_then_else (eq_attr "prefix_0f" "0")
5028 (const_string "double")
5029 (const_string "direct")))
5030 (set (attr "modrm")
5031 (if_then_else (eq_attr "prefix_0f" "0")
5032 (const_string "0")
5033 (const_string "1")))])
5034
5035 (define_insn "*extendhisi2_zext"
5036 [(set (match_operand:DI 0 "register_operand" "=*a,r")
5037 (zero_extend:DI
5038 (sign_extend:SI
5039 (match_operand:HI 1 "nonimmediate_operand" "*0,rm"))))]
5040 "TARGET_64BIT"
5041 {
5042 switch (get_attr_prefix_0f (insn))
5043 {
5044 case 0:
5045 return "{cwtl|cwde}";
5046 default:
5047 return "movs{wl|x}\t{%1, %k0|%k0, %1}";
5048 }
5049 }
5050 [(set_attr "type" "imovx")
5051 (set_attr "mode" "SI")
5052 (set (attr "prefix_0f")
5053 ;; movsx is short decodable while cwtl is vector decoded.
5054 (if_then_else (and (eq_attr "cpu" "!k6")
5055 (eq_attr "alternative" "0"))
5056 (const_string "0")
5057 (const_string "1")))
5058 (set (attr "modrm")
5059 (if_then_else (eq_attr "prefix_0f" "0")
5060 (const_string "0")
5061 (const_string "1")))])
5062
5063 (define_insn "extendqisi2"
5064 [(set (match_operand:SI 0 "register_operand" "=r")
5065 (sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm")))]
5066 ""
5067 "movs{bl|x}\t{%1, %0|%0, %1}"
5068 [(set_attr "type" "imovx")
5069 (set_attr "mode" "SI")])
5070
5071 (define_insn "*extendqisi2_zext"
5072 [(set (match_operand:DI 0 "register_operand" "=r")
5073 (zero_extend:DI
5074 (sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm"))))]
5075 "TARGET_64BIT"
5076 "movs{bl|x}\t{%1, %k0|%k0, %1}"
5077 [(set_attr "type" "imovx")
5078 (set_attr "mode" "SI")])
5079
5080 (define_insn "extendqihi2"
5081 [(set (match_operand:HI 0 "register_operand" "=*a,r")
5082 (sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" "*0,qm")))]
5083 ""
5084 {
5085 switch (get_attr_prefix_0f (insn))
5086 {
5087 case 0:
5088 return "{cbtw|cbw}";
5089 default:
5090 return "movs{bw|x}\t{%1, %0|%0, %1}";
5091 }
5092 }
5093 [(set_attr "type" "imovx")
5094 (set_attr "mode" "HI")
5095 (set (attr "prefix_0f")
5096 ;; movsx is short decodable while cwtl is vector decoded.
5097 (if_then_else (and (eq_attr "cpu" "!k6")
5098 (eq_attr "alternative" "0"))
5099 (const_string "0")
5100 (const_string "1")))
5101 (set (attr "modrm")
5102 (if_then_else (eq_attr "prefix_0f" "0")
5103 (const_string "0")
5104 (const_string "1")))])
5105
5106 (define_insn "*extendqi<SWI24:mode>_ext_1"
5107 [(set (match_operand:SWI24 0 "register_operand" "=R")
5108 (sign_extend:SWI24
5109 (subreg:QI
5110 (match_operator:SWI248 2 "extract_operator"
5111 [(match_operand 1 "int248_register_operand" "Q")
5112 (const_int 8)
5113 (const_int 8)]) 0)))]
5114 ""
5115 "movs{b<SWI24:imodesuffix>|x}\t{%h1, %0|%0, %h1}"
5116 [(set_attr "type" "imovx")
5117 (set_attr "mode" "<SWI24:MODE>")])
5118 \f
5119 ;; Conversions between float and double.
5120
5121 ;; These are all no-ops in the model used for the 80387.
5122 ;; So just emit moves.
5123
5124 ;; %%% Kill these when call knows how to work out a DFmode push earlier.
5125 (define_split
5126 [(set (match_operand:DF 0 "push_operand")
5127 (float_extend:DF (match_operand:SF 1 "fp_register_operand")))]
5128 "reload_completed"
5129 [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -8)))
5130 (set (mem:DF (reg:P SP_REG)) (float_extend:DF (match_dup 1)))])
5131
5132 (define_split
5133 [(set (match_operand:XF 0 "push_operand")
5134 (float_extend:XF (match_operand:MODEF 1 "fp_register_operand")))]
5135 "reload_completed"
5136 [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
5137 (set (mem:XF (reg:P SP_REG)) (float_extend:XF (match_dup 1)))]
5138 "operands[2] = GEN_INT (-GET_MODE_SIZE (XFmode));")
5139
5140 (define_expand "extendsfdf2"
5141 [(set (match_operand:DF 0 "nonimm_ssenomem_operand")
5142 (float_extend:DF (match_operand:SF 1 "general_operand")))]
5143 "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
5144 {
5145 /* ??? Needed for compress_float_constant since all fp constants
5146 are TARGET_LEGITIMATE_CONSTANT_P. */
5147 if (CONST_DOUBLE_P (operands[1]))
5148 {
5149 if ((!TARGET_SSE2 || TARGET_MIX_SSE_I387)
5150 && standard_80387_constant_p (operands[1]) > 0)
5151 {
5152 operands[1] = simplify_const_unary_operation
5153 (FLOAT_EXTEND, DFmode, operands[1], SFmode);
5154 emit_move_insn_1 (operands[0], operands[1]);
5155 DONE;
5156 }
5157 operands[1] = validize_mem (force_const_mem (SFmode, operands[1]));
5158 }
5159 })
5160
5161 (define_insn "*extendsfdf2"
5162 [(set (match_operand:DF 0 "nonimm_ssenomem_operand" "=f,m,v,v")
5163 (float_extend:DF
5164 (match_operand:SF 1 "nonimmediate_operand" "fm,f,v,m")))]
5165 "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
5166 {
5167 switch (which_alternative)
5168 {
5169 case 0:
5170 case 1:
5171 return output_387_reg_move (insn, operands);
5172
5173 case 2:
5174 return "%vcvtss2sd\t{%d1, %0|%0, %d1}";
5175 case 3:
5176 return "%vcvtss2sd\t{%1, %d0|%d0, %1}";
5177
5178 default:
5179 gcc_unreachable ();
5180 }
5181 }
5182 [(set_attr "type" "fmov,fmov,ssecvt,ssecvt")
5183 (set_attr "avx_partial_xmm_update" "false,false,false,true")
5184 (set_attr "prefix" "orig,orig,maybe_vex,maybe_vex")
5185 (set_attr "mode" "SF,XF,DF,DF")
5186 (set (attr "enabled")
5187 (if_then_else
5188 (match_test ("TARGET_SSE2 && TARGET_SSE_MATH"))
5189 (if_then_else
5190 (eq_attr "alternative" "0,1")
5191 (symbol_ref "TARGET_MIX_SSE_I387")
5192 (symbol_ref "true"))
5193 (if_then_else
5194 (eq_attr "alternative" "0,1")
5195 (symbol_ref "true")
5196 (symbol_ref "false"))))])
5197
5198 /* For converting SF(xmm2) to DF(xmm1), use the following code instead of
5199 cvtss2sd:
5200 unpcklps xmm2,xmm2 ; packed conversion might crash on signaling NaNs
5201 cvtps2pd xmm2,xmm1
5202 We do the conversion post reload to avoid producing of 128bit spills
5203 that might lead to ICE on 32bit target. The sequence unlikely combine
5204 anyway. */
5205 (define_split
5206 [(set (match_operand:DF 0 "sse_reg_operand")
5207 (float_extend:DF
5208 (match_operand:SF 1 "nonimmediate_operand")))]
5209 "TARGET_USE_VECTOR_FP_CONVERTS
5210 && optimize_insn_for_speed_p ()
5211 && reload_completed
5212 && (!EXT_REX_SSE_REG_P (operands[0])
5213 || TARGET_AVX512VL || TARGET_EVEX512)"
5214 [(set (match_dup 2)
5215 (float_extend:V2DF
5216 (vec_select:V2SF
5217 (match_dup 3)
5218 (parallel [(const_int 0) (const_int 1)]))))]
5219 {
5220 operands[2] = lowpart_subreg (V2DFmode, operands[0], DFmode);
5221 operands[3] = lowpart_subreg (V4SFmode, operands[0], DFmode);
5222 /* Use movss for loading from memory, unpcklps reg, reg for registers.
5223 Try to avoid move when unpacking can be done in source. */
5224 if (REG_P (operands[1]))
5225 {
5226 /* If it is unsafe to overwrite upper half of source, we need
5227 to move to destination and unpack there. */
5228 if (REGNO (operands[0]) != REGNO (operands[1])
5229 || (EXT_REX_SSE_REG_P (operands[1])
5230 && !TARGET_AVX512VL))
5231 {
5232 rtx tmp = lowpart_subreg (SFmode, operands[0], DFmode);
5233 emit_move_insn (tmp, operands[1]);
5234 }
5235 else
5236 operands[3] = lowpart_subreg (V4SFmode, operands[1], SFmode);
5237 /* FIXME: vec_interleave_lowv4sf for AVX512VL should allow
5238 =v, v, then vbroadcastss will be only needed for AVX512F without
5239 AVX512VL. */
5240 if (!EXT_REX_SSE_REGNO_P (REGNO (operands[3])))
5241 emit_insn (gen_vec_interleave_lowv4sf (operands[3], operands[3],
5242 operands[3]));
5243 else
5244 {
5245 rtx tmp = lowpart_subreg (V16SFmode, operands[3], V4SFmode);
5246 emit_insn (gen_avx512f_vec_dupv16sf_1 (tmp, tmp));
5247 }
5248 }
5249 else
5250 emit_insn (gen_vec_setv4sf_0 (operands[3],
5251 CONST0_RTX (V4SFmode), operands[1]));
5252 })
5253
5254 ;; It's more profitable to split and then extend in the same register.
5255 (define_peephole2
5256 [(set (match_operand:DF 0 "sse_reg_operand")
5257 (float_extend:DF
5258 (match_operand:SF 1 "memory_operand")))]
5259 "TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS
5260 && optimize_insn_for_speed_p ()"
5261 [(set (match_dup 2) (match_dup 1))
5262 (set (match_dup 0) (float_extend:DF (match_dup 2)))]
5263 "operands[2] = lowpart_subreg (SFmode, operands[0], DFmode);")
5264
5265 ;; Break partial SSE register dependency stall. This splitter should split
5266 ;; late in the pass sequence (after register rename pass), so allocated
5267 ;; registers won't change anymore
5268
5269 (define_split
5270 [(set (match_operand:DF 0 "sse_reg_operand")
5271 (float_extend:DF
5272 (match_operand:SF 1 "nonimmediate_operand")))]
5273 "!TARGET_AVX
5274 && TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY
5275 && epilogue_completed
5276 && optimize_function_for_speed_p (cfun)
5277 && (!REG_P (operands[1])
5278 || (!TARGET_AVX && REGNO (operands[0]) != REGNO (operands[1])))
5279 && (!EXT_REX_SSE_REG_P (operands[0])
5280 || TARGET_AVX512VL)"
5281 [(set (match_dup 0)
5282 (vec_merge:V2DF
5283 (vec_duplicate:V2DF
5284 (float_extend:DF
5285 (match_dup 1)))
5286 (match_dup 0)
5287 (const_int 1)))]
5288 {
5289 operands[0] = lowpart_subreg (V2DFmode, operands[0], DFmode);
5290 emit_move_insn (operands[0], CONST0_RTX (V2DFmode));
5291 })
5292
5293 (define_expand "extendhfsf2"
5294 [(set (match_operand:SF 0 "register_operand")
5295 (float_extend:SF
5296 (match_operand:HF 1 "nonimmediate_operand")))]
5297 "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL"
5298 {
5299 if (!TARGET_AVX512FP16)
5300 {
5301 rtx res = gen_reg_rtx (V4SFmode);
5302 rtx tmp = gen_reg_rtx (V8HFmode);
5303 rtx zero = force_reg (V8HFmode, CONST0_RTX (V8HFmode));
5304
5305 emit_insn (gen_vec_setv8hf_0 (tmp, zero, operands[1]));
5306 emit_insn (gen_vcvtph2ps (res, gen_lowpart (V8HImode, tmp)));
5307 emit_move_insn (operands[0], gen_lowpart (SFmode, res));
5308 DONE;
5309 }
5310 })
5311
5312 (define_expand "extendhfdf2"
5313 [(set (match_operand:DF 0 "register_operand")
5314 (float_extend:DF
5315 (match_operand:HF 1 "nonimmediate_operand")))]
5316 "TARGET_AVX512FP16")
5317
5318 (define_insn "*extendhf<mode>2"
5319 [(set (match_operand:MODEF 0 "register_operand" "=v")
5320 (float_extend:MODEF
5321 (match_operand:HF 1 "nonimmediate_operand" "vm")))]
5322 "TARGET_AVX512FP16"
5323 "vcvtsh2<ssemodesuffix>\t{%1, %0, %0|%0, %0, %1}"
5324 [(set_attr "type" "ssecvt")
5325 (set_attr "prefix" "evex")
5326 (set_attr "mode" "<MODE>")])
5327
5328 (define_expand "extendbfsf2"
5329 [(set (match_operand:SF 0 "register_operand")
5330 (unspec:SF
5331 [(match_operand:BF 1 "register_operand")]
5332 UNSPEC_CVTBFSF))]
5333 "TARGET_SSE2 && !HONOR_NANS (BFmode)")
5334
5335 ;; Don't use float_extend since psrlld doesn't raise
5336 ;; exceptions and turn a sNaN into a qNaN.
5337 (define_insn "extendbfsf2_1"
5338 [(set (match_operand:SF 0 "register_operand" "=x,Yv,v")
5339 (unspec:SF
5340 [(match_operand:BF 1 "register_operand" " 0,Yv,v")]
5341 UNSPEC_CVTBFSF))]
5342 "TARGET_SSE2"
5343 "@
5344 pslld\t{$16, %0|%0, 16}
5345 vpslld\t{$16, %1, %0|%0, %1, 16}
5346 vpslld\t{$16, %g1, %g0|%g0, %g1, 16}"
5347 [(set_attr "isa" "noavx,avx,*")
5348 (set_attr "type" "sseishft1")
5349 (set_attr "length_immediate" "1")
5350 (set_attr "prefix_data16" "1,*,*")
5351 (set_attr "prefix" "orig,maybe_evex,evex")
5352 (set_attr "mode" "TI,TI,XI")
5353 (set_attr "memory" "none")
5354 (set (attr "enabled")
5355 (if_then_else (eq_attr "alternative" "2")
5356 (symbol_ref "TARGET_AVX512F && TARGET_EVEX512
5357 && !TARGET_AVX512VL && !TARGET_PREFER_AVX256")
5358 (const_string "*")))])
5359
5360 (define_expand "extend<mode>xf2"
5361 [(set (match_operand:XF 0 "nonimmediate_operand")
5362 (float_extend:XF (match_operand:MODEF 1 "general_operand")))]
5363 "TARGET_80387"
5364 {
5365 /* ??? Needed for compress_float_constant since all fp constants
5366 are TARGET_LEGITIMATE_CONSTANT_P. */
5367 if (CONST_DOUBLE_P (operands[1]))
5368 {
5369 if (standard_80387_constant_p (operands[1]) > 0)
5370 {
5371 operands[1] = simplify_const_unary_operation
5372 (FLOAT_EXTEND, XFmode, operands[1], <MODE>mode);
5373 emit_move_insn_1 (operands[0], operands[1]);
5374 DONE;
5375 }
5376 operands[1] = validize_mem (force_const_mem (<MODE>mode, operands[1]));
5377 }
5378 })
5379
5380 (define_insn "*extend<mode>xf2_i387"
5381 [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m")
5382 (float_extend:XF
5383 (match_operand:MODEF 1 "nonimmediate_operand" "fm,f")))]
5384 "TARGET_80387"
5385 "* return output_387_reg_move (insn, operands);"
5386 [(set_attr "type" "fmov")
5387 (set_attr "mode" "<MODE>,XF")])
5388
5389 ;; %%% This seems like bad news.
5390 ;; This cannot output into an f-reg because there is no way to be sure
5391 ;; of truncating in that case. Otherwise this is just like a simple move
5392 ;; insn. So we pretend we can output to a reg in order to get better
5393 ;; register preferencing, but we really use a stack slot.
5394
5395 ;; Conversion from DFmode to SFmode.
5396
5397 (define_insn "truncdfsf2"
5398 [(set (match_operand:SF 0 "nonimm_ssenomem_operand" "=m,f,v,v")
5399 (float_truncate:SF
5400 (match_operand:DF 1 "register_ssemem_operand" "f,f,v,m")))]
5401 "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
5402 {
5403 switch (which_alternative)
5404 {
5405 case 0:
5406 case 1:
5407 return output_387_reg_move (insn, operands);
5408
5409 case 2:
5410 return "%vcvtsd2ss\t{%d1, %0|%0, %d1}";
5411 case 3:
5412 return "%vcvtsd2ss\t{%1, %d0|%d0, %1}";
5413
5414 default:
5415 gcc_unreachable ();
5416 }
5417 }
5418 [(set_attr "type" "fmov,fmov,ssecvt,ssecvt")
5419 (set_attr "avx_partial_xmm_update" "false,false,false,true")
5420 (set_attr "mode" "SF")
5421 (set (attr "enabled")
5422 (if_then_else
5423 (match_test ("TARGET_SSE2 && TARGET_SSE_MATH"))
5424 (cond [(eq_attr "alternative" "0")
5425 (symbol_ref "TARGET_MIX_SSE_I387")
5426 (eq_attr "alternative" "1")
5427 (symbol_ref "TARGET_MIX_SSE_I387
5428 && flag_unsafe_math_optimizations")
5429 ]
5430 (symbol_ref "true"))
5431 (cond [(eq_attr "alternative" "0")
5432 (symbol_ref "true")
5433 (eq_attr "alternative" "1")
5434 (symbol_ref "flag_unsafe_math_optimizations")
5435 ]
5436 (symbol_ref "false"))))])
5437
5438 /* For converting DF(xmm2) to SF(xmm1), use the following code instead of
5439 cvtsd2ss:
5440 unpcklpd xmm2,xmm2 ; packed conversion might crash on signaling NaNs
5441 cvtpd2ps xmm2,xmm1
5442 We do the conversion post reload to avoid producing of 128bit spills
5443 that might lead to ICE on 32bit target. The sequence unlikely combine
5444 anyway. */
5445 (define_split
5446 [(set (match_operand:SF 0 "sse_reg_operand")
5447 (float_truncate:SF
5448 (match_operand:DF 1 "nonimmediate_operand")))]
5449 "TARGET_USE_VECTOR_FP_CONVERTS
5450 && optimize_insn_for_speed_p ()
5451 && reload_completed
5452 && (!EXT_REX_SSE_REG_P (operands[0])
5453 || TARGET_AVX512VL)"
5454 [(set (match_dup 2)
5455 (vec_concat:V4SF
5456 (float_truncate:V2SF
5457 (match_dup 4))
5458 (match_dup 3)))]
5459 {
5460 operands[2] = lowpart_subreg (V4SFmode, operands[0], SFmode);
5461 operands[3] = CONST0_RTX (V2SFmode);
5462 operands[4] = lowpart_subreg (V2DFmode, operands[0], SFmode);
5463 /* Use movsd for loading from memory, unpcklpd for registers.
5464 Try to avoid move when unpacking can be done in source, or SSE3
5465 movddup is available. */
5466 if (REG_P (operands[1]))
5467 {
5468 if ((!TARGET_SSE3 && REGNO (operands[0]) != REGNO (operands[1]))
5469 || (EXT_REX_SSE_REG_P (operands[1]) && !TARGET_AVX512VL))
5470 {
5471 rtx tmp = lowpart_subreg (DFmode, operands[0], SFmode);
5472 emit_move_insn (tmp, operands[1]);
5473 operands[1] = tmp;
5474 }
5475 else if (!TARGET_SSE3)
5476 operands[4] = lowpart_subreg (V2DFmode, operands[1], DFmode);
5477 emit_insn (gen_vec_dupv2df (operands[4], operands[1]));
5478 }
5479 else
5480 emit_insn (gen_vec_concatv2df (operands[4], operands[1],
5481 CONST0_RTX (DFmode)));
5482 })
5483
5484 ;; It's more profitable to split and then truncate in the same register.
5485 (define_peephole2
5486 [(set (match_operand:SF 0 "sse_reg_operand")
5487 (float_truncate:SF
5488 (match_operand:DF 1 "memory_operand")))]
5489 "TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS
5490 && optimize_insn_for_speed_p ()"
5491 [(set (match_dup 2) (match_dup 1))
5492 (set (match_dup 0) (float_truncate:SF (match_dup 2)))]
5493 "operands[2] = lowpart_subreg (DFmode, operands[0], SFmode);")
5494
5495 ;; Break partial SSE register dependency stall. This splitter should split
5496 ;; late in the pass sequence (after register rename pass), so allocated
5497 ;; registers won't change anymore
5498
5499 (define_split
5500 [(set (match_operand:SF 0 "sse_reg_operand")
5501 (float_truncate:SF
5502 (match_operand:DF 1 "nonimmediate_operand")))]
5503 "!TARGET_AVX
5504 && TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY
5505 && epilogue_completed
5506 && optimize_function_for_speed_p (cfun)
5507 && (!REG_P (operands[1])
5508 || (!TARGET_AVX && REGNO (operands[0]) != REGNO (operands[1])))
5509 && (!EXT_REX_SSE_REG_P (operands[0])
5510 || TARGET_AVX512VL)"
5511 [(set (match_dup 0)
5512 (vec_merge:V4SF
5513 (vec_duplicate:V4SF
5514 (float_truncate:SF
5515 (match_dup 1)))
5516 (match_dup 0)
5517 (const_int 1)))]
5518 {
5519 operands[0] = lowpart_subreg (V4SFmode, operands[0], SFmode);
5520 emit_move_insn (operands[0], CONST0_RTX (V4SFmode));
5521 })
5522
5523 ;; Conversion from XFmode to {SF,DF}mode
5524
5525 (define_insn "truncxf<mode>2"
5526 [(set (match_operand:MODEF 0 "nonimmediate_operand" "=m,f")
5527 (float_truncate:MODEF
5528 (match_operand:XF 1 "register_operand" "f,f")))]
5529 "TARGET_80387"
5530 "* return output_387_reg_move (insn, operands);"
5531 [(set_attr "type" "fmov")
5532 (set_attr "mode" "<MODE>")
5533 (set (attr "enabled")
5534 (cond [(eq_attr "alternative" "1")
5535 (symbol_ref "flag_unsafe_math_optimizations")
5536 ]
5537 (symbol_ref "true")))])
5538
5539 ;; Conversion from {SF,DF}mode to HFmode.
5540
5541 (define_expand "truncsfhf2"
5542 [(set (match_operand:HF 0 "register_operand")
5543 (float_truncate:HF
5544 (match_operand:SF 1 "nonimmediate_operand")))]
5545 "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL"
5546 {
5547 if (!TARGET_AVX512FP16)
5548 {
5549 rtx res = gen_reg_rtx (V8HFmode);
5550 rtx tmp = gen_reg_rtx (V4SFmode);
5551 rtx zero = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
5552
5553 emit_insn (gen_vec_setv4sf_0 (tmp, zero, operands[1]));
5554 emit_insn (gen_vcvtps2ph (gen_lowpart (V8HImode, res), tmp, GEN_INT (4)));
5555 emit_move_insn (operands[0], gen_lowpart (HFmode, res));
5556 DONE;
5557 }
5558 })
5559
5560 (define_expand "truncdfhf2"
5561 [(set (match_operand:HF 0 "register_operand")
5562 (float_truncate:HF
5563 (match_operand:DF 1 "nonimmediate_operand")))]
5564 "TARGET_AVX512FP16")
5565
5566 (define_insn "*trunc<mode>hf2"
5567 [(set (match_operand:HF 0 "register_operand" "=v")
5568 (float_truncate:HF
5569 (match_operand:MODEF 1 "nonimmediate_operand" "vm")))]
5570 "TARGET_AVX512FP16"
5571 "vcvt<ssemodesuffix>2sh\t{%1, %d0|%d0, %1}"
5572 [(set_attr "type" "ssecvt")
5573 (set_attr "prefix" "evex")
5574 (set_attr "mode" "HF")])
5575
5576 (define_insn "truncsfbf2"
5577 [(set (match_operand:BF 0 "register_operand" "=x, v")
5578 (float_truncate:BF
5579 (match_operand:SF 1 "register_operand" "x,v")))]
5580 "((TARGET_AVX512BF16 && TARGET_AVX512VL) || TARGET_AVXNECONVERT)
5581 && !HONOR_NANS (BFmode) && flag_unsafe_math_optimizations"
5582 "@
5583 %{vex%} vcvtneps2bf16\t{%1, %0|%0, %1}
5584 vcvtneps2bf16\t{%1, %0|%0, %1}"
5585 [(set_attr "isa" "avxneconvert,avx512bf16vl")
5586 (set_attr "prefix" "vex,evex")])
5587
5588 ;; Signed conversion to DImode.
5589
5590 (define_expand "fix_truncxfdi2"
5591 [(parallel [(set (match_operand:DI 0 "nonimmediate_operand")
5592 (fix:DI (match_operand:XF 1 "register_operand")))
5593 (clobber (reg:CC FLAGS_REG))])]
5594 "TARGET_80387"
5595 {
5596 if (TARGET_FISTTP)
5597 {
5598 emit_insn (gen_fix_truncdi_i387_fisttp (operands[0], operands[1]));
5599 DONE;
5600 }
5601 })
5602
5603 (define_expand "fix_trunc<mode>di2"
5604 [(parallel [(set (match_operand:DI 0 "nonimmediate_operand")
5605 (fix:DI (match_operand:MODEF 1 "register_operand")))
5606 (clobber (reg:CC FLAGS_REG))])]
5607 "TARGET_80387 || (TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode))"
5608 {
5609 if (TARGET_FISTTP
5610 && !(TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
5611 {
5612 emit_insn (gen_fix_truncdi_i387_fisttp (operands[0], operands[1]));
5613 DONE;
5614 }
5615 if (TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode))
5616 {
5617 rtx out = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (DImode);
5618 emit_insn (gen_fix_trunc<mode>di_sse (out, operands[1]));
5619 if (out != operands[0])
5620 emit_move_insn (operands[0], out);
5621 DONE;
5622 }
5623 })
5624
5625 (define_insn "fix<fixunssuffix>_trunchf<mode>2"
5626 [(set (match_operand:SWI48 0 "register_operand" "=r")
5627 (any_fix:SWI48
5628 (match_operand:HF 1 "nonimmediate_operand" "vm")))]
5629 "TARGET_AVX512FP16"
5630 "vcvttsh2<fixsuffix>si\t{%1, %0|%0, %1}"
5631 [(set_attr "type" "sseicvt")
5632 (set_attr "prefix" "evex")
5633 (set_attr "mode" "<MODE>")])
5634
5635 ;; Signed conversion to SImode.
5636
5637 (define_expand "fix_truncxfsi2"
5638 [(parallel [(set (match_operand:SI 0 "nonimmediate_operand")
5639 (fix:SI (match_operand:XF 1 "register_operand")))
5640 (clobber (reg:CC FLAGS_REG))])]
5641 "TARGET_80387"
5642 {
5643 if (TARGET_FISTTP)
5644 {
5645 emit_insn (gen_fix_truncsi_i387_fisttp (operands[0], operands[1]));
5646 DONE;
5647 }
5648 })
5649
5650 (define_expand "fix_trunc<mode>si2"
5651 [(parallel [(set (match_operand:SI 0 "nonimmediate_operand")
5652 (fix:SI (match_operand:MODEF 1 "register_operand")))
5653 (clobber (reg:CC FLAGS_REG))])]
5654 "TARGET_80387 || SSE_FLOAT_MODE_P (<MODE>mode)"
5655 {
5656 if (TARGET_FISTTP
5657 && !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
5658 {
5659 emit_insn (gen_fix_truncsi_i387_fisttp (operands[0], operands[1]));
5660 DONE;
5661 }
5662 if (SSE_FLOAT_MODE_P (<MODE>mode))
5663 {
5664 rtx out = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (SImode);
5665 emit_insn (gen_fix_trunc<mode>si_sse (out, operands[1]));
5666 if (out != operands[0])
5667 emit_move_insn (operands[0], out);
5668 DONE;
5669 }
5670 })
5671
5672 ;; Signed conversion to HImode.
5673
5674 (define_expand "fix_trunc<mode>hi2"
5675 [(parallel [(set (match_operand:HI 0 "nonimmediate_operand")
5676 (fix:HI (match_operand:X87MODEF 1 "register_operand")))
5677 (clobber (reg:CC FLAGS_REG))])]
5678 "TARGET_80387
5679 && !(SSE_FLOAT_MODE_P (<MODE>mode) && (!TARGET_FISTTP || TARGET_SSE_MATH))"
5680 {
5681 if (TARGET_FISTTP)
5682 {
5683 emit_insn (gen_fix_trunchi_i387_fisttp (operands[0], operands[1]));
5684 DONE;
5685 }
5686 })
5687
5688 ;; Unsigned conversion to DImode
5689
5690 (define_insn "fixuns_trunc<mode>di2"
5691 [(set (match_operand:DI 0 "register_operand" "=r")
5692 (unsigned_fix:DI
5693 (match_operand:MODEF 1 "nonimmediate_operand" "vm")))]
5694 "TARGET_64BIT && TARGET_AVX512F && TARGET_SSE_MATH"
5695 "vcvtt<ssemodesuffix>2usi\t{%1, %0|%0, %1}"
5696 [(set_attr "type" "sseicvt")
5697 (set_attr "prefix" "evex")
5698 (set_attr "mode" "DI")])
5699
5700 ;; Unsigned conversion to SImode.
5701
5702 (define_expand "fixuns_trunc<mode>si2"
5703 [(parallel
5704 [(set (match_operand:SI 0 "register_operand")
5705 (unsigned_fix:SI
5706 (match_operand:MODEF 1 "nonimmediate_operand")))
5707 (use (match_dup 2))
5708 (clobber (scratch:<ssevecmode>))
5709 (clobber (scratch:<ssevecmode>))])]
5710 "(!TARGET_64BIT || TARGET_AVX512F) && TARGET_SSE2 && TARGET_SSE_MATH"
5711 {
5712 machine_mode mode = <MODE>mode;
5713 machine_mode vecmode = <ssevecmode>mode;
5714 REAL_VALUE_TYPE TWO31r;
5715 rtx two31;
5716
5717 if (TARGET_AVX512F)
5718 {
5719 emit_insn (gen_fixuns_trunc<mode>si2_avx512f (operands[0], operands[1]));
5720 DONE;
5721 }
5722
5723 if (optimize_insn_for_size_p ())
5724 FAIL;
5725
5726 real_ldexp (&TWO31r, &dconst1, 31);
5727 two31 = const_double_from_real_value (TWO31r, mode);
5728 two31 = ix86_build_const_vector (vecmode, true, two31);
5729 operands[2] = force_reg (vecmode, two31);
5730 })
5731
5732 (define_insn "fixuns_trunc<mode>si2_avx512f"
5733 [(set (match_operand:SI 0 "register_operand" "=r")
5734 (unsigned_fix:SI
5735 (match_operand:MODEF 1 "nonimmediate_operand" "vm")))]
5736 "TARGET_AVX512F && TARGET_SSE_MATH"
5737 "vcvtt<ssemodesuffix>2usi\t{%1, %0|%0, %1}"
5738 [(set_attr "type" "sseicvt")
5739 (set_attr "prefix" "evex")
5740 (set_attr "mode" "SI")])
5741
5742 (define_insn "*fixuns_trunchfsi2zext"
5743 [(set (match_operand:DI 0 "register_operand" "=r")
5744 (zero_extend:DI
5745 (unsigned_fix:SI
5746 (match_operand:HF 1 "nonimmediate_operand" "vm"))))]
5747 "TARGET_64BIT && TARGET_AVX512FP16"
5748 "vcvttsh2usi\t{%1, %k0|%k0, %1}"
5749 [(set_attr "type" "sseicvt")
5750 (set_attr "prefix" "evex")
5751 (set_attr "mode" "SI")])
5752
5753 (define_insn "*fixuns_trunc<mode>si2_avx512f_zext"
5754 [(set (match_operand:DI 0 "register_operand" "=r")
5755 (zero_extend:DI
5756 (unsigned_fix:SI
5757 (match_operand:MODEF 1 "nonimmediate_operand" "vm"))))]
5758 "TARGET_64BIT && TARGET_AVX512F && TARGET_SSE_MATH"
5759 "vcvtt<ssemodesuffix>2usi\t{%1, %k0|%k0, %1}"
5760 [(set_attr "type" "sseicvt")
5761 (set_attr "prefix" "evex")
5762 (set_attr "mode" "SI")])
5763
5764 (define_insn_and_split "*fixuns_trunc<mode>_1"
5765 [(set (match_operand:SI 0 "register_operand" "=&x,&x")
5766 (unsigned_fix:SI
5767 (match_operand:MODEF 3 "nonimmediate_operand" "xm,xm")))
5768 (use (match_operand:<ssevecmode> 4 "nonimmediate_operand" "m,x"))
5769 (clobber (match_scratch:<ssevecmode> 1 "=x,&x"))
5770 (clobber (match_scratch:<ssevecmode> 2 "=x,x"))]
5771 "!TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH
5772 && optimize_function_for_speed_p (cfun)"
5773 "#"
5774 "&& reload_completed"
5775 [(const_int 0)]
5776 {
5777 ix86_split_convert_uns_si_sse (operands);
5778 DONE;
5779 })
5780
5781 ;; Unsigned conversion to HImode.
5782 ;; Without these patterns, we'll try the unsigned SI conversion which
5783 ;; is complex for SSE, rather than the signed SI conversion, which isn't.
5784
5785 (define_expand "fixuns_trunchfhi2"
5786 [(set (match_dup 2)
5787 (fix:SI (match_operand:HF 1 "nonimmediate_operand")))
5788 (set (match_operand:HI 0 "nonimmediate_operand")
5789 (subreg:HI (match_dup 2) 0))]
5790 "TARGET_AVX512FP16"
5791 "operands[2] = gen_reg_rtx (SImode);")
5792
5793 (define_expand "fixuns_trunc<mode>hi2"
5794 [(set (match_dup 2)
5795 (fix:SI (match_operand:MODEF 1 "nonimmediate_operand")))
5796 (set (match_operand:HI 0 "nonimmediate_operand")
5797 (subreg:HI (match_dup 2) 0))]
5798 "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
5799 "operands[2] = gen_reg_rtx (SImode);")
5800
5801 ;; When SSE is available, it is always faster to use it!
5802 (define_insn "fix_trunc<MODEF:mode><SWI48:mode>_sse"
5803 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5804 (fix:SWI48 (match_operand:MODEF 1 "nonimmediate_operand" "v,m")))]
5805 "SSE_FLOAT_MODE_P (<MODEF:MODE>mode)
5806 && (!TARGET_FISTTP || TARGET_SSE_MATH)"
5807 "%vcvtt<MODEF:ssemodesuffix>2si<SWI48:rex64suffix>\t{%1, %0|%0, %1}"
5808 [(set_attr "type" "sseicvt")
5809 (set_attr "prefix" "maybe_vex")
5810 (set (attr "prefix_rex")
5811 (if_then_else
5812 (match_test "<SWI48:MODE>mode == DImode")
5813 (const_string "1")
5814 (const_string "*")))
5815 (set_attr "mode" "<MODEF:MODE>")
5816 (set_attr "athlon_decode" "double,vector")
5817 (set_attr "amdfam10_decode" "double,double")
5818 (set_attr "bdver1_decode" "double,double")])
5819
5820 ;; Avoid vector decoded forms of the instruction.
5821 (define_peephole2
5822 [(match_scratch:MODEF 2 "x")
5823 (set (match_operand:SWI48 0 "register_operand")
5824 (fix:SWI48 (match_operand:MODEF 1 "memory_operand")))]
5825 "TARGET_AVOID_VECTOR_DECODE
5826 && SSE_FLOAT_MODE_P (<MODEF:MODE>mode)
5827 && optimize_insn_for_speed_p ()"
5828 [(set (match_dup 2) (match_dup 1))
5829 (set (match_dup 0) (fix:SWI48 (match_dup 2)))])
5830
5831 (define_insn "fix_trunc<mode>_i387_fisttp"
5832 [(set (match_operand:SWI248x 0 "nonimmediate_operand" "=m")
5833 (fix:SWI248x (match_operand 1 "register_operand" "f")))
5834 (clobber (match_scratch:XF 2 "=&f"))]
5835 "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
5836 && TARGET_FISTTP
5837 && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
5838 && (TARGET_64BIT || <MODE>mode != DImode))
5839 && TARGET_SSE_MATH)"
5840 "* return output_fix_trunc (insn, operands, true);"
5841 [(set_attr "type" "fisttp")
5842 (set_attr "mode" "<MODE>")])
5843
5844 ;; See the comments in i386.h near OPTIMIZE_MODE_SWITCHING for the description
5845 ;; of the machinery. Please note the clobber of FLAGS_REG. In i387 control
5846 ;; word calculation (inserted by LCM in mode switching pass) a FLAGS_REG
5847 ;; clobbering insns can be used. Look at emit_i387_cw_initialization ()
5848 ;; function in i386.cc.
5849 (define_insn_and_split "*fix_trunc<mode>_i387_1"
5850 [(set (match_operand:SWI248x 0 "nonimmediate_operand")
5851 (fix:SWI248x (match_operand 1 "register_operand")))
5852 (clobber (reg:CC FLAGS_REG))]
5853 "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
5854 && !TARGET_FISTTP
5855 && !(SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
5856 && (TARGET_64BIT || <MODE>mode != DImode))
5857 && ix86_pre_reload_split ()"
5858 "#"
5859 "&& 1"
5860 [(const_int 0)]
5861 {
5862 ix86_optimize_mode_switching[I387_TRUNC] = 1;
5863
5864 operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
5865 operands[3] = assign_386_stack_local (HImode, SLOT_CW_TRUNC);
5866
5867 emit_insn (gen_fix_trunc<mode>_i387 (operands[0], operands[1],
5868 operands[2], operands[3]));
5869 DONE;
5870 }
5871 [(set_attr "type" "fistp")
5872 (set_attr "i387_cw" "trunc")
5873 (set_attr "mode" "<MODE>")])
5874
5875 (define_insn "fix_truncdi_i387"
5876 [(set (match_operand:DI 0 "nonimmediate_operand" "=m")
5877 (fix:DI (match_operand 1 "register_operand" "f")))
5878 (use (match_operand:HI 2 "memory_operand" "m"))
5879 (use (match_operand:HI 3 "memory_operand" "m"))
5880 (clobber (match_scratch:XF 4 "=&f"))]
5881 "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
5882 && !TARGET_FISTTP
5883 && !(TARGET_64BIT && SSE_FLOAT_MODE_P (GET_MODE (operands[1])))"
5884 "* return output_fix_trunc (insn, operands, false);"
5885 [(set_attr "type" "fistp")
5886 (set_attr "i387_cw" "trunc")
5887 (set_attr "mode" "DI")])
5888
5889 (define_insn "fix_trunc<mode>_i387"
5890 [(set (match_operand:SWI24 0 "nonimmediate_operand" "=m")
5891 (fix:SWI24 (match_operand 1 "register_operand" "f")))
5892 (use (match_operand:HI 2 "memory_operand" "m"))
5893 (use (match_operand:HI 3 "memory_operand" "m"))]
5894 "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
5895 && !TARGET_FISTTP
5896 && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))"
5897 "* return output_fix_trunc (insn, operands, false);"
5898 [(set_attr "type" "fistp")
5899 (set_attr "i387_cw" "trunc")
5900 (set_attr "mode" "<MODE>")])
5901
5902 (define_insn "x86_fnstcw_1"
5903 [(set (match_operand:HI 0 "memory_operand" "=m")
5904 (unspec:HI [(const_int 0)] UNSPEC_FSTCW))]
5905 "TARGET_80387"
5906 "fnstcw\t%0"
5907 [(set (attr "length")
5908 (symbol_ref "ix86_attr_length_address_default (insn) + 2"))
5909 (set_attr "mode" "HI")
5910 (set_attr "unit" "i387")
5911 (set_attr "bdver1_decode" "vector")])
5912 \f
5913 ;; Conversion between fixed point and floating point.
5914
5915 ;; Even though we only accept memory inputs, the backend _really_
5916 ;; wants to be able to do this between registers. Thankfully, LRA
5917 ;; will fix this up for us during register allocation.
5918
5919 (define_insn "floathi<mode>2"
5920 [(set (match_operand:X87MODEF 0 "register_operand" "=f")
5921 (float:X87MODEF (match_operand:HI 1 "nonimmediate_operand" "m")))]
5922 "TARGET_80387
5923 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
5924 || TARGET_MIX_SSE_I387)"
5925 "fild%Z1\t%1"
5926 [(set_attr "type" "fmov")
5927 (set_attr "mode" "<MODE>")
5928 (set_attr "znver1_decode" "double")
5929 (set_attr "fp_int_src" "true")])
5930
5931 (define_insn "float<SWI48x:mode>xf2"
5932 [(set (match_operand:XF 0 "register_operand" "=f")
5933 (float:XF (match_operand:SWI48x 1 "nonimmediate_operand" "m")))]
5934 "TARGET_80387"
5935 "fild%Z1\t%1"
5936 [(set_attr "type" "fmov")
5937 (set_attr "mode" "XF")
5938 (set_attr "znver1_decode" "double")
5939 (set_attr "fp_int_src" "true")])
5940
5941 (define_expand "float<SWI48x:mode><MODEF:mode>2"
5942 [(set (match_operand:MODEF 0 "register_operand")
5943 (float:MODEF (match_operand:SWI48x 1 "nonimmediate_operand")))]
5944 "(TARGET_80387 && X87_ENABLE_FLOAT (<MODEF:MODE>mode, <SWI48x:MODE>mode))
5945 || (SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
5946 && ((<SWI48x:MODE>mode != DImode) || TARGET_64BIT))")
5947
5948 (define_insn "*float<SWI48:mode><MODEF:mode>2"
5949 [(set (match_operand:MODEF 0 "register_operand" "=f,v,v")
5950 (float:MODEF
5951 (match_operand:SWI48 1 "nonimmediate_operand" "m,r,m")))]
5952 "(TARGET_80387 && X87_ENABLE_FLOAT (<MODEF:MODE>mode, <SWI48:MODE>mode))
5953 || (SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH)"
5954 "@
5955 fild%Z1\t%1
5956 %vcvtsi2<MODEF:ssemodesuffix><SWI48:rex64suffix>\t{%1, %d0|%d0, %1}
5957 %vcvtsi2<MODEF:ssemodesuffix><SWI48:rex64suffix>\t{%1, %d0|%d0, %1}"
5958 [(set_attr "type" "fmov,sseicvt,sseicvt")
5959 (set_attr "avx_partial_xmm_update" "false,true,true")
5960 (set_attr "prefix" "orig,maybe_vex,maybe_vex")
5961 (set_attr "mode" "<MODEF:MODE>")
5962 (set (attr "prefix_rex")
5963 (if_then_else
5964 (and (eq_attr "prefix" "maybe_vex")
5965 (match_test "<SWI48:MODE>mode == DImode"))
5966 (const_string "1")
5967 (const_string "*")))
5968 (set_attr "unit" "i387,*,*")
5969 (set_attr "athlon_decode" "*,double,direct")
5970 (set_attr "amdfam10_decode" "*,vector,double")
5971 (set_attr "bdver1_decode" "*,double,direct")
5972 (set_attr "znver1_decode" "double,*,*")
5973 (set_attr "fp_int_src" "true")
5974 (set (attr "enabled")
5975 (if_then_else
5976 (match_test ("SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH"))
5977 (if_then_else
5978 (eq_attr "alternative" "0")
5979 (symbol_ref "TARGET_MIX_SSE_I387
5980 && X87_ENABLE_FLOAT (<MODEF:MODE>mode,
5981 <SWI48:MODE>mode)")
5982 (symbol_ref "true"))
5983 (if_then_else
5984 (eq_attr "alternative" "0")
5985 (symbol_ref "true")
5986 (symbol_ref "false"))))
5987 (set (attr "preferred_for_speed")
5988 (cond [(eq_attr "alternative" "1")
5989 (symbol_ref "TARGET_INTER_UNIT_CONVERSIONS")]
5990 (symbol_ref "true")))])
5991
5992 (define_insn "float<floatunssuffix><mode>hf2"
5993 [(set (match_operand:HF 0 "register_operand" "=v")
5994 (any_float:HF
5995 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))]
5996 "TARGET_AVX512FP16"
5997 "vcvt<floatsuffix>si2sh<rex64suffix>\t{%1, %d0|%d0, %1}"
5998 [(set_attr "type" "sseicvt")
5999 (set_attr "prefix" "evex")
6000 (set_attr "mode" "HF")])
6001
6002 (define_insn "*floatdi<MODEF:mode>2_i387"
6003 [(set (match_operand:MODEF 0 "register_operand" "=f")
6004 (float:MODEF (match_operand:DI 1 "nonimmediate_operand" "m")))]
6005 "!TARGET_64BIT
6006 && TARGET_80387 && X87_ENABLE_FLOAT (<MODEF:MODE>mode, DImode)"
6007 "fild%Z1\t%1"
6008 [(set_attr "type" "fmov")
6009 (set_attr "mode" "<MODEF:MODE>")
6010 (set_attr "znver1_decode" "double")
6011 (set_attr "fp_int_src" "true")])
6012
6013 ;; Try TARGET_USE_VECTOR_CONVERTS, but not so hard as to require extra memory
6014 ;; slots when !TARGET_INTER_UNIT_MOVES_TO_VEC disables the general_regs
6015 ;; alternative in sse2_loadld.
6016 (define_split
6017 [(set (match_operand:MODEF 0 "sse_reg_operand")
6018 (float:MODEF (match_operand:SI 1 "nonimmediate_operand")))]
6019 "TARGET_SSE2
6020 && TARGET_USE_VECTOR_CONVERTS
6021 && optimize_function_for_speed_p (cfun)
6022 && reload_completed
6023 && (MEM_P (operands[1]) || TARGET_INTER_UNIT_MOVES_TO_VEC)
6024 && (!EXT_REX_SSE_REG_P (operands[0])
6025 || TARGET_AVX512VL)"
6026 [(const_int 0)]
6027 {
6028 operands[3] = lowpart_subreg (<ssevecmode>mode, operands[0], <MODE>mode);
6029 operands[4] = lowpart_subreg (V4SImode, operands[0], <MODE>mode);
6030
6031 emit_insn (gen_sse2_loadld (operands[4],
6032 CONST0_RTX (V4SImode), operands[1]));
6033
6034 if (<ssevecmode>mode == V4SFmode)
6035 emit_insn (gen_floatv4siv4sf2 (operands[3], operands[4]));
6036 else
6037 emit_insn (gen_sse2_cvtdq2pd (operands[3], operands[4]));
6038 DONE;
6039 })
6040
6041 ;; Avoid store forwarding (partial memory) stall penalty
6042 ;; by passing DImode value through XMM registers. */
6043
6044 (define_split
6045 [(set (match_operand:X87MODEF 0 "register_operand")
6046 (float:X87MODEF
6047 (match_operand:DI 1 "register_operand")))]
6048 "!TARGET_64BIT && TARGET_INTER_UNIT_MOVES_TO_VEC
6049 && TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
6050 && TARGET_SSE2 && optimize_function_for_speed_p (cfun)
6051 && can_create_pseudo_p ()"
6052 [(const_int 0)]
6053 {
6054 rtx s = assign_386_stack_local (DImode, SLOT_FLOATxFDI_387);
6055 emit_insn (gen_floatdi<mode>2_i387_with_xmm (operands[0], operands[1], s));
6056 DONE;
6057 })
6058
6059 (define_insn_and_split "floatdi<X87MODEF:mode>2_i387_with_xmm"
6060 [(set (match_operand:X87MODEF 0 "register_operand" "=f,f")
6061 (float:X87MODEF
6062 (match_operand:DI 1 "register_operand" "r,r")))
6063 (clobber (match_operand:DI 2 "memory_operand" "=m,m"))
6064 (clobber (match_scratch:V4SI 3 "=x,x"))
6065 (clobber (match_scratch:V4SI 4 "=X,x"))]
6066 "!TARGET_64BIT && TARGET_INTER_UNIT_MOVES_TO_VEC
6067 && TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
6068 && TARGET_SSE2 && optimize_function_for_speed_p (cfun)"
6069 "#"
6070 "&& reload_completed"
6071 [(set (match_dup 2) (match_dup 3))
6072 (set (match_dup 0) (float:X87MODEF (match_dup 2)))]
6073 {
6074 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
6075 Assemble the 64-bit DImode value in an xmm register. */
6076 emit_insn (gen_sse2_loadld (operands[3], CONST0_RTX (V4SImode),
6077 gen_lowpart (SImode, operands[1])));
6078 if (TARGET_SSE4_1)
6079 emit_insn (gen_sse4_1_pinsrd (operands[3], operands[3],
6080 gen_highpart (SImode, operands[1]),
6081 GEN_INT (2)));
6082 else
6083 {
6084 emit_insn (gen_sse2_loadld (operands[4], CONST0_RTX (V4SImode),
6085 gen_highpart (SImode, operands[1])));
6086 emit_insn (gen_vec_interleave_lowv4si (operands[3], operands[3],
6087 operands[4]));
6088 }
6089 operands[3] = gen_lowpart (DImode, operands[3]);
6090 }
6091 [(set_attr "isa" "sse4,*")
6092 (set_attr "type" "multi")
6093 (set_attr "mode" "<X87MODEF:MODE>")
6094 (set_attr "unit" "i387")
6095 (set_attr "fp_int_src" "true")])
6096
6097 ;; Break partial SSE register dependency stall. This splitter should split
6098 ;; late in the pass sequence (after register rename pass), so allocated
6099 ;; registers won't change anymore
6100
6101 (define_split
6102 [(set (match_operand:MODEF 0 "sse_reg_operand")
6103 (float:MODEF (match_operand:SWI48 1 "nonimmediate_operand")))]
6104 "!TARGET_AVX
6105 && TARGET_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY
6106 && epilogue_completed
6107 && optimize_function_for_speed_p (cfun)
6108 && (!EXT_REX_SSE_REG_P (operands[0])
6109 || TARGET_AVX512VL)"
6110 [(set (match_dup 0)
6111 (vec_merge:<MODEF:ssevecmode>
6112 (vec_duplicate:<MODEF:ssevecmode>
6113 (float:MODEF
6114 (match_dup 1)))
6115 (match_dup 0)
6116 (const_int 1)))]
6117 {
6118 const machine_mode vmode = <MODEF:ssevecmode>mode;
6119
6120 operands[0] = lowpart_subreg (vmode, operands[0], <MODEF:MODE>mode);
6121 emit_move_insn (operands[0], CONST0_RTX (vmode));
6122 })
6123
6124 (define_expand "floatuns<SWI12:mode><MODEF:mode>2"
6125 [(set (match_operand:MODEF 0 "register_operand")
6126 (unsigned_float:MODEF
6127 (match_operand:SWI12 1 "nonimmediate_operand")))]
6128 "!TARGET_64BIT
6129 && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH"
6130 {
6131 operands[1] = convert_to_mode (SImode, operands[1], 1);
6132 emit_insn (gen_floatsi<MODEF:mode>2 (operands[0], operands[1]));
6133 DONE;
6134 })
6135
6136 (define_insn "*floatuns<SWI48:mode><MODEF:mode>2_avx512"
6137 [(set (match_operand:MODEF 0 "register_operand" "=v")
6138 (unsigned_float:MODEF
6139 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))]
6140 "TARGET_AVX512F && TARGET_SSE_MATH"
6141 "vcvtusi2<MODEF:ssemodesuffix><SWI48:rex64suffix>\t{%1, %0, %0|%0, %0, %1}"
6142 [(set_attr "type" "sseicvt")
6143 (set_attr "avx_partial_xmm_update" "true")
6144 (set_attr "prefix" "evex")
6145 (set_attr "mode" "<MODEF:MODE>")])
6146
6147 ;; Avoid store forwarding (partial memory) stall penalty by extending
6148 ;; SImode value to DImode through XMM register instead of pushing two
6149 ;; SImode values to stack. Also note that fild loads from memory only.
6150
6151 (define_insn_and_split "floatunssi<mode>2_i387_with_xmm"
6152 [(set (match_operand:X87MODEF 0 "register_operand" "=f")
6153 (unsigned_float:X87MODEF
6154 (match_operand:SI 1 "nonimmediate_operand" "rm")))
6155 (clobber (match_operand:DI 2 "memory_operand" "=m"))
6156 (clobber (match_scratch:DI 3 "=x"))]
6157 "!TARGET_64BIT
6158 && TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
6159 && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
6160 "#"
6161 "&& reload_completed"
6162 [(set (match_dup 3) (zero_extend:DI (match_dup 1)))
6163 (set (match_dup 2) (match_dup 3))
6164 (set (match_dup 0)
6165 (float:X87MODEF (match_dup 2)))]
6166 ""
6167 [(set_attr "type" "multi")
6168 (set_attr "mode" "<MODE>")])
6169
6170 (define_expand "floatunssi<mode>2"
6171 [(set (match_operand:X87MODEF 0 "register_operand")
6172 (unsigned_float:X87MODEF
6173 (match_operand:SI 1 "nonimmediate_operand")))]
6174 "(!TARGET_64BIT
6175 && TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
6176 && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC)
6177 || ((!TARGET_64BIT || TARGET_AVX512F)
6178 && SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
6179 {
6180 if (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
6181 {
6182 emit_insn (gen_floatunssi<mode>2_i387_with_xmm
6183 (operands[0], operands[1],
6184 assign_386_stack_local (DImode, SLOT_TEMP)));
6185 DONE;
6186 }
6187 if (!TARGET_AVX512F)
6188 {
6189 ix86_expand_convert_uns_si<mode>_sse (operands[0], operands[1]);
6190 DONE;
6191 }
6192 })
6193
6194 (define_expand "floatunsdisf2"
6195 [(set (match_operand:SF 0 "register_operand")
6196 (unsigned_float:SF
6197 (match_operand:DI 1 "nonimmediate_operand")))]
6198 "TARGET_64BIT && TARGET_SSE && TARGET_SSE_MATH"
6199 {
6200 if (!TARGET_AVX512F)
6201 {
6202 x86_emit_floatuns (operands);
6203 DONE;
6204 }
6205 })
6206
6207 (define_expand "floatunsdidf2"
6208 [(set (match_operand:DF 0 "register_operand")
6209 (unsigned_float:DF
6210 (match_operand:DI 1 "nonimmediate_operand")))]
6211 "((TARGET_64BIT && TARGET_AVX512F)
6212 || TARGET_KEEPS_VECTOR_ALIGNED_STACK)
6213 && TARGET_SSE2 && TARGET_SSE_MATH"
6214 {
6215 if (!TARGET_64BIT)
6216 {
6217 ix86_expand_convert_uns_didf_sse (operands[0], operands[1]);
6218 DONE;
6219 }
6220 if (!TARGET_AVX512F)
6221 {
6222 x86_emit_floatuns (operands);
6223 DONE;
6224 }
6225 })
6226 \f
6227 ;; Load effective address instructions
6228
6229 (define_insn "*lea<mode>"
6230 [(set (match_operand:SWI48 0 "register_operand" "=r")
6231 (match_operand:SWI48 1 "address_no_seg_operand" "Ts"))]
6232 "ix86_hardreg_mov_ok (operands[0], operands[1])"
6233 {
6234 if (SImode_address_operand (operands[1], VOIDmode))
6235 {
6236 gcc_assert (TARGET_64BIT);
6237 return "lea{l}\t{%E1, %k0|%k0, %E1}";
6238 }
6239 else
6240 return "lea{<imodesuffix>}\t{%E1, %0|%0, %E1}";
6241 }
6242 [(set_attr "type" "lea")
6243 (set (attr "mode")
6244 (if_then_else
6245 (match_operand 1 "SImode_address_operand")
6246 (const_string "SI")
6247 (const_string "<MODE>")))])
6248
6249 (define_peephole2
6250 [(set (match_operand:SWI48 0 "register_operand")
6251 (match_operand:SWI48 1 "address_no_seg_operand"))]
6252 "ix86_hardreg_mov_ok (operands[0], operands[1])
6253 && peep2_regno_dead_p (0, FLAGS_REG)
6254 && ix86_avoid_lea_for_addr (peep2_next_insn (0), operands)"
6255 [(const_int 0)]
6256 {
6257 machine_mode mode = <MODE>mode;
6258
6259 /* Emit all operations in SImode for zero-extended addresses. */
6260 if (SImode_address_operand (operands[1], VOIDmode))
6261 mode = SImode;
6262
6263 ix86_split_lea_for_addr (peep2_next_insn (0), operands, mode);
6264
6265 /* Zero-extend return register to DImode for zero-extended addresses. */
6266 if (mode != <MODE>mode)
6267 emit_insn (gen_zero_extendsidi2 (operands[0],
6268 gen_lowpart (mode, operands[0])));
6269
6270 DONE;
6271 })
6272
6273 ;; ix86_split_lea_for_addr emits the shifts as MULT to avoid it from being
6274 ;; peephole2 optimized back into a lea. Split that into the shift during
6275 ;; the following split pass.
6276 (define_split
6277 [(set (match_operand:SWI48 0 "general_reg_operand")
6278 (mult:SWI48 (match_dup 0) (match_operand:SWI48 1 "const1248_operand")))
6279 (clobber (reg:CC FLAGS_REG))]
6280 "reload_completed"
6281 [(parallel [(set (match_dup 0) (ashift:SWI48 (match_dup 0) (match_dup 1)))
6282 (clobber (reg:CC FLAGS_REG))])]
6283 "operands[1] = GEN_INT (exact_log2 (INTVAL (operands[1])));")
6284 \f
6285 ;; Add instructions
6286
6287 (define_expand "add<mode>3"
6288 [(set (match_operand:SDWIM 0 "nonimmediate_operand")
6289 (plus:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand")
6290 (match_operand:SDWIM 2 "<general_hilo_operand>")))]
6291 ""
6292 "ix86_expand_binary_operator (PLUS, <MODE>mode, operands,
6293 TARGET_APX_NDD); DONE;")
6294
6295 (define_insn_and_split "*add<dwi>3_doubleword"
6296 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r,&r,&r")
6297 (plus:<DWI>
6298 (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0,ro,r")
6299 (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o,r<di>,r")))
6300 (clobber (reg:CC FLAGS_REG))]
6301 "ix86_binary_operator_ok (PLUS, <DWI>mode, operands, TARGET_APX_NDD)"
6302 "#"
6303 "&& reload_completed"
6304 [(parallel [(set (reg:CCC FLAGS_REG)
6305 (compare:CCC
6306 (plus:DWIH (match_dup 1) (match_dup 2))
6307 (match_dup 1)))
6308 (set (match_dup 0)
6309 (plus:DWIH (match_dup 1) (match_dup 2)))])
6310 (parallel [(set (match_dup 3)
6311 (plus:DWIH
6312 (plus:DWIH
6313 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
6314 (match_dup 4))
6315 (match_dup 5)))
6316 (clobber (reg:CC FLAGS_REG))])]
6317 {
6318 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
6319 if (operands[2] == const0_rtx)
6320 {
6321 /* Under NDD op0 and op1 may not equal, do not delete insn then. */
6322 bool emit_insn_deleted_note_p = true;
6323 if (!rtx_equal_p (operands[0], operands[1]))
6324 {
6325 emit_move_insn (operands[0], operands[1]);
6326 emit_insn_deleted_note_p = false;
6327 }
6328 if (operands[5] != const0_rtx)
6329 ix86_expand_binary_operator (PLUS, <MODE>mode, &operands[3],
6330 TARGET_APX_NDD);
6331 else if (!rtx_equal_p (operands[3], operands[4]))
6332 emit_move_insn (operands[3], operands[4]);
6333 else if (emit_insn_deleted_note_p)
6334 emit_note (NOTE_INSN_DELETED);
6335 DONE;
6336 }
6337 }
6338 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")])
6339
6340 (define_insn_and_split "*add<dwi>3_doubleword_zext"
6341 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=r,o,&r,&r")
6342 (plus:<DWI>
6343 (zero_extend:<DWI>
6344 (match_operand:DWIH 2 "nonimmediate_operand" "rm,r,rm,r"))
6345 (match_operand:<DWI> 1 "nonimmediate_operand" "0,0,r,m")))
6346 (clobber (reg:CC FLAGS_REG))]
6347 "ix86_binary_operator_ok (UNKNOWN, <DWI>mode, operands,
6348 TARGET_APX_NDD)"
6349 "#"
6350 "&& reload_completed"
6351 [(parallel [(set (reg:CCC FLAGS_REG)
6352 (compare:CCC
6353 (plus:DWIH (match_dup 1) (match_dup 2))
6354 (match_dup 1)))
6355 (set (match_dup 0)
6356 (plus:DWIH (match_dup 1) (match_dup 2)))])
6357 (parallel [(set (match_dup 3)
6358 (plus:DWIH
6359 (plus:DWIH
6360 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
6361 (match_dup 4))
6362 (const_int 0)))
6363 (clobber (reg:CC FLAGS_REG))])]
6364 "split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[3]);"
6365 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")])
6366
6367 (define_insn_and_split "*add<dwi>3_doubleword_concat"
6368 [(set (match_operand:<DWI> 0 "register_operand" "=&r")
6369 (plus:<DWI>
6370 (any_or_plus:<DWI>
6371 (ashift:<DWI>
6372 (zero_extend:<DWI>
6373 (match_operand:DWIH 2 "nonimmediate_operand" "rm"))
6374 (match_operand:QI 3 "const_int_operand"))
6375 (zero_extend:<DWI>
6376 (match_operand:DWIH 4 "nonimmediate_operand" "rm")))
6377 (match_operand:<DWI> 1 "register_operand" "0")))
6378 (clobber (reg:CC FLAGS_REG))]
6379 "INTVAL (operands[3]) == <MODE_SIZE> * BITS_PER_UNIT"
6380 "#"
6381 "&& reload_completed"
6382 [(parallel [(set (reg:CCC FLAGS_REG)
6383 (compare:CCC
6384 (plus:DWIH (match_dup 1) (match_dup 4))
6385 (match_dup 1)))
6386 (set (match_dup 0)
6387 (plus:DWIH (match_dup 1) (match_dup 4)))])
6388 (parallel [(set (match_dup 5)
6389 (plus:DWIH
6390 (plus:DWIH
6391 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
6392 (match_dup 6))
6393 (match_dup 2)))
6394 (clobber (reg:CC FLAGS_REG))])]
6395 "split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[5]);")
6396
6397 (define_insn_and_split "*add<dwi>3_doubleword_concat_zext"
6398 [(set (match_operand:<DWI> 0 "register_operand" "=&r")
6399 (plus:<DWI>
6400 (any_or_plus:<DWI>
6401 (ashift:<DWI>
6402 (zero_extend:<DWI>
6403 (match_operand:DWIH 2 "nonimmediate_operand" "rm"))
6404 (match_operand:QI 3 "const_int_operand"))
6405 (zero_extend:<DWI>
6406 (match_operand:DWIH 4 "nonimmediate_operand" "rm")))
6407 (zero_extend:<DWI>
6408 (match_operand:DWIH 1 "nonimmediate_operand" "rm"))))
6409 (clobber (reg:CC FLAGS_REG))]
6410 "INTVAL (operands[3]) == <MODE_SIZE> * BITS_PER_UNIT"
6411 "#"
6412 "&& reload_completed"
6413 [(set (match_dup 0) (match_dup 4))
6414 (set (match_dup 5) (match_dup 2))
6415 (parallel [(set (reg:CCC FLAGS_REG)
6416 (compare:CCC
6417 (plus:DWIH (match_dup 0) (match_dup 1))
6418 (match_dup 0)))
6419 (set (match_dup 0)
6420 (plus:DWIH (match_dup 0) (match_dup 1)))])
6421 (parallel [(set (match_dup 5)
6422 (plus:DWIH
6423 (plus:DWIH
6424 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
6425 (match_dup 5))
6426 (const_int 0)))
6427 (clobber (reg:CC FLAGS_REG))])]
6428 "split_double_mode (<DWI>mode, &operands[0], 1, &operands[0], &operands[5]);")
6429
6430 (define_insn "*add<mode>_1"
6431 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,r,r,r,r,r")
6432 (plus:SWI48
6433 (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,r,r,rm,r,m,r")
6434 (match_operand:SWI48 2 "x86_64_general_operand" "re,BM,0,le,r,e,je,BM")))
6435 (clobber (reg:CC FLAGS_REG))]
6436 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands,
6437 TARGET_APX_NDD)"
6438 {
6439 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
6440 switch (get_attr_type (insn))
6441 {
6442 case TYPE_LEA:
6443 return "#";
6444
6445 case TYPE_INCDEC:
6446 if (operands[2] == const1_rtx)
6447 return use_ndd ? "inc{<imodesuffix>}\t{%1, %0|%0, %1}"
6448 : "inc{<imodesuffix>}\t%0";
6449 else
6450 {
6451 gcc_assert (operands[2] == constm1_rtx);
6452 return use_ndd ? "dec{<imodesuffix>}\t{%1, %0|%0, %1}"
6453 : "dec{<imodesuffix>}\t%0";
6454 }
6455
6456 default:
6457 /* For most processors, ADD is faster than LEA. This alternative
6458 was added to use ADD as much as possible. */
6459 if (which_alternative == 2)
6460 std::swap (operands[1], operands[2]);
6461
6462 if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
6463 return use_ndd ? "sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
6464 : "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
6465
6466 return use_ndd ? "add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
6467 : "add{<imodesuffix>}\t{%2, %0|%0, %2}";
6468 }
6469 }
6470 [(set_attr "isa" "*,*,*,*,apx_ndd,apx_ndd,apx_ndd,apx_ndd")
6471 (set (attr "type")
6472 (cond [(eq_attr "alternative" "3")
6473 (const_string "lea")
6474 (match_operand:SWI48 2 "incdec_operand")
6475 (const_string "incdec")
6476 ]
6477 (const_string "alu")))
6478 (set (attr "length_immediate")
6479 (if_then_else
6480 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
6481 (const_string "1")
6482 (const_string "*")))
6483 (set_attr "mode" "<MODE>")])
6484
6485 ;; It may seem that nonimmediate operand is proper one for operand 1.
6486 ;; The addsi_1 pattern allows nonimmediate operand at that place and
6487 ;; we take care in ix86_binary_operator_ok to not allow two memory
6488 ;; operands so proper swapping will be done in reload. This allow
6489 ;; patterns constructed from addsi_1 to match.
6490
6491 (define_insn "addsi_1_zext"
6492 [(set (match_operand:DI 0 "register_operand" "=r,r,r,r,r")
6493 (zero_extend:DI
6494 (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r,r,r,rm")
6495 (match_operand:SI 2 "x86_64_general_operand" "rBMe,0,le,rBMe,re"))))
6496 (clobber (reg:CC FLAGS_REG))]
6497 "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands,
6498 TARGET_APX_NDD)"
6499 {
6500 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
6501 switch (get_attr_type (insn))
6502 {
6503 case TYPE_LEA:
6504 return "#";
6505
6506 case TYPE_INCDEC:
6507 if (operands[2] == const1_rtx)
6508 return use_ndd ? "inc{l}\t{%1, %k0|%k0, %1}"
6509 : "inc{l}\t%k0";
6510 else
6511 {
6512 gcc_assert (operands[2] == constm1_rtx);
6513 return use_ndd ? "dec{l}\t{%1, %k0|%k0, %1}"
6514 : "dec{l}\t%k0";
6515 }
6516
6517 default:
6518 /* For most processors, ADD is faster than LEA. This alternative
6519 was added to use ADD as much as possible. */
6520 if (which_alternative == 1)
6521 std::swap (operands[1], operands[2]);
6522
6523 if (x86_maybe_negate_const_int (&operands[2], SImode))
6524 return use_ndd ? "sub{l}\t{%2 ,%1, %k0|%k0, %1, %2}"
6525 : "sub{l}\t{%2, %k0|%k0, %2}";
6526
6527 return use_ndd ? "add{l}\t{%2 ,%1, %k0|%k0, %1, %2}"
6528 : "add{l}\t{%2, %k0|%k0, %2}";
6529 }
6530 }
6531 [(set_attr "isa" "*,*,*,apx_ndd,apx_ndd")
6532 (set (attr "type")
6533 (cond [(eq_attr "alternative" "2")
6534 (const_string "lea")
6535 (match_operand:SI 2 "incdec_operand")
6536 (const_string "incdec")
6537 ]
6538 (const_string "alu")))
6539 (set (attr "length_immediate")
6540 (if_then_else
6541 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
6542 (const_string "1")
6543 (const_string "*")))
6544 (set_attr "mode" "SI")])
6545
6546 (define_insn "*addhi_1"
6547 [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r,Yp,r,r")
6548 (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,r,Yp,rm,r")
6549 (match_operand:HI 2 "general_operand" "rn,m,0,ln,rn,m")))
6550 (clobber (reg:CC FLAGS_REG))]
6551 "ix86_binary_operator_ok (PLUS, HImode, operands,
6552 TARGET_APX_NDD)"
6553 {
6554 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
6555 switch (get_attr_type (insn))
6556 {
6557 case TYPE_LEA:
6558 return "#";
6559
6560 case TYPE_INCDEC:
6561 if (operands[2] == const1_rtx)
6562 return use_ndd ? "inc{w}\t{%1, %0|%0, %1}" : "inc{w}\t%0";
6563 else
6564 {
6565 gcc_assert (operands[2] == constm1_rtx);
6566 return use_ndd ? "dec{w}\t{%1, %0|%0, %1}" : "dec{w}\t%0";
6567 }
6568
6569 default:
6570 /* For most processors, ADD is faster than LEA. This alternative
6571 was added to use ADD as much as possible. */
6572 if (which_alternative == 2)
6573 std::swap (operands[1], operands[2]);
6574
6575 if (x86_maybe_negate_const_int (&operands[2], HImode))
6576 return use_ndd ? "sub{w}\t{%2, %1, %0|%0, %1, %2}"
6577 : "sub{w}\t{%2, %0|%0, %2}";
6578
6579 return use_ndd ? "add{w}\t{%2, %1, %0|%0, %1, %2}"
6580 : "add{w}\t{%2, %0|%0, %2}";
6581 }
6582 }
6583 [(set_attr "isa" "*,*,*,*,apx_ndd,apx_ndd")
6584 (set (attr "type")
6585 (cond [(eq_attr "alternative" "3")
6586 (const_string "lea")
6587 (match_operand:HI 2 "incdec_operand")
6588 (const_string "incdec")
6589 ]
6590 (const_string "alu")))
6591 (set (attr "length_immediate")
6592 (if_then_else
6593 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
6594 (const_string "1")
6595 (const_string "*")))
6596 (set_attr "mode" "HI,HI,HI,SI,HI,HI")])
6597
6598 (define_insn "*addqi_1"
6599 [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,q,r,r,Yp,r,r")
6600 (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,q,0,r,Yp,rm,r")
6601 (match_operand:QI 2 "general_operand" "qn,m,0,rn,0,ln,rn,m")))
6602 (clobber (reg:CC FLAGS_REG))]
6603 "ix86_binary_operator_ok (PLUS, QImode, operands, TARGET_APX_NDD)"
6604 {
6605 bool widen = (get_attr_mode (insn) != MODE_QI);
6606 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
6607 switch (get_attr_type (insn))
6608 {
6609 case TYPE_LEA:
6610 return "#";
6611
6612 case TYPE_INCDEC:
6613 if (operands[2] == const1_rtx)
6614 if (use_ndd)
6615 return "inc{b}\t{%1, %0|%0, %1}";
6616 else
6617 return widen ? "inc{l}\t%k0" : "inc{b}\t%0";
6618 else
6619 {
6620 gcc_assert (operands[2] == constm1_rtx);
6621 if (use_ndd)
6622 return "dec{b}\t{%1, %0|%0, %1}";
6623 else
6624 return widen ? "dec{l}\t%k0" : "dec{b}\t%0";
6625 }
6626
6627 default:
6628 /* For most processors, ADD is faster than LEA. These alternatives
6629 were added to use ADD as much as possible. */
6630 if (which_alternative == 2 || which_alternative == 4)
6631 std::swap (operands[1], operands[2]);
6632
6633 if (x86_maybe_negate_const_int (&operands[2], QImode))
6634 {
6635 if (use_ndd)
6636 return "sub{b}\t{%2, %1, %0|%0, %1, %2}";
6637 else
6638 return widen ? "sub{l}\t{%2, %k0|%k0, %2}"
6639 : "sub{b}\t{%2, %0|%0, %2}";
6640 }
6641 if (use_ndd)
6642 return "add{b}\t{%2, %1, %0|%0, %1, %2}";
6643 else
6644 return widen ? "add{l}\t{%k2, %k0|%k0, %k2}"
6645 : "add{b}\t{%2, %0|%0, %2}";
6646 }
6647 }
6648 [(set_attr "isa" "*,*,*,*,*,*,apx_ndd,apx_ndd")
6649 (set (attr "type")
6650 (cond [(eq_attr "alternative" "5")
6651 (const_string "lea")
6652 (match_operand:QI 2 "incdec_operand")
6653 (const_string "incdec")
6654 ]
6655 (const_string "alu")))
6656 (set (attr "length_immediate")
6657 (if_then_else
6658 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
6659 (const_string "1")
6660 (const_string "*")))
6661 (set_attr "mode" "QI,QI,QI,SI,SI,SI,QI,QI")
6662 ;; Potential partial reg stall on alternatives 3 and 4.
6663 (set (attr "preferred_for_speed")
6664 (cond [(eq_attr "alternative" "3,4")
6665 (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
6666 (symbol_ref "true")))])
6667
6668 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
6669 (define_insn_and_split "*add<mode>_1_slp"
6670 [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>,&<r>"))
6671 (plus:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "%0,!<r>")
6672 (match_operand:SWI12 2 "general_operand" "<r>mn,<r>mn")))
6673 (clobber (reg:CC FLAGS_REG))]
6674 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
6675 {
6676 if (which_alternative)
6677 return "#";
6678
6679 switch (get_attr_type (insn))
6680 {
6681 case TYPE_INCDEC:
6682 if (operands[2] == const1_rtx)
6683 return "inc{<imodesuffix>}\t%0";
6684 else
6685 {
6686 gcc_assert (operands[2] == constm1_rtx);
6687 return "dec{<imodesuffix>}\t%0";
6688 }
6689
6690 default:
6691 if (x86_maybe_negate_const_int (&operands[2], QImode))
6692 return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
6693
6694 return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
6695 }
6696 }
6697 "&& reload_completed
6698 && !(rtx_equal_p (operands[0], operands[1])
6699 || rtx_equal_p (operands[0], operands[2]))"
6700 [(set (strict_low_part (match_dup 0)) (match_dup 1))
6701 (parallel
6702 [(set (strict_low_part (match_dup 0))
6703 (plus:SWI12 (match_dup 0) (match_dup 2)))
6704 (clobber (reg:CC FLAGS_REG))])]
6705 ""
6706 [(set (attr "type")
6707 (if_then_else (match_operand:QI 2 "incdec_operand")
6708 (const_string "incdec")
6709 (const_string "alu")))
6710 (set_attr "mode" "<MODE>")])
6711
6712 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
6713 (define_insn_and_split "*addqi_ext<mode>_1_slp"
6714 [(set (strict_low_part (match_operand:QI 0 "register_operand" "+Q,&Q"))
6715 (plus:QI
6716 (subreg:QI
6717 (match_operator:SWI248 3 "extract_operator"
6718 [(match_operand 2 "int248_register_operand" "Q,Q")
6719 (const_int 8)
6720 (const_int 8)]) 0)
6721 (match_operand:QI 1 "nonimmediate_operand" "0,!qm")))
6722 (clobber (reg:CC FLAGS_REG))]
6723 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
6724 "@
6725 add{b}\t{%h2, %0|%0, %h2}
6726 #"
6727 "&& reload_completed
6728 && !rtx_equal_p (operands[0], operands[1])"
6729 [(set (strict_low_part (match_dup 0)) (match_dup 1))
6730 (parallel
6731 [(set (strict_low_part (match_dup 0))
6732 (plus:QI
6733 (subreg:QI
6734 (match_op_dup 3
6735 [(match_dup 2) (const_int 8) (const_int 8)]) 0)
6736 (match_dup 0)))
6737 (clobber (reg:CC FLAGS_REG))])]
6738 ""
6739 [(set_attr "type" "alu")
6740 (set_attr "mode" "QI")])
6741
6742 (define_insn_and_split "*addqi_ext<mode>_2_slp"
6743 [(set (strict_low_part (match_operand:QI 0 "register_operand" "+&Q"))
6744 (plus:QI
6745 (subreg:QI
6746 (match_operator:SWI248 3 "extract_operator"
6747 [(match_operand 1 "int248_register_operand" "Q")
6748 (const_int 8)
6749 (const_int 8)]) 0)
6750 (subreg:QI
6751 (match_operator:SWI248 4 "extract_operator"
6752 [(match_operand 2 "int248_register_operand" "Q")
6753 (const_int 8)
6754 (const_int 8)]) 0)))
6755 (clobber (reg:CC FLAGS_REG))]
6756 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
6757 "#"
6758 "&& reload_completed"
6759 [(set (strict_low_part (match_dup 0))
6760 (subreg:QI
6761 (match_op_dup 4
6762 [(match_dup 2) (const_int 8) (const_int 8)]) 0))
6763 (parallel
6764 [(set (strict_low_part (match_dup 0))
6765 (plus:QI
6766 (subreg:QI
6767 (match_op_dup 3
6768 [(match_dup 1) (const_int 8) (const_int 8)]) 0)
6769 (match_dup 0)))
6770 (clobber (reg:CC FLAGS_REG))])]
6771 ""
6772 [(set_attr "type" "alu")
6773 (set_attr "mode" "QI")])
6774
6775 ;; Split non destructive adds if we cannot use lea.
6776 (define_split
6777 [(set (match_operand:SWI48 0 "register_operand")
6778 (plus:SWI48 (match_operand:SWI48 1 "register_operand")
6779 (match_operand:SWI48 2 "x86_64_nonmemory_operand")))
6780 (clobber (reg:CC FLAGS_REG))]
6781 "reload_completed && ix86_avoid_lea_for_add (insn, operands)"
6782 [(set (match_dup 0) (match_dup 1))
6783 (parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (match_dup 2)))
6784 (clobber (reg:CC FLAGS_REG))])])
6785
6786 ;; Split non destructive adds if we cannot use lea.
6787 (define_split
6788 [(set (match_operand:DI 0 "register_operand")
6789 (zero_extend:DI
6790 (plus:SI (match_operand:SI 1 "register_operand")
6791 (match_operand:SI 2 "x86_64_nonmemory_operand"))))
6792 (clobber (reg:CC FLAGS_REG))]
6793 "TARGET_64BIT
6794 && reload_completed && ix86_avoid_lea_for_add (insn, operands)"
6795 [(set (match_dup 3) (match_dup 1))
6796 (parallel [(set (match_dup 0)
6797 (zero_extend:DI (plus:SI (match_dup 3) (match_dup 2))))
6798 (clobber (reg:CC FLAGS_REG))])]
6799 "operands[3] = gen_lowpart (SImode, operands[0]);")
6800
6801 ;; Convert add to the lea pattern to avoid flags dependency.
6802 (define_split
6803 [(set (match_operand:SWI 0 "register_operand")
6804 (plus:SWI (match_operand:SWI 1 "register_operand")
6805 (match_operand:SWI 2 "<nonmemory_operand>")))
6806 (clobber (reg:CC FLAGS_REG))]
6807 "reload_completed && ix86_lea_for_add_ok (insn, operands)"
6808 [(set (match_dup 0)
6809 (plus:<LEAMODE> (match_dup 1) (match_dup 2)))]
6810 {
6811 if (<MODE>mode != <LEAMODE>mode)
6812 {
6813 operands[0] = gen_lowpart (<LEAMODE>mode, operands[0]);
6814 operands[1] = gen_lowpart (<LEAMODE>mode, operands[1]);
6815 operands[2] = gen_lowpart (<LEAMODE>mode, operands[2]);
6816 }
6817 })
6818
6819 ;; Convert add to the lea pattern to avoid flags dependency.
6820 (define_split
6821 [(set (match_operand:DI 0 "register_operand")
6822 (zero_extend:DI
6823 (plus:SI (match_operand:SI 1 "register_operand")
6824 (match_operand:SI 2 "x86_64_nonmemory_operand"))))
6825 (clobber (reg:CC FLAGS_REG))]
6826 "TARGET_64BIT && reload_completed && ix86_lea_for_add_ok (insn, operands)"
6827 [(set (match_dup 0)
6828 (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))])
6829
6830 (define_insn "*add<mode>_2"
6831 [(set (reg FLAGS_REG)
6832 (compare
6833 (plus:SWI
6834 (match_operand:SWI 1 "nonimmediate_operand" "%0,0,<r>,rm,r")
6835 (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,0,r<i>,<m>"))
6836 (const_int 0)))
6837 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,<r>,r,r")
6838 (plus:SWI (match_dup 1) (match_dup 2)))]
6839 "ix86_match_ccmode (insn, CCGOCmode)
6840 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)"
6841 {
6842 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
6843 switch (get_attr_type (insn))
6844 {
6845 case TYPE_INCDEC:
6846 if (operands[2] == const1_rtx)
6847 return use_ndd ? "inc{<imodesuffix>}\t{%1, %0|%0, %1}"
6848 : "inc{<imodesuffix>}\t%0";
6849 else
6850 {
6851 gcc_assert (operands[2] == constm1_rtx);
6852 return use_ndd ? "dec{<imodesuffix>}\t{%1, %0|%0, %1}"
6853 : "dec{<imodesuffix>}\t%0";
6854 }
6855
6856 default:
6857 if (which_alternative == 2)
6858 std::swap (operands[1], operands[2]);
6859
6860 if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
6861 return use_ndd ? "sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
6862 : "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
6863
6864 return use_ndd ? "add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
6865 : "add{<imodesuffix>}\t{%2, %0|%0, %2}";
6866 }
6867 }
6868 [(set_attr "isa" "*,*,*,apx_ndd,apx_ndd")
6869 (set (attr "type")
6870 (if_then_else (match_operand:SWI 2 "incdec_operand")
6871 (const_string "incdec")
6872 (const_string "alu")))
6873 (set (attr "length_immediate")
6874 (if_then_else
6875 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
6876 (const_string "1")
6877 (const_string "*")))
6878 (set_attr "mode" "<MODE>")])
6879
6880 ;; See comment for addsi_1_zext why we do use nonimmediate_operand
6881 (define_insn "*addsi_2_zext"
6882 [(set (reg FLAGS_REG)
6883 (compare
6884 (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r,r,rm")
6885 (match_operand:SI 2 "x86_64_general_operand" "rBMe,0,rBMe,re"))
6886 (const_int 0)))
6887 (set (match_operand:DI 0 "register_operand" "=r,r,r,r")
6888 (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
6889 "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
6890 && ix86_binary_operator_ok (PLUS, SImode, operands, TARGET_APX_NDD)"
6891 {
6892 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
6893 switch (get_attr_type (insn))
6894 {
6895 case TYPE_INCDEC:
6896 if (operands[2] == const1_rtx)
6897 return use_ndd ? "inc{l}\t{%1, %k0|%k0, %1}"
6898 : "inc{l}\t%k0";
6899 else
6900 {
6901 gcc_assert (operands[2] == constm1_rtx);
6902 return use_ndd ? "dec{l}\t{%1, %k0|%k0, %1}"
6903 : "dec{l}\t%k0";
6904 }
6905
6906 default:
6907 if (which_alternative == 1)
6908 std::swap (operands[1], operands[2]);
6909
6910 if (x86_maybe_negate_const_int (&operands[2], SImode))
6911 return use_ndd ? "sub{l}\t{%2, %1, %k0|%k0, %1, %2}"
6912 : "sub{l}\t{%2, %k0|%k0, %2}";
6913
6914 return use_ndd ? "add{l}\t{%2, %1, %k0|%k0, %1, %2}"
6915 : "add{l}\t{%2, %k0|%k0, %2}";
6916 }
6917 }
6918 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
6919 (set (attr "type")
6920 (if_then_else (match_operand:SI 2 "incdec_operand")
6921 (const_string "incdec")
6922 (const_string "alu")))
6923 (set (attr "length_immediate")
6924 (if_then_else
6925 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
6926 (const_string "1")
6927 (const_string "*")))
6928 (set_attr "mode" "SI")])
6929
6930 (define_insn "*add<mode>_3"
6931 [(set (reg FLAGS_REG)
6932 (compare
6933 (neg:SWI (match_operand:SWI 2 "<general_operand>" "<g>,0,<g>,re"))
6934 (match_operand:SWI 1 "nonimmediate_operand" "%0,<r>,r,rm")))
6935 (clobber (match_scratch:SWI 0 "=<r>,<r>,r,r"))]
6936 "ix86_match_ccmode (insn, CCZmode)
6937 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
6938 {
6939 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
6940 switch (get_attr_type (insn))
6941 {
6942 case TYPE_INCDEC:
6943 if (operands[2] == const1_rtx)
6944 return use_ndd ? "inc{<imodesuffix>}\t{%1, %0|%0, %1}"
6945 : "inc{<imodesuffix>}\t%0";
6946 else
6947 {
6948 gcc_assert (operands[2] == constm1_rtx);
6949 return use_ndd ? "dec{<imodesuffix>}\t{%1, %0|%0, %1}"
6950 : "dec{<imodesuffix>}\t%0";
6951 }
6952
6953 default:
6954 if (which_alternative == 1)
6955 std::swap (operands[1], operands[2]);
6956
6957 if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
6958 return use_ndd ? "sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
6959 : "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
6960
6961 return use_ndd ? "add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
6962 : "add{<imodesuffix>}\t{%2, %0|%0, %2}";
6963 }
6964 }
6965 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
6966 (set (attr "type")
6967 (if_then_else (match_operand:SWI 2 "incdec_operand")
6968 (const_string "incdec")
6969 (const_string "alu")))
6970 (set (attr "length_immediate")
6971 (if_then_else
6972 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
6973 (const_string "1")
6974 (const_string "*")))
6975 (set_attr "mode" "<MODE>")])
6976
6977 ;; See comment for addsi_1_zext why we do use nonimmediate_operand
6978 (define_insn "*addsi_3_zext"
6979 [(set (reg FLAGS_REG)
6980 (compare
6981 (neg:SI (match_operand:SI 2 "x86_64_general_operand" "rBMe,0,rBMe,re"))
6982 (match_operand:SI 1 "nonimmediate_operand" "%0,r,r,rm")))
6983 (set (match_operand:DI 0 "register_operand" "=r,r,r,r")
6984 (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
6985 "TARGET_64BIT && ix86_match_ccmode (insn, CCZmode)
6986 && ix86_binary_operator_ok (PLUS, SImode, operands, TARGET_APX_NDD)"
6987 {
6988 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
6989 switch (get_attr_type (insn))
6990 {
6991 case TYPE_INCDEC:
6992 if (operands[2] == const1_rtx)
6993 return use_ndd ? "inc{l}\t{%1, %k0|%k0, %1}" : "inc{l}\t%k0";
6994 else
6995 {
6996 gcc_assert (operands[2] == constm1_rtx);
6997 return use_ndd ? "dec{l}\t{%1, %k0|%k0, %1}" : "dec{l}\t%k0";
6998 }
6999
7000 default:
7001 if (which_alternative == 1)
7002 std::swap (operands[1], operands[2]);
7003
7004 if (x86_maybe_negate_const_int (&operands[2], SImode))
7005 return use_ndd ? "sub{l}\t{%2, %1, %k0|%k0, %1, %2}"
7006 : "sub{l}\t{%2, %k0|%k0, %2}";
7007
7008 return use_ndd ? "add{l}\t{%2, %1, %k0|%k0, %1, %2}"
7009 : "add{l}\t{%2, %k0|%k0, %2}";
7010 }
7011 }
7012 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
7013 (set (attr "type")
7014 (if_then_else (match_operand:SI 2 "incdec_operand")
7015 (const_string "incdec")
7016 (const_string "alu")))
7017 (set (attr "length_immediate")
7018 (if_then_else
7019 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
7020 (const_string "1")
7021 (const_string "*")))
7022 (set_attr "mode" "SI")])
7023
7024 ; For comparisons against 1, -1 and 128, we may generate better code
7025 ; by converting cmp to add, inc or dec as done by peephole2. This pattern
7026 ; is matched then. We can't accept general immediate, because for
7027 ; case of overflows, the result is messed up.
7028 ; Also carry flag is reversed compared to cmp, so this conversion is valid
7029 ; only for comparisons not depending on it.
7030
7031 (define_insn "*adddi_4"
7032 [(set (reg FLAGS_REG)
7033 (compare
7034 (match_operand:DI 1 "nonimmediate_operand" "0,rm")
7035 (match_operand:DI 2 "x86_64_immediate_operand" "e,e")))
7036 (clobber (match_scratch:DI 0 "=r,r"))]
7037 "TARGET_64BIT
7038 && ix86_match_ccmode (insn, CCGCmode)"
7039 {
7040 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
7041 switch (get_attr_type (insn))
7042 {
7043 case TYPE_INCDEC:
7044 if (operands[2] == constm1_rtx)
7045 return use_ndd ? "inc{q}\t{%1, %0|%0, %1}" : "inc{q}\t%0";
7046 else
7047 {
7048 gcc_assert (operands[2] == const1_rtx);
7049 return use_ndd ? "dec{q}\t{%1, %0|%0, %1}" : "dec{q}\t%0";
7050 }
7051
7052 default:
7053 if (x86_maybe_negate_const_int (&operands[2], DImode))
7054 return use_ndd ? "add{q}\t{%2, %1, %0|%0, %1, %2}"
7055 : "add{q}\t{%2, %0|%0, %2}";
7056
7057 return use_ndd ? "sub{q}\t{%2, %1, %0|%0, %1, %2}"
7058 : "sub{q}\t{%2, %0|%0, %2}";
7059 }
7060 }
7061 [(set_attr "isa" "*,apx_ndd")
7062 (set (attr "type")
7063 (if_then_else (match_operand:DI 2 "incdec_operand")
7064 (const_string "incdec")
7065 (const_string "alu")))
7066 (set (attr "length_immediate")
7067 (if_then_else
7068 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
7069 (const_string "1")
7070 (const_string "*")))
7071 (set_attr "mode" "DI")])
7072
7073 ; For comparisons against 1, -1 and 128, we may generate better code
7074 ; by converting cmp to add, inc or dec as done by peephole2. This pattern
7075 ; is matched then. We can't accept general immediate, because for
7076 ; case of overflows, the result is messed up.
7077 ; Also carry flag is reversed compared to cmp, so this conversion is valid
7078 ; only for comparisons not depending on it.
7079
7080 (define_insn "*add<mode>_4"
7081 [(set (reg FLAGS_REG)
7082 (compare
7083 (match_operand:SWI124 1 "nonimmediate_operand" "0,rm")
7084 (match_operand:SWI124 2 "const_int_operand")))
7085 (clobber (match_scratch:SWI124 0 "=<r>,r"))]
7086 "ix86_match_ccmode (insn, CCGCmode)"
7087 {
7088 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
7089 switch (get_attr_type (insn))
7090 {
7091 case TYPE_INCDEC:
7092 if (operands[2] == constm1_rtx)
7093 return use_ndd ? "inc{<imodesuffix>}\t{%1, %0|%0, %1}"
7094 : "inc{<imodesuffix>}\t%0";
7095 else
7096 {
7097 gcc_assert (operands[2] == const1_rtx);
7098 return use_ndd ? "dec{<imodesuffix>}\t{%1, %0|%0, %1}"
7099 : "dec{<imodesuffix>}\t%0";
7100 }
7101
7102 default:
7103 if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
7104 return use_ndd ? "add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
7105 : "add{<imodesuffix>}\t{%2, %0|%0, %2}";
7106
7107 return use_ndd ? "sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
7108 : "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
7109 }
7110 }
7111 [(set_attr "isa" "*,apx_ndd")
7112 (set (attr "type")
7113 (if_then_else (match_operand:<MODE> 2 "incdec_operand")
7114 (const_string "incdec")
7115 (const_string "alu")))
7116 (set (attr "length_immediate")
7117 (if_then_else
7118 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
7119 (const_string "1")
7120 (const_string "*")))
7121 (set_attr "mode" "<MODE>")])
7122
7123 (define_insn "*add<mode>_5"
7124 [(set (reg FLAGS_REG)
7125 (compare
7126 (plus:SWI
7127 (match_operand:SWI 1 "nonimmediate_operand" "%0,<r>,r,rm")
7128 (match_operand:SWI 2 "<general_operand>" "<g>,0,<g>,re"))
7129 (const_int 0)))
7130 (clobber (match_scratch:SWI 0 "=<r>,<r>,r,r"))]
7131 "ix86_match_ccmode (insn, CCGOCmode)
7132 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
7133 {
7134 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
7135 switch (get_attr_type (insn))
7136 {
7137 case TYPE_INCDEC:
7138 if (operands[2] == const1_rtx)
7139 return use_ndd ? "inc{<imodesuffix>}\t{%1, %0|%0, %1}"
7140 : "inc{<imodesuffix>}\t%0";
7141 else
7142 {
7143 gcc_assert (operands[2] == constm1_rtx);
7144 return use_ndd ? "dec{<imodesuffix>}\t{%1, %0|%0, %1}"
7145 : "dec{<imodesuffix>}\t%0";
7146 }
7147
7148 default:
7149 if (which_alternative == 1)
7150 std::swap (operands[1], operands[2]);
7151
7152 if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
7153 return use_ndd ? "sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
7154 : "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
7155
7156 return use_ndd ? "add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
7157 : "add{<imodesuffix>}\t{%2, %0|%0, %2}";
7158 }
7159 }
7160 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
7161 (set (attr "type")
7162 (if_then_else (match_operand:SWI 2 "incdec_operand")
7163 (const_string "incdec")
7164 (const_string "alu")))
7165 (set (attr "length_immediate")
7166 (if_then_else
7167 (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
7168 (const_string "1")
7169 (const_string "*")))
7170 (set_attr "mode" "<MODE>")])
7171
7172 (define_insn "*addqi_ext<mode>_0"
7173 [(set (match_operand:QI 0 "nonimmediate_operand" "=QBn")
7174 (plus:QI
7175 (subreg:QI
7176 (match_operator:SWI248 3 "extract_operator"
7177 [(match_operand 2 "int248_register_operand" "Q")
7178 (const_int 8)
7179 (const_int 8)]) 0)
7180 (match_operand:QI 1 "nonimmediate_operand" "0")))
7181 (clobber (reg:CC FLAGS_REG))]
7182 ""
7183 "add{b}\t{%h2, %0|%0, %h2}"
7184 [(set_attr "addr" "gpr8")
7185 (set_attr "type" "alu")
7186 (set_attr "mode" "QI")])
7187
7188 (define_insn_and_split "*addqi_ext2<mode>_0"
7189 [(set (match_operand:QI 0 "register_operand" "=&Q")
7190 (plus:QI
7191 (subreg:QI
7192 (match_operator:SWI248 3 "extract_operator"
7193 [(match_operand 1 "int248_register_operand" "Q")
7194 (const_int 8)
7195 (const_int 8)]) 0)
7196 (subreg:QI
7197 (match_operator:SWI248 4 "extract_operator"
7198 [(match_operand 2 "int248_register_operand" "Q")
7199 (const_int 8)
7200 (const_int 8)]) 0)))
7201 (clobber (reg:CC FLAGS_REG))]
7202 ""
7203 "#"
7204 "&& reload_completed"
7205 [(set (match_dup 0)
7206 (subreg:QI
7207 (match_op_dup 4
7208 [(match_dup 2) (const_int 8) (const_int 8)]) 0))
7209 (parallel
7210 [(set (match_dup 0)
7211 (plus:QI
7212 (subreg:QI
7213 (match_op_dup 3
7214 [(match_dup 1) (const_int 8) (const_int 8)]) 0)
7215 (match_dup 0)))
7216 (clobber (reg:CC FLAGS_REG))])]
7217 ""
7218 [(set_attr "type" "alu")
7219 (set_attr "mode" "QI")])
7220
7221 (define_expand "addqi_ext_1"
7222 [(parallel
7223 [(set (zero_extract:HI (match_operand:HI 0 "register_operand")
7224 (const_int 8)
7225 (const_int 8))
7226 (subreg:HI
7227 (plus:QI
7228 (subreg:QI
7229 (zero_extract:HI (match_operand:HI 1 "register_operand")
7230 (const_int 8)
7231 (const_int 8)) 0)
7232 (match_operand:QI 2 "const_int_operand")) 0))
7233 (clobber (reg:CC FLAGS_REG))])])
7234
7235 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
7236 (define_insn_and_split "*addqi_ext<mode>_1"
7237 [(set (zero_extract:SWI248
7238 (match_operand 0 "int248_register_operand" "+Q,&Q")
7239 (const_int 8)
7240 (const_int 8))
7241 (subreg:SWI248
7242 (plus:QI
7243 (subreg:QI
7244 (match_operator:SWI248 3 "extract_operator"
7245 [(match_operand 1 "int248_register_operand" "0,!Q")
7246 (const_int 8)
7247 (const_int 8)]) 0)
7248 (match_operand:QI 2 "general_operand" "QnBn,QnBn")) 0))
7249 (clobber (reg:CC FLAGS_REG))]
7250 ""
7251 {
7252 if (which_alternative)
7253 return "#";
7254
7255 switch (get_attr_type (insn))
7256 {
7257 case TYPE_INCDEC:
7258 if (operands[2] == const1_rtx)
7259 return "inc{b}\t%h0";
7260 else
7261 {
7262 gcc_assert (operands[2] == constm1_rtx);
7263 return "dec{b}\t%h0";
7264 }
7265
7266 default:
7267 return "add{b}\t{%2, %h0|%h0, %2}";
7268 }
7269 }
7270 "reload_completed
7271 && !rtx_equal_p (operands[0], operands[1])"
7272 [(set (zero_extract:SWI248
7273 (match_dup 0) (const_int 8) (const_int 8))
7274 (zero_extract:SWI248
7275 (match_dup 1) (const_int 8) (const_int 8)))
7276 (parallel
7277 [(set (zero_extract:SWI248
7278 (match_dup 0) (const_int 8) (const_int 8))
7279 (subreg:SWI248
7280 (plus:QI
7281 (subreg:QI
7282 (match_op_dup 3
7283 [(match_dup 0) (const_int 8) (const_int 8)]) 0)
7284 (match_dup 2)) 0))
7285 (clobber (reg:CC FLAGS_REG))])]
7286 ""
7287 [(set_attr "addr" "gpr8")
7288 (set (attr "type")
7289 (if_then_else (match_operand:QI 2 "incdec_operand")
7290 (const_string "incdec")
7291 (const_string "alu")))
7292 (set_attr "mode" "QI")])
7293
7294 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
7295 (define_insn_and_split "*<insn>qi_ext<mode>_2"
7296 [(set (zero_extract:SWI248
7297 (match_operand 0 "int248_register_operand" "+Q,&Q")
7298 (const_int 8)
7299 (const_int 8))
7300 (subreg:SWI248
7301 (plusminus:QI
7302 (subreg:QI
7303 (match_operator:SWI248 3 "extract_operator"
7304 [(match_operand 1 "int248_register_operand" "<comm>0,!Q")
7305 (const_int 8)
7306 (const_int 8)]) 0)
7307 (subreg:QI
7308 (match_operator:SWI248 4 "extract_operator"
7309 [(match_operand 2 "int248_register_operand" "Q,Q")
7310 (const_int 8)
7311 (const_int 8)]) 0)) 0))
7312 (clobber (reg:CC FLAGS_REG))]
7313 ""
7314 "@
7315 <insn>{b}\t{%h2, %h0|%h0, %h2}
7316 #"
7317 "reload_completed
7318 && !(rtx_equal_p (operands[0], operands[1])
7319 || (<CODE> == PLUS && rtx_equal_p (operands[0], operands[2])))"
7320 [(set (zero_extract:SWI248
7321 (match_dup 0) (const_int 8) (const_int 8))
7322 (zero_extract:SWI248
7323 (match_dup 1) (const_int 8) (const_int 8)))
7324 (parallel
7325 [(set (zero_extract:SWI248
7326 (match_dup 0) (const_int 8) (const_int 8))
7327 (subreg:SWI248
7328 (plusminus:QI
7329 (subreg:QI
7330 (match_op_dup 3
7331 [(match_dup 0) (const_int 8) (const_int 8)]) 0)
7332 (subreg:QI
7333 (match_op_dup 4
7334 [(match_dup 2) (const_int 8) (const_int 8)]) 0)) 0))
7335 (clobber (reg:CC FLAGS_REG))])]
7336 ""
7337 [(set_attr "type" "alu")
7338 (set_attr "mode" "QI")])
7339
7340 ;; Like DWI, but use POImode instead of OImode.
7341 (define_mode_attr DPWI [(QI "HI") (HI "SI") (SI "DI") (DI "TI") (TI "POI")])
7342
7343 ;; Add with jump on overflow.
7344 (define_expand "addv<mode>4"
7345 [(parallel [(set (reg:CCO FLAGS_REG)
7346 (eq:CCO
7347 (plus:<DPWI>
7348 (sign_extend:<DPWI>
7349 (match_operand:SWIDWI 1 "nonimmediate_operand"))
7350 (match_dup 4))
7351 (sign_extend:<DPWI>
7352 (plus:SWIDWI (match_dup 1)
7353 (match_operand:SWIDWI 2
7354 "<general_hilo_operand>")))))
7355 (set (match_operand:SWIDWI 0 "register_operand")
7356 (plus:SWIDWI (match_dup 1) (match_dup 2)))])
7357 (set (pc) (if_then_else
7358 (eq (reg:CCO FLAGS_REG) (const_int 0))
7359 (label_ref (match_operand 3))
7360 (pc)))]
7361 ""
7362 {
7363 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
7364 if (CONST_SCALAR_INT_P (operands[2]))
7365 operands[4] = operands[2];
7366 else
7367 operands[4] = gen_rtx_SIGN_EXTEND (<DPWI>mode, operands[2]);
7368 })
7369
7370 (define_insn "*addv<mode>4"
7371 [(set (reg:CCO FLAGS_REG)
7372 (eq:CCO (plus:<DWI>
7373 (sign_extend:<DWI>
7374 (match_operand:SWI 1 "nonimmediate_operand" "%0,0,rm,r"))
7375 (sign_extend:<DWI>
7376 (match_operand:SWI 2 "<general_sext_operand>" "<r>We,m,rWe,m")))
7377 (sign_extend:<DWI>
7378 (plus:SWI (match_dup 1) (match_dup 2)))))
7379 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,r,r")
7380 (plus:SWI (match_dup 1) (match_dup 2)))]
7381 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)"
7382 "@
7383 add{<imodesuffix>}\t{%2, %0|%0, %2}
7384 add{<imodesuffix>}\t{%2, %0|%0, %2}
7385 add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
7386 add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
7387 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
7388 (set_attr "type" "alu")
7389 (set_attr "mode" "<MODE>")])
7390
7391 (define_insn "addv<mode>4_1"
7392 [(set (reg:CCO FLAGS_REG)
7393 (eq:CCO (plus:<DWI>
7394 (sign_extend:<DWI>
7395 (match_operand:SWI 1 "nonimmediate_operand" "0,rm"))
7396 (match_operand:<DWI> 3 "const_int_operand"))
7397 (sign_extend:<DWI>
7398 (plus:SWI
7399 (match_dup 1)
7400 (match_operand:SWI 2 "x86_64_immediate_operand" "<i>,<i>")))))
7401 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,r")
7402 (plus:SWI (match_dup 1) (match_dup 2)))]
7403 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)
7404 && CONST_INT_P (operands[2])
7405 && INTVAL (operands[2]) == INTVAL (operands[3])"
7406 "@
7407 add{<imodesuffix>}\t{%2, %0|%0, %2}
7408 add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
7409 [(set_attr "isa" "*,apx_ndd")
7410 (set_attr "type" "alu")
7411 (set_attr "mode" "<MODE>")
7412 (set (attr "length_immediate")
7413 (cond [(match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)")
7414 (const_string "1")
7415 (match_test "<MODE_SIZE> == 8")
7416 (const_string "4")]
7417 (const_string "<MODE_SIZE>")))])
7418
7419 ;; Quad word integer modes as mode attribute.
7420 (define_mode_attr QPWI [(SI "TI") (DI "POI")])
7421
7422 (define_insn_and_split "*addv<dwi>4_doubleword"
7423 [(set (reg:CCO FLAGS_REG)
7424 (eq:CCO
7425 (plus:<QPWI>
7426 (sign_extend:<QPWI>
7427 (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0,ro,r"))
7428 (sign_extend:<QPWI>
7429 (match_operand:<DWI> 2 "nonimmediate_operand" "r,o,r,o")))
7430 (sign_extend:<QPWI>
7431 (plus:<DWI> (match_dup 1) (match_dup 2)))))
7432 (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r,&r,&r")
7433 (plus:<DWI> (match_dup 1) (match_dup 2)))]
7434 "ix86_binary_operator_ok (PLUS, <DWI>mode, operands, TARGET_APX_NDD)"
7435 "#"
7436 "&& reload_completed"
7437 [(parallel [(set (reg:CCC FLAGS_REG)
7438 (compare:CCC
7439 (plus:DWIH (match_dup 1) (match_dup 2))
7440 (match_dup 1)))
7441 (set (match_dup 0)
7442 (plus:DWIH (match_dup 1) (match_dup 2)))])
7443 (parallel [(set (reg:CCO FLAGS_REG)
7444 (eq:CCO
7445 (plus:<DWI>
7446 (plus:<DWI>
7447 (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0))
7448 (sign_extend:<DWI> (match_dup 4)))
7449 (sign_extend:<DWI> (match_dup 5)))
7450 (sign_extend:<DWI>
7451 (plus:DWIH
7452 (plus:DWIH
7453 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
7454 (match_dup 4))
7455 (match_dup 5)))))
7456 (set (match_dup 3)
7457 (plus:DWIH
7458 (plus:DWIH
7459 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
7460 (match_dup 4))
7461 (match_dup 5)))])]
7462 {
7463 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
7464 }
7465 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")])
7466
7467 (define_insn_and_split "*addv<dwi>4_doubleword_1"
7468 [(set (reg:CCO FLAGS_REG)
7469 (eq:CCO
7470 (plus:<QPWI>
7471 (sign_extend:<QPWI>
7472 (match_operand:<DWI> 1 "nonimmediate_operand" "%0,rm"))
7473 (match_operand:<QPWI> 3 "const_scalar_int_operand" "n,n"))
7474 (sign_extend:<QPWI>
7475 (plus:<DWI>
7476 (match_dup 1)
7477 (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "<di>,<di>")))))
7478 (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,&r")
7479 (plus:<DWI> (match_dup 1) (match_dup 2)))]
7480 "ix86_binary_operator_ok (PLUS, <DWI>mode, operands, TARGET_APX_NDD)
7481 && CONST_SCALAR_INT_P (operands[2])
7482 && rtx_equal_p (operands[2], operands[3])"
7483 "#"
7484 "&& reload_completed"
7485 [(parallel [(set (reg:CCC FLAGS_REG)
7486 (compare:CCC
7487 (plus:DWIH (match_dup 1) (match_dup 2))
7488 (match_dup 1)))
7489 (set (match_dup 0)
7490 (plus:DWIH (match_dup 1) (match_dup 2)))])
7491 (parallel [(set (reg:CCO FLAGS_REG)
7492 (eq:CCO
7493 (plus:<DWI>
7494 (plus:<DWI>
7495 (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0))
7496 (sign_extend:<DWI> (match_dup 4)))
7497 (match_dup 5))
7498 (sign_extend:<DWI>
7499 (plus:DWIH
7500 (plus:DWIH
7501 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
7502 (match_dup 4))
7503 (match_dup 5)))))
7504 (set (match_dup 3)
7505 (plus:DWIH
7506 (plus:DWIH
7507 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
7508 (match_dup 4))
7509 (match_dup 5)))])]
7510 {
7511 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
7512 if (operands[2] == const0_rtx)
7513 {
7514 if (!rtx_equal_p (operands[0], operands[1]))
7515 emit_move_insn (operands[0], operands[1]);
7516 emit_insn (gen_addv<mode>4_1 (operands[3], operands[4], operands[5],
7517 operands[5]));
7518 DONE;
7519 }
7520 }
7521 [(set_attr "isa" "*,apx_ndd")])
7522
7523 (define_insn "*addv<mode>4_overflow_1"
7524 [(set (reg:CCO FLAGS_REG)
7525 (eq:CCO
7526 (plus:<DWI>
7527 (plus:<DWI>
7528 (match_operator:<DWI> 4 "ix86_carry_flag_operator"
7529 [(match_operand 3 "flags_reg_operand") (const_int 0)])
7530 (sign_extend:<DWI>
7531 (match_operand:SWI 1 "nonimmediate_operand" "%0,0,rm,r")))
7532 (sign_extend:<DWI>
7533 (match_operand:SWI 2 "<general_sext_operand>" "rWe,m,rWe,m")))
7534 (sign_extend:<DWI>
7535 (plus:SWI
7536 (plus:SWI
7537 (match_operator:SWI 5 "ix86_carry_flag_operator"
7538 [(match_dup 3) (const_int 0)])
7539 (match_dup 1))
7540 (match_dup 2)))))
7541 (set (match_operand:SWI 0 "nonimmediate_operand" "=rm,r,r,r")
7542 (plus:SWI
7543 (plus:SWI
7544 (match_op_dup 5 [(match_dup 3) (const_int 0)])
7545 (match_dup 1))
7546 (match_dup 2)))]
7547 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)"
7548 "@
7549 adc{<imodesuffix>}\t{%2, %0|%0, %2}
7550 adc{<imodesuffix>}\t{%2, %0|%0, %2}
7551 adc{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
7552 adc{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
7553 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
7554 (set_attr "type" "alu")
7555 (set_attr "mode" "<MODE>")])
7556
7557 (define_insn "*addv<mode>4_overflow_2"
7558 [(set (reg:CCO FLAGS_REG)
7559 (eq:CCO
7560 (plus:<DWI>
7561 (plus:<DWI>
7562 (match_operator:<DWI> 4 "ix86_carry_flag_operator"
7563 [(match_operand 3 "flags_reg_operand") (const_int 0)])
7564 (sign_extend:<DWI>
7565 (match_operand:SWI 1 "nonimmediate_operand" "%0,rm")))
7566 (match_operand:<DWI> 6 "const_int_operand" "n,n"))
7567 (sign_extend:<DWI>
7568 (plus:SWI
7569 (plus:SWI
7570 (match_operator:SWI 5 "ix86_carry_flag_operator"
7571 [(match_dup 3) (const_int 0)])
7572 (match_dup 1))
7573 (match_operand:SWI 2 "x86_64_immediate_operand" "e,e")))))
7574 (set (match_operand:SWI 0 "nonimmediate_operand" "=rm,r")
7575 (plus:SWI
7576 (plus:SWI
7577 (match_op_dup 5 [(match_dup 3) (const_int 0)])
7578 (match_dup 1))
7579 (match_dup 2)))]
7580 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)
7581 && CONST_INT_P (operands[2])
7582 && INTVAL (operands[2]) == INTVAL (operands[6])"
7583 "@
7584 adc{<imodesuffix>}\t{%2, %0|%0, %2}
7585 adc{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
7586 [(set_attr "isa" "*,apx_ndd")
7587 (set_attr "type" "alu")
7588 (set_attr "mode" "<MODE>")
7589 (set (attr "length_immediate")
7590 (if_then_else (match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)")
7591 (const_string "1")
7592 (const_string "4")))])
7593
7594 (define_expand "uaddv<mode>4"
7595 [(parallel [(set (reg:CCC FLAGS_REG)
7596 (compare:CCC
7597 (plus:SWIDWI
7598 (match_operand:SWIDWI 1 "nonimmediate_operand")
7599 (match_operand:SWIDWI 2 "<general_hilo_operand>"))
7600 (match_dup 1)))
7601 (set (match_operand:SWIDWI 0 "register_operand")
7602 (plus:SWIDWI (match_dup 1) (match_dup 2)))])
7603 (set (pc) (if_then_else
7604 (ltu (reg:CCC FLAGS_REG) (const_int 0))
7605 (label_ref (match_operand 3))
7606 (pc)))]
7607 ""
7608 "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
7609
7610 ;; The lea patterns for modes less than 32 bits need to be matched by
7611 ;; several insns converted to real lea by splitters.
7612
7613 (define_insn_and_split "*lea<mode>_general_1"
7614 [(set (match_operand:SWI12 0 "register_operand" "=r")
7615 (plus:SWI12
7616 (plus:SWI12 (match_operand:SWI12 1 "register_no_SP_operand" "l")
7617 (match_operand:SWI12 2 "register_operand" "r"))
7618 (match_operand:SWI12 3 "immediate_operand" "i")))]
7619 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
7620 "#"
7621 "&& reload_completed"
7622 [(set (match_dup 0)
7623 (plus:SI
7624 (plus:SI (match_dup 1) (match_dup 2))
7625 (match_dup 3)))]
7626 {
7627 operands[0] = gen_lowpart (SImode, operands[0]);
7628 operands[1] = gen_lowpart (SImode, operands[1]);
7629 operands[2] = gen_lowpart (SImode, operands[2]);
7630 operands[3] = gen_lowpart (SImode, operands[3]);
7631 }
7632 [(set_attr "type" "lea")
7633 (set_attr "mode" "SI")])
7634
7635 (define_insn_and_split "*lea<mode>_general_2"
7636 [(set (match_operand:SWI12 0 "register_operand" "=r")
7637 (plus:SWI12
7638 (mult:SWI12 (match_operand:SWI12 1 "register_no_SP_operand" "l")
7639 (match_operand 2 "const248_operand" "n"))
7640 (match_operand:SWI12 3 "nonmemory_operand" "ri")))]
7641 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
7642 "#"
7643 "&& reload_completed"
7644 [(set (match_dup 0)
7645 (plus:SI
7646 (mult:SI (match_dup 1) (match_dup 2))
7647 (match_dup 3)))]
7648 {
7649 operands[0] = gen_lowpart (SImode, operands[0]);
7650 operands[1] = gen_lowpart (SImode, operands[1]);
7651 operands[3] = gen_lowpart (SImode, operands[3]);
7652 }
7653 [(set_attr "type" "lea")
7654 (set_attr "mode" "SI")])
7655
7656 (define_insn_and_split "*lea<mode>_general_2b"
7657 [(set (match_operand:SWI12 0 "register_operand" "=r")
7658 (plus:SWI12
7659 (ashift:SWI12 (match_operand:SWI12 1 "register_no_SP_operand" "l")
7660 (match_operand 2 "const123_operand" "n"))
7661 (match_operand:SWI12 3 "nonmemory_operand" "ri")))]
7662 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
7663 "#"
7664 "&& reload_completed"
7665 [(set (match_dup 0)
7666 (plus:SI
7667 (ashift:SI (match_dup 1) (match_dup 2))
7668 (match_dup 3)))]
7669 {
7670 operands[0] = gen_lowpart (SImode, operands[0]);
7671 operands[1] = gen_lowpart (SImode, operands[1]);
7672 operands[3] = gen_lowpart (SImode, operands[3]);
7673 }
7674 [(set_attr "type" "lea")
7675 (set_attr "mode" "SI")])
7676
7677 (define_insn_and_split "*lea<mode>_general_3"
7678 [(set (match_operand:SWI12 0 "register_operand" "=r")
7679 (plus:SWI12
7680 (plus:SWI12
7681 (mult:SWI12 (match_operand:SWI12 1 "register_no_SP_operand" "l")
7682 (match_operand 2 "const248_operand" "n"))
7683 (match_operand:SWI12 3 "register_operand" "r"))
7684 (match_operand:SWI12 4 "immediate_operand" "i")))]
7685 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
7686 "#"
7687 "&& reload_completed"
7688 [(set (match_dup 0)
7689 (plus:SI
7690 (plus:SI
7691 (mult:SI (match_dup 1) (match_dup 2))
7692 (match_dup 3))
7693 (match_dup 4)))]
7694 {
7695 operands[0] = gen_lowpart (SImode, operands[0]);
7696 operands[1] = gen_lowpart (SImode, operands[1]);
7697 operands[3] = gen_lowpart (SImode, operands[3]);
7698 operands[4] = gen_lowpart (SImode, operands[4]);
7699 }
7700 [(set_attr "type" "lea")
7701 (set_attr "mode" "SI")])
7702
7703 (define_insn_and_split "*lea<mode>_general_3b"
7704 [(set (match_operand:SWI12 0 "register_operand" "=r")
7705 (plus:SWI12
7706 (plus:SWI12
7707 (ashift:SWI12 (match_operand:SWI12 1 "register_no_SP_operand" "l")
7708 (match_operand 2 "const123_operand" "n"))
7709 (match_operand:SWI12 3 "register_operand" "r"))
7710 (match_operand:SWI12 4 "immediate_operand" "i")))]
7711 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
7712 "#"
7713 "&& reload_completed"
7714 [(set (match_dup 0)
7715 (plus:SI
7716 (plus:SI
7717 (ashift:SI (match_dup 1) (match_dup 2))
7718 (match_dup 3))
7719 (match_dup 4)))]
7720 {
7721 operands[0] = gen_lowpart (SImode, operands[0]);
7722 operands[1] = gen_lowpart (SImode, operands[1]);
7723 operands[3] = gen_lowpart (SImode, operands[3]);
7724 operands[4] = gen_lowpart (SImode, operands[4]);
7725 }
7726 [(set_attr "type" "lea")
7727 (set_attr "mode" "SI")])
7728
7729 (define_insn_and_split "*lea<mode>_general_4"
7730 [(set (match_operand:SWI12 0 "register_operand" "=r")
7731 (any_or:SWI12
7732 (ashift:SWI12
7733 (match_operand:SWI12 1 "register_no_SP_operand" "l")
7734 (match_operand 2 "const_0_to_3_operand"))
7735 (match_operand 3 "const_int_operand")))]
7736 "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
7737 && ((unsigned HOST_WIDE_INT) INTVAL (operands[3])
7738 < (HOST_WIDE_INT_1U << INTVAL (operands[2])))"
7739 "#"
7740 "&& reload_completed"
7741 [(set (match_dup 0)
7742 (plus:SI
7743 (mult:SI (match_dup 1) (match_dup 2))
7744 (match_dup 3)))]
7745 {
7746 operands[0] = gen_lowpart (SImode, operands[0]);
7747 operands[1] = gen_lowpart (SImode, operands[1]);
7748 operands[2] = GEN_INT (1 << INTVAL (operands[2]));
7749 }
7750 [(set_attr "type" "lea")
7751 (set_attr "mode" "SI")])
7752
7753 (define_insn_and_split "*lea<mode>_general_4"
7754 [(set (match_operand:SWI48 0 "register_operand" "=r")
7755 (any_or:SWI48
7756 (ashift:SWI48
7757 (match_operand:SWI48 1 "register_no_SP_operand" "l")
7758 (match_operand 2 "const_0_to_3_operand"))
7759 (match_operand 3 "const_int_operand")))]
7760 "(unsigned HOST_WIDE_INT) INTVAL (operands[3])
7761 < (HOST_WIDE_INT_1U << INTVAL (operands[2]))"
7762 "#"
7763 "&& reload_completed"
7764 [(set (match_dup 0)
7765 (plus:SWI48
7766 (mult:SWI48 (match_dup 1) (match_dup 2))
7767 (match_dup 3)))]
7768 "operands[2] = GEN_INT (1 << INTVAL (operands[2]));"
7769 [(set_attr "type" "lea")
7770 (set_attr "mode" "<MODE>")])
7771 \f
7772 ;; Subtract instructions
7773
7774 (define_expand "sub<mode>3"
7775 [(set (match_operand:SDWIM 0 "nonimmediate_operand")
7776 (minus:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand")
7777 (match_operand:SDWIM 2 "<general_hilo_operand>")))]
7778 ""
7779 "ix86_expand_binary_operator (MINUS, <MODE>mode, operands,
7780 TARGET_APX_NDD); DONE;")
7781
7782 (define_insn_and_split "*sub<dwi>3_doubleword"
7783 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r,&r,&r")
7784 (minus:<DWI>
7785 (match_operand:<DWI> 1 "nonimmediate_operand" "0,0,ro,r")
7786 (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o,r<di>,o")))
7787 (clobber (reg:CC FLAGS_REG))]
7788 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands,
7789 TARGET_APX_NDD)"
7790 "#"
7791 "&& reload_completed"
7792 [(parallel [(set (reg:CC FLAGS_REG)
7793 (compare:CC (match_dup 1) (match_dup 2)))
7794 (set (match_dup 0)
7795 (minus:DWIH (match_dup 1) (match_dup 2)))])
7796 (parallel [(set (match_dup 3)
7797 (minus:DWIH
7798 (minus:DWIH
7799 (match_dup 4)
7800 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)))
7801 (match_dup 5)))
7802 (clobber (reg:CC FLAGS_REG))])]
7803 {
7804 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
7805 if (operands[2] == const0_rtx)
7806 {
7807 if (!rtx_equal_p (operands[0], operands[1]))
7808 emit_move_insn (operands[0], operands[1]);
7809 ix86_expand_binary_operator (MINUS, <MODE>mode, &operands[3],
7810 TARGET_APX_NDD);
7811 DONE;
7812 }
7813 }
7814 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")])
7815
7816 (define_insn_and_split "*sub<dwi>3_doubleword_zext"
7817 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=r,o,&r,&r")
7818 (minus:<DWI>
7819 (match_operand:<DWI> 1 "nonimmediate_operand" "0,0,r,o")
7820 (zero_extend:<DWI>
7821 (match_operand:DWIH 2 "nonimmediate_operand" "rm,r,rm,r"))))
7822 (clobber (reg:CC FLAGS_REG))]
7823 "ix86_binary_operator_ok (UNKNOWN, <DWI>mode, operands,
7824 TARGET_APX_NDD)"
7825 "#"
7826 "&& reload_completed"
7827 [(parallel [(set (reg:CC FLAGS_REG)
7828 (compare:CC (match_dup 1) (match_dup 2)))
7829 (set (match_dup 0)
7830 (minus:DWIH (match_dup 1) (match_dup 2)))])
7831 (parallel [(set (match_dup 3)
7832 (minus:DWIH
7833 (minus:DWIH
7834 (match_dup 4)
7835 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)))
7836 (const_int 0)))
7837 (clobber (reg:CC FLAGS_REG))])]
7838 "split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[3]);"
7839 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")])
7840
7841 (define_insn "*sub<mode>_1"
7842 [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,r,r")
7843 (minus:SWI
7844 (match_operand:SWI 1 "nonimmediate_operand" "0,0,rm,r")
7845 (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,r<i>,<m>")))
7846 (clobber (reg:CC FLAGS_REG))]
7847 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands,
7848 TARGET_APX_NDD)"
7849 "@
7850 sub{<imodesuffix>}\t{%2, %0|%0, %2}
7851 sub{<imodesuffix>}\t{%2, %0|%0, %2}
7852 sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
7853 sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
7854 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
7855 (set_attr "type" "alu")
7856 (set_attr "mode" "<MODE>")])
7857
7858 (define_insn "*subsi_1_zext"
7859 [(set (match_operand:DI 0 "register_operand" "=r,r,r")
7860 (zero_extend:DI
7861 (minus:SI (match_operand:SI 1 "nonimmediate_operand" "0,r,rm")
7862 (match_operand:SI 2 "x86_64_general_operand" "rBMe,rBMe,re"))))
7863 (clobber (reg:CC FLAGS_REG))]
7864 "TARGET_64BIT && ix86_binary_operator_ok (MINUS, SImode, operands,
7865 TARGET_APX_NDD)"
7866 "@
7867 sub{l}\t{%2, %k0|%k0, %2}
7868 sub{l}\t{%2, %1, %k0|%k0, %1, %2}
7869 sub{l}\t{%2, %1, %k0|%k0, %1, %2}"
7870 [(set_attr "isa" "*,apx_ndd,apx_ndd")
7871 (set_attr "type" "alu")
7872 (set_attr "mode" "SI")])
7873
7874 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
7875 (define_insn_and_split "*sub<mode>_1_slp"
7876 [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>,&<r>"))
7877 (minus:SWI12 (match_operand:SWI12 1 "register_operand" "0,!<r>")
7878 (match_operand:SWI12 2 "general_operand" "<r>mn,<r>mn")))
7879 (clobber (reg:CC FLAGS_REG))]
7880 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
7881 "@
7882 sub{<imodesuffix>}\t{%2, %0|%0, %2}
7883 #"
7884 "&& reload_completed
7885 && !(rtx_equal_p (operands[0], operands[1]))"
7886 [(set (strict_low_part (match_dup 0)) (match_dup 1))
7887 (parallel
7888 [(set (strict_low_part (match_dup 0))
7889 (minus:SWI12 (match_dup 0) (match_dup 2)))
7890 (clobber (reg:CC FLAGS_REG))])]
7891 ""
7892 [(set_attr "type" "alu")
7893 (set_attr "mode" "<MODE>")])
7894
7895 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
7896 (define_insn_and_split "*subqi_ext<mode>_1_slp"
7897 [(set (strict_low_part (match_operand:QI 0 "register_operand" "+Q,&Q"))
7898 (minus:QI
7899 (match_operand:QI 1 "nonimmediate_operand" "0,!qm")
7900 (subreg:QI
7901 (match_operator:SWI248 3 "extract_operator"
7902 [(match_operand 2 "int248_register_operand" "Q,Q")
7903 (const_int 8)
7904 (const_int 8)]) 0)))
7905 (clobber (reg:CC FLAGS_REG))]
7906 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
7907 "@
7908 sub{b}\t{%h2, %0|%0, %h2}
7909 #"
7910 "&& reload_completed
7911 && !rtx_equal_p (operands[0], operands[1])"
7912 [(set (strict_low_part (match_dup 0)) (match_dup 1))
7913 (parallel
7914 [(set (strict_low_part (match_dup 0))
7915 (minus:QI
7916 (match_dup 0)
7917 (subreg:QI
7918 (match_op_dup 3
7919 [(match_dup 2) (const_int 8) (const_int 8)]) 0)))
7920 (clobber (reg:CC FLAGS_REG))])]
7921 ""
7922 [(set_attr "type" "alu")
7923 (set_attr "mode" "QI")])
7924
7925 (define_insn_and_split "*subqi_ext<mode>_2_slp"
7926 [(set (strict_low_part (match_operand:QI 0 "register_operand" "+&Q"))
7927 (minus:QI
7928 (subreg:QI
7929 (match_operator:SWI248 3 "extract_operator"
7930 [(match_operand 1 "int248_register_operand" "Q")
7931 (const_int 8)
7932 (const_int 8)]) 0)
7933 (subreg:QI
7934 (match_operator:SWI248 4 "extract_operator"
7935 [(match_operand 2 "int248_register_operand" "Q")
7936 (const_int 8)
7937 (const_int 8)]) 0)))
7938 (clobber (reg:CC FLAGS_REG))]
7939 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
7940 "#"
7941 "&& reload_completed"
7942 [(set (strict_low_part (match_dup 0))
7943 (subreg:QI
7944 (match_op_dup 3
7945 [(match_dup 1) (const_int 8) (const_int 8)]) 0))
7946 (parallel
7947 [(set (strict_low_part (match_dup 0))
7948 (minus:QI
7949 (match_dup 0)
7950 (subreg:QI
7951 (match_op_dup 4
7952 [(match_dup 2) (const_int 8) (const_int 8)]) 0)))
7953 (clobber (reg:CC FLAGS_REG))])]
7954 ""
7955 [(set_attr "type" "alu")
7956 (set_attr "mode" "QI")])
7957
7958 (define_insn "*sub<mode>_2"
7959 [(set (reg FLAGS_REG)
7960 (compare
7961 (minus:SWI
7962 (match_operand:SWI 1 "nonimmediate_operand" "0,0,rm,r")
7963 (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,r<i>,<m>"))
7964 (const_int 0)))
7965 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,r,r")
7966 (minus:SWI (match_dup 1) (match_dup 2)))]
7967 "ix86_match_ccmode (insn, CCGOCmode)
7968 && ix86_binary_operator_ok (MINUS, <MODE>mode, operands,
7969 TARGET_APX_NDD)"
7970 "@
7971 sub{<imodesuffix>}\t{%2, %0|%0, %2}
7972 sub{<imodesuffix>}\t{%2, %0|%0, %2}
7973 sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
7974 sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
7975 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
7976 (set_attr "type" "alu")
7977 (set_attr "mode" "<MODE>")])
7978
7979 (define_insn "*subsi_2_zext"
7980 [(set (reg FLAGS_REG)
7981 (compare
7982 (minus:SI (match_operand:SI 1 "nonimmediate_operand" "0,r,rm")
7983 (match_operand:SI 2 "x86_64_general_operand" "rBMe,rBMe,re"))
7984 (const_int 0)))
7985 (set (match_operand:DI 0 "register_operand" "=r,r,r")
7986 (zero_extend:DI
7987 (minus:SI (match_dup 1)
7988 (match_dup 2))))]
7989 "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
7990 && ix86_binary_operator_ok (MINUS, SImode, operands,
7991 TARGET_APX_NDD)"
7992 "@
7993 sub{l}\t{%2, %k0|%k0, %2}
7994 sub{l}\t{%2, %1, %k0|%k0, %1, %2}
7995 sub{l}\t{%2, %1, %k0|%k0, %1, %2}"
7996 [(set_attr "isa" "*,apx_ndd,apx_ndd")
7997 (set_attr "type" "alu")
7998 (set_attr "mode" "SI")])
7999
8000 (define_insn "*subqi_ext<mode>_0"
8001 [(set (match_operand:QI 0 "nonimmediate_operand" "=QBn")
8002 (minus:QI
8003 (match_operand:QI 1 "nonimmediate_operand" "0")
8004 (subreg:QI
8005 (match_operator:SWI248 3 "extract_operator"
8006 [(match_operand 2 "int248_register_operand" "Q")
8007 (const_int 8)
8008 (const_int 8)]) 0)))
8009 (clobber (reg:CC FLAGS_REG))]
8010 ""
8011 "sub{b}\t{%h2, %0|%0, %h2}"
8012 [(set_attr "addr" "gpr8")
8013 (set_attr "type" "alu")
8014 (set_attr "mode" "QI")])
8015
8016 (define_insn_and_split "*subqi_ext2<mode>_0"
8017 [(set (match_operand:QI 0 "register_operand" "=&Q")
8018 (minus:QI
8019 (subreg:QI
8020 (match_operator:SWI248 3 "extract_operator"
8021 [(match_operand 1 "int248_register_operand" "Q")
8022 (const_int 8)
8023 (const_int 8)]) 0)
8024 (subreg:QI
8025 (match_operator:SWI248 4 "extract_operator"
8026 [(match_operand 2 "int248_register_operand" "Q")
8027 (const_int 8)
8028 (const_int 8)]) 0)))
8029 (clobber (reg:CC FLAGS_REG))]
8030 ""
8031 "#"
8032 "&& reload_completed"
8033 [(set (match_dup 0)
8034 (subreg:QI
8035 (match_op_dup 3
8036 [(match_dup 1) (const_int 8) (const_int 8)]) 0))
8037 (parallel
8038 [(set (match_dup 0)
8039 (minus:QI
8040 (match_dup 0)
8041 (subreg:QI
8042 (match_op_dup 4
8043 [(match_dup 2) (const_int 8) (const_int 8)]) 0)))
8044 (clobber (reg:CC FLAGS_REG))])]
8045 ""
8046 [(set_attr "type" "alu")
8047 (set_attr "mode" "QI")])
8048
8049 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
8050 (define_insn_and_split "*subqi_ext<mode>_1"
8051 [(set (zero_extract:SWI248
8052 (match_operand 0 "int248_register_operand" "+Q,&Q")
8053 (const_int 8)
8054 (const_int 8))
8055 (subreg:SWI248
8056 (minus:QI
8057 (subreg:QI
8058 (match_operator:SWI248 3 "extract_operator"
8059 [(match_operand 1 "int248_register_operand" "0,!Q")
8060 (const_int 8)
8061 (const_int 8)]) 0)
8062 (match_operand:QI 2 "general_operand" "QnBn,QnBn")) 0))
8063 (clobber (reg:CC FLAGS_REG))]
8064 ""
8065 "@
8066 sub{b}\t{%2, %h0|%h0, %2}
8067 #"
8068 "reload_completed
8069 && !(rtx_equal_p (operands[0], operands[1]))"
8070 [(set (zero_extract:SWI248
8071 (match_dup 0) (const_int 8) (const_int 8))
8072 (zero_extract:SWI248
8073 (match_dup 1) (const_int 8) (const_int 8)))
8074 (parallel
8075 [(set (zero_extract:SWI248
8076 (match_dup 0) (const_int 8) (const_int 8))
8077 (subreg:SWI248
8078 (minus:QI
8079 (subreg:QI
8080 (match_op_dup 3
8081 [(match_dup 0) (const_int 8) (const_int 8)]) 0)
8082 (match_dup 2)) 0))
8083 (clobber (reg:CC FLAGS_REG))])]
8084 ""
8085 [(set_attr "addr" "gpr8")
8086 (set_attr "type" "alu")
8087 (set_attr "mode" "QI")])
8088
8089 ;; Subtract with jump on overflow.
8090 (define_expand "subv<mode>4"
8091 [(parallel [(set (reg:CCO FLAGS_REG)
8092 (eq:CCO
8093 (minus:<DPWI>
8094 (sign_extend:<DPWI>
8095 (match_operand:SWIDWI 1 "nonimmediate_operand"))
8096 (match_dup 4))
8097 (sign_extend:<DPWI>
8098 (minus:SWIDWI (match_dup 1)
8099 (match_operand:SWIDWI 2
8100 "<general_hilo_operand>")))))
8101 (set (match_operand:SWIDWI 0 "register_operand")
8102 (minus:SWIDWI (match_dup 1) (match_dup 2)))])
8103 (set (pc) (if_then_else
8104 (eq (reg:CCO FLAGS_REG) (const_int 0))
8105 (label_ref (match_operand 3))
8106 (pc)))]
8107 ""
8108 {
8109 ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands,
8110 TARGET_APX_NDD);
8111 if (CONST_SCALAR_INT_P (operands[2]))
8112 operands[4] = operands[2];
8113 else
8114 operands[4] = gen_rtx_SIGN_EXTEND (<DPWI>mode, operands[2]);
8115 })
8116
8117 (define_insn "*subv<mode>4"
8118 [(set (reg:CCO FLAGS_REG)
8119 (eq:CCO (minus:<DWI>
8120 (sign_extend:<DWI>
8121 (match_operand:SWI 1 "nonimmediate_operand" "0,0,rm,r"))
8122 (sign_extend:<DWI>
8123 (match_operand:SWI 2 "<general_sext_operand>" "<r>We,m,rWe,m")))
8124 (sign_extend:<DWI>
8125 (minus:SWI (match_dup 1) (match_dup 2)))))
8126 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,r,r")
8127 (minus:SWI (match_dup 1) (match_dup 2)))]
8128 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands,
8129 TARGET_APX_NDD)"
8130 "@
8131 sub{<imodesuffix>}\t{%2, %0|%0, %2}
8132 sub{<imodesuffix>}\t{%2, %0|%0, %2}
8133 sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
8134 sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
8135 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
8136 (set_attr "type" "alu")
8137 (set_attr "mode" "<MODE>")])
8138
8139 (define_insn "subv<mode>4_1"
8140 [(set (reg:CCO FLAGS_REG)
8141 (eq:CCO (minus:<DWI>
8142 (sign_extend:<DWI>
8143 (match_operand:SWI 1 "nonimmediate_operand" "0,rm"))
8144 (match_operand:<DWI> 3 "const_int_operand"))
8145 (sign_extend:<DWI>
8146 (minus:SWI
8147 (match_dup 1)
8148 (match_operand:SWI 2 "x86_64_immediate_operand" "<i>,<i>")))))
8149 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,r")
8150 (minus:SWI (match_dup 1) (match_dup 2)))]
8151 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands,
8152 TARGET_APX_NDD)
8153 && CONST_INT_P (operands[2])
8154 && INTVAL (operands[2]) == INTVAL (operands[3])"
8155 "@
8156 sub{<imodesuffix>}\t{%2, %0|%0, %2}
8157 sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
8158 [(set_attr "isa" "*,apx_ndd")
8159 (set_attr "type" "alu")
8160 (set_attr "mode" "<MODE>")
8161 (set (attr "length_immediate")
8162 (cond [(match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)")
8163 (const_string "1")
8164 (match_test "<MODE_SIZE> == 8")
8165 (const_string "4")]
8166 (const_string "<MODE_SIZE>")))])
8167
8168 (define_insn_and_split "*subv<dwi>4_doubleword"
8169 [(set (reg:CCO FLAGS_REG)
8170 (eq:CCO
8171 (minus:<QPWI>
8172 (sign_extend:<QPWI>
8173 (match_operand:<DWI> 1 "nonimmediate_operand" "0,0,ro,r"))
8174 (sign_extend:<QPWI>
8175 (match_operand:<DWI> 2 "nonimmediate_operand" "r,o,r,o")))
8176 (sign_extend:<QPWI>
8177 (minus:<DWI> (match_dup 1) (match_dup 2)))))
8178 (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r,&r,&r")
8179 (minus:<DWI> (match_dup 1) (match_dup 2)))]
8180 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands,
8181 TARGET_APX_NDD)"
8182 "#"
8183 "&& reload_completed"
8184 [(parallel [(set (reg:CC FLAGS_REG)
8185 (compare:CC (match_dup 1) (match_dup 2)))
8186 (set (match_dup 0)
8187 (minus:DWIH (match_dup 1) (match_dup 2)))])
8188 (parallel [(set (reg:CCO FLAGS_REG)
8189 (eq:CCO
8190 (minus:<DWI>
8191 (minus:<DWI>
8192 (sign_extend:<DWI> (match_dup 4))
8193 (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0)))
8194 (sign_extend:<DWI> (match_dup 5)))
8195 (sign_extend:<DWI>
8196 (minus:DWIH
8197 (minus:DWIH
8198 (match_dup 4)
8199 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)))
8200 (match_dup 5)))))
8201 (set (match_dup 3)
8202 (minus:DWIH
8203 (minus:DWIH
8204 (match_dup 4)
8205 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)))
8206 (match_dup 5)))])]
8207 {
8208 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
8209 }
8210 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")])
8211
8212 (define_insn_and_split "*subv<dwi>4_doubleword_1"
8213 [(set (reg:CCO FLAGS_REG)
8214 (eq:CCO
8215 (minus:<QPWI>
8216 (sign_extend:<QPWI>
8217 (match_operand:<DWI> 1 "nonimmediate_operand" "0,ro"))
8218 (match_operand:<QPWI> 3 "const_scalar_int_operand"))
8219 (sign_extend:<QPWI>
8220 (minus:<DWI>
8221 (match_dup 1)
8222 (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "<di>,<di>")))))
8223 (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,&r")
8224 (minus:<DWI> (match_dup 1) (match_dup 2)))]
8225 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands,
8226 TARGET_APX_NDD)
8227 && CONST_SCALAR_INT_P (operands[2])
8228 && rtx_equal_p (operands[2], operands[3])"
8229 "#"
8230 "&& reload_completed"
8231 [(parallel [(set (reg:CC FLAGS_REG)
8232 (compare:CC (match_dup 1) (match_dup 2)))
8233 (set (match_dup 0)
8234 (minus:DWIH (match_dup 1) (match_dup 2)))])
8235 (parallel [(set (reg:CCO FLAGS_REG)
8236 (eq:CCO
8237 (minus:<DWI>
8238 (minus:<DWI>
8239 (sign_extend:<DWI> (match_dup 4))
8240 (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0)))
8241 (match_dup 5))
8242 (sign_extend:<DWI>
8243 (minus:DWIH
8244 (minus:DWIH
8245 (match_dup 4)
8246 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)))
8247 (match_dup 5)))))
8248 (set (match_dup 3)
8249 (minus:DWIH
8250 (minus:DWIH
8251 (match_dup 4)
8252 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)))
8253 (match_dup 5)))])]
8254 {
8255 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
8256 if (operands[2] == const0_rtx)
8257 {
8258 if (!rtx_equal_p (operands[0], operands[1]))
8259 emit_move_insn (operands[0], operands[1]);
8260 emit_insn (gen_subv<mode>4_1 (operands[3], operands[4], operands[5],
8261 operands[5]));
8262 DONE;
8263 }
8264 }
8265 [(set_attr "isa" "*,apx_ndd")])
8266
8267 (define_insn "*subv<mode>4_overflow_1"
8268 [(set (reg:CCO FLAGS_REG)
8269 (eq:CCO
8270 (minus:<DWI>
8271 (minus:<DWI>
8272 (sign_extend:<DWI>
8273 (match_operand:SWI 1 "nonimmediate_operand" "%0,0,rm,r"))
8274 (match_operator:<DWI> 4 "ix86_carry_flag_operator"
8275 [(match_operand 3 "flags_reg_operand") (const_int 0)]))
8276 (sign_extend:<DWI>
8277 (match_operand:SWI 2 "<general_sext_operand>" "rWe,m,rWe,m")))
8278 (sign_extend:<DWI>
8279 (minus:SWI
8280 (minus:SWI
8281 (match_dup 1)
8282 (match_operator:SWI 5 "ix86_carry_flag_operator"
8283 [(match_dup 3) (const_int 0)]))
8284 (match_dup 2)))))
8285 (set (match_operand:SWI 0 "nonimmediate_operand" "=rm,r,r,r")
8286 (minus:SWI
8287 (minus:SWI
8288 (match_dup 1)
8289 (match_op_dup 5 [(match_dup 3) (const_int 0)]))
8290 (match_dup 2)))]
8291 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands,
8292 TARGET_APX_NDD)"
8293 "@
8294 sbb{<imodesuffix>}\t{%2, %0|%0, %2}
8295 sbb{<imodesuffix>}\t{%2, %0|%0, %2}
8296 sbb{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
8297 sbb{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
8298 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
8299 (set_attr "type" "alu")
8300 (set_attr "mode" "<MODE>")])
8301
8302 (define_insn "*subv<mode>4_overflow_2"
8303 [(set (reg:CCO FLAGS_REG)
8304 (eq:CCO
8305 (minus:<DWI>
8306 (minus:<DWI>
8307 (sign_extend:<DWI>
8308 (match_operand:SWI 1 "nonimmediate_operand" "%0,rm"))
8309 (match_operator:<DWI> 4 "ix86_carry_flag_operator"
8310 [(match_operand 3 "flags_reg_operand") (const_int 0)]))
8311 (match_operand:<DWI> 6 "const_int_operand" "n,n"))
8312 (sign_extend:<DWI>
8313 (minus:SWI
8314 (minus:SWI
8315 (match_dup 1)
8316 (match_operator:SWI 5 "ix86_carry_flag_operator"
8317 [(match_dup 3) (const_int 0)]))
8318 (match_operand:SWI 2 "x86_64_immediate_operand" "e,e")))))
8319 (set (match_operand:SWI 0 "nonimmediate_operand" "=rm,r")
8320 (minus:SWI
8321 (minus:SWI
8322 (match_dup 1)
8323 (match_op_dup 5 [(match_dup 3) (const_int 0)]))
8324 (match_dup 2)))]
8325 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands,
8326 TARGET_APX_NDD)
8327 && CONST_INT_P (operands[2])
8328 && INTVAL (operands[2]) == INTVAL (operands[6])"
8329 "@
8330 sbb{<imodesuffix>}\t{%2, %0|%0, %2}
8331 sbb{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
8332 [(set_attr "isa" "*,apx_ndd")
8333 (set_attr "type" "alu")
8334 (set_attr "mode" "<MODE>")
8335 (set (attr "length_immediate")
8336 (if_then_else (match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)")
8337 (const_string "1")
8338 (const_string "4")))])
8339
8340 (define_expand "usubv<mode>4"
8341 [(parallel [(set (reg:CC FLAGS_REG)
8342 (compare:CC
8343 (match_operand:SWI 1 "nonimmediate_operand")
8344 (match_operand:SWI 2 "<general_operand>")))
8345 (set (match_operand:SWI 0 "register_operand")
8346 (minus:SWI (match_dup 1) (match_dup 2)))])
8347 (set (pc) (if_then_else
8348 (ltu (reg:CC FLAGS_REG) (const_int 0))
8349 (label_ref (match_operand 3))
8350 (pc)))]
8351 ""
8352 "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands,
8353 TARGET_APX_NDD);")
8354
8355 (define_insn "*sub<mode>_3"
8356 [(set (reg FLAGS_REG)
8357 (compare (match_operand:SWI 1 "nonimmediate_operand" "0,0,rm,r")
8358 (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,r<i>,<m>")))
8359 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>i,r,r")
8360 (minus:SWI (match_dup 1) (match_dup 2)))]
8361 "ix86_match_ccmode (insn, CCmode)
8362 && ix86_binary_operator_ok (MINUS, <MODE>mode, operands,
8363 TARGET_APX_NDD)"
8364 "@
8365 sub{<imodesuffix>}\t{%2, %0|%0, %2}
8366 sub{<imodesuffix>}\t{%2, %0|%0, %2}
8367 sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
8368 sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
8369 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
8370 (set_attr "type" "alu")
8371 (set_attr "mode" "<MODE>")])
8372
8373 (define_peephole2
8374 [(parallel
8375 [(set (reg:CC FLAGS_REG)
8376 (compare:CC (match_operand:SWI 0 "general_reg_operand")
8377 (match_operand:SWI 1 "general_gr_operand")))
8378 (set (match_dup 0)
8379 (minus:SWI (match_dup 0) (match_dup 1)))])]
8380 "find_regno_note (peep2_next_insn (0), REG_UNUSED, REGNO (operands[0])) != 0"
8381 [(set (reg:CC FLAGS_REG)
8382 (compare:CC (match_dup 0) (match_dup 1)))])
8383
8384 (define_peephole2
8385 [(set (match_operand:SWI 0 "general_reg_operand")
8386 (match_operand:SWI 1 "memory_operand"))
8387 (parallel [(set (reg:CC FLAGS_REG)
8388 (compare:CC (match_dup 0)
8389 (match_operand:SWI 2 "memory_operand")))
8390 (set (match_dup 0)
8391 (minus:SWI (match_dup 0) (match_dup 2)))])
8392 (set (match_dup 1) (match_dup 0))]
8393 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
8394 && peep2_reg_dead_p (3, operands[0])
8395 && !reg_overlap_mentioned_p (operands[0], operands[1])
8396 && !reg_overlap_mentioned_p (operands[0], operands[2])"
8397 [(set (match_dup 0) (match_dup 2))
8398 (parallel [(set (reg:CC FLAGS_REG)
8399 (compare:CC (match_dup 1) (match_dup 0)))
8400 (set (match_dup 1)
8401 (minus:SWI (match_dup 1) (match_dup 0)))])])
8402
8403 ;; decl %eax; cmpl $-1, %eax; jne .Lxx; can be optimized into
8404 ;; subl $1, %eax; jnc .Lxx;
8405 (define_peephole2
8406 [(parallel
8407 [(set (match_operand:SWI 0 "general_reg_operand")
8408 (plus:SWI (match_dup 0) (const_int -1)))
8409 (clobber (reg FLAGS_REG))])
8410 (set (reg:CCZ FLAGS_REG)
8411 (compare:CCZ (match_dup 0) (const_int -1)))
8412 (set (pc)
8413 (if_then_else (match_operator 1 "bt_comparison_operator"
8414 [(reg:CCZ FLAGS_REG) (const_int 0)])
8415 (match_operand 2)
8416 (pc)))]
8417 "peep2_regno_dead_p (3, FLAGS_REG)"
8418 [(parallel
8419 [(set (reg:CC FLAGS_REG)
8420 (compare:CC (match_dup 0) (const_int 1)))
8421 (set (match_dup 0)
8422 (minus:SWI (match_dup 0) (const_int 1)))])
8423 (set (pc)
8424 (if_then_else (match_dup 3)
8425 (match_dup 2)
8426 (pc)))]
8427 {
8428 rtx cc = gen_rtx_REG (CCmode, FLAGS_REG);
8429 operands[3] = gen_rtx_fmt_ee (GET_CODE (operands[1]) == NE
8430 ? GEU : LTU, VOIDmode, cc, const0_rtx);
8431 })
8432
8433 ;; Help combine use borrow flag to test for -1 after dec (add $-1).
8434 (define_insn_and_split "*dec_cmov<mode>"
8435 [(set (match_operand:SWI248 0 "register_operand" "=r")
8436 (if_then_else:SWI248
8437 (match_operator 1 "bt_comparison_operator"
8438 [(match_operand:SWI248 2 "register_operand" "0") (const_int 0)])
8439 (plus:SWI248 (match_dup 2) (const_int -1))
8440 (match_operand:SWI248 3 "nonimmediate_operand" "rm")))
8441 (clobber (reg:CC FLAGS_REG))]
8442 "TARGET_CMOVE"
8443 "#"
8444 "&& reload_completed"
8445 [(parallel [(set (reg:CC FLAGS_REG)
8446 (compare:CC (match_dup 2) (const_int 1)))
8447 (set (match_dup 0) (minus:SWI248 (match_dup 2) (const_int 1)))])
8448 (set (match_dup 0)
8449 (if_then_else:SWI248 (match_dup 4) (match_dup 0) (match_dup 3)))]
8450 {
8451 rtx cc = gen_rtx_REG (CCCmode, FLAGS_REG);
8452 operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[1]) == NE
8453 ? GEU : LTU, VOIDmode, cc, const0_rtx);
8454 })
8455
8456 (define_insn "*subsi_3_zext"
8457 [(set (reg FLAGS_REG)
8458 (compare (match_operand:SI 1 "nonimmediate_operand" "0,r,rm")
8459 (match_operand:SI 2 "x86_64_general_operand" "rBMe,rBMe,re")))
8460 (set (match_operand:DI 0 "register_operand" "=r,r,r")
8461 (zero_extend:DI
8462 (minus:SI (match_dup 1)
8463 (match_dup 2))))]
8464 "TARGET_64BIT && ix86_match_ccmode (insn, CCmode)
8465 && ix86_binary_operator_ok (MINUS, SImode, operands,
8466 TARGET_APX_NDD)"
8467 "@
8468 sub{l}\t{%2, %1|%1, %2}
8469 sub{l}\t{%2, %1, %k0|%k0, %1, %2}
8470 sub{l}\t{%2, %1, %k0|%k0, %1, %2}"
8471 [(set_attr "isa" "*,apx_ndd,apx_ndd")
8472 (set_attr "type" "alu")
8473 (set_attr "mode" "SI")])
8474 \f
8475 ;; Add with carry and subtract with borrow
8476
8477 (define_insn "@add<mode>3_carry"
8478 [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,r,r")
8479 (plus:SWI
8480 (plus:SWI
8481 (match_operator:SWI 4 "ix86_carry_flag_operator"
8482 [(match_operand 3 "flags_reg_operand") (const_int 0)])
8483 (match_operand:SWI 1 "nonimmediate_operand" "%0,0,rm,r"))
8484 (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,r<i>,<m>")))
8485 (clobber (reg:CC FLAGS_REG))]
8486 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)"
8487 "@
8488 adc{<imodesuffix>}\t{%2, %0|%0, %2}
8489 adc{<imodesuffix>}\t{%2, %0|%0, %2}
8490 adc{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
8491 adc{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
8492 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
8493 (set_attr "type" "alu")
8494 (set_attr "use_carry" "1")
8495 (set_attr "pent_pair" "pu")
8496 (set_attr "mode" "<MODE>")])
8497
8498 (define_peephole2
8499 [(set (match_operand:SWI 0 "general_reg_operand")
8500 (match_operand:SWI 1 "memory_operand"))
8501 (parallel [(set (match_dup 0)
8502 (plus:SWI
8503 (plus:SWI
8504 (match_operator:SWI 4 "ix86_carry_flag_operator"
8505 [(match_operand 3 "flags_reg_operand")
8506 (const_int 0)])
8507 (match_dup 0))
8508 (match_operand:SWI 2 "memory_operand")))
8509 (clobber (reg:CC FLAGS_REG))])
8510 (set (match_dup 1) (match_dup 0))]
8511 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
8512 && peep2_reg_dead_p (3, operands[0])
8513 && !reg_overlap_mentioned_p (operands[0], operands[1])
8514 && !reg_overlap_mentioned_p (operands[0], operands[2])"
8515 [(set (match_dup 0) (match_dup 2))
8516 (parallel [(set (match_dup 1)
8517 (plus:SWI (plus:SWI (match_op_dup 4
8518 [(match_dup 3) (const_int 0)])
8519 (match_dup 1))
8520 (match_dup 0)))
8521 (clobber (reg:CC FLAGS_REG))])])
8522
8523 (define_peephole2
8524 [(set (match_operand:SWI 0 "general_reg_operand")
8525 (match_operand:SWI 1 "memory_operand"))
8526 (parallel [(set (match_dup 0)
8527 (plus:SWI
8528 (plus:SWI
8529 (match_operator:SWI 4 "ix86_carry_flag_operator"
8530 [(match_operand 3 "flags_reg_operand")
8531 (const_int 0)])
8532 (match_dup 0))
8533 (match_operand:SWI 2 "memory_operand")))
8534 (clobber (reg:CC FLAGS_REG))])
8535 (set (match_operand:SWI 5 "general_reg_operand") (match_dup 0))
8536 (set (match_dup 1) (match_dup 5))]
8537 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
8538 && peep2_reg_dead_p (3, operands[0])
8539 && peep2_reg_dead_p (4, operands[5])
8540 && !reg_overlap_mentioned_p (operands[0], operands[1])
8541 && !reg_overlap_mentioned_p (operands[0], operands[2])
8542 && !reg_overlap_mentioned_p (operands[5], operands[1])"
8543 [(set (match_dup 0) (match_dup 2))
8544 (parallel [(set (match_dup 1)
8545 (plus:SWI (plus:SWI (match_op_dup 4
8546 [(match_dup 3) (const_int 0)])
8547 (match_dup 1))
8548 (match_dup 0)))
8549 (clobber (reg:CC FLAGS_REG))])])
8550
8551 (define_insn "*add<mode>3_carry_0"
8552 [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
8553 (plus:SWI
8554 (match_operator:SWI 2 "ix86_carry_flag_operator"
8555 [(reg FLAGS_REG) (const_int 0)])
8556 (match_operand:SWI 1 "nonimmediate_operand" "0")))
8557 (clobber (reg:CC FLAGS_REG))]
8558 "!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[1])"
8559 "adc{<imodesuffix>}\t{$0, %0|%0, 0}"
8560 [(set_attr "type" "alu")
8561 (set_attr "use_carry" "1")
8562 (set_attr "pent_pair" "pu")
8563 (set_attr "mode" "<MODE>")])
8564
8565 (define_insn "*add<mode>3_carry_0r"
8566 [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
8567 (plus:SWI
8568 (match_operator:SWI 2 "ix86_carry_flag_unset_operator"
8569 [(reg FLAGS_REG) (const_int 0)])
8570 (match_operand:SWI 1 "nonimmediate_operand" "0")))
8571 (clobber (reg:CC FLAGS_REG))]
8572 "!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[1])"
8573 "sbb{<imodesuffix>}\t{$-1, %0|%0, -1}"
8574 [(set_attr "type" "alu")
8575 (set_attr "use_carry" "1")
8576 (set_attr "pent_pair" "pu")
8577 (set_attr "mode" "<MODE>")])
8578
8579 (define_insn "*addsi3_carry_zext"
8580 [(set (match_operand:DI 0 "register_operand" "=r,r,r")
8581 (zero_extend:DI
8582 (plus:SI
8583 (plus:SI (match_operator:SI 3 "ix86_carry_flag_operator"
8584 [(reg FLAGS_REG) (const_int 0)])
8585 (match_operand:SI 1 "nonimmediate_operand" "%0,r,rm"))
8586 (match_operand:SI 2 "x86_64_general_operand" "rBMe,rBMe,re"))))
8587 (clobber (reg:CC FLAGS_REG))]
8588 "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands,
8589 TARGET_APX_NDD)"
8590 "@
8591 adc{l}\t{%2, %k0|%k0, %2}
8592 adc{l}\t{%2, %1, %k0|%k0, %1, %2}
8593 adc{l}\t{%2, %1, %k0|%k0, %1, %2}"
8594 [(set_attr "isa" "*,apx_ndd,apx_ndd")
8595 (set_attr "type" "alu")
8596 (set_attr "use_carry" "1")
8597 (set_attr "pent_pair" "pu")
8598 (set_attr "mode" "SI")])
8599
8600 (define_insn "*addsi3_carry_zext_0"
8601 [(set (match_operand:DI 0 "register_operand" "=r,r")
8602 (zero_extend:DI
8603 (plus:SI (match_operator:SI 2 "ix86_carry_flag_operator"
8604 [(reg FLAGS_REG) (const_int 0)])
8605 (match_operand:SI 1 "nonimmediate_operand" "0,rm"))))
8606 (clobber (reg:CC FLAGS_REG))]
8607 "TARGET_64BIT"
8608 "@
8609 adc{l}\t{$0, %k0|%k0, 0}
8610 adc{l}\t{$0, %1, %k0|%k0, %1, 0}"
8611 [(set_attr "isa" "*,apx_ndd")
8612 (set_attr "type" "alu")
8613 (set_attr "use_carry" "1")
8614 (set_attr "pent_pair" "pu")
8615 (set_attr "mode" "SI")])
8616
8617 (define_insn "*addsi3_carry_zext_0r"
8618 [(set (match_operand:DI 0 "register_operand" "=r,r")
8619 (zero_extend:DI
8620 (plus:SI (match_operator:SI 2 "ix86_carry_flag_unset_operator"
8621 [(reg FLAGS_REG) (const_int 0)])
8622 (match_operand:SI 1 "nonimmediate_operand" "0,rm"))))
8623 (clobber (reg:CC FLAGS_REG))]
8624 "TARGET_64BIT"
8625 "@
8626 sbb{l}\t{$-1, %k0|%k0, -1}
8627 sbb{l}\t{$-1, %1, %k0|%k0, %1, -1}"
8628 [(set_attr "isa" "*,apx_ndd")
8629 (set_attr "type" "alu")
8630 (set_attr "use_carry" "1")
8631 (set_attr "pent_pair" "pu")
8632 (set_attr "mode" "SI")])
8633
8634 ;; There is no point to generate ADCX instruction. ADC is shorter and faster.
8635
8636 (define_insn "addcarry<mode>"
8637 [(set (reg:CCC FLAGS_REG)
8638 (compare:CCC
8639 (zero_extend:<DWI>
8640 (plus:SWI48
8641 (plus:SWI48
8642 (match_operator:SWI48 5 "ix86_carry_flag_operator"
8643 [(match_operand 3 "flags_reg_operand") (const_int 0)])
8644 (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,rm,r"))
8645 (match_operand:SWI48 2 "nonimmediate_operand" "r,rm,r,m")))
8646 (plus:<DWI>
8647 (zero_extend:<DWI> (match_dup 2))
8648 (match_operator:<DWI> 4 "ix86_carry_flag_operator"
8649 [(match_dup 3) (const_int 0)]))))
8650 (set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,r")
8651 (plus:SWI48 (plus:SWI48 (match_op_dup 5
8652 [(match_dup 3) (const_int 0)])
8653 (match_dup 1))
8654 (match_dup 2)))]
8655 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)"
8656 "@
8657 adc{<imodesuffix>}\t{%2, %0|%0, %2}
8658 adc{<imodesuffix>}\t{%2, %0|%0, %2}
8659 adc{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
8660 adc{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
8661 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
8662 (set_attr "type" "alu")
8663 (set_attr "use_carry" "1")
8664 (set_attr "pent_pair" "pu")
8665 (set_attr "mode" "<MODE>")])
8666
8667 (define_peephole2
8668 [(parallel [(set (reg:CCC FLAGS_REG)
8669 (compare:CCC
8670 (zero_extend:<DWI>
8671 (plus:SWI48
8672 (plus:SWI48
8673 (match_operator:SWI48 4 "ix86_carry_flag_operator"
8674 [(match_operand 2 "flags_reg_operand")
8675 (const_int 0)])
8676 (match_operand:SWI48 0 "general_reg_operand"))
8677 (match_operand:SWI48 1 "memory_operand")))
8678 (plus:<DWI>
8679 (zero_extend:<DWI> (match_dup 1))
8680 (match_operator:<DWI> 3 "ix86_carry_flag_operator"
8681 [(match_dup 2) (const_int 0)]))))
8682 (set (match_dup 0)
8683 (plus:SWI48 (plus:SWI48 (match_op_dup 4
8684 [(match_dup 2) (const_int 0)])
8685 (match_dup 0))
8686 (match_dup 1)))])
8687 (set (match_dup 1) (match_dup 0))]
8688 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
8689 && peep2_reg_dead_p (2, operands[0])
8690 && !reg_overlap_mentioned_p (operands[0], operands[1])"
8691 [(parallel [(set (reg:CCC FLAGS_REG)
8692 (compare:CCC
8693 (zero_extend:<DWI>
8694 (plus:SWI48
8695 (plus:SWI48
8696 (match_op_dup 4
8697 [(match_dup 2) (const_int 0)])
8698 (match_dup 1))
8699 (match_dup 0)))
8700 (plus:<DWI>
8701 (zero_extend:<DWI> (match_dup 0))
8702 (match_op_dup 3
8703 [(match_dup 2) (const_int 0)]))))
8704 (set (match_dup 1)
8705 (plus:SWI48 (plus:SWI48 (match_op_dup 4
8706 [(match_dup 2) (const_int 0)])
8707 (match_dup 1))
8708 (match_dup 0)))])])
8709
8710 (define_peephole2
8711 [(set (match_operand:SWI48 0 "general_reg_operand")
8712 (match_operand:SWI48 1 "memory_operand"))
8713 (parallel [(set (reg:CCC FLAGS_REG)
8714 (compare:CCC
8715 (zero_extend:<DWI>
8716 (plus:SWI48
8717 (plus:SWI48
8718 (match_operator:SWI48 5 "ix86_carry_flag_operator"
8719 [(match_operand 3 "flags_reg_operand")
8720 (const_int 0)])
8721 (match_dup 0))
8722 (match_operand:SWI48 2 "memory_operand")))
8723 (plus:<DWI>
8724 (zero_extend:<DWI> (match_dup 2))
8725 (match_operator:<DWI> 4 "ix86_carry_flag_operator"
8726 [(match_dup 3) (const_int 0)]))))
8727 (set (match_dup 0)
8728 (plus:SWI48 (plus:SWI48 (match_op_dup 5
8729 [(match_dup 3) (const_int 0)])
8730 (match_dup 0))
8731 (match_dup 2)))])
8732 (set (match_dup 1) (match_dup 0))]
8733 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
8734 && peep2_reg_dead_p (3, operands[0])
8735 && !reg_overlap_mentioned_p (operands[0], operands[1])
8736 && !reg_overlap_mentioned_p (operands[0], operands[2])"
8737 [(set (match_dup 0) (match_dup 2))
8738 (parallel [(set (reg:CCC FLAGS_REG)
8739 (compare:CCC
8740 (zero_extend:<DWI>
8741 (plus:SWI48
8742 (plus:SWI48
8743 (match_op_dup 5
8744 [(match_dup 3) (const_int 0)])
8745 (match_dup 1))
8746 (match_dup 0)))
8747 (plus:<DWI>
8748 (zero_extend:<DWI> (match_dup 0))
8749 (match_op_dup 4
8750 [(match_dup 3) (const_int 0)]))))
8751 (set (match_dup 1)
8752 (plus:SWI48 (plus:SWI48 (match_op_dup 5
8753 [(match_dup 3) (const_int 0)])
8754 (match_dup 1))
8755 (match_dup 0)))])])
8756
8757 (define_peephole2
8758 [(parallel [(set (reg:CCC FLAGS_REG)
8759 (compare:CCC
8760 (zero_extend:<DWI>
8761 (plus:SWI48
8762 (plus:SWI48
8763 (match_operator:SWI48 4 "ix86_carry_flag_operator"
8764 [(match_operand 2 "flags_reg_operand")
8765 (const_int 0)])
8766 (match_operand:SWI48 0 "general_reg_operand"))
8767 (match_operand:SWI48 1 "memory_operand")))
8768 (plus:<DWI>
8769 (zero_extend:<DWI> (match_dup 1))
8770 (match_operator:<DWI> 3 "ix86_carry_flag_operator"
8771 [(match_dup 2) (const_int 0)]))))
8772 (set (match_dup 0)
8773 (plus:SWI48 (plus:SWI48 (match_op_dup 4
8774 [(match_dup 2) (const_int 0)])
8775 (match_dup 0))
8776 (match_dup 1)))])
8777 (set (match_operand:QI 5 "general_reg_operand")
8778 (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
8779 (set (match_operand:SWI48 6 "general_reg_operand")
8780 (zero_extend:SWI48 (match_dup 5)))
8781 (set (match_dup 1) (match_dup 0))]
8782 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
8783 && peep2_reg_dead_p (4, operands[0])
8784 && !reg_overlap_mentioned_p (operands[0], operands[1])
8785 && !reg_overlap_mentioned_p (operands[0], operands[5])
8786 && !reg_overlap_mentioned_p (operands[5], operands[1])
8787 && !reg_overlap_mentioned_p (operands[0], operands[6])
8788 && !reg_overlap_mentioned_p (operands[6], operands[1])"
8789 [(parallel [(set (reg:CCC FLAGS_REG)
8790 (compare:CCC
8791 (zero_extend:<DWI>
8792 (plus:SWI48
8793 (plus:SWI48
8794 (match_op_dup 4
8795 [(match_dup 2) (const_int 0)])
8796 (match_dup 1))
8797 (match_dup 0)))
8798 (plus:<DWI>
8799 (zero_extend:<DWI> (match_dup 0))
8800 (match_op_dup 3
8801 [(match_dup 2) (const_int 0)]))))
8802 (set (match_dup 1)
8803 (plus:SWI48 (plus:SWI48 (match_op_dup 4
8804 [(match_dup 2) (const_int 0)])
8805 (match_dup 1))
8806 (match_dup 0)))])
8807 (set (match_dup 5) (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
8808 (set (match_dup 6) (zero_extend:SWI48 (match_dup 5)))])
8809
8810 (define_expand "addcarry<mode>_0"
8811 [(parallel
8812 [(set (reg:CCC FLAGS_REG)
8813 (compare:CCC
8814 (plus:SWI48
8815 (match_operand:SWI48 1 "nonimmediate_operand")
8816 (match_operand:SWI48 2 "x86_64_general_operand"))
8817 (match_dup 1)))
8818 (set (match_operand:SWI48 0 "nonimmediate_operand")
8819 (plus:SWI48 (match_dup 1) (match_dup 2)))])]
8820 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands,
8821 TARGET_APX_NDD)")
8822
8823 (define_insn "*addcarry<mode>_1"
8824 [(set (reg:CCC FLAGS_REG)
8825 (compare:CCC
8826 (zero_extend:<DWI>
8827 (plus:SWI48
8828 (plus:SWI48
8829 (match_operator:SWI48 5 "ix86_carry_flag_operator"
8830 [(match_operand 3 "flags_reg_operand") (const_int 0)])
8831 (match_operand:SWI48 1 "nonimmediate_operand" "%0,rm"))
8832 (match_operand:SWI48 2 "x86_64_immediate_operand" "e,e")))
8833 (plus:<DWI>
8834 (match_operand:<DWI> 6 "const_scalar_int_operand")
8835 (match_operator:<DWI> 4 "ix86_carry_flag_operator"
8836 [(match_dup 3) (const_int 0)]))))
8837 (set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
8838 (plus:SWI48 (plus:SWI48 (match_op_dup 5
8839 [(match_dup 3) (const_int 0)])
8840 (match_dup 1))
8841 (match_dup 2)))]
8842 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)
8843 && CONST_INT_P (operands[2])
8844 /* Check that operands[6] is operands[2] zero extended from
8845 <MODE>mode to <DWI>mode. */
8846 && ((<MODE>mode == SImode || INTVAL (operands[2]) >= 0)
8847 ? (CONST_INT_P (operands[6])
8848 && UINTVAL (operands[6]) == (UINTVAL (operands[2])
8849 & GET_MODE_MASK (<MODE>mode)))
8850 : (CONST_WIDE_INT_P (operands[6])
8851 && CONST_WIDE_INT_NUNITS (operands[6]) == 2
8852 && ((unsigned HOST_WIDE_INT) CONST_WIDE_INT_ELT (operands[6], 0)
8853 == UINTVAL (operands[2]))
8854 && CONST_WIDE_INT_ELT (operands[6], 1) == 0))"
8855 "@
8856 adc{<imodesuffix>}\t{%2, %0|%0, %2}
8857 adc{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
8858 [(set_attr "isa" "*,apx_ndd")
8859 (set_attr "type" "alu")
8860 (set_attr "use_carry" "1")
8861 (set_attr "pent_pair" "pu")
8862 (set_attr "mode" "<MODE>")
8863 (set (attr "length_immediate")
8864 (if_then_else (match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)")
8865 (const_string "1")
8866 (const_string "4")))])
8867
8868 (define_insn "@sub<mode>3_carry"
8869 [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,r,r")
8870 (minus:SWI
8871 (minus:SWI
8872 (match_operand:SWI 1 "nonimmediate_operand" "0,0,rm,r")
8873 (match_operator:SWI 4 "ix86_carry_flag_operator"
8874 [(match_operand 3 "flags_reg_operand") (const_int 0)]))
8875 (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,r<i>,<m>")))
8876 (clobber (reg:CC FLAGS_REG))]
8877 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands,
8878 TARGET_APX_NDD)"
8879 "@
8880 sbb{<imodesuffix>}\t{%2, %0|%0, %2}
8881 sbb{<imodesuffix>}\t{%2, %0|%0, %2}
8882 sbb{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
8883 sbb{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
8884 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
8885 (set_attr "type" "alu")
8886 (set_attr "use_carry" "1")
8887 (set_attr "pent_pair" "pu")
8888 (set_attr "mode" "<MODE>")])
8889
8890 (define_peephole2
8891 [(set (match_operand:SWI 0 "general_reg_operand")
8892 (match_operand:SWI 1 "memory_operand"))
8893 (parallel [(set (match_dup 0)
8894 (minus:SWI
8895 (minus:SWI
8896 (match_dup 0)
8897 (match_operator:SWI 4 "ix86_carry_flag_operator"
8898 [(match_operand 3 "flags_reg_operand")
8899 (const_int 0)]))
8900 (match_operand:SWI 2 "memory_operand")))
8901 (clobber (reg:CC FLAGS_REG))])
8902 (set (match_dup 1) (match_dup 0))]
8903 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
8904 && peep2_reg_dead_p (3, operands[0])
8905 && !reg_overlap_mentioned_p (operands[0], operands[1])
8906 && !reg_overlap_mentioned_p (operands[0], operands[2])"
8907 [(set (match_dup 0) (match_dup 2))
8908 (parallel [(set (match_dup 1)
8909 (minus:SWI (minus:SWI (match_dup 1)
8910 (match_op_dup 4
8911 [(match_dup 3) (const_int 0)]))
8912 (match_dup 0)))
8913 (clobber (reg:CC FLAGS_REG))])])
8914
8915 (define_peephole2
8916 [(set (match_operand:SWI 0 "general_reg_operand")
8917 (match_operand:SWI 1 "memory_operand"))
8918 (parallel [(set (match_dup 0)
8919 (minus:SWI
8920 (minus:SWI
8921 (match_dup 0)
8922 (match_operator:SWI 4 "ix86_carry_flag_operator"
8923 [(match_operand 3 "flags_reg_operand")
8924 (const_int 0)]))
8925 (match_operand:SWI 2 "memory_operand")))
8926 (clobber (reg:CC FLAGS_REG))])
8927 (set (match_operand:SWI 5 "general_reg_operand") (match_dup 0))
8928 (set (match_dup 1) (match_dup 5))]
8929 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
8930 && peep2_reg_dead_p (3, operands[0])
8931 && peep2_reg_dead_p (4, operands[5])
8932 && !reg_overlap_mentioned_p (operands[0], operands[1])
8933 && !reg_overlap_mentioned_p (operands[0], operands[2])
8934 && !reg_overlap_mentioned_p (operands[5], operands[1])"
8935 [(set (match_dup 0) (match_dup 2))
8936 (parallel [(set (match_dup 1)
8937 (minus:SWI (minus:SWI (match_dup 1)
8938 (match_op_dup 4
8939 [(match_dup 3) (const_int 0)]))
8940 (match_dup 0)))
8941 (clobber (reg:CC FLAGS_REG))])])
8942
8943 (define_insn "*sub<mode>3_carry_0"
8944 [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
8945 (minus:SWI
8946 (match_operand:SWI 1 "nonimmediate_operand" "0")
8947 (match_operator:SWI 2 "ix86_carry_flag_operator"
8948 [(reg FLAGS_REG) (const_int 0)])))
8949 (clobber (reg:CC FLAGS_REG))]
8950 "!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[1])"
8951 "sbb{<imodesuffix>}\t{$0, %0|%0, 0}"
8952 [(set_attr "type" "alu")
8953 (set_attr "use_carry" "1")
8954 (set_attr "pent_pair" "pu")
8955 (set_attr "mode" "<MODE>")])
8956
8957 (define_insn "*sub<mode>3_carry_0r"
8958 [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
8959 (minus:SWI
8960 (match_operand:SWI 1 "nonimmediate_operand" "0")
8961 (match_operator:SWI 2 "ix86_carry_flag_unset_operator"
8962 [(reg FLAGS_REG) (const_int 0)])))
8963 (clobber (reg:CC FLAGS_REG))]
8964 "!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[1])"
8965 "adc{<imodesuffix>}\t{$-1, %0|%0, -1}"
8966 [(set_attr "type" "alu")
8967 (set_attr "use_carry" "1")
8968 (set_attr "pent_pair" "pu")
8969 (set_attr "mode" "<MODE>")])
8970
8971 (define_insn "*subsi3_carry_zext"
8972 [(set (match_operand:DI 0 "register_operand" "=r,r,r")
8973 (zero_extend:DI
8974 (minus:SI
8975 (minus:SI
8976 (match_operand:SI 1 "nonimmediate_operand" "0,r,rm")
8977 (match_operator:SI 3 "ix86_carry_flag_operator"
8978 [(reg FLAGS_REG) (const_int 0)]))
8979 (match_operand:SI 2 "x86_64_general_operand" "rBMe,rBMe,re"))))
8980 (clobber (reg:CC FLAGS_REG))]
8981 "TARGET_64BIT && ix86_binary_operator_ok (MINUS, SImode, operands,
8982 TARGET_APX_NDD)"
8983 "@
8984 sbb{l}\t{%2, %k0|%k0, %2}
8985 sbb{l}\t{%2, %1, %k0|%k0, %1, %2}
8986 sbb{l}\t{%2, %1, %k0|%k0, %1, %2}"
8987 [(set_attr "isa" "*,apx_ndd,apx_ndd")
8988 (set_attr "type" "alu")
8989 (set_attr "use_carry" "1")
8990 (set_attr "pent_pair" "pu")
8991 (set_attr "mode" "SI")])
8992
8993 (define_insn "*subsi3_carry_zext_0"
8994 [(set (match_operand:DI 0 "register_operand" "=r")
8995 (zero_extend:DI
8996 (minus:SI
8997 (match_operand:SI 1 "register_operand" "0")
8998 (match_operator:SI 2 "ix86_carry_flag_operator"
8999 [(reg FLAGS_REG) (const_int 0)]))))
9000 (clobber (reg:CC FLAGS_REG))]
9001 "TARGET_64BIT"
9002 "sbb{l}\t{$0, %k0|%k0, 0}"
9003 [(set_attr "type" "alu")
9004 (set_attr "use_carry" "1")
9005 (set_attr "pent_pair" "pu")
9006 (set_attr "mode" "SI")])
9007
9008 (define_insn "*subsi3_carry_zext_0r"
9009 [(set (match_operand:DI 0 "register_operand" "=r")
9010 (zero_extend:DI
9011 (minus:SI
9012 (match_operand:SI 1 "register_operand" "0")
9013 (match_operator:SI 2 "ix86_carry_flag_unset_operator"
9014 [(reg FLAGS_REG) (const_int 0)]))))
9015 (clobber (reg:CC FLAGS_REG))]
9016 "TARGET_64BIT"
9017 "adc{l}\t{$-1, %k0|%k0, -1}"
9018 [(set_attr "type" "alu")
9019 (set_attr "use_carry" "1")
9020 (set_attr "pent_pair" "pu")
9021 (set_attr "mode" "SI")])
9022
9023 (define_insn "@sub<mode>3_carry_ccc"
9024 [(set (reg:CCC FLAGS_REG)
9025 (compare:CCC
9026 (zero_extend:<DWI> (match_operand:DWIH 1 "register_operand" "0"))
9027 (plus:<DWI>
9028 (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0))
9029 (zero_extend:<DWI>
9030 (match_operand:DWIH 2 "x86_64_sext_operand" "rmWe")))))
9031 (clobber (match_scratch:DWIH 0 "=r"))]
9032 ""
9033 "sbb{<imodesuffix>}\t{%2, %0|%0, %2}"
9034 [(set_attr "type" "alu")
9035 (set_attr "mode" "<MODE>")])
9036
9037 (define_insn "*sub<mode>3_carry_ccc_1"
9038 [(set (reg:CCC FLAGS_REG)
9039 (compare:CCC
9040 (zero_extend:<DWI> (match_operand:DWIH 1 "register_operand" "0"))
9041 (plus:<DWI>
9042 (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0))
9043 (match_operand:<DWI> 2 "x86_64_dwzext_immediate_operand" "Wf"))))
9044 (clobber (match_scratch:DWIH 0 "=r"))]
9045 ""
9046 {
9047 operands[3] = simplify_subreg (<MODE>mode, operands[2], <DWI>mode, 0);
9048 return "sbb{<imodesuffix>}\t{%3, %0|%0, %3}";
9049 }
9050 [(set_attr "type" "alu")
9051 (set_attr "mode" "<MODE>")])
9052
9053 ;; The sign flag is set from the
9054 ;; (compare (match_dup 1) (plus:DWIH (ltu:DWIH ...) (match_dup 2)))
9055 ;; result, the overflow flag likewise, but the overflow flag is also
9056 ;; set if the (plus:DWIH (ltu:DWIH ...) (match_dup 2)) overflows.
9057 (define_insn "@sub<mode>3_carry_ccgz"
9058 [(set (reg:CCGZ FLAGS_REG)
9059 (unspec:CCGZ [(match_operand:DWIH 1 "register_operand" "0")
9060 (match_operand:DWIH 2 "x86_64_general_operand" "rBMe")
9061 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))]
9062 UNSPEC_SBB))
9063 (clobber (match_scratch:DWIH 0 "=r"))]
9064 ""
9065 "sbb{<imodesuffix>}\t{%2, %0|%0, %2}"
9066 [(set_attr "type" "alu")
9067 (set_attr "mode" "<MODE>")])
9068
9069 (define_insn "subborrow<mode>"
9070 [(set (reg:CCC FLAGS_REG)
9071 (compare:CCC
9072 (zero_extend:<DWI>
9073 (match_operand:SWI48 1 "nonimmediate_operand" "0,0,r,rm"))
9074 (plus:<DWI>
9075 (match_operator:<DWI> 4 "ix86_carry_flag_operator"
9076 [(match_operand 3 "flags_reg_operand") (const_int 0)])
9077 (zero_extend:<DWI>
9078 (match_operand:SWI48 2 "nonimmediate_operand" "r,rm,rm,r")))))
9079 (set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,r")
9080 (minus:SWI48 (minus:SWI48
9081 (match_dup 1)
9082 (match_operator:SWI48 5 "ix86_carry_flag_operator"
9083 [(match_dup 3) (const_int 0)]))
9084 (match_dup 2)))]
9085 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands,
9086 TARGET_APX_NDD)"
9087 "@
9088 sbb{<imodesuffix>}\t{%2, %0|%0, %2}
9089 sbb{<imodesuffix>}\t{%2, %0|%0, %2}
9090 sbb{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
9091 sbb{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
9092 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
9093 (set_attr "type" "alu")
9094 (set_attr "use_carry" "1")
9095 (set_attr "pent_pair" "pu")
9096 (set_attr "mode" "<MODE>")])
9097
9098 (define_peephole2
9099 [(set (match_operand:SWI48 0 "general_reg_operand")
9100 (match_operand:SWI48 1 "memory_operand"))
9101 (parallel [(set (reg:CCC FLAGS_REG)
9102 (compare:CCC
9103 (zero_extend:<DWI> (match_dup 0))
9104 (plus:<DWI>
9105 (match_operator:<DWI> 4 "ix86_carry_flag_operator"
9106 [(match_operand 3 "flags_reg_operand") (const_int 0)])
9107 (zero_extend:<DWI>
9108 (match_operand:SWI48 2 "memory_operand")))))
9109 (set (match_dup 0)
9110 (minus:SWI48
9111 (minus:SWI48
9112 (match_dup 0)
9113 (match_operator:SWI48 5 "ix86_carry_flag_operator"
9114 [(match_dup 3) (const_int 0)]))
9115 (match_dup 2)))])
9116 (set (match_dup 1) (match_dup 0))]
9117 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
9118 && peep2_reg_dead_p (3, operands[0])
9119 && !reg_overlap_mentioned_p (operands[0], operands[1])
9120 && !reg_overlap_mentioned_p (operands[0], operands[2])"
9121 [(set (match_dup 0) (match_dup 2))
9122 (parallel [(set (reg:CCC FLAGS_REG)
9123 (compare:CCC
9124 (zero_extend:<DWI> (match_dup 1))
9125 (plus:<DWI> (match_op_dup 4
9126 [(match_dup 3) (const_int 0)])
9127 (zero_extend:<DWI> (match_dup 0)))))
9128 (set (match_dup 1)
9129 (minus:SWI48 (minus:SWI48 (match_dup 1)
9130 (match_op_dup 5
9131 [(match_dup 3) (const_int 0)]))
9132 (match_dup 0)))])])
9133
9134 (define_peephole2
9135 [(set (match_operand:SWI48 6 "general_reg_operand")
9136 (match_operand:SWI48 7 "memory_operand"))
9137 (set (match_operand:SWI48 8 "general_reg_operand")
9138 (match_operand:SWI48 9 "memory_operand"))
9139 (parallel [(set (reg:CCC FLAGS_REG)
9140 (compare:CCC
9141 (zero_extend:<DWI>
9142 (match_operand:SWI48 0 "general_reg_operand"))
9143 (plus:<DWI>
9144 (match_operator:<DWI> 4 "ix86_carry_flag_operator"
9145 [(match_operand 3 "flags_reg_operand") (const_int 0)])
9146 (zero_extend:<DWI>
9147 (match_operand:SWI48 2 "general_reg_operand")))))
9148 (set (match_dup 0)
9149 (minus:SWI48
9150 (minus:SWI48
9151 (match_dup 0)
9152 (match_operator:SWI48 5 "ix86_carry_flag_operator"
9153 [(match_dup 3) (const_int 0)]))
9154 (match_dup 2)))])
9155 (set (match_operand:SWI48 1 "memory_operand") (match_dup 0))]
9156 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
9157 && peep2_reg_dead_p (4, operands[0])
9158 && peep2_reg_dead_p (3, operands[2])
9159 && !reg_overlap_mentioned_p (operands[0], operands[1])
9160 && !reg_overlap_mentioned_p (operands[2], operands[1])
9161 && !reg_overlap_mentioned_p (operands[6], operands[9])
9162 && (rtx_equal_p (operands[6], operands[0])
9163 ? (rtx_equal_p (operands[7], operands[1])
9164 && rtx_equal_p (operands[8], operands[2]))
9165 : (rtx_equal_p (operands[8], operands[0])
9166 && rtx_equal_p (operands[9], operands[1])
9167 && rtx_equal_p (operands[6], operands[2])))"
9168 [(set (match_dup 0) (match_dup 9))
9169 (parallel [(set (reg:CCC FLAGS_REG)
9170 (compare:CCC
9171 (zero_extend:<DWI> (match_dup 1))
9172 (plus:<DWI> (match_op_dup 4
9173 [(match_dup 3) (const_int 0)])
9174 (zero_extend:<DWI> (match_dup 0)))))
9175 (set (match_dup 1)
9176 (minus:SWI48 (minus:SWI48 (match_dup 1)
9177 (match_op_dup 5
9178 [(match_dup 3) (const_int 0)]))
9179 (match_dup 0)))])]
9180 {
9181 if (!rtx_equal_p (operands[6], operands[0]))
9182 operands[9] = operands[7];
9183 })
9184
9185 (define_peephole2
9186 [(set (match_operand:SWI48 6 "general_reg_operand")
9187 (match_operand:SWI48 7 "memory_operand"))
9188 (set (match_operand:SWI48 8 "general_reg_operand")
9189 (match_operand:SWI48 9 "memory_operand"))
9190 (parallel [(set (reg:CCC FLAGS_REG)
9191 (compare:CCC
9192 (zero_extend:<DWI>
9193 (match_operand:SWI48 0 "general_reg_operand"))
9194 (plus:<DWI>
9195 (match_operator:<DWI> 4 "ix86_carry_flag_operator"
9196 [(match_operand 3 "flags_reg_operand") (const_int 0)])
9197 (zero_extend:<DWI>
9198 (match_operand:SWI48 2 "general_reg_operand")))))
9199 (set (match_dup 0)
9200 (minus:SWI48
9201 (minus:SWI48
9202 (match_dup 0)
9203 (match_operator:SWI48 5 "ix86_carry_flag_operator"
9204 [(match_dup 3) (const_int 0)]))
9205 (match_dup 2)))])
9206 (set (match_operand:QI 10 "general_reg_operand")
9207 (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
9208 (set (match_operand:SWI48 11 "general_reg_operand")
9209 (zero_extend:SWI48 (match_dup 10)))
9210 (set (match_operand:SWI48 1 "memory_operand") (match_dup 0))]
9211 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
9212 && peep2_reg_dead_p (6, operands[0])
9213 && peep2_reg_dead_p (3, operands[2])
9214 && !reg_overlap_mentioned_p (operands[0], operands[1])
9215 && !reg_overlap_mentioned_p (operands[2], operands[1])
9216 && !reg_overlap_mentioned_p (operands[6], operands[9])
9217 && !reg_overlap_mentioned_p (operands[0], operands[10])
9218 && !reg_overlap_mentioned_p (operands[10], operands[1])
9219 && !reg_overlap_mentioned_p (operands[0], operands[11])
9220 && !reg_overlap_mentioned_p (operands[11], operands[1])
9221 && (rtx_equal_p (operands[6], operands[0])
9222 ? (rtx_equal_p (operands[7], operands[1])
9223 && rtx_equal_p (operands[8], operands[2]))
9224 : (rtx_equal_p (operands[8], operands[0])
9225 && rtx_equal_p (operands[9], operands[1])
9226 && rtx_equal_p (operands[6], operands[2])))"
9227 [(set (match_dup 0) (match_dup 9))
9228 (parallel [(set (reg:CCC FLAGS_REG)
9229 (compare:CCC
9230 (zero_extend:<DWI> (match_dup 1))
9231 (plus:<DWI> (match_op_dup 4
9232 [(match_dup 3) (const_int 0)])
9233 (zero_extend:<DWI> (match_dup 0)))))
9234 (set (match_dup 1)
9235 (minus:SWI48 (minus:SWI48 (match_dup 1)
9236 (match_op_dup 5
9237 [(match_dup 3) (const_int 0)]))
9238 (match_dup 0)))])
9239 (set (match_dup 10) (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
9240 (set (match_dup 11) (zero_extend:SWI48 (match_dup 10)))]
9241 {
9242 if (!rtx_equal_p (operands[6], operands[0]))
9243 operands[9] = operands[7];
9244 })
9245
9246 (define_expand "subborrow<mode>_0"
9247 [(parallel
9248 [(set (reg:CC FLAGS_REG)
9249 (compare:CC
9250 (match_operand:SWI48 1 "nonimmediate_operand")
9251 (match_operand:SWI48 2 "<general_operand>")))
9252 (set (match_operand:SWI48 0 "register_operand")
9253 (minus:SWI48 (match_dup 1) (match_dup 2)))])]
9254 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands,
9255 TARGET_APX_NDD)")
9256
9257 (define_expand "uaddc<mode>5"
9258 [(match_operand:SWI48 0 "register_operand")
9259 (match_operand:SWI48 1 "register_operand")
9260 (match_operand:SWI48 2 "register_operand")
9261 (match_operand:SWI48 3 "register_operand")
9262 (match_operand:SWI48 4 "nonmemory_operand")]
9263 ""
9264 {
9265 rtx cf = gen_rtx_REG (CCCmode, FLAGS_REG), pat, pat2;
9266 if (operands[4] == const0_rtx)
9267 emit_insn (gen_addcarry<mode>_0 (operands[0], operands[2], operands[3]));
9268 else
9269 {
9270 ix86_expand_carry (operands[4]);
9271 pat = gen_rtx_LTU (<DWI>mode, cf, const0_rtx);
9272 pat2 = gen_rtx_LTU (<MODE>mode, cf, const0_rtx);
9273 emit_insn (gen_addcarry<mode> (operands[0], operands[2], operands[3],
9274 cf, pat, pat2));
9275 }
9276 rtx cc = gen_reg_rtx (QImode);
9277 pat = gen_rtx_LTU (QImode, cf, const0_rtx);
9278 emit_insn (gen_rtx_SET (cc, pat));
9279 emit_insn (gen_zero_extendqi<mode>2 (operands[1], cc));
9280 DONE;
9281 })
9282
9283 (define_expand "usubc<mode>5"
9284 [(match_operand:SWI48 0 "register_operand")
9285 (match_operand:SWI48 1 "register_operand")
9286 (match_operand:SWI48 2 "register_operand")
9287 (match_operand:SWI48 3 "register_operand")
9288 (match_operand:SWI48 4 "nonmemory_operand")]
9289 ""
9290 {
9291 rtx cf, pat, pat2;
9292 if (operands[4] == const0_rtx)
9293 {
9294 cf = gen_rtx_REG (CCmode, FLAGS_REG);
9295 emit_insn (gen_subborrow<mode>_0 (operands[0], operands[2],
9296 operands[3]));
9297 }
9298 else
9299 {
9300 cf = gen_rtx_REG (CCCmode, FLAGS_REG);
9301 ix86_expand_carry (operands[4]);
9302 pat = gen_rtx_LTU (<DWI>mode, cf, const0_rtx);
9303 pat2 = gen_rtx_LTU (<MODE>mode, cf, const0_rtx);
9304 emit_insn (gen_subborrow<mode> (operands[0], operands[2], operands[3],
9305 cf, pat, pat2));
9306 }
9307 rtx cc = gen_reg_rtx (QImode);
9308 pat = gen_rtx_LTU (QImode, cf, const0_rtx);
9309 emit_insn (gen_rtx_SET (cc, pat));
9310 emit_insn (gen_zero_extendqi<mode>2 (operands[1], cc));
9311 DONE;
9312 })
9313
9314 (define_mode_iterator CC_CCC [CC CCC])
9315
9316 ;; Pre-reload splitter to optimize
9317 ;; *setcc_qi followed by *addqi3_cconly_overflow_1 with the same QI
9318 ;; operand and no intervening flags modifications into nothing.
9319 (define_insn_and_split "*setcc_qi_addqi3_cconly_overflow_1_<mode>"
9320 [(set (reg:CCC FLAGS_REG)
9321 (compare:CCC (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
9322 (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))))]
9323 "ix86_pre_reload_split ()"
9324 "#"
9325 "&& 1"
9326 [(const_int 0)]
9327 "emit_note (NOTE_INSN_DELETED); DONE;")
9328
9329 ;; Set the carry flag from the carry flag.
9330 (define_insn_and_split "*setccc"
9331 [(set (reg:CCC FLAGS_REG)
9332 (reg:CCC FLAGS_REG))]
9333 "ix86_pre_reload_split ()"
9334 "#"
9335 "&& 1"
9336 [(const_int 0)]
9337 "emit_note (NOTE_INSN_DELETED); DONE;")
9338
9339 ;; Set the carry flag from the carry flag.
9340 (define_insn_and_split "*setcc_qi_negqi_ccc_1_<mode>"
9341 [(set (reg:CCC FLAGS_REG)
9342 (ltu:CCC (reg:CC_CCC FLAGS_REG) (const_int 0)))]
9343 "ix86_pre_reload_split ()"
9344 "#"
9345 "&& 1"
9346 [(const_int 0)]
9347 "emit_note (NOTE_INSN_DELETED); DONE;")
9348
9349 ;; Set the carry flag from the carry flag.
9350 (define_insn_and_split "*setcc_qi_negqi_ccc_2_<mode>"
9351 [(set (reg:CCC FLAGS_REG)
9352 (unspec:CCC [(ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))
9353 (const_int 0)] UNSPEC_CC_NE))]
9354 "ix86_pre_reload_split ()"
9355 "#"
9356 "&& 1"
9357 [(const_int 0)]
9358 "emit_note (NOTE_INSN_DELETED); DONE;")
9359 \f
9360 ;; Overflow setting add instructions
9361
9362 (define_expand "addqi3_cconly_overflow"
9363 [(parallel
9364 [(set (reg:CCC FLAGS_REG)
9365 (compare:CCC
9366 (plus:QI
9367 (match_operand:QI 0 "nonimmediate_operand")
9368 (match_operand:QI 1 "general_operand"))
9369 (match_dup 0)))
9370 (clobber (scratch:QI))])]
9371 "!(MEM_P (operands[0]) && MEM_P (operands[1]))")
9372
9373 (define_insn "*add<mode>3_cconly_overflow_1"
9374 [(set (reg:CCC FLAGS_REG)
9375 (compare:CCC
9376 (plus:SWI
9377 (match_operand:SWI 1 "nonimmediate_operand" "%0,r,rm")
9378 (match_operand:SWI 2 "<general_operand>" "<g>,<g>,re"))
9379 (match_dup 1)))
9380 (clobber (match_scratch:SWI 0 "=<r>,r,r"))]
9381 "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
9382 "@
9383 add{<imodesuffix>}\t{%2, %0|%0, %2}
9384 add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
9385 add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
9386 [(set_attr "isa" "*,apx_ndd,apx_ndd")
9387 (set_attr "type" "alu")
9388 (set_attr "mode" "<MODE>")])
9389
9390 (define_insn "@add<mode>3_cc_overflow_1"
9391 [(set (reg:CCC FLAGS_REG)
9392 (compare:CCC
9393 (plus:SWI
9394 (match_operand:SWI 1 "nonimmediate_operand" "%0,0,rm,r")
9395 (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,r<i>,<m>"))
9396 (match_dup 1)))
9397 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,r,r")
9398 (plus:SWI (match_dup 1) (match_dup 2)))]
9399 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)"
9400 "@
9401 add{<imodesuffix>}\t{%2, %0|%0, %2}
9402 add{<imodesuffix>}\t{%2, %0|%0, %2}
9403 add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
9404 add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
9405 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
9406 (set_attr "type" "alu")
9407 (set_attr "mode" "<MODE>")])
9408
9409 (define_peephole2
9410 [(parallel [(set (reg:CCC FLAGS_REG)
9411 (compare:CCC
9412 (plus:SWI (match_operand:SWI 0 "general_reg_operand")
9413 (match_operand:SWI 1 "memory_operand"))
9414 (match_dup 0)))
9415 (set (match_dup 0) (plus:SWI (match_dup 0) (match_dup 1)))])
9416 (set (match_dup 1) (match_dup 0))]
9417 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
9418 && peep2_reg_dead_p (2, operands[0])
9419 && !reg_overlap_mentioned_p (operands[0], operands[1])"
9420 [(parallel [(set (reg:CCC FLAGS_REG)
9421 (compare:CCC
9422 (plus:SWI (match_dup 1) (match_dup 0))
9423 (match_dup 1)))
9424 (set (match_dup 1) (plus:SWI (match_dup 1) (match_dup 0)))])])
9425
9426 (define_peephole2
9427 [(set (match_operand:SWI 0 "general_reg_operand")
9428 (match_operand:SWI 1 "memory_operand"))
9429 (parallel [(set (reg:CCC FLAGS_REG)
9430 (compare:CCC
9431 (plus:SWI (match_dup 0)
9432 (match_operand:SWI 2 "memory_operand"))
9433 (match_dup 0)))
9434 (set (match_dup 0) (plus:SWI (match_dup 0) (match_dup 2)))])
9435 (set (match_dup 1) (match_dup 0))]
9436 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
9437 && peep2_reg_dead_p (3, operands[0])
9438 && !reg_overlap_mentioned_p (operands[0], operands[1])
9439 && !reg_overlap_mentioned_p (operands[0], operands[2])"
9440 [(set (match_dup 0) (match_dup 2))
9441 (parallel [(set (reg:CCC FLAGS_REG)
9442 (compare:CCC
9443 (plus:SWI (match_dup 1) (match_dup 0))
9444 (match_dup 1)))
9445 (set (match_dup 1) (plus:SWI (match_dup 1) (match_dup 0)))])])
9446
9447 (define_insn "*addsi3_zext_cc_overflow_1"
9448 [(set (reg:CCC FLAGS_REG)
9449 (compare:CCC
9450 (plus:SI
9451 (match_operand:SI 1 "nonimmediate_operand" "%0,r,rm")
9452 (match_operand:SI 2 "x86_64_general_operand" "rBMe,rBMe,re"))
9453 (match_dup 1)))
9454 (set (match_operand:DI 0 "register_operand" "=r,r,r")
9455 (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
9456 "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands,
9457 TARGET_APX_NDD)"
9458 "@
9459 add{l}\t{%2, %k0|%k0, %2}
9460 add{l}\t{%2, %1, %k0|%k0, %1, %2}
9461 add{l}\t{%2, %1, %k0|%k0, %1, %2}"
9462 [(set_attr "isa" "*,apx_ndd,apx_ndd")
9463 (set_attr "type" "alu")
9464 (set_attr "mode" "SI")])
9465
9466 (define_insn "*add<mode>3_cconly_overflow_2"
9467 [(set (reg:CCC FLAGS_REG)
9468 (compare:CCC
9469 (plus:SWI
9470 (match_operand:SWI 1 "nonimmediate_operand" "%0,r,rm")
9471 (match_operand:SWI 2 "<general_operand>" "<g>,<g>,re"))
9472 (match_dup 2)))
9473 (clobber (match_scratch:SWI 0 "=<r>,r,r"))]
9474 "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
9475 "@
9476 add{<imodesuffix>}\t{%2, %0|%0, %2}
9477 add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
9478 add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
9479 [(set_attr "isa" "*,apx_ndd,apx_ndd")
9480 (set_attr "type" "alu")
9481 (set_attr "mode" "<MODE>")])
9482
9483 (define_insn "*add<mode>3_cc_overflow_2"
9484 [(set (reg:CCC FLAGS_REG)
9485 (compare:CCC
9486 (plus:SWI
9487 (match_operand:SWI 1 "nonimmediate_operand" "%0,0,rm,r")
9488 (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,r<i>,<m>"))
9489 (match_dup 2)))
9490 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,r,r")
9491 (plus:SWI (match_dup 1) (match_dup 2)))]
9492 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)"
9493 "@
9494 add{<imodesuffix>}\t{%2, %0|%0, %2}
9495 add{<imodesuffix>}\t{%2, %0|%0, %2}
9496 add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
9497 add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
9498 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
9499 (set_attr "type" "alu")
9500 (set_attr "mode" "<MODE>")])
9501
9502 (define_insn "*addsi3_zext_cc_overflow_2"
9503 [(set (reg:CCC FLAGS_REG)
9504 (compare:CCC
9505 (plus:SI
9506 (match_operand:SI 1 "nonimmediate_operand" "%0,r,rm")
9507 (match_operand:SI 2 "x86_64_general_operand" "rBMe,rBMe,re"))
9508 (match_dup 2)))
9509 (set (match_operand:DI 0 "register_operand" "=r,r,r")
9510 (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
9511 "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands,
9512 TARGET_APX_NDD)"
9513 "@
9514 add{l}\t{%2, %k0|%k0, %2}
9515 add{l}\t{%2, %1, %k0|%k0, %1, %2}
9516 add{l}\t{%2, %1, %k0|%k0, %1, %2}"
9517 [(set_attr "isa" "*,apx_ndd,apx_ndd")
9518 (set_attr "type" "alu")
9519 (set_attr "mode" "SI")])
9520
9521 (define_insn_and_split "*add<dwi>3_doubleword_cc_overflow_1"
9522 [(set (reg:CCC FLAGS_REG)
9523 (compare:CCC
9524 (plus:<DWI>
9525 (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0,ro,r")
9526 (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o,r<di>,o"))
9527 (match_dup 1)))
9528 (set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r,&r,&r")
9529 (plus:<DWI> (match_dup 1) (match_dup 2)))]
9530 "ix86_binary_operator_ok (PLUS, <DWI>mode, operands, TARGET_APX_NDD)"
9531 "#"
9532 "&& reload_completed"
9533 [(parallel [(set (reg:CCC FLAGS_REG)
9534 (compare:CCC
9535 (plus:DWIH (match_dup 1) (match_dup 2))
9536 (match_dup 1)))
9537 (set (match_dup 0)
9538 (plus:DWIH (match_dup 1) (match_dup 2)))])
9539 (parallel [(set (reg:CCC FLAGS_REG)
9540 (compare:CCC
9541 (zero_extend:<DWI>
9542 (plus:DWIH
9543 (plus:DWIH
9544 (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
9545 (match_dup 4))
9546 (match_dup 5)))
9547 (plus:<DWI>
9548 (match_dup 6)
9549 (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0)))))
9550 (set (match_dup 3)
9551 (plus:DWIH
9552 (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
9553 (match_dup 4))
9554 (match_dup 5)))])]
9555 {
9556 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
9557 if (operands[2] == const0_rtx)
9558 {
9559 if (!rtx_equal_p (operands[0], operands[1]))
9560 emit_move_insn (operands[0], operands[1]);
9561 emit_insn (gen_addcarry<mode>_0 (operands[3], operands[4], operands[5]));
9562 DONE;
9563 }
9564 if (CONST_INT_P (operands[5]))
9565 operands[6] = simplify_unary_operation (ZERO_EXTEND, <DWI>mode,
9566 operands[5], <MODE>mode);
9567 else
9568 operands[6] = gen_rtx_ZERO_EXTEND (<DWI>mode, operands[5]);
9569 }
9570 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")])
9571
9572 ;; x == 0 with zero flag test can be done also as x < 1U with carry flag
9573 ;; test, where the latter is preferrable if we have some carry consuming
9574 ;; instruction.
9575 ;; For x != 0, we need to use x < 1U with negation of carry, i.e.
9576 ;; + (1 - CF).
9577 (define_insn_and_split "*add<mode>3_eq"
9578 [(set (match_operand:SWI 0 "nonimmediate_operand")
9579 (plus:SWI
9580 (plus:SWI
9581 (eq:SWI (match_operand 3 "int_nonimmediate_operand") (const_int 0))
9582 (match_operand:SWI 1 "nonimmediate_operand"))
9583 (match_operand:SWI 2 "<general_operand>")))
9584 (clobber (reg:CC FLAGS_REG))]
9585 "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)
9586 && ix86_pre_reload_split ()"
9587 "#"
9588 "&& 1"
9589 [(set (reg:CC FLAGS_REG)
9590 (compare:CC (match_dup 3) (const_int 1)))
9591 (parallel [(set (match_dup 0)
9592 (plus:SWI
9593 (plus:SWI (ltu:SWI (reg:CC FLAGS_REG) (const_int 0))
9594 (match_dup 1))
9595 (match_dup 2)))
9596 (clobber (reg:CC FLAGS_REG))])])
9597
9598 (define_insn_and_split "*add<mode>3_ne"
9599 [(set (match_operand:SWI 0 "nonimmediate_operand")
9600 (plus:SWI
9601 (plus:SWI
9602 (ne:SWI (match_operand 3 "int_nonimmediate_operand") (const_int 0))
9603 (match_operand:SWI 1 "nonimmediate_operand"))
9604 (match_operand:SWI 2 "<immediate_operand>")))
9605 (clobber (reg:CC FLAGS_REG))]
9606 "CONST_INT_P (operands[2])
9607 && (<MODE>mode != DImode
9608 || INTVAL (operands[2]) != HOST_WIDE_INT_C (-0x80000000))
9609 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands,
9610 TARGET_APX_NDD)
9611 && ix86_pre_reload_split ()"
9612 "#"
9613 "&& 1"
9614 [(set (reg:CC FLAGS_REG)
9615 (compare:CC (match_dup 3) (const_int 1)))
9616 (parallel [(set (match_dup 0)
9617 (minus:SWI
9618 (minus:SWI (match_dup 1)
9619 (ltu:SWI (reg:CC FLAGS_REG) (const_int 0)))
9620 (match_dup 2)))
9621 (clobber (reg:CC FLAGS_REG))])]
9622 {
9623 operands[2] = gen_int_mode (~INTVAL (operands[2]),
9624 <MODE>mode == DImode ? SImode : <MODE>mode);
9625 })
9626
9627 (define_insn_and_split "*add<mode>3_eq_0"
9628 [(set (match_operand:SWI 0 "nonimmediate_operand")
9629 (plus:SWI
9630 (eq:SWI (match_operand 2 "int_nonimmediate_operand") (const_int 0))
9631 (match_operand:SWI 1 "<general_operand>")))
9632 (clobber (reg:CC FLAGS_REG))]
9633 "ix86_unary_operator_ok (PLUS, <MODE>mode, operands)
9634 && ix86_pre_reload_split ()"
9635 "#"
9636 "&& 1"
9637 [(set (reg:CC FLAGS_REG)
9638 (compare:CC (match_dup 2) (const_int 1)))
9639 (parallel [(set (match_dup 0)
9640 (plus:SWI (ltu:SWI (reg:CC FLAGS_REG) (const_int 0))
9641 (match_dup 1)))
9642 (clobber (reg:CC FLAGS_REG))])]
9643 {
9644 if (!nonimmediate_operand (operands[1], <MODE>mode))
9645 operands[1] = force_reg (<MODE>mode, operands[1]);
9646 })
9647
9648 (define_insn_and_split "*add<mode>3_ne_0"
9649 [(set (match_operand:SWI 0 "nonimmediate_operand")
9650 (plus:SWI
9651 (ne:SWI (match_operand 2 "int_nonimmediate_operand") (const_int 0))
9652 (match_operand:SWI 1 "<general_operand>")))
9653 (clobber (reg:CC FLAGS_REG))]
9654 "ix86_unary_operator_ok (PLUS, <MODE>mode, operands)
9655 && ix86_pre_reload_split ()"
9656 "#"
9657 "&& 1"
9658 [(set (reg:CC FLAGS_REG)
9659 (compare:CC (match_dup 2) (const_int 1)))
9660 (parallel [(set (match_dup 0)
9661 (minus:SWI (minus:SWI
9662 (match_dup 1)
9663 (ltu:SWI (reg:CC FLAGS_REG) (const_int 0)))
9664 (const_int -1)))
9665 (clobber (reg:CC FLAGS_REG))])]
9666 {
9667 if (!nonimmediate_operand (operands[1], <MODE>mode))
9668 operands[1] = force_reg (<MODE>mode, operands[1]);
9669 })
9670
9671 (define_insn_and_split "*sub<mode>3_eq"
9672 [(set (match_operand:SWI 0 "nonimmediate_operand")
9673 (minus:SWI
9674 (minus:SWI
9675 (match_operand:SWI 1 "nonimmediate_operand")
9676 (eq:SWI (match_operand 3 "int_nonimmediate_operand")
9677 (const_int 0)))
9678 (match_operand:SWI 2 "<general_operand>")))
9679 (clobber (reg:CC FLAGS_REG))]
9680 "ix86_binary_operator_ok (MINUS, <MODE>mode, operands,
9681 TARGET_APX_NDD)
9682 && ix86_pre_reload_split ()"
9683 "#"
9684 "&& 1"
9685 [(set (reg:CC FLAGS_REG)
9686 (compare:CC (match_dup 3) (const_int 1)))
9687 (parallel [(set (match_dup 0)
9688 (minus:SWI
9689 (minus:SWI (match_dup 1)
9690 (ltu:SWI (reg:CC FLAGS_REG) (const_int 0)))
9691 (match_dup 2)))
9692 (clobber (reg:CC FLAGS_REG))])])
9693
9694 (define_insn_and_split "*sub<mode>3_ne"
9695 [(set (match_operand:SWI 0 "nonimmediate_operand")
9696 (plus:SWI
9697 (minus:SWI
9698 (match_operand:SWI 1 "nonimmediate_operand")
9699 (ne:SWI (match_operand 3 "int_nonimmediate_operand")
9700 (const_int 0)))
9701 (match_operand:SWI 2 "<immediate_operand>")))
9702 (clobber (reg:CC FLAGS_REG))]
9703 "CONST_INT_P (operands[2])
9704 && (<MODE>mode != DImode
9705 || INTVAL (operands[2]) != HOST_WIDE_INT_C (-0x80000000))
9706 && ix86_binary_operator_ok (MINUS, <MODE>mode, operands,
9707 TARGET_APX_NDD)
9708 && ix86_pre_reload_split ()"
9709 "#"
9710 "&& 1"
9711 [(set (reg:CC FLAGS_REG)
9712 (compare:CC (match_dup 3) (const_int 1)))
9713 (parallel [(set (match_dup 0)
9714 (plus:SWI
9715 (plus:SWI (ltu:SWI (reg:CC FLAGS_REG) (const_int 0))
9716 (match_dup 1))
9717 (match_dup 2)))
9718 (clobber (reg:CC FLAGS_REG))])]
9719 {
9720 operands[2] = gen_int_mode (INTVAL (operands[2]) - 1,
9721 <MODE>mode == DImode ? SImode : <MODE>mode);
9722 })
9723
9724 (define_insn_and_split "*sub<mode>3_eq_1"
9725 [(set (match_operand:SWI 0 "nonimmediate_operand")
9726 (plus:SWI
9727 (minus:SWI
9728 (match_operand:SWI 1 "nonimmediate_operand")
9729 (eq:SWI (match_operand 3 "int_nonimmediate_operand")
9730 (const_int 0)))
9731 (match_operand:SWI 2 "<immediate_operand>")))
9732 (clobber (reg:CC FLAGS_REG))]
9733 "CONST_INT_P (operands[2])
9734 && (<MODE>mode != DImode
9735 || INTVAL (operands[2]) != HOST_WIDE_INT_C (-0x80000000))
9736 && ix86_binary_operator_ok (MINUS, <MODE>mode, operands,
9737 TARGET_APX_NDD)
9738 && ix86_pre_reload_split ()"
9739 "#"
9740 "&& 1"
9741 [(set (reg:CC FLAGS_REG)
9742 (compare:CC (match_dup 3) (const_int 1)))
9743 (parallel [(set (match_dup 0)
9744 (minus:SWI
9745 (minus:SWI (match_dup 1)
9746 (ltu:SWI (reg:CC FLAGS_REG) (const_int 0)))
9747 (match_dup 2)))
9748 (clobber (reg:CC FLAGS_REG))])]
9749 {
9750 operands[2] = gen_int_mode (-INTVAL (operands[2]),
9751 <MODE>mode == DImode ? SImode : <MODE>mode);
9752 })
9753
9754 (define_insn_and_split "*sub<mode>3_eq_0"
9755 [(set (match_operand:SWI 0 "nonimmediate_operand")
9756 (minus:SWI
9757 (match_operand:SWI 1 "<general_operand>")
9758 (eq:SWI (match_operand 2 "int_nonimmediate_operand") (const_int 0))))
9759 (clobber (reg:CC FLAGS_REG))]
9760 "ix86_unary_operator_ok (MINUS, <MODE>mode, operands)
9761 && ix86_pre_reload_split ()"
9762 "#"
9763 "&& 1"
9764 [(set (reg:CC FLAGS_REG)
9765 (compare:CC (match_dup 2) (const_int 1)))
9766 (parallel [(set (match_dup 0)
9767 (minus:SWI (match_dup 1)
9768 (ltu:SWI (reg:CC FLAGS_REG) (const_int 0))))
9769 (clobber (reg:CC FLAGS_REG))])]
9770 {
9771 if (!nonimmediate_operand (operands[1], <MODE>mode))
9772 operands[1] = force_reg (<MODE>mode, operands[1]);
9773 })
9774
9775 (define_insn_and_split "*sub<mode>3_ne_0"
9776 [(set (match_operand:SWI 0 "nonimmediate_operand")
9777 (minus:SWI
9778 (match_operand:SWI 1 "<general_operand>")
9779 (ne:SWI (match_operand 2 "int_nonimmediate_operand") (const_int 0))))
9780 (clobber (reg:CC FLAGS_REG))]
9781 "ix86_unary_operator_ok (MINUS, <MODE>mode, operands)
9782 && ix86_pre_reload_split ()"
9783 "#"
9784 "&& 1"
9785 [(set (reg:CC FLAGS_REG)
9786 (compare:CC (match_dup 2) (const_int 1)))
9787 (parallel [(set (match_dup 0)
9788 (plus:SWI (plus:SWI
9789 (ltu:SWI (reg:CC FLAGS_REG) (const_int 0))
9790 (match_dup 1))
9791 (const_int -1)))
9792 (clobber (reg:CC FLAGS_REG))])]
9793 {
9794 if (!nonimmediate_operand (operands[1], <MODE>mode))
9795 operands[1] = force_reg (<MODE>mode, operands[1]);
9796 })
9797
9798 ;; The patterns that match these are at the end of this file.
9799
9800 (define_expand "<insn>xf3"
9801 [(set (match_operand:XF 0 "register_operand")
9802 (plusminus:XF
9803 (match_operand:XF 1 "register_operand")
9804 (match_operand:XF 2 "register_operand")))]
9805 "TARGET_80387")
9806
9807 (define_expand "<insn>hf3"
9808 [(set (match_operand:HF 0 "register_operand")
9809 (plusminus:HF
9810 (match_operand:HF 1 "register_operand")
9811 (match_operand:HF 2 "nonimmediate_operand")))]
9812 "TARGET_AVX512FP16")
9813
9814 (define_expand "<insn><mode>3"
9815 [(set (match_operand:MODEF 0 "register_operand")
9816 (plusminus:MODEF
9817 (match_operand:MODEF 1 "register_operand")
9818 (match_operand:MODEF 2 "nonimmediate_operand")))]
9819 "(TARGET_80387 && X87_ENABLE_ARITH (<MODE>mode))
9820 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)")
9821 \f
9822 ;; Multiply instructions
9823
9824 (define_expand "mul<mode>3"
9825 [(parallel [(set (match_operand:SWIM248 0 "register_operand")
9826 (mult:SWIM248
9827 (match_operand:SWIM248 1 "register_operand")
9828 (match_operand:SWIM248 2 "<general_operand>")))
9829 (clobber (reg:CC FLAGS_REG))])])
9830
9831 (define_expand "mulqi3"
9832 [(parallel [(set (match_operand:QI 0 "register_operand")
9833 (mult:QI
9834 (match_operand:QI 1 "register_operand")
9835 (match_operand:QI 2 "nonimmediate_operand")))
9836 (clobber (reg:CC FLAGS_REG))])]
9837 "TARGET_QIMODE_MATH")
9838
9839 ;; On AMDFAM10
9840 ;; IMUL reg32/64, reg32/64, imm8 Direct
9841 ;; IMUL reg32/64, mem32/64, imm8 VectorPath
9842 ;; IMUL reg32/64, reg32/64, imm32 Direct
9843 ;; IMUL reg32/64, mem32/64, imm32 VectorPath
9844 ;; IMUL reg32/64, reg32/64 Direct
9845 ;; IMUL reg32/64, mem32/64 Direct
9846 ;;
9847 ;; On BDVER1, all above IMULs use DirectPath
9848 ;;
9849 ;; On AMDFAM10
9850 ;; IMUL reg16, reg16, imm8 VectorPath
9851 ;; IMUL reg16, mem16, imm8 VectorPath
9852 ;; IMUL reg16, reg16, imm16 VectorPath
9853 ;; IMUL reg16, mem16, imm16 VectorPath
9854 ;; IMUL reg16, reg16 Direct
9855 ;; IMUL reg16, mem16 Direct
9856 ;;
9857 ;; On BDVER1, all HI MULs use DoublePath
9858
9859 (define_insn "*mul<mode>3_1"
9860 [(set (match_operand:SWIM248 0 "register_operand" "=r,r,r")
9861 (mult:SWIM248
9862 (match_operand:SWIM248 1 "nonimmediate_operand" "%rm,rm,0")
9863 (match_operand:SWIM248 2 "<general_operand>" "K,<i>,<m>r")))
9864 (clobber (reg:CC FLAGS_REG))]
9865 "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
9866 "@
9867 imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
9868 imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
9869 imul{<imodesuffix>}\t{%2, %0|%0, %2}"
9870 [(set_attr "type" "imul")
9871 (set_attr "prefix_0f" "0,0,1")
9872 (set (attr "athlon_decode")
9873 (cond [(eq_attr "cpu" "athlon")
9874 (const_string "vector")
9875 (eq_attr "alternative" "1")
9876 (const_string "vector")
9877 (and (eq_attr "alternative" "2")
9878 (ior (match_test "<MODE>mode == HImode")
9879 (match_operand 1 "memory_operand")))
9880 (const_string "vector")]
9881 (const_string "direct")))
9882 (set (attr "amdfam10_decode")
9883 (cond [(and (eq_attr "alternative" "0,1")
9884 (ior (match_test "<MODE>mode == HImode")
9885 (match_operand 1 "memory_operand")))
9886 (const_string "vector")]
9887 (const_string "direct")))
9888 (set (attr "bdver1_decode")
9889 (if_then_else
9890 (match_test "<MODE>mode == HImode")
9891 (const_string "double")
9892 (const_string "direct")))
9893 (set_attr "mode" "<MODE>")])
9894
9895 (define_insn "*mulsi3_1_zext"
9896 [(set (match_operand:DI 0 "register_operand" "=r,r,r")
9897 (zero_extend:DI
9898 (mult:SI (match_operand:SI 1 "nonimmediate_operand" "%rm,rm,0")
9899 (match_operand:SI 2 "x86_64_general_operand" "K,e,BMr"))))
9900 (clobber (reg:CC FLAGS_REG))]
9901 "TARGET_64BIT
9902 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
9903 "@
9904 imul{l}\t{%2, %1, %k0|%k0, %1, %2}
9905 imul{l}\t{%2, %1, %k0|%k0, %1, %2}
9906 imul{l}\t{%2, %k0|%k0, %2}"
9907 [(set_attr "type" "imul")
9908 (set_attr "prefix_0f" "0,0,1")
9909 (set (attr "athlon_decode")
9910 (cond [(eq_attr "cpu" "athlon")
9911 (const_string "vector")
9912 (eq_attr "alternative" "1")
9913 (const_string "vector")
9914 (and (eq_attr "alternative" "2")
9915 (match_operand 1 "memory_operand"))
9916 (const_string "vector")]
9917 (const_string "direct")))
9918 (set (attr "amdfam10_decode")
9919 (cond [(and (eq_attr "alternative" "0,1")
9920 (match_operand 1 "memory_operand"))
9921 (const_string "vector")]
9922 (const_string "direct")))
9923 (set_attr "bdver1_decode" "direct")
9924 (set_attr "mode" "SI")])
9925
9926 ;;On AMDFAM10 and BDVER1
9927 ;; MUL reg8 Direct
9928 ;; MUL mem8 Direct
9929
9930 (define_insn "*mulqi3_1"
9931 [(set (match_operand:QI 0 "register_operand" "=a")
9932 (mult:QI (match_operand:QI 1 "nonimmediate_operand" "%0")
9933 (match_operand:QI 2 "nonimmediate_operand" "qm")))
9934 (clobber (reg:CC FLAGS_REG))]
9935 "TARGET_QIMODE_MATH
9936 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
9937 "mul{b}\t%2"
9938 [(set_attr "type" "imul")
9939 (set_attr "length_immediate" "0")
9940 (set (attr "athlon_decode")
9941 (if_then_else (eq_attr "cpu" "athlon")
9942 (const_string "vector")
9943 (const_string "direct")))
9944 (set_attr "amdfam10_decode" "direct")
9945 (set_attr "bdver1_decode" "direct")
9946 (set_attr "mode" "QI")])
9947
9948 ;; Multiply with jump on overflow.
9949 (define_expand "mulv<mode>4"
9950 [(parallel [(set (reg:CCO FLAGS_REG)
9951 (eq:CCO (mult:<DWI>
9952 (sign_extend:<DWI>
9953 (match_operand:SWI248 1 "register_operand"))
9954 (match_dup 4))
9955 (sign_extend:<DWI>
9956 (mult:SWI248 (match_dup 1)
9957 (match_operand:SWI248 2
9958 "<general_operand>")))))
9959 (set (match_operand:SWI248 0 "register_operand")
9960 (mult:SWI248 (match_dup 1) (match_dup 2)))])
9961 (set (pc) (if_then_else
9962 (eq (reg:CCO FLAGS_REG) (const_int 0))
9963 (label_ref (match_operand 3))
9964 (pc)))]
9965 ""
9966 {
9967 if (CONST_INT_P (operands[2]))
9968 operands[4] = operands[2];
9969 else
9970 operands[4] = gen_rtx_SIGN_EXTEND (<DWI>mode, operands[2]);
9971 })
9972
9973 (define_insn "*mulv<mode>4"
9974 [(set (reg:CCO FLAGS_REG)
9975 (eq:CCO (mult:<DWI>
9976 (sign_extend:<DWI>
9977 (match_operand:SWI48 1 "nonimmediate_operand" "%rm,0"))
9978 (sign_extend:<DWI>
9979 (match_operand:SWI48 2 "x86_64_sext_operand" "We,mr")))
9980 (sign_extend:<DWI>
9981 (mult:SWI48 (match_dup 1) (match_dup 2)))))
9982 (set (match_operand:SWI48 0 "register_operand" "=r,r")
9983 (mult:SWI48 (match_dup 1) (match_dup 2)))]
9984 "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
9985 "@
9986 imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
9987 imul{<imodesuffix>}\t{%2, %0|%0, %2}"
9988 [(set_attr "type" "imul")
9989 (set_attr "prefix_0f" "0,1")
9990 (set (attr "athlon_decode")
9991 (cond [(eq_attr "cpu" "athlon")
9992 (const_string "vector")
9993 (eq_attr "alternative" "0")
9994 (const_string "vector")
9995 (and (eq_attr "alternative" "1")
9996 (match_operand 1 "memory_operand"))
9997 (const_string "vector")]
9998 (const_string "direct")))
9999 (set (attr "amdfam10_decode")
10000 (cond [(and (eq_attr "alternative" "1")
10001 (match_operand 1 "memory_operand"))
10002 (const_string "vector")]
10003 (const_string "direct")))
10004 (set_attr "bdver1_decode" "direct")
10005 (set_attr "mode" "<MODE>")])
10006
10007 (define_insn "*mulvhi4"
10008 [(set (reg:CCO FLAGS_REG)
10009 (eq:CCO (mult:SI
10010 (sign_extend:SI
10011 (match_operand:HI 1 "nonimmediate_operand" "%0"))
10012 (sign_extend:SI
10013 (match_operand:HI 2 "nonimmediate_operand" "mr")))
10014 (sign_extend:SI
10015 (mult:HI (match_dup 1) (match_dup 2)))))
10016 (set (match_operand:HI 0 "register_operand" "=r")
10017 (mult:HI (match_dup 1) (match_dup 2)))]
10018 "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
10019 "imul{w}\t{%2, %0|%0, %2}"
10020 [(set_attr "type" "imul")
10021 (set_attr "prefix_0f" "1")
10022 (set_attr "athlon_decode" "vector")
10023 (set_attr "amdfam10_decode" "direct")
10024 (set_attr "bdver1_decode" "double")
10025 (set_attr "mode" "HI")])
10026
10027 (define_insn "*mulv<mode>4_1"
10028 [(set (reg:CCO FLAGS_REG)
10029 (eq:CCO (mult:<DWI>
10030 (sign_extend:<DWI>
10031 (match_operand:SWI248 1 "nonimmediate_operand" "rm,rm"))
10032 (match_operand:<DWI> 3 "const_int_operand" "K,i"))
10033 (sign_extend:<DWI>
10034 (mult:SWI248 (match_dup 1)
10035 (match_operand:SWI248 2
10036 "<immediate_operand>" "K,<i>")))))
10037 (set (match_operand:SWI248 0 "register_operand" "=r,r")
10038 (mult:SWI248 (match_dup 1) (match_dup 2)))]
10039 "!(MEM_P (operands[1]) && MEM_P (operands[2]))
10040 && CONST_INT_P (operands[2])
10041 && INTVAL (operands[2]) == INTVAL (operands[3])"
10042 "imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
10043 [(set_attr "type" "imul")
10044 (set (attr "prefix_0f")
10045 (if_then_else
10046 (match_test "<MODE>mode == HImode")
10047 (const_string "0")
10048 (const_string "*")))
10049 (set (attr "athlon_decode")
10050 (cond [(eq_attr "cpu" "athlon")
10051 (const_string "vector")
10052 (eq_attr "alternative" "1")
10053 (const_string "vector")]
10054 (const_string "direct")))
10055 (set (attr "amdfam10_decode")
10056 (cond [(ior (match_test "<MODE>mode == HImode")
10057 (match_operand 1 "memory_operand"))
10058 (const_string "vector")]
10059 (const_string "direct")))
10060 (set (attr "bdver1_decode")
10061 (if_then_else
10062 (match_test "<MODE>mode == HImode")
10063 (const_string "double")
10064 (const_string "direct")))
10065 (set_attr "mode" "<MODE>")
10066 (set (attr "length_immediate")
10067 (cond [(eq_attr "alternative" "0")
10068 (const_string "1")
10069 (match_test "<MODE_SIZE> == 8")
10070 (const_string "4")]
10071 (const_string "<MODE_SIZE>")))])
10072
10073 (define_expand "umulv<mode>4"
10074 [(parallel [(set (reg:CCO FLAGS_REG)
10075 (eq:CCO (mult:<DWI>
10076 (zero_extend:<DWI>
10077 (match_operand:SWI248 1
10078 "nonimmediate_operand"))
10079 (zero_extend:<DWI>
10080 (match_operand:SWI248 2
10081 "nonimmediate_operand")))
10082 (zero_extend:<DWI>
10083 (mult:SWI248 (match_dup 1) (match_dup 2)))))
10084 (set (match_operand:SWI248 0 "register_operand")
10085 (mult:SWI248 (match_dup 1) (match_dup 2)))
10086 (clobber (scratch:SWI248))])
10087 (set (pc) (if_then_else
10088 (eq (reg:CCO FLAGS_REG) (const_int 0))
10089 (label_ref (match_operand 3))
10090 (pc)))]
10091 ""
10092 {
10093 if (MEM_P (operands[1]) && MEM_P (operands[2]))
10094 operands[1] = force_reg (<MODE>mode, operands[1]);
10095 })
10096
10097 (define_insn "*umulv<mode>4"
10098 [(set (reg:CCO FLAGS_REG)
10099 (eq:CCO (mult:<DWI>
10100 (zero_extend:<DWI>
10101 (match_operand:SWI248 1 "nonimmediate_operand" "%0"))
10102 (zero_extend:<DWI>
10103 (match_operand:SWI248 2 "nonimmediate_operand" "rm")))
10104 (zero_extend:<DWI>
10105 (mult:SWI248 (match_dup 1) (match_dup 2)))))
10106 (set (match_operand:SWI248 0 "register_operand" "=a")
10107 (mult:SWI248 (match_dup 1) (match_dup 2)))
10108 (clobber (match_scratch:SWI248 3 "=d"))]
10109 "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
10110 "mul{<imodesuffix>}\t%2"
10111 [(set_attr "type" "imul")
10112 (set_attr "length_immediate" "0")
10113 (set (attr "athlon_decode")
10114 (if_then_else (eq_attr "cpu" "athlon")
10115 (const_string "vector")
10116 (const_string "double")))
10117 (set_attr "amdfam10_decode" "double")
10118 (set_attr "bdver1_decode" "direct")
10119 (set_attr "mode" "<MODE>")])
10120
10121 (define_expand "<u>mulvqi4"
10122 [(parallel [(set (reg:CCO FLAGS_REG)
10123 (eq:CCO (mult:HI
10124 (any_extend:HI
10125 (match_operand:QI 1 "nonimmediate_operand"))
10126 (any_extend:HI
10127 (match_operand:QI 2 "nonimmediate_operand")))
10128 (any_extend:HI
10129 (mult:QI (match_dup 1) (match_dup 2)))))
10130 (set (match_operand:QI 0 "register_operand")
10131 (mult:QI (match_dup 1) (match_dup 2)))])
10132 (set (pc) (if_then_else
10133 (eq (reg:CCO FLAGS_REG) (const_int 0))
10134 (label_ref (match_operand 3))
10135 (pc)))]
10136 "TARGET_QIMODE_MATH"
10137 {
10138 if (MEM_P (operands[1]) && MEM_P (operands[2]))
10139 operands[1] = force_reg (QImode, operands[1]);
10140 })
10141
10142 (define_insn "*<u>mulvqi4"
10143 [(set (reg:CCO FLAGS_REG)
10144 (eq:CCO (mult:HI
10145 (any_extend:HI
10146 (match_operand:QI 1 "nonimmediate_operand" "%0"))
10147 (any_extend:HI
10148 (match_operand:QI 2 "nonimmediate_operand" "qm")))
10149 (any_extend:HI
10150 (mult:QI (match_dup 1) (match_dup 2)))))
10151 (set (match_operand:QI 0 "register_operand" "=a")
10152 (mult:QI (match_dup 1) (match_dup 2)))]
10153 "TARGET_QIMODE_MATH
10154 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10155 "<sgnprefix>mul{b}\t%2"
10156 [(set_attr "type" "imul")
10157 (set_attr "length_immediate" "0")
10158 (set (attr "athlon_decode")
10159 (if_then_else (eq_attr "cpu" "athlon")
10160 (const_string "vector")
10161 (const_string "direct")))
10162 (set_attr "amdfam10_decode" "direct")
10163 (set_attr "bdver1_decode" "direct")
10164 (set_attr "mode" "QI")])
10165
10166 (define_expand "<u>mul<mode><dwi>3"
10167 [(parallel [(set (match_operand:<DWI> 0 "register_operand")
10168 (mult:<DWI>
10169 (any_extend:<DWI>
10170 (match_operand:DWIH 1 "register_operand"))
10171 (any_extend:<DWI>
10172 (match_operand:DWIH 2 "nonimmediate_operand"))))
10173 (clobber (reg:CC FLAGS_REG))])])
10174
10175 (define_expand "<u>mulqihi3"
10176 [(parallel [(set (match_operand:HI 0 "register_operand")
10177 (mult:HI
10178 (any_extend:HI
10179 (match_operand:QI 1 "register_operand"))
10180 (any_extend:HI
10181 (match_operand:QI 2 "nonimmediate_operand"))))
10182 (clobber (reg:CC FLAGS_REG))])]
10183 "TARGET_QIMODE_MATH")
10184
10185 (define_insn "*bmi2_umul<mode><dwi>3_1"
10186 [(set (match_operand:DWIH 0 "register_operand" "=r")
10187 (mult:DWIH
10188 (match_operand:DWIH 2 "register_operand" "%d")
10189 (match_operand:DWIH 3 "nonimmediate_operand" "rm")))
10190 (set (match_operand:DWIH 1 "register_operand" "=r")
10191 (umul_highpart:DWIH (match_dup 2) (match_dup 3)))]
10192 "TARGET_BMI2"
10193 "mulx\t{%3, %0, %1|%1, %0, %3}"
10194 [(set_attr "type" "imulx")
10195 (set_attr "prefix" "vex")
10196 (set_attr "mode" "<MODE>")])
10197
10198 ;; Tweak *bmi2_umul<mode><dwi>3_1 to eliminate following mov.
10199 (define_peephole2
10200 [(parallel [(set (match_operand:DWIH 0 "general_reg_operand")
10201 (mult:DWIH (match_operand:DWIH 2 "register_operand")
10202 (match_operand:DWIH 3 "nonimmediate_operand")))
10203 (set (match_operand:DWIH 1 "general_reg_operand")
10204 (umul_highpart:DWIH (match_dup 2) (match_dup 3)))])
10205 (set (match_operand:DWIH 4 "general_reg_operand")
10206 (match_operand:DWIH 5 "general_reg_operand"))]
10207 "TARGET_BMI2
10208 && ((REGNO (operands[5]) == REGNO (operands[0])
10209 && REGNO (operands[1]) != REGNO (operands[4]))
10210 || (REGNO (operands[5]) == REGNO (operands[1])
10211 && REGNO (operands[0]) != REGNO (operands[4])))
10212 && peep2_reg_dead_p (2, operands[5])"
10213 [(parallel [(set (match_dup 0) (mult:DWIH (match_dup 2) (match_dup 3)))
10214 (set (match_dup 1)
10215 (umul_highpart:DWIH (match_dup 2) (match_dup 3)))])]
10216 {
10217 if (REGNO (operands[5]) == REGNO (operands[0]))
10218 operands[0] = operands[4];
10219 else
10220 operands[1] = operands[4];
10221 })
10222
10223 (define_insn "*umul<mode><dwi>3_1"
10224 [(set (match_operand:<DWI> 0 "register_operand" "=r,A")
10225 (mult:<DWI>
10226 (zero_extend:<DWI>
10227 (match_operand:DWIH 1 "register_operand" "%d,a"))
10228 (zero_extend:<DWI>
10229 (match_operand:DWIH 2 "nonimmediate_operand" "rm,rm"))))
10230 (clobber (reg:CC FLAGS_REG))]
10231 "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
10232 "@
10233 #
10234 mul{<imodesuffix>}\t%2"
10235 [(set_attr "isa" "bmi2,*")
10236 (set_attr "type" "imulx,imul")
10237 (set_attr "length_immediate" "*,0")
10238 (set (attr "athlon_decode")
10239 (cond [(eq_attr "alternative" "1")
10240 (if_then_else (eq_attr "cpu" "athlon")
10241 (const_string "vector")
10242 (const_string "double"))]
10243 (const_string "*")))
10244 (set_attr "amdfam10_decode" "*,double")
10245 (set_attr "bdver1_decode" "*,direct")
10246 (set_attr "prefix" "vex,orig")
10247 (set_attr "mode" "<MODE>")])
10248
10249 ;; Convert mul to the mulx pattern to avoid flags dependency.
10250 (define_split
10251 [(set (match_operand:<DWI> 0 "register_operand")
10252 (mult:<DWI>
10253 (zero_extend:<DWI>
10254 (match_operand:DWIH 1 "register_operand"))
10255 (zero_extend:<DWI>
10256 (match_operand:DWIH 2 "nonimmediate_operand"))))
10257 (clobber (reg:CC FLAGS_REG))]
10258 "TARGET_BMI2 && reload_completed
10259 && REGNO (operands[1]) == DX_REG"
10260 [(parallel [(set (match_dup 3)
10261 (mult:DWIH (match_dup 1) (match_dup 2)))
10262 (set (match_dup 4)
10263 (umul_highpart:DWIH (match_dup 1) (match_dup 2)))])]
10264 {
10265 split_double_mode (<DWI>mode, &operands[0], 1, &operands[3], &operands[4]);
10266
10267 operands[5] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);
10268 })
10269
10270 (define_insn "*mul<mode><dwi>3_1"
10271 [(set (match_operand:<DWI> 0 "register_operand" "=A")
10272 (mult:<DWI>
10273 (sign_extend:<DWI>
10274 (match_operand:DWIH 1 "register_operand" "%a"))
10275 (sign_extend:<DWI>
10276 (match_operand:DWIH 2 "nonimmediate_operand" "rm"))))
10277 (clobber (reg:CC FLAGS_REG))]
10278 "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
10279 "imul{<imodesuffix>}\t%2"
10280 [(set_attr "type" "imul")
10281 (set_attr "length_immediate" "0")
10282 (set (attr "athlon_decode")
10283 (if_then_else (eq_attr "cpu" "athlon")
10284 (const_string "vector")
10285 (const_string "double")))
10286 (set_attr "amdfam10_decode" "double")
10287 (set_attr "bdver1_decode" "direct")
10288 (set_attr "mode" "<MODE>")])
10289
10290 (define_insn "*<u>mulqihi3_1"
10291 [(set (match_operand:HI 0 "register_operand" "=a")
10292 (mult:HI
10293 (any_extend:HI
10294 (match_operand:QI 1 "register_operand" "%0"))
10295 (any_extend:HI
10296 (match_operand:QI 2 "nonimmediate_operand" "qm"))))
10297 (clobber (reg:CC FLAGS_REG))]
10298 "TARGET_QIMODE_MATH
10299 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10300 "<sgnprefix>mul{b}\t%2"
10301 [(set_attr "type" "imul")
10302 (set_attr "length_immediate" "0")
10303 (set (attr "athlon_decode")
10304 (if_then_else (eq_attr "cpu" "athlon")
10305 (const_string "vector")
10306 (const_string "direct")))
10307 (set_attr "amdfam10_decode" "direct")
10308 (set_attr "bdver1_decode" "direct")
10309 (set_attr "mode" "QI")])
10310
10311 ;; Widening multiplication peephole2s to tweak register allocation.
10312 ;; mov imm,%rdx; mov %rdi,%rax; mulq %rdx -> mov imm,%rax; mulq %rdi
10313 (define_peephole2
10314 [(set (match_operand:DWIH 0 "general_reg_operand")
10315 (match_operand:DWIH 1 "immediate_operand"))
10316 (set (match_operand:DWIH 2 "general_reg_operand")
10317 (match_operand:DWIH 3 "general_reg_operand"))
10318 (parallel [(set (match_operand:<DWI> 4 "general_reg_operand")
10319 (mult:<DWI> (zero_extend:<DWI> (match_dup 2))
10320 (zero_extend:<DWI> (match_dup 0))))
10321 (clobber (reg:CC FLAGS_REG))])]
10322 "REGNO (operands[3]) != AX_REG
10323 && REGNO (operands[0]) != REGNO (operands[2])
10324 && REGNO (operands[0]) != REGNO (operands[3])
10325 && (REGNO (operands[0]) == REGNO (operands[4])
10326 || REGNO (operands[0]) == DX_REG
10327 || peep2_reg_dead_p (3, operands[0]))"
10328 [(set (match_dup 2) (match_dup 1))
10329 (parallel [(set (match_dup 4)
10330 (mult:<DWI> (zero_extend:<DWI> (match_dup 2))
10331 (zero_extend:<DWI> (match_dup 3))))
10332 (clobber (reg:CC FLAGS_REG))])])
10333
10334 ;; mov imm,%rax; mov %rdi,%rdx; mulx %rax -> mov imm,%rdx; mulx %rdi
10335 (define_peephole2
10336 [(set (match_operand:DWIH 0 "general_reg_operand")
10337 (match_operand:DWIH 1 "immediate_operand"))
10338 (set (match_operand:DWIH 2 "general_reg_operand")
10339 (match_operand:DWIH 3 "general_reg_operand"))
10340 (parallel [(set (match_operand:DWIH 4 "general_reg_operand")
10341 (mult:DWIH (match_dup 2) (match_dup 0)))
10342 (set (match_operand:DWIH 5 "general_reg_operand")
10343 (umul_highpart:DWIH (match_dup 2) (match_dup 0)))])]
10344 "REGNO (operands[3]) != DX_REG
10345 && REGNO (operands[0]) != REGNO (operands[2])
10346 && REGNO (operands[0]) != REGNO (operands[3])
10347 && (REGNO (operands[0]) == REGNO (operands[4])
10348 || REGNO (operands[0]) == REGNO (operands[5])
10349 || peep2_reg_dead_p (3, operands[0]))
10350 && (REGNO (operands[2]) == REGNO (operands[4])
10351 || REGNO (operands[2]) == REGNO (operands[5])
10352 || peep2_reg_dead_p (3, operands[2]))"
10353 [(set (match_dup 2) (match_dup 1))
10354 (parallel [(set (match_dup 4)
10355 (mult:DWIH (match_dup 2) (match_dup 3)))
10356 (set (match_dup 5)
10357 (umul_highpart:DWIH (match_dup 2) (match_dup 3)))])])
10358
10359 ;; Highpart multiplication patterns
10360 (define_insn "<s>mul<mode>3_highpart"
10361 [(set (match_operand:DWIH 0 "register_operand" "=d")
10362 (any_mul_highpart:DWIH
10363 (match_operand:DWIH 1 "register_operand" "%a")
10364 (match_operand:DWIH 2 "nonimmediate_operand" "rm")))
10365 (clobber (match_scratch:DWIH 3 "=1"))
10366 (clobber (reg:CC FLAGS_REG))]
10367 ""
10368 "<sgnprefix>mul{<imodesuffix>}\t%2"
10369 [(set_attr "type" "imul")
10370 (set_attr "length_immediate" "0")
10371 (set (attr "athlon_decode")
10372 (if_then_else (eq_attr "cpu" "athlon")
10373 (const_string "vector")
10374 (const_string "double")))
10375 (set_attr "amdfam10_decode" "double")
10376 (set_attr "bdver1_decode" "direct")
10377 (set_attr "mode" "<MODE>")])
10378
10379 (define_insn "*<s>mulsi3_highpart_zext"
10380 [(set (match_operand:DI 0 "register_operand" "=d")
10381 (zero_extend:DI
10382 (any_mul_highpart:SI
10383 (match_operand:SI 1 "register_operand" "%a")
10384 (match_operand:SI 2 "nonimmediate_operand" "rm"))))
10385 (clobber (match_scratch:SI 3 "=1"))
10386 (clobber (reg:CC FLAGS_REG))]
10387 "TARGET_64BIT"
10388 "<sgnprefix>mul{l}\t%2"
10389 [(set_attr "type" "imul")
10390 (set_attr "length_immediate" "0")
10391 (set (attr "athlon_decode")
10392 (if_then_else (eq_attr "cpu" "athlon")
10393 (const_string "vector")
10394 (const_string "double")))
10395 (set_attr "amdfam10_decode" "double")
10396 (set_attr "bdver1_decode" "direct")
10397 (set_attr "mode" "SI")])
10398
10399 (define_insn "*<s>muldi3_highpart_1"
10400 [(set (match_operand:DI 0 "register_operand" "=d")
10401 (truncate:DI
10402 (lshiftrt:TI
10403 (mult:TI
10404 (any_extend:TI
10405 (match_operand:DI 1 "nonimmediate_operand" "%a"))
10406 (any_extend:TI
10407 (match_operand:DI 2 "nonimmediate_operand" "rm")))
10408 (const_int 64))))
10409 (clobber (match_scratch:DI 3 "=1"))
10410 (clobber (reg:CC FLAGS_REG))]
10411 "TARGET_64BIT
10412 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10413 "<sgnprefix>mul{q}\t%2"
10414 [(set_attr "type" "imul")
10415 (set_attr "length_immediate" "0")
10416 (set (attr "athlon_decode")
10417 (if_then_else (eq_attr "cpu" "athlon")
10418 (const_string "vector")
10419 (const_string "double")))
10420 (set_attr "amdfam10_decode" "double")
10421 (set_attr "bdver1_decode" "direct")
10422 (set_attr "mode" "DI")])
10423
10424 (define_insn "*<s>mulsi3_highpart_zext"
10425 [(set (match_operand:DI 0 "register_operand" "=d")
10426 (zero_extend:DI (truncate:SI
10427 (lshiftrt:DI
10428 (mult:DI (any_extend:DI
10429 (match_operand:SI 1 "nonimmediate_operand" "%a"))
10430 (any_extend:DI
10431 (match_operand:SI 2 "nonimmediate_operand" "rm")))
10432 (const_int 32)))))
10433 (clobber (match_scratch:SI 3 "=1"))
10434 (clobber (reg:CC FLAGS_REG))]
10435 "TARGET_64BIT
10436 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10437 "<sgnprefix>mul{l}\t%2"
10438 [(set_attr "type" "imul")
10439 (set_attr "length_immediate" "0")
10440 (set (attr "athlon_decode")
10441 (if_then_else (eq_attr "cpu" "athlon")
10442 (const_string "vector")
10443 (const_string "double")))
10444 (set_attr "amdfam10_decode" "double")
10445 (set_attr "bdver1_decode" "direct")
10446 (set_attr "mode" "SI")])
10447
10448 (define_insn "*<s>mulsi3_highpart_1"
10449 [(set (match_operand:SI 0 "register_operand" "=d")
10450 (truncate:SI
10451 (lshiftrt:DI
10452 (mult:DI
10453 (any_extend:DI
10454 (match_operand:SI 1 "nonimmediate_operand" "%a"))
10455 (any_extend:DI
10456 (match_operand:SI 2 "nonimmediate_operand" "rm")))
10457 (const_int 32))))
10458 (clobber (match_scratch:SI 3 "=1"))
10459 (clobber (reg:CC FLAGS_REG))]
10460 "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
10461 "<sgnprefix>mul{l}\t%2"
10462 [(set_attr "type" "imul")
10463 (set_attr "length_immediate" "0")
10464 (set (attr "athlon_decode")
10465 (if_then_else (eq_attr "cpu" "athlon")
10466 (const_string "vector")
10467 (const_string "double")))
10468 (set_attr "amdfam10_decode" "double")
10469 (set_attr "bdver1_decode" "direct")
10470 (set_attr "mode" "SI")])
10471
10472 ;; Highpart multiplication peephole2s to tweak register allocation.
10473 ;; mov imm,%rdx; mov %rdi,%rax; imulq %rdx -> mov imm,%rax; imulq %rdi
10474 (define_peephole2
10475 [(set (match_operand:SWI48 0 "general_reg_operand")
10476 (match_operand:SWI48 1 "immediate_operand"))
10477 (set (match_operand:SWI48 2 "general_reg_operand")
10478 (match_operand:SWI48 3 "general_reg_operand"))
10479 (parallel [(set (match_operand:SWI48 4 "general_reg_operand")
10480 (any_mul_highpart:SWI48 (match_dup 2) (match_dup 0)))
10481 (clobber (match_dup 2))
10482 (clobber (reg:CC FLAGS_REG))])]
10483 "REGNO (operands[3]) != AX_REG
10484 && REGNO (operands[0]) != REGNO (operands[2])
10485 && REGNO (operands[0]) != REGNO (operands[3])
10486 && (REGNO (operands[0]) == REGNO (operands[4])
10487 || peep2_reg_dead_p (3, operands[0]))"
10488 [(set (match_dup 2) (match_dup 1))
10489 (parallel [(set (match_dup 4)
10490 (any_mul_highpart:SWI48 (match_dup 2) (match_dup 3)))
10491 (clobber (match_dup 2))
10492 (clobber (reg:CC FLAGS_REG))])])
10493
10494 (define_peephole2
10495 [(set (match_operand:SI 0 "general_reg_operand")
10496 (match_operand:SI 1 "immediate_operand"))
10497 (set (match_operand:SI 2 "general_reg_operand")
10498 (match_operand:SI 3 "general_reg_operand"))
10499 (parallel [(set (match_operand:DI 4 "general_reg_operand")
10500 (zero_extend:DI
10501 (any_mul_highpart:SI (match_dup 2) (match_dup 0))))
10502 (clobber (match_dup 2))
10503 (clobber (reg:CC FLAGS_REG))])]
10504 "TARGET_64BIT
10505 && REGNO (operands[3]) != AX_REG
10506 && REGNO (operands[0]) != REGNO (operands[2])
10507 && REGNO (operands[2]) != REGNO (operands[3])
10508 && REGNO (operands[0]) != REGNO (operands[3])
10509 && (REGNO (operands[0]) == REGNO (operands[4])
10510 || peep2_reg_dead_p (3, operands[0]))"
10511 [(set (match_dup 2) (match_dup 1))
10512 (parallel [(set (match_dup 4)
10513 (zero_extend:DI
10514 (any_mul_highpart:SI (match_dup 2) (match_dup 3))))
10515 (clobber (match_dup 2))
10516 (clobber (reg:CC FLAGS_REG))])])
10517
10518 ;; The patterns that match these are at the end of this file.
10519
10520 (define_expand "mulxf3"
10521 [(set (match_operand:XF 0 "register_operand")
10522 (mult:XF (match_operand:XF 1 "register_operand")
10523 (match_operand:XF 2 "register_operand")))]
10524 "TARGET_80387")
10525
10526 (define_expand "mulhf3"
10527 [(set (match_operand:HF 0 "register_operand")
10528 (mult:HF (match_operand:HF 1 "register_operand")
10529 (match_operand:HF 2 "nonimmediate_operand")))]
10530 "TARGET_AVX512FP16")
10531
10532 (define_expand "mul<mode>3"
10533 [(set (match_operand:MODEF 0 "register_operand")
10534 (mult:MODEF (match_operand:MODEF 1 "register_operand")
10535 (match_operand:MODEF 2 "nonimmediate_operand")))]
10536 "(TARGET_80387 && X87_ENABLE_ARITH (<MODE>mode))
10537 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)")
10538 \f
10539 ;; Divide instructions
10540
10541 ;; The patterns that match these are at the end of this file.
10542
10543 (define_expand "divxf3"
10544 [(set (match_operand:XF 0 "register_operand")
10545 (div:XF (match_operand:XF 1 "register_operand")
10546 (match_operand:XF 2 "register_operand")))]
10547 "TARGET_80387")
10548
10549 /* There is no more precision loss than Newton-Rhapson approximation
10550 when using HFmode rcp/rsqrt, so do the transformation directly under
10551 TARGET_RECIP_DIV and fast-math. */
10552 (define_expand "divhf3"
10553 [(set (match_operand:HF 0 "register_operand")
10554 (div:HF (match_operand:HF 1 "register_operand")
10555 (match_operand:HF 2 "nonimmediate_operand")))]
10556 "TARGET_AVX512FP16"
10557 {
10558 if (TARGET_RECIP_DIV
10559 && optimize_insn_for_speed_p ()
10560 && flag_finite_math_only && !flag_trapping_math
10561 && flag_unsafe_math_optimizations)
10562 {
10563 rtx op = gen_reg_rtx (HFmode);
10564 operands[2] = force_reg (HFmode, operands[2]);
10565 emit_insn (gen_rcphf2 (op, operands[2]));
10566 emit_insn (gen_mulhf3 (operands[0], operands[1], op));
10567 DONE;
10568 }
10569 })
10570
10571 (define_expand "div<mode>3"
10572 [(set (match_operand:MODEF 0 "register_operand")
10573 (div:MODEF (match_operand:MODEF 1 "register_operand")
10574 (match_operand:MODEF 2 "nonimmediate_operand")))]
10575 "(TARGET_80387 && X87_ENABLE_ARITH (<MODE>mode))
10576 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
10577 {
10578 if (<MODE>mode == SFmode
10579 && TARGET_SSE && TARGET_SSE_MATH
10580 && TARGET_RECIP_DIV
10581 && optimize_insn_for_speed_p ()
10582 && flag_finite_math_only && !flag_trapping_math
10583 && flag_unsafe_math_optimizations)
10584 {
10585 ix86_emit_swdivsf (operands[0], operands[1],
10586 operands[2], SFmode);
10587 DONE;
10588 }
10589 })
10590 \f
10591 ;; Divmod instructions.
10592
10593 (define_code_iterator any_div [div udiv])
10594 (define_code_attr paired_mod [(div "mod") (udiv "umod")])
10595
10596 (define_expand "<u>divmod<mode>4"
10597 [(parallel [(set (match_operand:SWIM248 0 "register_operand")
10598 (any_div:SWIM248
10599 (match_operand:SWIM248 1 "register_operand")
10600 (match_operand:SWIM248 2 "nonimmediate_operand")))
10601 (set (match_operand:SWIM248 3 "register_operand")
10602 (<paired_mod>:SWIM248 (match_dup 1) (match_dup 2)))
10603 (clobber (reg:CC FLAGS_REG))])])
10604
10605 ;; Split with 8bit unsigned divide:
10606 ;; if (dividend an divisor are in [0-255])
10607 ;; use 8bit unsigned integer divide
10608 ;; else
10609 ;; use original integer divide
10610 (define_split
10611 [(set (match_operand:SWI48 0 "register_operand")
10612 (any_div:SWI48 (match_operand:SWI48 2 "register_operand")
10613 (match_operand:SWI48 3 "nonimmediate_operand")))
10614 (set (match_operand:SWI48 1 "register_operand")
10615 (<paired_mod>:SWI48 (match_dup 2) (match_dup 3)))
10616 (clobber (reg:CC FLAGS_REG))]
10617 "TARGET_USE_8BIT_IDIV
10618 && TARGET_QIMODE_MATH
10619 && can_create_pseudo_p ()
10620 && !optimize_insn_for_size_p ()"
10621 [(const_int 0)]
10622 "ix86_split_idivmod (<MODE>mode, operands, <u_bool>); DONE;")
10623
10624 (define_split
10625 [(set (match_operand:DI 0 "register_operand")
10626 (zero_extend:DI
10627 (any_div:SI (match_operand:SI 2 "register_operand")
10628 (match_operand:SI 3 "nonimmediate_operand"))))
10629 (set (match_operand:SI 1 "register_operand")
10630 (<paired_mod>:SI (match_dup 2) (match_dup 3)))
10631 (clobber (reg:CC FLAGS_REG))]
10632 "TARGET_64BIT
10633 && TARGET_USE_8BIT_IDIV
10634 && TARGET_QIMODE_MATH
10635 && can_create_pseudo_p ()
10636 && !optimize_insn_for_size_p ()"
10637 [(const_int 0)]
10638 "ix86_split_idivmod (SImode, operands, <u_bool>); DONE;")
10639
10640 (define_split
10641 [(set (match_operand:DI 1 "register_operand")
10642 (zero_extend:DI
10643 (<paired_mod>:SI (match_operand:SI 2 "register_operand")
10644 (match_operand:SI 3 "nonimmediate_operand"))))
10645 (set (match_operand:SI 0 "register_operand")
10646 (any_div:SI (match_dup 2) (match_dup 3)))
10647 (clobber (reg:CC FLAGS_REG))]
10648 "TARGET_64BIT
10649 && TARGET_USE_8BIT_IDIV
10650 && TARGET_QIMODE_MATH
10651 && can_create_pseudo_p ()
10652 && !optimize_insn_for_size_p ()"
10653 [(const_int 0)]
10654 "ix86_split_idivmod (SImode, operands, <u_bool>); DONE;")
10655
10656 (define_insn_and_split "divmod<mode>4_1"
10657 [(set (match_operand:SWI48 0 "register_operand" "=a")
10658 (div:SWI48 (match_operand:SWI48 2 "register_operand" "0")
10659 (match_operand:SWI48 3 "nonimmediate_operand" "rm")))
10660 (set (match_operand:SWI48 1 "register_operand" "=&d")
10661 (mod:SWI48 (match_dup 2) (match_dup 3)))
10662 (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT)
10663 (clobber (reg:CC FLAGS_REG))]
10664 ""
10665 "#"
10666 "reload_completed"
10667 [(parallel [(set (match_dup 1)
10668 (ashiftrt:SWI48 (match_dup 4) (match_dup 5)))
10669 (clobber (reg:CC FLAGS_REG))])
10670 (parallel [(set (match_dup 0)
10671 (div:SWI48 (match_dup 2) (match_dup 3)))
10672 (set (match_dup 1)
10673 (mod:SWI48 (match_dup 2) (match_dup 3)))
10674 (use (match_dup 1))
10675 (clobber (reg:CC FLAGS_REG))])]
10676 {
10677 operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode)-1);
10678
10679 if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
10680 operands[4] = operands[2];
10681 else
10682 {
10683 /* Avoid use of cltd in favor of a mov+shift. */
10684 emit_move_insn (operands[1], operands[2]);
10685 operands[4] = operands[1];
10686 }
10687 }
10688 [(set_attr "type" "multi")
10689 (set_attr "mode" "<MODE>")])
10690
10691 (define_insn_and_split "udivmod<mode>4_1"
10692 [(set (match_operand:SWI48 0 "register_operand" "=a")
10693 (udiv:SWI48 (match_operand:SWI48 2 "register_operand" "0")
10694 (match_operand:SWI48 3 "nonimmediate_operand" "rm")))
10695 (set (match_operand:SWI48 1 "register_operand" "=&d")
10696 (umod:SWI48 (match_dup 2) (match_dup 3)))
10697 (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT)
10698 (clobber (reg:CC FLAGS_REG))]
10699 ""
10700 "#"
10701 "reload_completed"
10702 [(set (match_dup 1) (const_int 0))
10703 (parallel [(set (match_dup 0)
10704 (udiv:SWI48 (match_dup 2) (match_dup 3)))
10705 (set (match_dup 1)
10706 (umod:SWI48 (match_dup 2) (match_dup 3)))
10707 (use (match_dup 1))
10708 (clobber (reg:CC FLAGS_REG))])]
10709 ""
10710 [(set_attr "type" "multi")
10711 (set_attr "mode" "<MODE>")])
10712
10713 (define_insn_and_split "divmodsi4_zext_1"
10714 [(set (match_operand:DI 0 "register_operand" "=a")
10715 (zero_extend:DI
10716 (div:SI (match_operand:SI 2 "register_operand" "0")
10717 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
10718 (set (match_operand:SI 1 "register_operand" "=&d")
10719 (mod:SI (match_dup 2) (match_dup 3)))
10720 (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT)
10721 (clobber (reg:CC FLAGS_REG))]
10722 "TARGET_64BIT"
10723 "#"
10724 "&& reload_completed"
10725 [(parallel [(set (match_dup 1)
10726 (ashiftrt:SI (match_dup 4) (match_dup 5)))
10727 (clobber (reg:CC FLAGS_REG))])
10728 (parallel [(set (match_dup 0)
10729 (zero_extend:DI (div:SI (match_dup 2) (match_dup 3))))
10730 (set (match_dup 1)
10731 (mod:SI (match_dup 2) (match_dup 3)))
10732 (use (match_dup 1))
10733 (clobber (reg:CC FLAGS_REG))])]
10734 {
10735 operands[5] = GEN_INT (GET_MODE_BITSIZE (SImode)-1);
10736
10737 if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
10738 operands[4] = operands[2];
10739 else
10740 {
10741 /* Avoid use of cltd in favor of a mov+shift. */
10742 emit_move_insn (operands[1], operands[2]);
10743 operands[4] = operands[1];
10744 }
10745 }
10746 [(set_attr "type" "multi")
10747 (set_attr "mode" "SI")])
10748
10749 (define_insn_and_split "udivmodsi4_zext_1"
10750 [(set (match_operand:DI 0 "register_operand" "=a")
10751 (zero_extend:DI
10752 (udiv:SI (match_operand:SI 2 "register_operand" "0")
10753 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
10754 (set (match_operand:SI 1 "register_operand" "=&d")
10755 (umod:SI (match_dup 2) (match_dup 3)))
10756 (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT)
10757 (clobber (reg:CC FLAGS_REG))]
10758 "TARGET_64BIT"
10759 "#"
10760 "&& reload_completed"
10761 [(set (match_dup 1) (const_int 0))
10762 (parallel [(set (match_dup 0)
10763 (zero_extend:DI (udiv:SI (match_dup 2) (match_dup 3))))
10764 (set (match_dup 1)
10765 (umod:SI (match_dup 2) (match_dup 3)))
10766 (use (match_dup 1))
10767 (clobber (reg:CC FLAGS_REG))])]
10768 ""
10769 [(set_attr "type" "multi")
10770 (set_attr "mode" "SI")])
10771
10772 (define_insn_and_split "divmodsi4_zext_2"
10773 [(set (match_operand:DI 1 "register_operand" "=&d")
10774 (zero_extend:DI
10775 (mod:SI (match_operand:SI 2 "register_operand" "0")
10776 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
10777 (set (match_operand:SI 0 "register_operand" "=a")
10778 (div:SI (match_dup 2) (match_dup 3)))
10779 (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT)
10780 (clobber (reg:CC FLAGS_REG))]
10781 "TARGET_64BIT"
10782 "#"
10783 "&& reload_completed"
10784 [(parallel [(set (match_dup 6)
10785 (ashiftrt:SI (match_dup 4) (match_dup 5)))
10786 (clobber (reg:CC FLAGS_REG))])
10787 (parallel [(set (match_dup 1)
10788 (zero_extend:DI (mod:SI (match_dup 2) (match_dup 3))))
10789 (set (match_dup 0)
10790 (div:SI (match_dup 2) (match_dup 3)))
10791 (use (match_dup 6))
10792 (clobber (reg:CC FLAGS_REG))])]
10793 {
10794 operands[5] = GEN_INT (GET_MODE_BITSIZE (SImode)-1);
10795 operands[6] = gen_lowpart (SImode, operands[1]);
10796
10797 if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
10798 operands[4] = operands[2];
10799 else
10800 {
10801 /* Avoid use of cltd in favor of a mov+shift. */
10802 emit_move_insn (operands[6], operands[2]);
10803 operands[4] = operands[6];
10804 }
10805 }
10806 [(set_attr "type" "multi")
10807 (set_attr "mode" "SI")])
10808
10809 (define_insn_and_split "udivmodsi4_zext_2"
10810 [(set (match_operand:DI 1 "register_operand" "=&d")
10811 (zero_extend:DI
10812 (umod:SI (match_operand:SI 2 "register_operand" "0")
10813 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
10814 (set (match_operand:SI 0 "register_operand" "=a")
10815 (udiv:SI (match_dup 2) (match_dup 3)))
10816 (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT)
10817 (clobber (reg:CC FLAGS_REG))]
10818 "TARGET_64BIT"
10819 "#"
10820 "&& reload_completed"
10821 [(set (match_dup 4) (const_int 0))
10822 (parallel [(set (match_dup 1)
10823 (zero_extend:DI (umod:SI (match_dup 2) (match_dup 3))))
10824 (set (match_dup 0)
10825 (udiv:SI (match_dup 2) (match_dup 3)))
10826 (use (match_dup 4))
10827 (clobber (reg:CC FLAGS_REG))])]
10828 "operands[4] = gen_lowpart (SImode, operands[1]);"
10829 [(set_attr "type" "multi")
10830 (set_attr "mode" "SI")])
10831
10832 (define_insn_and_split "*divmod<mode>4"
10833 [(set (match_operand:SWIM248 0 "register_operand" "=a")
10834 (div:SWIM248 (match_operand:SWIM248 2 "register_operand" "0")
10835 (match_operand:SWIM248 3 "nonimmediate_operand" "rm")))
10836 (set (match_operand:SWIM248 1 "register_operand" "=&d")
10837 (mod:SWIM248 (match_dup 2) (match_dup 3)))
10838 (clobber (reg:CC FLAGS_REG))]
10839 ""
10840 "#"
10841 "reload_completed"
10842 [(parallel [(set (match_dup 1)
10843 (ashiftrt:SWIM248 (match_dup 4) (match_dup 5)))
10844 (clobber (reg:CC FLAGS_REG))])
10845 (parallel [(set (match_dup 0)
10846 (div:SWIM248 (match_dup 2) (match_dup 3)))
10847 (set (match_dup 1)
10848 (mod:SWIM248 (match_dup 2) (match_dup 3)))
10849 (use (match_dup 1))
10850 (clobber (reg:CC FLAGS_REG))])]
10851 {
10852 operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode)-1);
10853
10854 if (<MODE>mode != HImode
10855 && (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD))
10856 operands[4] = operands[2];
10857 else
10858 {
10859 /* Avoid use of cltd in favor of a mov+shift. */
10860 emit_move_insn (operands[1], operands[2]);
10861 operands[4] = operands[1];
10862 }
10863 }
10864 [(set_attr "type" "multi")
10865 (set_attr "mode" "<MODE>")])
10866
10867 (define_insn_and_split "*udivmod<mode>4"
10868 [(set (match_operand:SWIM248 0 "register_operand" "=a")
10869 (udiv:SWIM248 (match_operand:SWIM248 2 "register_operand" "0")
10870 (match_operand:SWIM248 3 "nonimmediate_operand" "rm")))
10871 (set (match_operand:SWIM248 1 "register_operand" "=&d")
10872 (umod:SWIM248 (match_dup 2) (match_dup 3)))
10873 (clobber (reg:CC FLAGS_REG))]
10874 ""
10875 "#"
10876 "reload_completed"
10877 [(set (match_dup 1) (const_int 0))
10878 (parallel [(set (match_dup 0)
10879 (udiv:SWIM248 (match_dup 2) (match_dup 3)))
10880 (set (match_dup 1)
10881 (umod:SWIM248 (match_dup 2) (match_dup 3)))
10882 (use (match_dup 1))
10883 (clobber (reg:CC FLAGS_REG))])]
10884 ""
10885 [(set_attr "type" "multi")
10886 (set_attr "mode" "<MODE>")])
10887
10888 ;; Optimize division or modulo by constant power of 2, if the constant
10889 ;; materializes only after expansion.
10890 (define_insn_and_split "*udivmod<mode>4_pow2"
10891 [(set (match_operand:SWI48 0 "register_operand" "=r")
10892 (udiv:SWI48 (match_operand:SWI48 2 "register_operand" "0")
10893 (match_operand:SWI48 3 "const_int_operand")))
10894 (set (match_operand:SWI48 1 "register_operand" "=r")
10895 (umod:SWI48 (match_dup 2) (match_dup 3)))
10896 (clobber (reg:CC FLAGS_REG))]
10897 "IN_RANGE (exact_log2 (UINTVAL (operands[3])), 1, 31)"
10898 "#"
10899 "&& reload_completed"
10900 [(set (match_dup 1) (match_dup 2))
10901 (parallel [(set (match_dup 0) (lshiftrt:<MODE> (match_dup 2) (match_dup 4)))
10902 (clobber (reg:CC FLAGS_REG))])
10903 (parallel [(set (match_dup 1) (and:<MODE> (match_dup 1) (match_dup 5)))
10904 (clobber (reg:CC FLAGS_REG))])]
10905 {
10906 int v = exact_log2 (UINTVAL (operands[3]));
10907 operands[4] = GEN_INT (v);
10908 operands[5] = GEN_INT ((HOST_WIDE_INT_1U << v) - 1);
10909 }
10910 [(set_attr "type" "multi")
10911 (set_attr "mode" "<MODE>")])
10912
10913 (define_insn_and_split "*divmodsi4_zext_1"
10914 [(set (match_operand:DI 0 "register_operand" "=a")
10915 (zero_extend:DI
10916 (div:SI (match_operand:SI 2 "register_operand" "0")
10917 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
10918 (set (match_operand:SI 1 "register_operand" "=&d")
10919 (mod:SI (match_dup 2) (match_dup 3)))
10920 (clobber (reg:CC FLAGS_REG))]
10921 "TARGET_64BIT"
10922 "#"
10923 "&& reload_completed"
10924 [(parallel [(set (match_dup 1)
10925 (ashiftrt:SI (match_dup 4) (match_dup 5)))
10926 (clobber (reg:CC FLAGS_REG))])
10927 (parallel [(set (match_dup 0)
10928 (zero_extend:DI (div:SI (match_dup 2) (match_dup 3))))
10929 (set (match_dup 1)
10930 (mod:SI (match_dup 2) (match_dup 3)))
10931 (use (match_dup 1))
10932 (clobber (reg:CC FLAGS_REG))])]
10933 {
10934 operands[5] = GEN_INT (GET_MODE_BITSIZE (SImode)-1);
10935
10936 if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
10937 operands[4] = operands[2];
10938 else
10939 {
10940 /* Avoid use of cltd in favor of a mov+shift. */
10941 emit_move_insn (operands[1], operands[2]);
10942 operands[4] = operands[1];
10943 }
10944 }
10945 [(set_attr "type" "multi")
10946 (set_attr "mode" "SI")])
10947
10948 (define_insn_and_split "*udivmodsi4_zext_1"
10949 [(set (match_operand:DI 0 "register_operand" "=a")
10950 (zero_extend:DI
10951 (udiv:SI (match_operand:SI 2 "register_operand" "0")
10952 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
10953 (set (match_operand:SI 1 "register_operand" "=&d")
10954 (umod:SI (match_dup 2) (match_dup 3)))
10955 (clobber (reg:CC FLAGS_REG))]
10956 "TARGET_64BIT"
10957 "#"
10958 "&& reload_completed"
10959 [(set (match_dup 1) (const_int 0))
10960 (parallel [(set (match_dup 0)
10961 (zero_extend:DI (udiv:SI (match_dup 2) (match_dup 3))))
10962 (set (match_dup 1)
10963 (umod:SI (match_dup 2) (match_dup 3)))
10964 (use (match_dup 1))
10965 (clobber (reg:CC FLAGS_REG))])]
10966 ""
10967 [(set_attr "type" "multi")
10968 (set_attr "mode" "SI")])
10969
10970 (define_insn_and_split "*udivmodsi4_pow2_zext_1"
10971 [(set (match_operand:DI 0 "register_operand" "=r")
10972 (zero_extend:DI
10973 (udiv:SI (match_operand:SI 2 "register_operand" "0")
10974 (match_operand:SI 3 "const_int_operand"))))
10975 (set (match_operand:SI 1 "register_operand" "=r")
10976 (umod:SI (match_dup 2) (match_dup 3)))
10977 (clobber (reg:CC FLAGS_REG))]
10978 "TARGET_64BIT
10979 && IN_RANGE (exact_log2 (UINTVAL (operands[3])), 1, 31)"
10980 "#"
10981 "&& reload_completed"
10982 [(set (match_dup 1) (match_dup 2))
10983 (parallel [(set (match_dup 0)
10984 (zero_extend:DI (lshiftrt:SI (match_dup 2) (match_dup 4))))
10985 (clobber (reg:CC FLAGS_REG))])
10986 (parallel [(set (match_dup 1) (and:SI (match_dup 1) (match_dup 5)))
10987 (clobber (reg:CC FLAGS_REG))])]
10988 {
10989 int v = exact_log2 (UINTVAL (operands[3]));
10990 operands[4] = GEN_INT (v);
10991 operands[5] = GEN_INT ((HOST_WIDE_INT_1U << v) - 1);
10992 }
10993 [(set_attr "type" "multi")
10994 (set_attr "mode" "SI")])
10995
10996 (define_insn_and_split "*divmodsi4_zext_2"
10997 [(set (match_operand:DI 1 "register_operand" "=&d")
10998 (zero_extend:DI
10999 (mod:SI (match_operand:SI 2 "register_operand" "0")
11000 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
11001 (set (match_operand:SI 0 "register_operand" "=a")
11002 (div:SI (match_dup 2) (match_dup 3)))
11003 (clobber (reg:CC FLAGS_REG))]
11004 "TARGET_64BIT"
11005 "#"
11006 "&& reload_completed"
11007 [(parallel [(set (match_dup 6)
11008 (ashiftrt:SI (match_dup 4) (match_dup 5)))
11009 (clobber (reg:CC FLAGS_REG))])
11010 (parallel [(set (match_dup 1)
11011 (zero_extend:DI (mod:SI (match_dup 2) (match_dup 3))))
11012 (set (match_dup 0)
11013 (div:SI (match_dup 2) (match_dup 3)))
11014 (use (match_dup 6))
11015 (clobber (reg:CC FLAGS_REG))])]
11016 {
11017 operands[5] = GEN_INT (GET_MODE_BITSIZE (SImode)-1);
11018 operands[6] = gen_lowpart (SImode, operands[1]);
11019
11020 if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
11021 operands[4] = operands[2];
11022 else
11023 {
11024 /* Avoid use of cltd in favor of a mov+shift. */
11025 emit_move_insn (operands[6], operands[2]);
11026 operands[4] = operands[6];
11027 }
11028 }
11029 [(set_attr "type" "multi")
11030 (set_attr "mode" "SI")])
11031
11032 (define_insn_and_split "*udivmodsi4_zext_2"
11033 [(set (match_operand:DI 1 "register_operand" "=&d")
11034 (zero_extend:DI
11035 (umod:SI (match_operand:SI 2 "register_operand" "0")
11036 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
11037 (set (match_operand:SI 0 "register_operand" "=a")
11038 (udiv:SI (match_dup 2) (match_dup 3)))
11039 (clobber (reg:CC FLAGS_REG))]
11040 "TARGET_64BIT"
11041 "#"
11042 "&& reload_completed"
11043 [(set (match_dup 4) (const_int 0))
11044 (parallel [(set (match_dup 1)
11045 (zero_extend:DI (umod:SI (match_dup 2) (match_dup 3))))
11046 (set (match_dup 0)
11047 (udiv:SI (match_dup 2) (match_dup 3)))
11048 (use (match_dup 4))
11049 (clobber (reg:CC FLAGS_REG))])]
11050 "operands[4] = gen_lowpart (SImode, operands[1]);"
11051 [(set_attr "type" "multi")
11052 (set_attr "mode" "SI")])
11053
11054 (define_insn_and_split "*udivmodsi4_pow2_zext_2"
11055 [(set (match_operand:DI 1 "register_operand" "=r")
11056 (zero_extend:DI
11057 (umod:SI (match_operand:SI 2 "register_operand" "0")
11058 (match_operand:SI 3 "const_int_operand"))))
11059 (set (match_operand:SI 0 "register_operand" "=r")
11060 (udiv:SI (match_dup 2) (match_dup 3)))
11061 (clobber (reg:CC FLAGS_REG))]
11062 "TARGET_64BIT
11063 && IN_RANGE (exact_log2 (UINTVAL (operands[3])), 1, 31)"
11064 "#"
11065 "&& reload_completed"
11066 [(set (match_dup 1) (match_dup 2))
11067 (parallel [(set (match_dup 0) (lshiftrt:SI (match_dup 2) (match_dup 4)))
11068 (clobber (reg:CC FLAGS_REG))])
11069 (parallel [(set (match_dup 1)
11070 (zero_extend:DI (and:SI (match_dup 1) (match_dup 5))))
11071 (clobber (reg:CC FLAGS_REG))])]
11072 {
11073 int v = exact_log2 (UINTVAL (operands[3]));
11074 operands[4] = GEN_INT (v);
11075 operands[5] = GEN_INT ((HOST_WIDE_INT_1U << v) - 1);
11076 }
11077 [(set_attr "type" "multi")
11078 (set_attr "mode" "SI")])
11079
11080 (define_insn "*<u>divmod<mode>4_noext"
11081 [(set (match_operand:SWIM248 0 "register_operand" "=a")
11082 (any_div:SWIM248
11083 (match_operand:SWIM248 2 "register_operand" "0")
11084 (match_operand:SWIM248 3 "nonimmediate_operand" "rm")))
11085 (set (match_operand:SWIM248 1 "register_operand" "=d")
11086 (<paired_mod>:SWIM248 (match_dup 2) (match_dup 3)))
11087 (use (match_operand:SWIM248 4 "register_operand" "1"))
11088 (clobber (reg:CC FLAGS_REG))]
11089 ""
11090 "<sgnprefix>div{<imodesuffix>}\t%3"
11091 [(set_attr "type" "idiv")
11092 (set_attr "mode" "<MODE>")])
11093
11094 (define_insn "*<u>divmodsi4_noext_zext_1"
11095 [(set (match_operand:DI 0 "register_operand" "=a")
11096 (zero_extend:DI
11097 (any_div:SI (match_operand:SI 2 "register_operand" "0")
11098 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
11099 (set (match_operand:SI 1 "register_operand" "=d")
11100 (<paired_mod>:SI (match_dup 2) (match_dup 3)))
11101 (use (match_operand:SI 4 "register_operand" "1"))
11102 (clobber (reg:CC FLAGS_REG))]
11103 "TARGET_64BIT"
11104 "<sgnprefix>div{l}\t%3"
11105 [(set_attr "type" "idiv")
11106 (set_attr "mode" "SI")])
11107
11108 (define_insn "*<u>divmodsi4_noext_zext_2"
11109 [(set (match_operand:DI 1 "register_operand" "=d")
11110 (zero_extend:DI
11111 (<paired_mod>:SI (match_operand:SI 2 "register_operand" "0")
11112 (match_operand:SI 3 "nonimmediate_operand" "rm"))))
11113 (set (match_operand:SI 0 "register_operand" "=a")
11114 (any_div:SI (match_dup 2) (match_dup 3)))
11115 (use (match_operand:SI 4 "register_operand" "1"))
11116 (clobber (reg:CC FLAGS_REG))]
11117 "TARGET_64BIT"
11118 "<sgnprefix>div{l}\t%3"
11119 [(set_attr "type" "idiv")
11120 (set_attr "mode" "SI")])
11121
11122 ;; Avoid sign-extension (using cdq) for constant numerators.
11123 (define_insn_and_split "*divmodsi4_const"
11124 [(set (match_operand:SI 0 "register_operand" "=&a")
11125 (div:SI (match_operand:SI 2 "const_int_operand")
11126 (match_operand:SI 3 "nonimmediate_operand" "rm")))
11127 (set (match_operand:SI 1 "register_operand" "=&d")
11128 (mod:SI (match_dup 2) (match_dup 3)))
11129 (clobber (reg:CC FLAGS_REG))]
11130 "!optimize_function_for_size_p (cfun)"
11131 "#"
11132 "&& reload_completed"
11133 [(set (match_dup 0) (match_dup 2))
11134 (set (match_dup 1) (match_dup 4))
11135 (parallel [(set (match_dup 0)
11136 (div:SI (match_dup 0) (match_dup 3)))
11137 (set (match_dup 1)
11138 (mod:SI (match_dup 0) (match_dup 3)))
11139 (use (match_dup 1))
11140 (clobber (reg:CC FLAGS_REG))])]
11141 {
11142 operands[4] = INTVAL (operands[2]) < 0 ? constm1_rtx : const0_rtx;
11143 }
11144 [(set_attr "type" "multi")
11145 (set_attr "mode" "SI")])
11146
11147 (define_expand "divmodqi4"
11148 [(parallel [(set (match_operand:QI 0 "register_operand")
11149 (div:QI
11150 (match_operand:QI 1 "register_operand")
11151 (match_operand:QI 2 "nonimmediate_operand")))
11152 (set (match_operand:QI 3 "register_operand")
11153 (mod:QI (match_dup 1) (match_dup 2)))
11154 (clobber (reg:CC FLAGS_REG))])]
11155 "TARGET_QIMODE_MATH"
11156 {
11157 rtx div, mod;
11158 rtx tmp0, tmp1;
11159
11160 tmp0 = gen_reg_rtx (HImode);
11161 tmp1 = gen_reg_rtx (HImode);
11162
11163 /* Extend operands[1] to HImode. Generate 8bit divide. Result is in AX. */
11164 emit_insn (gen_extendqihi2 (tmp1, operands[1]));
11165 emit_insn (gen_divmodhiqi3 (tmp0, tmp1, operands[2]));
11166
11167 /* Extract remainder from AH. */
11168 tmp1 = gen_rtx_ZERO_EXTRACT (HImode, tmp0, GEN_INT (8), GEN_INT (8));
11169 tmp1 = lowpart_subreg (QImode, tmp1, HImode);
11170 rtx_insn *insn = emit_move_insn (operands[3], tmp1);
11171
11172 mod = gen_rtx_MOD (QImode, operands[1], operands[2]);
11173 set_unique_reg_note (insn, REG_EQUAL, mod);
11174
11175 /* Extract quotient from AL. */
11176 insn = emit_move_insn (operands[0], gen_lowpart (QImode, tmp0));
11177
11178 div = gen_rtx_DIV (QImode, operands[1], operands[2]);
11179 set_unique_reg_note (insn, REG_EQUAL, div);
11180
11181 DONE;
11182 })
11183
11184 (define_expand "udivmodqi4"
11185 [(parallel [(set (match_operand:QI 0 "register_operand")
11186 (udiv:QI
11187 (match_operand:QI 1 "register_operand")
11188 (match_operand:QI 2 "nonimmediate_operand")))
11189 (set (match_operand:QI 3 "register_operand")
11190 (umod:QI (match_dup 1) (match_dup 2)))
11191 (clobber (reg:CC FLAGS_REG))])]
11192 "TARGET_QIMODE_MATH"
11193 {
11194 rtx div, mod;
11195 rtx tmp0, tmp1;
11196
11197 tmp0 = gen_reg_rtx (HImode);
11198 tmp1 = gen_reg_rtx (HImode);
11199
11200 /* Extend operands[1] to HImode. Generate 8bit divide. Result is in AX. */
11201 emit_insn (gen_zero_extendqihi2 (tmp1, operands[1]));
11202 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, operands[2]));
11203
11204 /* Extract remainder from AH. */
11205 tmp1 = gen_rtx_ZERO_EXTRACT (HImode, tmp0, GEN_INT (8), GEN_INT (8));
11206 tmp1 = lowpart_subreg (QImode, tmp1, HImode);
11207 rtx_insn *insn = emit_move_insn (operands[3], tmp1);
11208
11209 mod = gen_rtx_UMOD (QImode, operands[1], operands[2]);
11210 set_unique_reg_note (insn, REG_EQUAL, mod);
11211
11212 /* Extract quotient from AL. */
11213 insn = emit_move_insn (operands[0], gen_lowpart (QImode, tmp0));
11214
11215 div = gen_rtx_UDIV (QImode, operands[1], operands[2]);
11216 set_unique_reg_note (insn, REG_EQUAL, div);
11217
11218 DONE;
11219 })
11220
11221 ;; Divide AX by r/m8, with result stored in
11222 ;; AL <- Quotient
11223 ;; AH <- Remainder
11224 ;; Change div/mod to HImode and extend the second argument to HImode
11225 ;; so that mode of div/mod matches with mode of arguments. Otherwise
11226 ;; combine may fail.
11227 (define_insn "<u>divmodhiqi3"
11228 [(set (match_operand:HI 0 "register_operand" "=a")
11229 (ior:HI
11230 (ashift:HI
11231 (zero_extend:HI
11232 (truncate:QI
11233 (mod:HI (match_operand:HI 1 "register_operand" "0")
11234 (any_extend:HI
11235 (match_operand:QI 2 "nonimmediate_operand" "qm")))))
11236 (const_int 8))
11237 (zero_extend:HI
11238 (truncate:QI
11239 (div:HI (match_dup 1) (any_extend:HI (match_dup 2)))))))
11240 (clobber (reg:CC FLAGS_REG))]
11241 "TARGET_QIMODE_MATH"
11242 "<sgnprefix>div{b}\t%2"
11243 [(set_attr "type" "idiv")
11244 (set_attr "mode" "QI")])
11245
11246 ;; We cannot use div/idiv for double division, because it causes
11247 ;; "division by zero" on the overflow and that's not what we expect
11248 ;; from truncate. Because true (non truncating) double division is
11249 ;; never generated, we can't create this insn anyway.
11250 ;
11251 ;(define_insn ""
11252 ; [(set (match_operand:SI 0 "register_operand" "=a")
11253 ; (truncate:SI
11254 ; (udiv:DI (match_operand:DI 1 "register_operand" "A")
11255 ; (zero_extend:DI
11256 ; (match_operand:SI 2 "nonimmediate_operand" "rm")))))
11257 ; (set (match_operand:SI 3 "register_operand" "=d")
11258 ; (truncate:SI
11259 ; (umod:DI (match_dup 1) (zero_extend:DI (match_dup 2)))))
11260 ; (clobber (reg:CC FLAGS_REG))]
11261 ; ""
11262 ; "div{l}\t{%2, %0|%0, %2}"
11263 ; [(set_attr "type" "idiv")])
11264 \f
11265 ;;- Logical AND instructions
11266
11267 ;; On Pentium, "test imm, reg" is pairable only with eax, ax, and al.
11268 ;; Note that this excludes ah.
11269
11270 (define_expand "@test<mode>_ccno_1"
11271 [(set (reg:CCNO FLAGS_REG)
11272 (compare:CCNO
11273 (and:SWI48
11274 (match_operand:SWI48 0 "nonimmediate_operand")
11275 (match_operand:SWI48 1 "<nonmemory_szext_operand>"))
11276 (const_int 0)))])
11277
11278 (define_expand "testqi_ccz_1"
11279 [(set (reg:CCZ FLAGS_REG)
11280 (compare:CCZ
11281 (and:QI
11282 (match_operand:QI 0 "nonimmediate_operand")
11283 (match_operand:QI 1 "nonmemory_operand"))
11284 (const_int 0)))])
11285
11286 (define_insn "*testdi_1"
11287 [(set (reg FLAGS_REG)
11288 (compare
11289 (and:DI
11290 (match_operand:DI 0 "nonimmediate_operand" "%r,rm")
11291 (match_operand:DI 1 "x86_64_szext_nonmemory_operand" "Z,re"))
11292 (const_int 0)))]
11293 "TARGET_64BIT
11294 && ix86_match_ccmode
11295 (insn,
11296 /* If we are going to emit testl instead of testq, and the operands[1]
11297 constant might have the SImode sign bit set, make sure the sign
11298 flag isn't tested, because the instruction will set the sign flag
11299 based on bit 31 rather than bit 63. If it isn't CONST_INT,
11300 conservatively assume it might have bit 31 set. */
11301 (satisfies_constraint_Z (operands[1])
11302 && (!CONST_INT_P (operands[1])
11303 || val_signbit_known_set_p (SImode, INTVAL (operands[1]))))
11304 ? CCZmode : CCNOmode)"
11305 "@
11306 test{l}\t{%k1, %k0|%k0, %k1}
11307 test{q}\t{%1, %0|%0, %1}"
11308 [(set_attr "type" "test")
11309 (set_attr "mode" "SI,DI")])
11310
11311 (define_insn "*testqi_1_maybe_si"
11312 [(set (reg FLAGS_REG)
11313 (compare
11314 (and:QI
11315 (match_operand:QI 0 "nonimmediate_operand" "%qm,qm,r")
11316 (match_operand:QI 1 "nonmemory_operand" "q,n,n"))
11317 (const_int 0)))]
11318 "ix86_match_ccmode (insn,
11319 CONST_INT_P (operands[1])
11320 && INTVAL (operands[1]) >= 0 ? CCNOmode : CCZmode)"
11321 {
11322 if (get_attr_mode (insn) == MODE_SI)
11323 {
11324 if (CONST_INT_P (operands[1]) && INTVAL (operands[1]) < 0)
11325 operands[1] = GEN_INT (INTVAL (operands[1]) & 0xff);
11326 return "test{l}\t{%1, %k0|%k0, %1}";
11327 }
11328 return "test{b}\t{%1, %0|%0, %1}";
11329 }
11330 [(set_attr "type" "test")
11331 (set (attr "mode")
11332 (cond [(eq_attr "alternative" "2")
11333 (const_string "SI")
11334 (and (match_test "optimize_insn_for_size_p ()")
11335 (and (match_operand 0 "ext_QIreg_operand")
11336 (match_operand 1 "const_0_to_127_operand")))
11337 (const_string "SI")
11338 ]
11339 (const_string "QI")))
11340 (set_attr "pent_pair" "uv,np,np")])
11341
11342 (define_insn "*test<mode>_1"
11343 [(set (reg FLAGS_REG)
11344 (compare
11345 (and:SWI124
11346 (match_operand:SWI124 0 "nonimmediate_operand" "%<r>m,*a,<r>m")
11347 (match_operand:SWI124 1 "<nonmemory_szext_operand>" "<r>,<i>,<i>"))
11348 (const_int 0)))]
11349 "ix86_match_ccmode (insn, CCNOmode)"
11350 "test{<imodesuffix>}\t{%1, %0|%0, %1}"
11351 [(set_attr "type" "test")
11352 (set_attr "mode" "<MODE>")
11353 (set_attr "pent_pair" "uv,uv,np")])
11354
11355 (define_expand "testqi_ext_1_ccno"
11356 [(set (reg:CCNO FLAGS_REG)
11357 (compare:CCNO
11358 (and:QI
11359 (subreg:QI
11360 (zero_extract:HI
11361 (match_operand:HI 0 "register_operand")
11362 (const_int 8)
11363 (const_int 8)) 0)
11364 (match_operand:QI 1 "const_int_operand"))
11365 (const_int 0)))])
11366
11367 (define_insn "*testqi_ext<mode>_1"
11368 [(set (reg FLAGS_REG)
11369 (compare
11370 (and:QI
11371 (subreg:QI
11372 (match_operator:SWI248 2 "extract_operator"
11373 [(match_operand 0 "int248_register_operand" "Q")
11374 (const_int 8)
11375 (const_int 8)]) 0)
11376 (match_operand:QI 1 "general_operand" "QnBn"))
11377 (const_int 0)))]
11378 "ix86_match_ccmode (insn, CCNOmode)"
11379 "test{b}\t{%1, %h0|%h0, %1}"
11380 [(set_attr "addr" "gpr8")
11381 (set_attr "type" "test")
11382 (set_attr "mode" "QI")])
11383
11384 (define_insn "*testqi_ext<mode>_2"
11385 [(set (reg FLAGS_REG)
11386 (compare
11387 (and:QI
11388 (subreg:QI
11389 (match_operator:SWI248 2 "extract_operator"
11390 [(match_operand 0 "int248_register_operand" "Q")
11391 (const_int 8)
11392 (const_int 8)]) 0)
11393 (subreg:QI
11394 (match_operator:SWI248 3 "extract_operator"
11395 [(match_operand 1 "int248_register_operand" "Q")
11396 (const_int 8)
11397 (const_int 8)]) 0))
11398 (const_int 0)))]
11399 "ix86_match_ccmode (insn, CCNOmode)"
11400 "test{b}\t{%h1, %h0|%h0, %h1}"
11401 [(set_attr "type" "test")
11402 (set_attr "mode" "QI")])
11403
11404 ;; Provide a *testti instruction that STV can implement using ptest.
11405 ;; This pattern splits into *andti3_doubleword and *cmpti_doubleword.
11406 (define_insn_and_split "*testti_doubleword"
11407 [(set (reg:CCZ FLAGS_REG)
11408 (compare:CCZ
11409 (and:TI (match_operand:TI 0 "register_operand")
11410 (match_operand:TI 1 "general_operand"))
11411 (const_int 0)))]
11412 "TARGET_64BIT
11413 && ix86_pre_reload_split ()"
11414 "#"
11415 "&& 1"
11416 [(parallel [(set (match_dup 2) (and:TI (match_dup 0) (match_dup 1)))
11417 (clobber (reg:CC FLAGS_REG))])
11418 (set (reg:CCZ FLAGS_REG) (compare:CCZ (match_dup 2) (const_int 0)))]
11419 {
11420 operands[2] = gen_reg_rtx (TImode);
11421 if (!x86_64_hilo_general_operand (operands[1], TImode))
11422 operands[1] = force_reg (TImode, operands[1]);
11423 })
11424
11425 ;; Combine likes to form bit extractions for some tests. Humor it.
11426 (define_insn_and_split "*testqi_ext_3"
11427 [(set (match_operand 0 "flags_reg_operand")
11428 (match_operator 1 "compare_operator"
11429 [(zero_extract:SWI248
11430 (match_operand 2 "int_nonimmediate_operand" "rm")
11431 (match_operand:QI 3 "const_int_operand")
11432 (match_operand:QI 4 "const_int_operand"))
11433 (const_int 0)]))]
11434 "/* Ensure that resulting mask is zero or sign extended operand. */
11435 INTVAL (operands[4]) >= 0
11436 && ((INTVAL (operands[3]) > 0
11437 && INTVAL (operands[3]) + INTVAL (operands[4]) <= 32)
11438 || (<MODE>mode == DImode
11439 && INTVAL (operands[3]) > 32
11440 && INTVAL (operands[3]) + INTVAL (operands[4]) == 64))
11441 && ix86_match_ccmode (insn,
11442 /* If zero_extract mode precision is the same
11443 as len, the SF of the zero_extract
11444 comparison will be the most significant
11445 extracted bit, but this could be matched
11446 after splitting only for pos 0 len all bits
11447 trivial extractions. Require CCZmode. */
11448 (GET_MODE_PRECISION (<MODE>mode)
11449 == INTVAL (operands[3]))
11450 /* Otherwise, require CCZmode if we'd use a mask
11451 with the most significant bit set and can't
11452 widen it to wider mode. *testdi_1 also
11453 requires CCZmode if the mask has bit
11454 31 set and all bits above it clear. */
11455 || (INTVAL (operands[3]) + INTVAL (operands[4])
11456 >= 32)
11457 /* We can't widen also if val is not a REG. */
11458 || (INTVAL (operands[3]) + INTVAL (operands[4])
11459 == GET_MODE_PRECISION (GET_MODE (operands[2]))
11460 && !register_operand (operands[2],
11461 GET_MODE (operands[2])))
11462 /* And we shouldn't widen if
11463 TARGET_PARTIAL_REG_STALL. */
11464 || (TARGET_PARTIAL_REG_STALL
11465 && (INTVAL (operands[3]) + INTVAL (operands[4])
11466 >= (paradoxical_subreg_p (operands[2])
11467 && (GET_MODE_CLASS
11468 (GET_MODE (SUBREG_REG (operands[2])))
11469 == MODE_INT)
11470 ? GET_MODE_PRECISION
11471 (GET_MODE (SUBREG_REG (operands[2])))
11472 : GET_MODE_PRECISION
11473 (GET_MODE (operands[2])))))
11474 ? CCZmode : CCNOmode)"
11475 "#"
11476 "&& 1"
11477 [(set (match_dup 0) (match_op_dup 1 [(match_dup 2) (const_int 0)]))]
11478 {
11479 rtx val = operands[2];
11480 HOST_WIDE_INT len = INTVAL (operands[3]);
11481 HOST_WIDE_INT pos = INTVAL (operands[4]);
11482 machine_mode mode = GET_MODE (val);
11483
11484 if (SUBREG_P (val))
11485 {
11486 machine_mode submode = GET_MODE (SUBREG_REG (val));
11487
11488 /* Narrow paradoxical subregs to prevent partial register stalls. */
11489 if (GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (submode)
11490 && GET_MODE_CLASS (submode) == MODE_INT
11491 && (GET_MODE (operands[0]) == CCZmode
11492 || pos + len < GET_MODE_PRECISION (submode)
11493 || REG_P (SUBREG_REG (val))))
11494 {
11495 val = SUBREG_REG (val);
11496 mode = submode;
11497 }
11498 }
11499
11500 /* Small HImode tests can be converted to QImode. */
11501 if (pos + len <= 8
11502 && register_operand (val, HImode))
11503 {
11504 rtx nval = gen_lowpart (QImode, val);
11505 if (!MEM_P (nval)
11506 || GET_MODE (operands[0]) == CCZmode
11507 || pos + len < 8)
11508 {
11509 val = nval;
11510 mode = QImode;
11511 }
11512 }
11513
11514 gcc_assert (pos + len <= GET_MODE_PRECISION (mode));
11515
11516 /* If the mask is going to have the sign bit set in the mode
11517 we want to do the comparison in and user isn't interested just
11518 in the zero flag, then we must widen the target mode. */
11519 if (pos + len == GET_MODE_PRECISION (mode)
11520 && GET_MODE (operands[0]) != CCZmode)
11521 {
11522 gcc_assert (pos + len < 32 && !MEM_P (val));
11523 mode = SImode;
11524 val = gen_lowpart (mode, val);
11525 }
11526
11527 wide_int mask
11528 = wi::shifted_mask (pos, len, false, GET_MODE_PRECISION (mode));
11529
11530 operands[2] = gen_rtx_AND (mode, val, immed_wide_int_const (mask, mode));
11531 })
11532
11533 ;; Split and;cmp (as optimized by combine) into not;test
11534 ;; Except when TARGET_BMI provides andn (*andn_<mode>_ccno).
11535 (define_insn_and_split "*test<mode>_not"
11536 [(set (reg:CCZ FLAGS_REG)
11537 (compare:CCZ
11538 (and:SWI
11539 (not:SWI (match_operand:SWI 0 "register_operand"))
11540 (match_operand:SWI 1 "<nonmemory_szext_operand>"))
11541 (const_int 0)))]
11542 "ix86_pre_reload_split ()
11543 && (!TARGET_BMI || !REG_P (operands[1]))"
11544 "#"
11545 "&& 1"
11546 [(set (match_dup 2) (not:SWI (match_dup 0)))
11547 (set (reg:CCZ FLAGS_REG)
11548 (compare:CCZ (and:SWI (match_dup 2) (match_dup 1))
11549 (const_int 0)))]
11550 "operands[2] = gen_reg_rtx (<MODE>mode);")
11551
11552 ;; Split and;cmp (as optimized by combine) into andn;cmp $0
11553 (define_insn_and_split "*test<mode>_not_doubleword"
11554 [(set (reg:CCZ FLAGS_REG)
11555 (compare:CCZ
11556 (and:DWI
11557 (not:DWI (match_operand:DWI 0 "nonimmediate_operand"))
11558 (match_operand:DWI 1 "nonimmediate_operand"))
11559 (const_int 0)))]
11560 "ix86_pre_reload_split ()"
11561 "#"
11562 "&& 1"
11563 [(parallel
11564 [(set (match_dup 2) (and:DWI (not:DWI (match_dup 0)) (match_dup 1)))
11565 (clobber (reg:CC FLAGS_REG))])
11566 (set (reg:CCZ FLAGS_REG) (compare:CCZ (match_dup 2) (const_int 0)))]
11567 {
11568 operands[0] = force_reg (<MODE>mode, operands[0]);
11569 operands[2] = gen_reg_rtx (<MODE>mode);
11570 })
11571
11572 ;; Convert HImode/SImode test instructions with immediate to QImode ones.
11573 ;; i386 does not allow to encode test with 8bit sign extended immediate, so
11574 ;; this is relatively important trick.
11575 ;; Do the conversion only post-reload to avoid limiting of the register class
11576 ;; to QI regs.
11577 (define_split
11578 [(set (match_operand 0 "flags_reg_operand")
11579 (match_operator 1 "compare_operator"
11580 [(and (match_operand 2 "QIreg_operand")
11581 (match_operand 3 "const_int_operand"))
11582 (const_int 0)]))]
11583 "reload_completed
11584 && GET_MODE (operands[2]) != QImode
11585 && ((ix86_match_ccmode (insn, CCZmode)
11586 && !(INTVAL (operands[3]) & ~(255 << 8)))
11587 || (ix86_match_ccmode (insn, CCNOmode)
11588 && !(INTVAL (operands[3]) & ~(127 << 8))))"
11589 [(set (match_dup 0)
11590 (match_op_dup 1
11591 [(and:QI
11592 (subreg:QI
11593 (zero_extract:HI (match_dup 2)
11594 (const_int 8)
11595 (const_int 8)) 0)
11596 (match_dup 3))
11597 (const_int 0)]))]
11598 {
11599 operands[2] = gen_lowpart (HImode, operands[2]);
11600 operands[3] = gen_int_mode (INTVAL (operands[3]) >> 8, QImode);
11601 })
11602
11603 (define_split
11604 [(set (match_operand 0 "flags_reg_operand")
11605 (match_operator 1 "compare_operator"
11606 [(and (match_operand 2 "nonimmediate_operand")
11607 (match_operand 3 "const_int_operand"))
11608 (const_int 0)]))]
11609 "reload_completed
11610 && GET_MODE (operands[2]) != QImode
11611 && (!REG_P (operands[2]) || ANY_QI_REG_P (operands[2]))
11612 && ((ix86_match_ccmode (insn, CCZmode)
11613 && !(INTVAL (operands[3]) & ~255))
11614 || (ix86_match_ccmode (insn, CCNOmode)
11615 && !(INTVAL (operands[3]) & ~127)))"
11616 [(set (match_dup 0)
11617 (match_op_dup 1 [(and:QI (match_dup 2) (match_dup 3))
11618 (const_int 0)]))]
11619 {
11620 operands[2] = gen_lowpart (QImode, operands[2]);
11621 operands[3] = gen_int_mode (INTVAL (operands[3]), QImode);
11622 })
11623
11624 ;; Narrow test instructions with immediate operands that test
11625 ;; memory locations for zero. E.g. testl $0x00aa0000, mem can be
11626 ;; converted to testb $0xaa, mem+2. Reject volatile locations and
11627 ;; targets where reading (possibly unaligned) part of memory
11628 ;; location after a large write to the same address causes
11629 ;; store-to-load forwarding stall.
11630 (define_peephole2
11631 [(set (reg:CCZ FLAGS_REG)
11632 (compare:CCZ
11633 (and:SWI248 (match_operand:SWI248 0 "memory_operand")
11634 (match_operand 1 "const_int_operand"))
11635 (const_int 0)))]
11636 "!TARGET_PARTIAL_MEMORY_READ_STALL && !MEM_VOLATILE_P (operands[0])"
11637 [(set (reg:CCZ FLAGS_REG)
11638 (compare:CCZ (match_dup 2) (const_int 0)))]
11639 {
11640 unsigned HOST_WIDE_INT ival = UINTVAL (operands[1]);
11641 int first_nonzero_byte, bitsize;
11642 rtx new_addr, new_const;
11643 machine_mode new_mode;
11644
11645 if (ival == 0)
11646 FAIL;
11647
11648 /* Clear bits outside mode width. */
11649 ival &= GET_MODE_MASK (<MODE>mode);
11650
11651 first_nonzero_byte = ctz_hwi (ival) / BITS_PER_UNIT;
11652
11653 ival >>= first_nonzero_byte * BITS_PER_UNIT;
11654
11655 bitsize = sizeof (ival) * BITS_PER_UNIT - clz_hwi (ival);
11656
11657 if (bitsize <= GET_MODE_BITSIZE (QImode))
11658 new_mode = QImode;
11659 else if (bitsize <= GET_MODE_BITSIZE (HImode))
11660 new_mode = HImode;
11661 else if (bitsize <= GET_MODE_BITSIZE (SImode))
11662 new_mode = SImode;
11663 else
11664 new_mode = DImode;
11665
11666 if (GET_MODE_SIZE (new_mode) >= GET_MODE_SIZE (<MODE>mode))
11667 FAIL;
11668
11669 new_addr = adjust_address (operands[0], new_mode, first_nonzero_byte);
11670 new_const = gen_int_mode (ival, new_mode);
11671
11672 operands[2] = gen_rtx_AND (new_mode, new_addr, new_const);
11673 })
11674
11675 ;; %%% This used to optimize known byte-wide and operations to memory,
11676 ;; and sometimes to QImode registers. If this is considered useful,
11677 ;; it should be done with splitters.
11678
11679 (define_expand "and<mode>3"
11680 [(set (match_operand:SDWIM 0 "nonimmediate_operand")
11681 (and:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand")
11682 (match_operand:SDWIM 2 "<general_szext_operand>")))]
11683 ""
11684 {
11685 machine_mode mode = <MODE>mode;
11686
11687 if (GET_MODE_SIZE (<MODE>mode) > UNITS_PER_WORD
11688 && !x86_64_hilo_general_operand (operands[2], <MODE>mode))
11689 operands[2] = force_reg (<MODE>mode, operands[2]);
11690
11691 if (GET_MODE_SIZE (<MODE>mode) <= UNITS_PER_WORD
11692 && const_int_operand (operands[2], <MODE>mode)
11693 && register_operand (operands[0], <MODE>mode)
11694 && !(TARGET_ZERO_EXTEND_WITH_AND
11695 && optimize_function_for_speed_p (cfun)))
11696 {
11697 unsigned HOST_WIDE_INT ival = UINTVAL (operands[2]);
11698
11699 if (ival == GET_MODE_MASK (SImode))
11700 mode = SImode;
11701 else if (ival == GET_MODE_MASK (HImode))
11702 mode = HImode;
11703 else if (ival == GET_MODE_MASK (QImode))
11704 mode = QImode;
11705 }
11706
11707 if (mode != <MODE>mode)
11708 emit_insn (gen_extend_insn
11709 (operands[0], gen_lowpart (mode, operands[1]),
11710 <MODE>mode, mode, 1));
11711 else
11712 ix86_expand_binary_operator (AND, <MODE>mode, operands,
11713 TARGET_APX_NDD);
11714
11715 DONE;
11716 })
11717
11718 (define_insn_and_split "*and<dwi>3_doubleword"
11719 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r,&r,&r")
11720 (and:<DWI>
11721 (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0,ro,r")
11722 (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o,r<di>,o")))
11723 (clobber (reg:CC FLAGS_REG))]
11724 "ix86_binary_operator_ok (AND, <DWI>mode, operands, TARGET_APX_NDD)"
11725 "#"
11726 "&& reload_completed"
11727 [(const_int:DWIH 0)]
11728 {
11729 bool emit_insn_deleted_note_p = false;
11730
11731 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
11732
11733 if (operands[2] == const0_rtx)
11734 emit_move_insn (operands[0], const0_rtx);
11735 else if (operands[2] == constm1_rtx)
11736 {
11737 if (!rtx_equal_p (operands[0], operands[1]))
11738 emit_move_insn (operands[0], operands[1]);
11739 else
11740 emit_insn_deleted_note_p = true;
11741 }
11742 else
11743 ix86_expand_binary_operator (AND, <MODE>mode, &operands[0],
11744 TARGET_APX_NDD);
11745
11746 if (operands[5] == const0_rtx)
11747 emit_move_insn (operands[3], const0_rtx);
11748 else if (operands[5] == constm1_rtx)
11749 {
11750 if (!rtx_equal_p (operands[3], operands[4]))
11751 emit_move_insn (operands[3], operands[4]);
11752 else if (emit_insn_deleted_note_p)
11753 emit_note (NOTE_INSN_DELETED);
11754 }
11755 else
11756 ix86_expand_binary_operator (AND, <MODE>mode, &operands[3],
11757 TARGET_APX_NDD);
11758
11759 DONE;
11760 }
11761 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")])
11762
11763 (define_insn "*anddi_1"
11764 [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,rm,r,r,r,r,?k")
11765 (and:DI
11766 (match_operand:DI 1 "nonimmediate_operand" "%0,r,0,0,rm,r,qm,k")
11767 (match_operand:DI 2 "x86_64_szext_general_operand" "Z,Z,re,m,re,m,L,k")))
11768 (clobber (reg:CC FLAGS_REG))]
11769 "TARGET_64BIT && ix86_binary_operator_ok (AND, DImode, operands,
11770 TARGET_APX_NDD)"
11771 "@
11772 and{l}\t{%k2, %k0|%k0, %k2}
11773 and{l}\t{%k2, %k1, %k0|%k0, %k1, %k2}
11774 and{q}\t{%2, %0|%0, %2}
11775 and{q}\t{%2, %0|%0, %2}
11776 and{q}\t{%2, %1, %0|%0, %1, %2}
11777 and{q}\t{%2, %1, %0|%0, %1, %2}
11778 #
11779 #"
11780 [(set_attr "isa" "x64,apx_ndd,x64,x64,apx_ndd,apx_ndd,x64,avx512bw")
11781 (set_attr "type" "alu,alu,alu,alu,alu,alu,imovx,msklog")
11782 (set_attr "length_immediate" "*,*,*,*,*,*,0,*")
11783 (set (attr "prefix_rex")
11784 (if_then_else
11785 (and (eq_attr "type" "imovx")
11786 (and (match_test "INTVAL (operands[2]) == 0xff")
11787 (match_operand 1 "ext_QIreg_operand")))
11788 (const_string "1")
11789 (const_string "*")))
11790 (set_attr "mode" "SI,SI,DI,DI,DI,DI,SI,DI")])
11791
11792 (define_insn_and_split "*anddi_1_btr"
11793 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
11794 (and:DI
11795 (match_operand:DI 1 "nonimmediate_operand" "%0")
11796 (match_operand:DI 2 "const_int_operand" "n")))
11797 (clobber (reg:CC FLAGS_REG))]
11798 "TARGET_64BIT && TARGET_USE_BT
11799 && ix86_binary_operator_ok (AND, DImode, operands)
11800 && IN_RANGE (exact_log2 (~INTVAL (operands[2])), 31, 63)"
11801 "#"
11802 "&& reload_completed"
11803 [(parallel [(set (zero_extract:DI (match_dup 0)
11804 (const_int 1)
11805 (match_dup 3))
11806 (const_int 0))
11807 (clobber (reg:CC FLAGS_REG))])]
11808 "operands[3] = GEN_INT (exact_log2 (~INTVAL (operands[2])));"
11809 [(set_attr "type" "alu1")
11810 (set_attr "prefix_0f" "1")
11811 (set_attr "znver1_decode" "double")
11812 (set_attr "mode" "DI")])
11813
11814 ;; Turn *anddi_1 into *andsi_1_zext if possible.
11815 (define_split
11816 [(set (match_operand:DI 0 "register_operand")
11817 (and:DI (subreg:DI (match_operand:SI 1 "register_operand") 0)
11818 (match_operand:DI 2 "x86_64_zext_immediate_operand")))
11819 (clobber (reg:CC FLAGS_REG))]
11820 "TARGET_64BIT"
11821 [(parallel [(set (match_dup 0)
11822 (zero_extend:DI (and:SI (match_dup 1) (match_dup 2))))
11823 (clobber (reg:CC FLAGS_REG))])]
11824 {
11825 if (GET_CODE (operands[2]) == SYMBOL_REF
11826 || GET_CODE (operands[2]) == LABEL_REF)
11827 {
11828 operands[2] = shallow_copy_rtx (operands[2]);
11829 PUT_MODE (operands[2], SImode);
11830 }
11831 else if (GET_CODE (operands[2]) == CONST)
11832 {
11833 /* (const:DI (plus:DI (symbol_ref:DI ("...")) (const_int N))) */
11834 operands[2] = copy_rtx (operands[2]);
11835 PUT_MODE (operands[2], SImode);
11836 PUT_MODE (XEXP (operands[2], 0), SImode);
11837 PUT_MODE (XEXP (XEXP (operands[2], 0), 0), SImode);
11838 }
11839 else
11840 operands[2] = gen_lowpart (SImode, operands[2]);
11841 })
11842
11843 ;; See comment for addsi_1_zext why we do use nonimmediate_operand
11844 (define_insn "*andsi_1_zext"
11845 [(set (match_operand:DI 0 "register_operand" "=r,r,r")
11846 (zero_extend:DI
11847 (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,rm,r")
11848 (match_operand:SI 2 "x86_64_general_operand" "rBMe,re,BM"))))
11849 (clobber (reg:CC FLAGS_REG))]
11850 "TARGET_64BIT && ix86_binary_operator_ok (AND, SImode, operands,
11851 TARGET_APX_NDD)"
11852 "@
11853 and{l}\t{%2, %k0|%k0, %2}
11854 and{l}\t{%2, %1, %k0|%k0, %1, %2}
11855 and{l}\t{%2, %1, %k0|%k0, %1, %2}"
11856 [(set_attr "type" "alu")
11857 (set_attr "isa" "*,apx_ndd,apx_ndd")
11858 (set_attr "mode" "SI")])
11859
11860 (define_insn "*and<mode>_1"
11861 [(set (match_operand:SWI24 0 "nonimmediate_operand" "=rm,r,r,r,Ya,?k")
11862 (and:SWI24 (match_operand:SWI24 1 "nonimmediate_operand" "%0,0,rm,r,qm,k")
11863 (match_operand:SWI24 2 "<general_operand>" "r<i>,<m>,r<i>,<m>,L,k")))
11864 (clobber (reg:CC FLAGS_REG))]
11865 "ix86_binary_operator_ok (AND, <MODE>mode, operands, TARGET_APX_NDD)"
11866 "@
11867 and{<imodesuffix>}\t{%2, %0|%0, %2}
11868 and{<imodesuffix>}\t{%2, %0|%0, %2}
11869 and{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
11870 and{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
11871 #
11872 #"
11873 [(set (attr "isa")
11874 (cond [(eq_attr "alternative" "2,3")
11875 (const_string "apx_ndd")
11876 (eq_attr "alternative" "5")
11877 (if_then_else (eq_attr "mode" "SI")
11878 (const_string "avx512bw")
11879 (const_string "avx512f"))
11880 ]
11881 (const_string "*")))
11882 (set_attr "type" "alu,alu,alu,alu,imovx,msklog")
11883 (set_attr "length_immediate" "*,*,*,*,0,*")
11884 (set (attr "prefix_rex")
11885 (if_then_else
11886 (and (eq_attr "type" "imovx")
11887 (and (match_test "INTVAL (operands[2]) == 0xff")
11888 (match_operand 1 "ext_QIreg_operand")))
11889 (const_string "1")
11890 (const_string "*")))
11891 (set_attr "mode" "<MODE>,<MODE>,<MODE>,<MODE>,SI,<MODE>")])
11892
11893 (define_insn "*andqi_1"
11894 [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,r,r,?k")
11895 (and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,rm,r,k")
11896 (match_operand:QI 2 "general_operand" "qn,m,rn,rn,m,k")))
11897 (clobber (reg:CC FLAGS_REG))]
11898 "ix86_binary_operator_ok (AND, QImode, operands, TARGET_APX_NDD)"
11899 "@
11900 and{b}\t{%2, %0|%0, %2}
11901 and{b}\t{%2, %0|%0, %2}
11902 and{l}\t{%k2, %k0|%k0, %k2}
11903 and{b}\t{%2, %1, %0|%0, %1, %2}
11904 and{b}\t{%2, %1, %0|%0, %1, %2}
11905 #"
11906 [(set_attr "type" "alu,alu,alu,alu,alu,msklog")
11907 (set_attr "isa" "*,*,*,apx_ndd,apx_ndd,*")
11908 (set (attr "mode")
11909 (cond [(eq_attr "alternative" "2")
11910 (const_string "SI")
11911 (and (eq_attr "alternative" "5")
11912 (match_test "!TARGET_AVX512DQ"))
11913 (const_string "HI")
11914 ]
11915 (const_string "QI")))
11916 ;; Potential partial reg stall on alternative 2.
11917 (set (attr "preferred_for_speed")
11918 (cond [(eq_attr "alternative" "2")
11919 (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
11920 (symbol_ref "true")))])
11921
11922 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
11923 (define_insn_and_split "*<code><mode>_1_slp"
11924 [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>,&<r>"))
11925 (any_logic:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "%0,!<r>")
11926 (match_operand:SWI12 2 "general_operand" "<r>mn,<r>mn")))
11927 (clobber (reg:CC FLAGS_REG))]
11928 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
11929 "@
11930 <logic>{<imodesuffix>}\t{%2, %0|%0, %2}
11931 #"
11932 "&& reload_completed
11933 && !(rtx_equal_p (operands[0], operands[1])
11934 || rtx_equal_p (operands[0], operands[2]))"
11935 [(set (strict_low_part (match_dup 0)) (match_dup 1))
11936 (parallel
11937 [(set (strict_low_part (match_dup 0))
11938 (any_logic:SWI12 (match_dup 0) (match_dup 2)))
11939 (clobber (reg:CC FLAGS_REG))])]
11940 ""
11941 [(set_attr "type" "alu")
11942 (set_attr "mode" "<MODE>")])
11943
11944 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
11945 (define_insn_and_split "*<code>qi_ext<mode>_1_slp"
11946 [(set (strict_low_part (match_operand:QI 0 "register_operand" "+Q,&Q"))
11947 (any_logic:QI
11948 (subreg:QI
11949 (match_operator:SWI248 3 "extract_operator"
11950 [(match_operand 2 "int248_register_operand" "Q,Q")
11951 (const_int 8)
11952 (const_int 8)]) 0)
11953 (match_operand:QI 1 "nonimmediate_operand" "0,!qm")))
11954 (clobber (reg:CC FLAGS_REG))]
11955 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
11956 "@
11957 <logic>{b}\t{%h2, %0|%0, %h2}
11958 #"
11959 "&& reload_completed
11960 && !rtx_equal_p (operands[0], operands[1])"
11961 [(set (strict_low_part (match_dup 0)) (match_dup 1))
11962 (parallel
11963 [(set (strict_low_part (match_dup 0))
11964 (any_logic:QI
11965 (subreg:QI
11966 (match_op_dup 3
11967 [(match_dup 2) (const_int 8) (const_int 8)]) 0)
11968 (match_dup 0)))
11969 (clobber (reg:CC FLAGS_REG))])]
11970 ""
11971 [(set_attr "type" "alu")
11972 (set_attr "mode" "QI")])
11973
11974 (define_insn_and_split "*<code>qi_ext<mode>_2_slp"
11975 [(set (strict_low_part (match_operand:QI 0 "register_operand" "+&Q"))
11976 (any_logic:QI
11977 (subreg:QI
11978 (match_operator:SWI248 3 "extract_operator"
11979 [(match_operand 1 "int248_register_operand" "Q")
11980 (const_int 8)
11981 (const_int 8)]) 0)
11982 (subreg:QI
11983 (match_operator:SWI248 4 "extract_operator"
11984 [(match_operand 2 "int248_register_operand" "Q")
11985 (const_int 8)
11986 (const_int 8)]) 0)))
11987 (clobber (reg:CC FLAGS_REG))]
11988 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
11989 "#"
11990 "&& reload_completed"
11991 [(set (strict_low_part (match_dup 0))
11992 (subreg:QI
11993 (match_op_dup 4
11994 [(match_dup 2) (const_int 8) (const_int 8)]) 0))
11995 (parallel
11996 [(set (strict_low_part (match_dup 0))
11997 (any_logic:QI
11998 (subreg:QI
11999 (match_op_dup 3
12000 [(match_dup 1) (const_int 8) (const_int 8)]) 0)
12001 (match_dup 0)))
12002 (clobber (reg:CC FLAGS_REG))])]
12003 ""
12004 [(set_attr "type" "alu")
12005 (set_attr "mode" "QI")])
12006
12007 (define_split
12008 [(set (match_operand:SWI248 0 "register_operand")
12009 (and:SWI248 (match_operand:SWI248 1 "nonimmediate_operand")
12010 (match_operand:SWI248 2 "const_int_operand")))
12011 (clobber (reg:CC FLAGS_REG))]
12012 "reload_completed
12013 && (!REG_P (operands[1])
12014 || REGNO (operands[0]) != REGNO (operands[1]))
12015 && (UINTVAL (operands[2]) == GET_MODE_MASK (SImode)
12016 || UINTVAL (operands[2]) == GET_MODE_MASK (HImode)
12017 || UINTVAL (operands[2]) == GET_MODE_MASK (QImode))"
12018 [(const_int 0)]
12019 {
12020 unsigned HOST_WIDE_INT ival = UINTVAL (operands[2]);
12021 machine_mode mode;
12022
12023 if (ival == GET_MODE_MASK (SImode))
12024 mode = SImode;
12025 else if (ival == GET_MODE_MASK (HImode))
12026 mode = HImode;
12027 else if (ival == GET_MODE_MASK (QImode))
12028 mode = QImode;
12029 else
12030 gcc_unreachable ();
12031
12032 /* Zero extend to SImode to avoid partial register stalls. */
12033 if (<MODE_SIZE> < GET_MODE_SIZE (SImode))
12034 operands[0] = gen_lowpart (SImode, operands[0]);
12035
12036 emit_insn (gen_extend_insn
12037 (operands[0], gen_lowpart (mode, operands[1]),
12038 GET_MODE (operands[0]), mode, 1));
12039 DONE;
12040 })
12041
12042 (define_split
12043 [(set (match_operand:SWI48 0 "register_operand")
12044 (and:SWI48 (match_dup 0)
12045 (const_int -65536)))
12046 (clobber (reg:CC FLAGS_REG))]
12047 "(TARGET_FAST_PREFIX && !TARGET_PARTIAL_REG_STALL)
12048 || optimize_function_for_size_p (cfun)"
12049 [(set (strict_low_part (match_dup 1)) (const_int 0))]
12050 "operands[1] = gen_lowpart (HImode, operands[0]);")
12051
12052 (define_split
12053 [(set (match_operand:SWI248 0 "any_QIreg_operand")
12054 (and:SWI248 (match_dup 0)
12055 (const_int -256)))
12056 (clobber (reg:CC FLAGS_REG))]
12057 "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
12058 && reload_completed"
12059 [(set (strict_low_part (match_dup 1)) (const_int 0))]
12060 "operands[1] = gen_lowpart (QImode, operands[0]);")
12061
12062 (define_split
12063 [(set (match_operand:SWI248 0 "QIreg_operand")
12064 (and:SWI248 (match_dup 0)
12065 (const_int -65281)))
12066 (clobber (reg:CC FLAGS_REG))]
12067 "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
12068 && reload_completed"
12069 [(parallel
12070 [(set (zero_extract:HI (match_dup 0)
12071 (const_int 8)
12072 (const_int 8))
12073 (subreg:HI
12074 (xor:QI
12075 (subreg:QI
12076 (zero_extract:HI (match_dup 0)
12077 (const_int 8)
12078 (const_int 8)) 0)
12079 (subreg:QI
12080 (zero_extract:HI (match_dup 0)
12081 (const_int 8)
12082 (const_int 8)) 0)) 0))
12083 (clobber (reg:CC FLAGS_REG))])]
12084 "operands[0] = gen_lowpart (HImode, operands[0]);")
12085
12086 (define_insn "*anddi_2"
12087 [(set (reg FLAGS_REG)
12088 (compare
12089 (and:DI
12090 (match_operand:DI 1 "nonimmediate_operand" "%0,0,0,r,rm,r")
12091 (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,m,Z,re,m"))
12092 (const_int 0)))
12093 (set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r,r,r")
12094 (and:DI (match_dup 1) (match_dup 2)))]
12095 "TARGET_64BIT
12096 && ix86_match_ccmode
12097 (insn,
12098 /* If we are going to emit andl instead of andq, and the operands[2]
12099 constant might have the SImode sign bit set, make sure the sign
12100 flag isn't tested, because the instruction will set the sign flag
12101 based on bit 31 rather than bit 63. If it isn't CONST_INT,
12102 conservatively assume it might have bit 31 set. */
12103 (satisfies_constraint_Z (operands[2])
12104 && (!CONST_INT_P (operands[2])
12105 || val_signbit_known_set_p (SImode, INTVAL (operands[2]))))
12106 ? CCZmode : CCNOmode)
12107 && ix86_binary_operator_ok (AND, DImode, operands, TARGET_APX_NDD)"
12108 "@
12109 and{l}\t{%k2, %k0|%k0, %k2}
12110 and{q}\t{%2, %0|%0, %2}
12111 and{q}\t{%2, %0|%0, %2}
12112 and{l}\t{%k2, %k1, %k0|%k0, %k1, %k2}
12113 and{q}\t{%2, %1, %0|%0, %1, %2}
12114 and{q}\t{%2, %1, %0|%0, %1, %2}"
12115 [(set_attr "type" "alu")
12116 (set_attr "isa" "*,*,*,apx_ndd,apx_ndd,apx_ndd")
12117 (set_attr "mode" "SI,DI,DI,SI,DI,DI")])
12118
12119 ;; See comment for addsi_1_zext why we do use nonimmediate_operand
12120 (define_insn "*andsi_2_zext"
12121 [(set (reg FLAGS_REG)
12122 (compare (and:SI
12123 (match_operand:SI 1 "nonimmediate_operand" "%0,rm,r")
12124 (match_operand:SI 2 "x86_64_general_operand" "rBMe,re,BM"))
12125 (const_int 0)))
12126 (set (match_operand:DI 0 "register_operand" "=r,r,r")
12127 (zero_extend:DI (and:SI (match_dup 1) (match_dup 2))))]
12128 "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
12129 && ix86_binary_operator_ok (AND, SImode, operands, TARGET_APX_NDD)"
12130 "@
12131 and{l}\t{%2, %k0|%k0, %2}
12132 and{l}\t{%2, %1, %k0|%k0, %1, %2}
12133 and{l}\t{%2, %1, %k0|%k0, %1, %2}"
12134 [(set_attr "type" "alu")
12135 (set_attr "isa" "*,apx_ndd,apx_ndd")
12136 (set_attr "mode" "SI")])
12137
12138 (define_insn "*andqi_2_maybe_si"
12139 [(set (reg FLAGS_REG)
12140 (compare (and:QI
12141 (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,rm,r")
12142 (match_operand:QI 2 "general_operand" "qn,m,n,rn,m"))
12143 (const_int 0)))
12144 (set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,r,r")
12145 (and:QI (match_dup 1) (match_dup 2)))]
12146 "ix86_binary_operator_ok (AND, QImode, operands, TARGET_APX_NDD)
12147 && ix86_match_ccmode (insn,
12148 CONST_INT_P (operands[2])
12149 && INTVAL (operands[2]) >= 0 ? CCNOmode : CCZmode)"
12150 {
12151 if (get_attr_mode (insn) == MODE_SI)
12152 {
12153 if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) < 0)
12154 operands[2] = GEN_INT (INTVAL (operands[2]) & 0xff);
12155 return "and{l}\t{%2, %k0|%k0, %2}";
12156 }
12157 if (which_alternative > 2)
12158 return "and{b}\t{%2, %1, %0|%0, %1, %2}";
12159 return "and{b}\t{%2, %0|%0, %2}";
12160 }
12161 [(set_attr "type" "alu")
12162 (set_attr "isa" "*,*,*,apx_ndd,apx_ndd")
12163 (set (attr "mode")
12164 (cond [(eq_attr "alternative" "3,4")
12165 (const_string "QI")
12166 (eq_attr "alternative" "2")
12167 (const_string "SI")
12168 (and (match_test "optimize_insn_for_size_p ()")
12169 (and (match_operand 0 "ext_QIreg_operand")
12170 (match_operand 2 "const_0_to_127_operand")))
12171 (const_string "SI")
12172 ]
12173 (const_string "QI")))
12174 ;; Potential partial reg stall on alternative 2.
12175 (set (attr "preferred_for_speed")
12176 (cond [(eq_attr "alternative" "2")
12177 (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
12178 (symbol_ref "true")))])
12179
12180 (define_insn "*and<mode>_2"
12181 [(set (reg FLAGS_REG)
12182 (compare (and:SWI124
12183 (match_operand:SWI124 1 "nonimmediate_operand" "%0,0,rm,r")
12184 (match_operand:SWI124 2 "<general_operand>" "<r><i>,<m>,r<i>,<m>"))
12185 (const_int 0)))
12186 (set (match_operand:SWI124 0 "nonimmediate_operand" "=<r>m,<r>,r,r")
12187 (and:SWI124 (match_dup 1) (match_dup 2)))]
12188 "ix86_match_ccmode (insn, CCNOmode)
12189 && ix86_binary_operator_ok (AND, <MODE>mode, operands,
12190 TARGET_APX_NDD)"
12191 "@
12192 and{<imodesuffix>}\t{%2, %0|%0, %2}
12193 and{<imodesuffix>}\t{%2, %0|%0, %2}
12194 and{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
12195 and{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
12196 [(set_attr "type" "alu")
12197 (set_attr "isa" "*,*,apx_ndd,apx_ndd")
12198 (set_attr "mode" "<MODE>")])
12199
12200 (define_insn "*<code>qi_ext<mode>_0"
12201 [(set (match_operand:QI 0 "nonimmediate_operand" "=QBn")
12202 (any_logic:QI
12203 (subreg:QI
12204 (match_operator:SWI248 3 "extract_operator"
12205 [(match_operand 2 "int248_register_operand" "Q")
12206 (const_int 8)
12207 (const_int 8)]) 0)
12208 (match_operand:QI 1 "nonimmediate_operand" "0")))
12209 (clobber (reg:CC FLAGS_REG))]
12210 ""
12211 "<logic>{b}\t{%h2, %0|%0, %h2}"
12212 [(set_attr "addr" "gpr8")
12213 (set_attr "type" "alu")
12214 (set_attr "mode" "QI")])
12215
12216 (define_insn_and_split "*<code>qi_ext2<mode>_0"
12217 [(set (match_operand:QI 0 "register_operand" "=&Q")
12218 (any_logic:QI
12219 (subreg:QI
12220 (match_operator:SWI248 3 "extract_operator"
12221 [(match_operand 1 "int248_register_operand" "Q")
12222 (const_int 8)
12223 (const_int 8)]) 0)
12224 (subreg:QI
12225 (match_operator:SWI248 4 "extract_operator"
12226 [(match_operand 2 "int248_register_operand" "Q")
12227 (const_int 8)
12228 (const_int 8)]) 0)))
12229 (clobber (reg:CC FLAGS_REG))]
12230 ""
12231 "#"
12232 "&& reload_completed"
12233 [(set (match_dup 0)
12234 (subreg:QI
12235 (match_op_dup 4
12236 [(match_dup 2) (const_int 8) (const_int 8)]) 0))
12237 (parallel
12238 [(set (match_dup 0)
12239 (any_logic:QI
12240 (subreg:QI
12241 (match_op_dup 3
12242 [(match_dup 1) (const_int 8) (const_int 8)]) 0)
12243 (match_dup 0)))
12244 (clobber (reg:CC FLAGS_REG))])]
12245 ""
12246 [(set_attr "type" "alu")
12247 (set_attr "mode" "QI")])
12248
12249 (define_expand "andqi_ext_1"
12250 [(parallel
12251 [(set (zero_extract:HI (match_operand:HI 0 "register_operand")
12252 (const_int 8)
12253 (const_int 8))
12254 (subreg:HI
12255 (and:QI
12256 (subreg:QI
12257 (zero_extract:HI (match_operand:HI 1 "register_operand")
12258 (const_int 8)
12259 (const_int 8)) 0)
12260 (match_operand:QI 2 "const_int_operand")) 0))
12261 (clobber (reg:CC FLAGS_REG))])])
12262
12263 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
12264 (define_insn_and_split "*<code>qi_ext<mode>_1"
12265 [(set (zero_extract:SWI248
12266 (match_operand 0 "int248_register_operand" "+Q,&Q")
12267 (const_int 8)
12268 (const_int 8))
12269 (subreg:SWI248
12270 (any_logic:QI
12271 (subreg:QI
12272 (match_operator:SWI248 3 "extract_operator"
12273 [(match_operand 1 "int248_register_operand" "0,!Q")
12274 (const_int 8)
12275 (const_int 8)]) 0)
12276 (match_operand:QI 2 "general_operand" "QnBn,QnBn")) 0))
12277 (clobber (reg:CC FLAGS_REG))]
12278 ""
12279 "@
12280 <logic>{b}\t{%2, %h0|%h0, %2}
12281 #"
12282 "reload_completed
12283 && !(rtx_equal_p (operands[0], operands[1]))"
12284 [(set (zero_extract:SWI248
12285 (match_dup 0) (const_int 8) (const_int 8))
12286 (zero_extract:SWI248
12287 (match_dup 1) (const_int 8) (const_int 8)))
12288 (parallel
12289 [(set (zero_extract:SWI248
12290 (match_dup 0) (const_int 8) (const_int 8))
12291 (subreg:SWI248
12292 (any_logic:QI
12293 (subreg:QI
12294 (match_op_dup 3
12295 [(match_dup 0) (const_int 8) (const_int 8)]) 0)
12296 (match_dup 2)) 0))
12297 (clobber (reg:CC FLAGS_REG))])]
12298 ""
12299 [(set_attr "addr" "gpr8")
12300 (set_attr "type" "alu")
12301 (set_attr "mode" "QI")])
12302
12303 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
12304 (define_insn_and_split "*<code>qi_ext<mode>_1_cc"
12305 [(set (match_operand 4 "flags_reg_operand")
12306 (match_operator 5 "compare_operator"
12307 [(any_logic:QI
12308 (subreg:QI
12309 (match_operator:SWI248 3 "extract_operator"
12310 [(match_operand 1 "int248_register_operand" "0,!Q")
12311 (const_int 8)
12312 (const_int 8)]) 0)
12313 (match_operand:QI 2 "general_operand" "QnBn,QnBn"))
12314 (const_int 0)]))
12315 (set (zero_extract:SWI248
12316 (match_operand 0 "int248_register_operand" "+Q,&Q")
12317 (const_int 8)
12318 (const_int 8))
12319 (subreg:SWI248
12320 (any_logic:QI
12321 (subreg:QI
12322 (match_op_dup 3
12323 [(match_dup 0) (const_int 8) (const_int 8)]) 0)
12324 (match_dup 2)) 0))]
12325 "ix86_match_ccmode (insn, CCNOmode)"
12326 "@
12327 <logic>{b}\t{%2, %h0|%h0, %2}
12328 #"
12329 "&& reload_completed
12330 && !(rtx_equal_p (operands[0], operands[1]))"
12331 [(set (zero_extract:SWI248
12332 (match_dup 0) (const_int 8) (const_int 8))
12333 (zero_extract:SWI248
12334 (match_dup 1) (const_int 8) (const_int 8)))
12335 (parallel
12336 [(set (match_dup 4)
12337 (match_op_dup 5
12338 [(any_logic:QI
12339 (subreg:QI
12340 (match_op_dup 3
12341 [(match_dup 0) (const_int 8) (const_int 8)]) 0)
12342 (match_dup 2))
12343 (const_int 0)]))
12344 (set (zero_extract:SWI248
12345 (match_dup 0) (const_int 8) (const_int 8))
12346 (subreg:SWI248
12347 (any_logic:QI
12348 (subreg:QI
12349 (match_op_dup 3
12350 [(match_dup 1) (const_int 8) (const_int 8)]) 0)
12351 (match_dup 2)) 0))])]
12352 ""
12353 [(set_attr "addr" "gpr8")
12354 (set_attr "type" "alu")
12355 (set_attr "mode" "QI")])
12356
12357 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
12358 (define_insn_and_split "*<code>qi_ext<mode>_2"
12359 [(set (zero_extract:SWI248
12360 (match_operand 0 "int248_register_operand" "+Q,&Q")
12361 (const_int 8)
12362 (const_int 8))
12363 (subreg:SWI248
12364 (any_logic:QI
12365 (subreg:QI
12366 (match_operator:SWI248 3 "extract_operator"
12367 [(match_operand 1 "int248_register_operand" "%0,!Q")
12368 (const_int 8)
12369 (const_int 8)]) 0)
12370 (subreg:QI
12371 (match_operator:SWI248 4 "extract_operator"
12372 [(match_operand 2 "int248_register_operand" "Q,Q")
12373 (const_int 8)
12374 (const_int 8)]) 0)) 0))
12375 (clobber (reg:CC FLAGS_REG))]
12376 ""
12377 "@
12378 <logic>{b}\t{%h2, %h0|%h0, %h2}
12379 #"
12380 "reload_completed
12381 && !(rtx_equal_p (operands[0], operands[1])
12382 || rtx_equal_p (operands[0], operands[2]))"
12383 [(set (zero_extract:SWI248
12384 (match_dup 0) (const_int 8) (const_int 8))
12385 (zero_extract:SWI248
12386 (match_dup 1) (const_int 8) (const_int 8)))
12387 (parallel
12388 [(set (zero_extract:SWI248
12389 (match_dup 0) (const_int 8) (const_int 8))
12390 (subreg:SWI248
12391 (any_logic:QI
12392 (subreg:QI
12393 (match_op_dup 3
12394 [(match_dup 0) (const_int 8) (const_int 8)]) 0)
12395 (subreg:QI
12396 (match_op_dup 4
12397 [(match_dup 2) (const_int 8) (const_int 8)]) 0)) 0))
12398 (clobber (reg:CC FLAGS_REG))])]
12399 ""
12400 [(set_attr "type" "alu")
12401 (set_attr "mode" "QI")])
12402
12403 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
12404 (define_insn_and_split "*<code>qi_ext<mode>_3"
12405 [(set (zero_extract:SWI248
12406 (match_operand 0 "int248_register_operand" "+Q,&Q")
12407 (const_int 8)
12408 (const_int 8))
12409 (match_operator:SWI248 3 "extract_operator"
12410 [(any_logic
12411 (match_operand 1 "int248_register_operand" "%0,!Q")
12412 (match_operand 2 "int248_register_operand" "Q,Q"))
12413 (const_int 8)
12414 (const_int 8)]))
12415 (clobber (reg:CC FLAGS_REG))]
12416 "GET_MODE (operands[1]) == GET_MODE (operands[2])"
12417 "@
12418 <logic>{b}\t{%h2, %h0|%h0, %h2}
12419 #"
12420 "&& reload_completed
12421 && !(rtx_equal_p (operands[0], operands[1])
12422 || rtx_equal_p (operands[0], operands[2]))"
12423 [(set (zero_extract:SWI248
12424 (match_dup 0) (const_int 8) (const_int 8))
12425 (zero_extract:SWI248
12426 (match_dup 1) (const_int 8) (const_int 8)))
12427 (parallel
12428 [(set (zero_extract:SWI248
12429 (match_dup 0) (const_int 8) (const_int 8))
12430 (match_op_dup 3
12431 [(any_logic (match_dup 4) (match_dup 2))
12432 (const_int 8) (const_int 8)]))
12433 (clobber (reg:CC FLAGS_REG))])]
12434 "operands[4] = gen_lowpart (GET_MODE (operands[1]), operands[0]);"
12435 [(set_attr "type" "alu")
12436 (set_attr "mode" "QI")])
12437
12438 ;; Convert wide AND instructions with immediate operand to shorter QImode
12439 ;; equivalents when possible.
12440 ;; Don't do the splitting with memory operands, since it introduces risk
12441 ;; of memory mismatch stalls. We may want to do the splitting for optimizing
12442 ;; for size, but that can (should?) be handled by generic code instead.
12443 ;; Don't do the splitting for APX NDD as NDD does not support *h registers.
12444 (define_split
12445 [(set (match_operand:SWI248 0 "QIreg_operand")
12446 (and:SWI248 (match_operand:SWI248 1 "register_operand")
12447 (match_operand:SWI248 2 "const_int_operand")))
12448 (clobber (reg:CC FLAGS_REG))]
12449 "reload_completed
12450 && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
12451 && !(~INTVAL (operands[2]) & ~(255 << 8))
12452 && !(TARGET_APX_NDD && REGNO (operands[0]) != REGNO (operands[1]))"
12453 [(parallel
12454 [(set (zero_extract:HI (match_dup 0)
12455 (const_int 8)
12456 (const_int 8))
12457 (subreg:HI
12458 (and:QI
12459 (subreg:QI
12460 (zero_extract:HI (match_dup 1)
12461 (const_int 8)
12462 (const_int 8)) 0)
12463 (match_dup 2)) 0))
12464 (clobber (reg:CC FLAGS_REG))])]
12465 {
12466 operands[0] = gen_lowpart (HImode, operands[0]);
12467 operands[1] = gen_lowpart (HImode, operands[1]);
12468 operands[2] = gen_int_mode (INTVAL (operands[2]) >> 8, QImode);
12469 })
12470
12471 ;; Since AND can be encoded with sign extended immediate, this is only
12472 ;; profitable when 7th bit is not set.
12473 (define_split
12474 [(set (match_operand:SWI248 0 "any_QIreg_operand")
12475 (and:SWI248 (match_operand:SWI248 1 "general_operand")
12476 (match_operand:SWI248 2 "const_int_operand")))
12477 (clobber (reg:CC FLAGS_REG))]
12478 "reload_completed
12479 && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
12480 && !(~INTVAL (operands[2]) & ~255)
12481 && !(INTVAL (operands[2]) & 128)
12482 && !(TARGET_APX_NDD
12483 && !rtx_equal_p (operands[0], operands[1]))"
12484 [(parallel [(set (strict_low_part (match_dup 0))
12485 (and:QI (match_dup 1)
12486 (match_dup 2)))
12487 (clobber (reg:CC FLAGS_REG))])]
12488 {
12489 operands[0] = gen_lowpart (QImode, operands[0]);
12490 operands[1] = gen_lowpart (QImode, operands[1]);
12491 operands[2] = gen_int_mode (INTVAL (operands[2]), QImode);
12492 })
12493
12494 (define_insn_and_split "*andn<dwi>3_doubleword_bmi"
12495 [(set (match_operand:<DWI> 0 "register_operand" "=&r,r,r")
12496 (and:<DWI>
12497 (not:<DWI> (match_operand:<DWI> 1 "register_operand" "r,0,r"))
12498 (match_operand:<DWI> 2 "nonimmediate_operand" "ro,ro,0")))
12499 (clobber (reg:CC FLAGS_REG))]
12500 "TARGET_BMI"
12501 "#"
12502 "&& reload_completed"
12503 [(parallel [(set (match_dup 0)
12504 (and:DWIH (not:DWIH (match_dup 1)) (match_dup 2)))
12505 (clobber (reg:CC FLAGS_REG))])
12506 (parallel [(set (match_dup 3)
12507 (and:DWIH (not:DWIH (match_dup 4)) (match_dup 5)))
12508 (clobber (reg:CC FLAGS_REG))])]
12509 "split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);")
12510
12511 (define_insn_and_split "*andn<mode>3_doubleword"
12512 [(set (match_operand:DWI 0 "register_operand")
12513 (and:DWI
12514 (not:DWI (match_operand:DWI 1 "register_operand"))
12515 (match_operand:DWI 2 "nonimmediate_operand")))
12516 (clobber (reg:CC FLAGS_REG))]
12517 "!TARGET_BMI
12518 && ix86_pre_reload_split ()"
12519 "#"
12520 "&& 1"
12521 [(set (match_dup 3) (not:DWI (match_dup 1)))
12522 (parallel [(set (match_dup 0)
12523 (and:DWI (match_dup 3) (match_dup 2)))
12524 (clobber (reg:CC FLAGS_REG))])]
12525 "operands[3] = gen_reg_rtx (<MODE>mode);")
12526
12527 (define_insn "*andn<mode>_1"
12528 [(set (match_operand:SWI48 0 "register_operand" "=r,r,?k")
12529 (and:SWI48
12530 (not:SWI48 (match_operand:SWI48 1 "register_operand" "r,r,k"))
12531 (match_operand:SWI48 2 "nonimmediate_operand" "r,m,k")))
12532 (clobber (reg:CC FLAGS_REG))]
12533 "TARGET_BMI || TARGET_AVX512BW"
12534 "@
12535 andn\t{%2, %1, %0|%0, %1, %2}
12536 andn\t{%2, %1, %0|%0, %1, %2}
12537 #"
12538 [(set_attr "isa" "bmi,bmi,avx512bw")
12539 (set_attr "type" "bitmanip,bitmanip,msklog")
12540 (set_attr "btver2_decode" "direct, double,*")
12541 (set_attr "mode" "<MODE>")])
12542
12543 (define_insn "*andn<mode>_1"
12544 [(set (match_operand:SWI12 0 "register_operand" "=r,?k")
12545 (and:SWI12
12546 (not:SWI12 (match_operand:SWI12 1 "register_operand" "r,k"))
12547 (match_operand:SWI12 2 "register_operand" "r,k")))
12548 (clobber (reg:CC FLAGS_REG))]
12549 "TARGET_BMI || TARGET_AVX512BW"
12550 "@
12551 andn\t{%k2, %k1, %k0|%k0, %k1, %k2}
12552 #"
12553 [(set_attr "isa" "bmi,avx512f")
12554 (set_attr "type" "bitmanip,msklog")
12555 (set_attr "btver2_decode" "direct,*")
12556 (set (attr "mode")
12557 (cond [(eq_attr "alternative" "0")
12558 (const_string "SI")
12559 (and (eq_attr "alternative" "1")
12560 (match_test "!TARGET_AVX512DQ"))
12561 (const_string "HI")
12562 ]
12563 (const_string "<MODE>")))])
12564
12565 (define_insn "*andn_<mode>_ccno"
12566 [(set (reg FLAGS_REG)
12567 (compare
12568 (and:SWI48
12569 (not:SWI48 (match_operand:SWI48 1 "register_operand" "r,r"))
12570 (match_operand:SWI48 2 "nonimmediate_operand" "r,m"))
12571 (const_int 0)))
12572 (clobber (match_scratch:SWI48 0 "=r,r"))]
12573 "TARGET_BMI && ix86_match_ccmode (insn, CCNOmode)"
12574 "andn\t{%2, %1, %0|%0, %1, %2}"
12575 [(set_attr "type" "bitmanip")
12576 (set_attr "btver2_decode" "direct, double")
12577 (set_attr "mode" "<MODE>")])
12578
12579 ;; Split *andnsi_1 after reload with -Oz when not;and is shorter.
12580 (define_split
12581 [(set (match_operand:SI 0 "register_operand")
12582 (and:SI (not:SI (match_operand:SI 1 "register_operand"))
12583 (match_operand:SI 2 "nonimmediate_operand")))
12584 (clobber (reg:CC FLAGS_REG))]
12585 "reload_completed
12586 && optimize_insn_for_size_p () && optimize_size > 1
12587 && REGNO (operands[0]) == REGNO (operands[1])
12588 && LEGACY_INT_REG_P (operands[0])
12589 && !REX_INT_REG_P (operands[2])
12590 && !reg_overlap_mentioned_p (operands[0], operands[2])"
12591 [(set (match_dup 0) (not:SI (match_dup 1)))
12592 (parallel [(set (match_dup 0) (and:SI (match_dup 0) (match_dup 2)))
12593 (clobber (reg:CC FLAGS_REG))])])
12594
12595 ;; Split *andn_si_ccno with -Oz when not;test is shorter.
12596 (define_split
12597 [(set (match_operand 0 "flags_reg_operand")
12598 (match_operator 1 "compare_operator"
12599 [(and:SI (not:SI (match_operand:SI 2 "general_reg_operand"))
12600 (match_operand:SI 3 "nonimmediate_operand"))
12601 (const_int 0)]))
12602 (clobber (match_dup 2))]
12603 "reload_completed
12604 && optimize_insn_for_size_p () && optimize_size > 1
12605 && LEGACY_INT_REG_P (operands[2])
12606 && !REX_INT_REG_P (operands[3])
12607 && !reg_overlap_mentioned_p (operands[2], operands[3])"
12608 [(set (match_dup 2) (not:SI (match_dup 2)))
12609 (set (match_dup 0) (match_op_dup 1
12610 [(and:SI (match_dup 3) (match_dup 2))
12611 (const_int 0)]))])
12612
12613 ;; Variant 1 of 4: Split ((A | B) ^ A) ^ C as (B & ~A) ^ C.
12614 (define_split
12615 [(set (match_operand:SWI48 0 "register_operand")
12616 (xor:SWI48
12617 (xor:SWI48
12618 (ior:SWI48 (match_operand:SWI48 1 "register_operand")
12619 (match_operand:SWI48 2 "nonimmediate_operand"))
12620 (match_dup 1))
12621 (match_operand:SWI48 3 "nonimmediate_operand")))
12622 (clobber (reg:CC FLAGS_REG))]
12623 "TARGET_BMI"
12624 [(parallel
12625 [(set (match_dup 4) (and:SWI48 (not:SWI48 (match_dup 1)) (match_dup 2)))
12626 (clobber (reg:CC FLAGS_REG))])
12627 (parallel
12628 [(set (match_dup 0) (xor:SWI48 (match_dup 4) (match_dup 3)))
12629 (clobber (reg:CC FLAGS_REG))])]
12630 "operands[4] = gen_reg_rtx (<MODE>mode);")
12631
12632 ;; Variant 2 of 4: Split ((A | B) ^ B) ^ C as (A & ~B) ^ C.
12633 (define_split
12634 [(set (match_operand:SWI48 0 "register_operand")
12635 (xor:SWI48
12636 (xor:SWI48
12637 (ior:SWI48 (match_operand:SWI48 1 "register_operand")
12638 (match_operand:SWI48 2 "register_operand"))
12639 (match_dup 2))
12640 (match_operand:SWI48 3 "nonimmediate_operand")))
12641 (clobber (reg:CC FLAGS_REG))]
12642 "TARGET_BMI"
12643 [(parallel
12644 [(set (match_dup 4) (and:SWI48 (not:SWI48 (match_dup 2)) (match_dup 1)))
12645 (clobber (reg:CC FLAGS_REG))])
12646 (parallel
12647 [(set (match_dup 0) (xor:SWI48 (match_dup 4) (match_dup 3)))
12648 (clobber (reg:CC FLAGS_REG))])]
12649 "operands[4] = gen_reg_rtx (<MODE>mode);")
12650
12651 ;; Variant 3 of 4: Split ((A | B) ^ C) ^ A as (B & ~A) ^ C.
12652 (define_split
12653 [(set (match_operand:SWI48 0 "register_operand")
12654 (xor:SWI48
12655 (xor:SWI48
12656 (ior:SWI48 (match_operand:SWI48 1 "register_operand")
12657 (match_operand:SWI48 2 "nonimmediate_operand"))
12658 (match_operand:SWI48 3 "nonimmediate_operand"))
12659 (match_dup 1)))
12660 (clobber (reg:CC FLAGS_REG))]
12661 "TARGET_BMI"
12662 [(parallel
12663 [(set (match_dup 4) (and:SWI48 (not:SWI48 (match_dup 1)) (match_dup 2)))
12664 (clobber (reg:CC FLAGS_REG))])
12665 (parallel
12666 [(set (match_dup 0) (xor:SWI48 (match_dup 4) (match_dup 3)))
12667 (clobber (reg:CC FLAGS_REG))])]
12668 "operands[4] = gen_reg_rtx (<MODE>mode);")
12669
12670 ;; Variant 4 of 4: Split ((A | B) ^ C) ^ B as (A & ~B) ^ C.
12671 (define_split
12672 [(set (match_operand:SWI48 0 "register_operand")
12673 (xor:SWI48
12674 (xor:SWI48
12675 (ior:SWI48 (match_operand:SWI48 1 "register_operand")
12676 (match_operand:SWI48 2 "register_operand"))
12677 (match_operand:SWI48 3 "nonimmediate_operand"))
12678 (match_dup 2)))
12679 (clobber (reg:CC FLAGS_REG))]
12680 "TARGET_BMI"
12681 [(parallel
12682 [(set (match_dup 4) (and:SWI48 (not:SWI48 (match_dup 2)) (match_dup 1)))
12683 (clobber (reg:CC FLAGS_REG))])
12684 (parallel
12685 [(set (match_dup 0) (xor:SWI48 (match_dup 4) (match_dup 3)))
12686 (clobber (reg:CC FLAGS_REG))])]
12687 "operands[4] = gen_reg_rtx (<MODE>mode);")
12688 \f
12689 ;; Logical inclusive and exclusive OR instructions
12690
12691 ;; %%% This used to optimize known byte-wide and operations to memory.
12692 ;; If this is considered useful, it should be done with splitters.
12693
12694 (define_expand "<code><mode>3"
12695 [(set (match_operand:SDWIM 0 "nonimmediate_operand")
12696 (any_or:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand")
12697 (match_operand:SDWIM 2 "<general_operand>")))]
12698 ""
12699 {
12700 if (GET_MODE_SIZE (<MODE>mode) > UNITS_PER_WORD
12701 && !x86_64_hilo_general_operand (operands[2], <MODE>mode))
12702 operands[2] = force_reg (<MODE>mode, operands[2]);
12703
12704 ix86_expand_binary_operator (<CODE>, <MODE>mode, operands,
12705 TARGET_APX_NDD);
12706 DONE;
12707 })
12708
12709 (define_insn_and_split "*<code><dwi>3_doubleword"
12710 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r,&r,&r")
12711 (any_or:<DWI>
12712 (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0,ro,r")
12713 (match_operand:<DWI> 2 "x86_64_hilo_general_operand" "r<di>,o,r<di>,o")))
12714 (clobber (reg:CC FLAGS_REG))]
12715 "ix86_binary_operator_ok (<CODE>, <DWI>mode, operands,
12716 TARGET_APX_NDD)"
12717 "#"
12718 "&& reload_completed"
12719 [(const_int:DWIH 0)]
12720 {
12721 /* This insn may disappear completely when operands[2] == const0_rtx
12722 and operands[0] == operands[1], which requires a NOTE_INSN_DELETED. */
12723 bool emit_insn_deleted_note_p = false;
12724
12725 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
12726
12727 if (operands[2] == const0_rtx)
12728 {
12729 if (!rtx_equal_p (operands[0], operands[1]))
12730 emit_move_insn (operands[0], operands[1]);
12731 else
12732 emit_insn_deleted_note_p = true;
12733 }
12734 else if (operands[2] == constm1_rtx)
12735 {
12736 if (<CODE> == IOR)
12737 emit_move_insn (operands[0], constm1_rtx);
12738 else
12739 ix86_expand_unary_operator (NOT, <MODE>mode, &operands[0],
12740 TARGET_APX_NDD);
12741 }
12742 else
12743 ix86_expand_binary_operator (<CODE>, <MODE>mode, &operands[0],
12744 TARGET_APX_NDD);
12745
12746 if (operands[5] == const0_rtx)
12747 {
12748 if (!rtx_equal_p (operands[3], operands[4]))
12749 emit_move_insn (operands[3], operands[4]);
12750 else if (emit_insn_deleted_note_p)
12751 emit_note (NOTE_INSN_DELETED);
12752 }
12753 else if (operands[5] == constm1_rtx)
12754 {
12755 if (<CODE> == IOR)
12756 emit_move_insn (operands[3], constm1_rtx);
12757 else
12758 ix86_expand_unary_operator (NOT, <MODE>mode, &operands[3],
12759 TARGET_APX_NDD);
12760 }
12761 else
12762 ix86_expand_binary_operator (<CODE>, <MODE>mode, &operands[3],
12763 TARGET_APX_NDD);
12764
12765 DONE;
12766 }
12767 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")])
12768
12769 (define_insn "*<code><mode>_1"
12770 [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r,r,r,?k")
12771 (any_or:SWI248
12772 (match_operand:SWI248 1 "nonimmediate_operand" "%0,0,rm,r,k")
12773 (match_operand:SWI248 2 "<general_operand>" "r<i>,<m>,r<i>,<m>,k")))
12774 (clobber (reg:CC FLAGS_REG))]
12775 "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands,
12776 TARGET_APX_NDD)"
12777 "@
12778 <logic>{<imodesuffix>}\t{%2, %0|%0, %2}
12779 <logic>{<imodesuffix>}\t{%2, %0|%0, %2}
12780 <logic>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
12781 <logic>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
12782 #"
12783 [(set_attr "isa" "*,*,apx_ndd,apx_ndd,<kmov_isa>")
12784 (set_attr "type" "alu, alu, alu, alu, msklog")
12785 (set_attr "mode" "<MODE>")])
12786
12787 (define_insn_and_split "*notxor<mode>_1"
12788 [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r,r,r,?k")
12789 (not:SWI248
12790 (xor:SWI248
12791 (match_operand:SWI248 1 "nonimmediate_operand" "%0,0,rm,r,k")
12792 (match_operand:SWI248 2 "<general_operand>" "r<i>,<m>,r<i>,<m>,k"))))
12793 (clobber (reg:CC FLAGS_REG))]
12794 "ix86_binary_operator_ok (XOR, <MODE>mode, operands, TARGET_APX_NDD)"
12795 "#"
12796 "&& reload_completed"
12797 [(parallel
12798 [(set (match_dup 0)
12799 (xor:SWI248 (match_dup 1) (match_dup 2)))
12800 (clobber (reg:CC FLAGS_REG))])
12801 (set (match_dup 0)
12802 (not:SWI248 (match_dup 0)))]
12803 {
12804 if (MASK_REG_P (operands[0]))
12805 {
12806 emit_insn (gen_kxnor<mode> (operands[0], operands[1], operands[2]));
12807 DONE;
12808 }
12809 }
12810 [(set_attr "isa" "*,*,apx_ndd,apx_ndd,<kmov_isa>")
12811 (set_attr "type" "alu, alu, alu, alu, msklog")
12812 (set_attr "mode" "<MODE>")])
12813
12814 (define_insn_and_split "*iordi_1_bts"
12815 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
12816 (ior:DI
12817 (match_operand:DI 1 "nonimmediate_operand" "%0")
12818 (match_operand:DI 2 "const_int_operand" "n")))
12819 (clobber (reg:CC FLAGS_REG))]
12820 "TARGET_64BIT && TARGET_USE_BT
12821 && ix86_binary_operator_ok (IOR, DImode, operands)
12822 && IN_RANGE (exact_log2 (INTVAL (operands[2])), 31, 63)"
12823 "#"
12824 "&& reload_completed"
12825 [(parallel [(set (zero_extract:DI (match_dup 0)
12826 (const_int 1)
12827 (match_dup 3))
12828 (const_int 1))
12829 (clobber (reg:CC FLAGS_REG))])]
12830 "operands[3] = GEN_INT (exact_log2 (INTVAL (operands[2])));"
12831 [(set_attr "type" "alu1")
12832 (set_attr "prefix_0f" "1")
12833 (set_attr "znver1_decode" "double")
12834 (set_attr "mode" "DI")])
12835
12836 (define_insn_and_split "*xordi_1_btc"
12837 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
12838 (xor:DI
12839 (match_operand:DI 1 "nonimmediate_operand" "%0")
12840 (match_operand:DI 2 "const_int_operand" "n")))
12841 (clobber (reg:CC FLAGS_REG))]
12842 "TARGET_64BIT && TARGET_USE_BT
12843 && ix86_binary_operator_ok (XOR, DImode, operands)
12844 && IN_RANGE (exact_log2 (INTVAL (operands[2])), 31, 63)"
12845 "#"
12846 "&& reload_completed"
12847 [(parallel [(set (zero_extract:DI (match_dup 0)
12848 (const_int 1)
12849 (match_dup 3))
12850 (not:DI (zero_extract:DI (match_dup 0)
12851 (const_int 1)
12852 (match_dup 3))))
12853 (clobber (reg:CC FLAGS_REG))])]
12854 "operands[3] = GEN_INT (exact_log2 (INTVAL (operands[2])));"
12855 [(set_attr "type" "alu1")
12856 (set_attr "prefix_0f" "1")
12857 (set_attr "znver1_decode" "double")
12858 (set_attr "mode" "DI")])
12859
12860 ;; Optimize a ^ ((a ^ b) & mask) to (~mask & a) | (b & mask)
12861 (define_insn_and_split "*xor2andn"
12862 [(set (match_operand:SWI248 0 "register_operand")
12863 (xor:SWI248
12864 (and:SWI248
12865 (xor:SWI248
12866 (match_operand:SWI248 1 "nonimmediate_operand")
12867 (match_operand:SWI248 2 "nonimmediate_operand"))
12868 (match_operand:SWI248 3 "nonimmediate_operand"))
12869 (match_dup 1)))
12870 (clobber (reg:CC FLAGS_REG))]
12871 "TARGET_BMI && ix86_pre_reload_split ()"
12872 "#"
12873 "&& 1"
12874 [(parallel [(set (match_dup 4)
12875 (and:SWI248
12876 (not:SWI248
12877 (match_dup 3))
12878 (match_dup 1)))
12879 (clobber (reg:CC FLAGS_REG))])
12880 (parallel [(set (match_dup 5)
12881 (and:SWI248
12882 (match_dup 3)
12883 (match_dup 2)))
12884 (clobber (reg:CC FLAGS_REG))])
12885 (parallel [(set (match_dup 0)
12886 (ior:SWI248
12887 (match_dup 4)
12888 (match_dup 5)))
12889 (clobber (reg:CC FLAGS_REG))])]
12890 {
12891 operands[1] = force_reg (<MODE>mode, operands[1]);
12892 operands[3] = force_reg (<MODE>mode, operands[3]);
12893 operands[4] = gen_reg_rtx (<MODE>mode);
12894 operands[5] = gen_reg_rtx (<MODE>mode);
12895 })
12896
12897 ;; See comment for addsi_1_zext why we do use nonimmediate_operand
12898 (define_insn "*<code>si_1_zext"
12899 [(set (match_operand:DI 0 "register_operand" "=r,r,r")
12900 (zero_extend:DI
12901 (any_or:SI (match_operand:SI 1 "nonimmediate_operand" "%0,rm,r")
12902 (match_operand:SI 2 "x86_64_general_operand" "rBMe,re,BM"))))
12903 (clobber (reg:CC FLAGS_REG))]
12904 "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands,
12905 TARGET_APX_NDD)"
12906 "@
12907 <logic>{l}\t{%2, %k0|%k0, %2}
12908 <logic>{l}\t{%2, %1, %k0|%k0, %1, %2}
12909 <logic>{l}\t{%2, %1, %k0|%k0, %1, %2}"
12910 [(set_attr "type" "alu")
12911 (set_attr "isa" "*,apx_ndd,apx_ndd")
12912 (set_attr "mode" "SI")])
12913
12914 (define_insn "*<code>si_1_zext_imm"
12915 [(set (match_operand:DI 0 "register_operand" "=r,r")
12916 (any_or:DI
12917 (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "%0,rm"))
12918 (match_operand:DI 2 "x86_64_zext_immediate_operand" "Z,Z")))
12919 (clobber (reg:CC FLAGS_REG))]
12920 "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands,
12921 TARGET_APX_NDD)"
12922 "@
12923 <logic>{l}\t{%2, %k0|%k0, %2}
12924 <logic>{l}\t{%2, %1, %k0|%k0, %1, %2}"
12925 [(set_attr "type" "alu")
12926 (set_attr "isa" "*,apx_ndd")
12927 (set_attr "mode" "SI")])
12928
12929 (define_insn "*<code>qi_1"
12930 [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,r,r,?k")
12931 (any_or:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,rm,r,k")
12932 (match_operand:QI 2 "general_operand" "qn,m,rn,rn,m,k")))
12933 (clobber (reg:CC FLAGS_REG))]
12934 "ix86_binary_operator_ok (<CODE>, QImode, operands, TARGET_APX_NDD)"
12935 "@
12936 <logic>{b}\t{%2, %0|%0, %2}
12937 <logic>{b}\t{%2, %0|%0, %2}
12938 <logic>{l}\t{%k2, %k0|%k0, %k2}
12939 <logic>{b}\t{%2, %1, %0|%0, %1, %2}
12940 <logic>{b}\t{%2, %1, %0|%0, %1, %2}
12941 #"
12942 [(set_attr "isa" "*,*,*,apx_ndd,apx_ndd,avx512f")
12943 (set_attr "type" "alu,alu,alu,alu,alu,msklog")
12944 (set (attr "mode")
12945 (cond [(eq_attr "alternative" "2")
12946 (const_string "SI")
12947 (and (eq_attr "alternative" "5")
12948 (match_test "!TARGET_AVX512DQ"))
12949 (const_string "HI")
12950 ]
12951 (const_string "QI")))
12952 ;; Potential partial reg stall on alternative 2.
12953 (set (attr "preferred_for_speed")
12954 (cond [(eq_attr "alternative" "2")
12955 (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
12956 (symbol_ref "true")))])
12957
12958 (define_insn_and_split "*notxorqi_1"
12959 [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,r,r,?k")
12960 (not:QI
12961 (xor:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,rm,r,k")
12962 (match_operand:QI 2 "general_operand" "qn,m,rn,rn,m,k"))))
12963 (clobber (reg:CC FLAGS_REG))]
12964 "ix86_binary_operator_ok (XOR, QImode, operands, TARGET_APX_NDD)"
12965 "#"
12966 "&& reload_completed"
12967 [(parallel
12968 [(set (match_dup 0)
12969 (xor:QI (match_dup 1) (match_dup 2)))
12970 (clobber (reg:CC FLAGS_REG))])
12971 (set (match_dup 0)
12972 (not:QI (match_dup 0)))]
12973 {
12974 if (mask_reg_operand (operands[0], QImode))
12975 {
12976 emit_insn (gen_kxnorqi (operands[0], operands[1], operands[2]));
12977 DONE;
12978 }
12979 }
12980 [(set_attr "isa" "*,*,*,apx_ndd,apx_ndd,avx512f")
12981 (set_attr "type" "alu,alu,alu,alu,alu,msklog")
12982 (set (attr "mode")
12983 (cond [(eq_attr "alternative" "2")
12984 (const_string "SI")
12985 (and (eq_attr "alternative" "5")
12986 (match_test "!TARGET_AVX512DQ"))
12987 (const_string "HI")
12988 ]
12989 (const_string "QI")))
12990 ;; Potential partial reg stall on alternative 2.
12991 (set (attr "preferred_for_speed")
12992 (cond [(eq_attr "alternative" "2")
12993 (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
12994 (symbol_ref "true")))])
12995
12996 ;; convert (sign_extend:WIDE (any_logic:NARROW (memory, immediate)))
12997 ;; to (any_logic:WIDE (sign_extend (memory)), (sign_extend (immediate))).
12998 ;; This eliminates sign extension after logic operation.
12999
13000 (define_split
13001 [(set (match_operand:SWI248 0 "register_operand")
13002 (sign_extend:SWI248
13003 (any_logic:QI (match_operand:QI 1 "memory_operand")
13004 (match_operand:QI 2 "const_int_operand"))))]
13005 ""
13006 [(set (match_dup 3) (sign_extend:SWI248 (match_dup 1)))
13007 (set (match_dup 0) (any_logic:SWI248 (match_dup 3) (match_dup 2)))]
13008 "operands[3] = gen_reg_rtx (<MODE>mode);")
13009
13010 (define_split
13011 [(set (match_operand:SWI48 0 "register_operand")
13012 (sign_extend:SWI48
13013 (any_logic:HI (match_operand:HI 1 "memory_operand")
13014 (match_operand:HI 2 "const_int_operand"))))]
13015 ""
13016 [(set (match_dup 3) (sign_extend:SWI48 (match_dup 1)))
13017 (set (match_dup 0) (any_logic:SWI48 (match_dup 3) (match_dup 2)))]
13018 "operands[3] = gen_reg_rtx (<MODE>mode);")
13019
13020 (define_split
13021 [(set (match_operand:DI 0 "register_operand")
13022 (sign_extend:DI
13023 (any_logic:SI (match_operand:SI 1 "memory_operand")
13024 (match_operand:SI 2 "const_int_operand"))))]
13025 "TARGET_64BIT"
13026 [(set (match_dup 3) (sign_extend:DI (match_dup 1)))
13027 (set (match_dup 0) (any_logic:DI (match_dup 3) (match_dup 2)))]
13028 "operands[3] = gen_reg_rtx (DImode);")
13029
13030 (define_insn "*<code><mode>_2"
13031 [(set (reg FLAGS_REG)
13032 (compare (any_or:SWI
13033 (match_operand:SWI 1 "nonimmediate_operand" "%0,0,rm,r")
13034 (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,r<i>,<m>"))
13035 (const_int 0)))
13036 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,r,r")
13037 (any_or:SWI (match_dup 1) (match_dup 2)))]
13038 "ix86_match_ccmode (insn, CCNOmode)
13039 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands,
13040 TARGET_APX_NDD)"
13041 "@
13042 <logic>{<imodesuffix>}\t{%2, %0|%0, %2}
13043 <logic>{<imodesuffix>}\t{%2, %0|%0, %2}
13044 <logic>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
13045 <logic>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
13046 [(set_attr "type" "alu")
13047 (set_attr "isa" "*,*,apx_ndd,apx_ndd")
13048 (set_attr "mode" "<MODE>")])
13049
13050 ;; See comment for addsi_1_zext why we do use nonimmediate_operand
13051 ;; ??? Special case for immediate operand is missing - it is tricky.
13052 (define_insn "*<code>si_2_zext"
13053 [(set (reg FLAGS_REG)
13054 (compare (any_or:SI (match_operand:SI 1 "nonimmediate_operand" "%0,rm,r")
13055 (match_operand:SI 2 "x86_64_general_operand" "rBMe,re,BM"))
13056 (const_int 0)))
13057 (set (match_operand:DI 0 "register_operand" "=r,r,r")
13058 (zero_extend:DI (any_or:SI (match_dup 1) (match_dup 2))))]
13059 "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
13060 && ix86_binary_operator_ok (<CODE>, SImode, operands,
13061 TARGET_APX_NDD)"
13062 "@
13063 <logic>{l}\t{%2, %k0|%k0, %2}
13064 <logic>{l}\t{%2, %1, %k0|%k0, %1, %2}
13065 <logic>{l}\t{%2, %1, %k0|%k0, %1, %2}"
13066 [(set_attr "type" "alu")
13067 (set_attr "isa" "*,apx_ndd,apx_ndd")
13068 (set_attr "mode" "SI")])
13069
13070 (define_insn "*<code>si_2_zext_imm"
13071 [(set (reg FLAGS_REG)
13072 (compare (any_or:SI
13073 (match_operand:SI 1 "nonimmediate_operand" "%0,rm")
13074 (match_operand:SI 2 "x86_64_zext_immediate_operand" "Z,Z"))
13075 (const_int 0)))
13076 (set (match_operand:DI 0 "register_operand" "=r,r")
13077 (any_or:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))]
13078 "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
13079 && ix86_binary_operator_ok (<CODE>, SImode, operands,
13080 TARGET_APX_NDD)"
13081 "@
13082 <logic>{l}\t{%2, %k0|%k0, %2}
13083 <logic>{l}\t{%2, %1, %k0|%k0, %1, %2}"
13084 [(set_attr "type" "alu")
13085 (set_attr "isa" "*,apx_ndd")
13086 (set_attr "mode" "SI")])
13087
13088 (define_insn "*<code><mode>_3"
13089 [(set (reg FLAGS_REG)
13090 (compare (any_or:SWI
13091 (match_operand:SWI 1 "nonimmediate_operand" "%0")
13092 (match_operand:SWI 2 "<general_operand>" "<g>"))
13093 (const_int 0)))
13094 (clobber (match_scratch:SWI 0 "=<r>"))]
13095 "ix86_match_ccmode (insn, CCNOmode)
13096 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13097 "<logic>{<imodesuffix>}\t{%2, %0|%0, %2}"
13098 [(set_attr "type" "alu")
13099 (set_attr "mode" "<MODE>")])
13100
13101 ;; Convert wide OR instructions with immediate operand to shorter QImode
13102 ;; equivalents when possible.
13103 ;; Don't do the splitting with memory operands, since it introduces risk
13104 ;; of memory mismatch stalls. We may want to do the splitting for optimizing
13105 ;; for size, but that can (should?) be handled by generic code instead.
13106 ;; Don't do the splitting for APX NDD as NDD does not support *h registers.
13107 (define_split
13108 [(set (match_operand:SWI248 0 "QIreg_operand")
13109 (any_or:SWI248 (match_operand:SWI248 1 "register_operand")
13110 (match_operand:SWI248 2 "const_int_operand")))
13111 (clobber (reg:CC FLAGS_REG))]
13112 "reload_completed
13113 && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
13114 && !(INTVAL (operands[2]) & ~(255 << 8))
13115 && !(TARGET_APX_NDD && REGNO (operands[0]) != REGNO (operands[1]))"
13116 [(parallel
13117 [(set (zero_extract:HI (match_dup 0)
13118 (const_int 8)
13119 (const_int 8))
13120 (subreg:HI
13121 (any_or:QI
13122 (subreg:QI
13123 (zero_extract:HI (match_dup 1)
13124 (const_int 8)
13125 (const_int 8)) 0)
13126 (match_dup 2)) 0))
13127 (clobber (reg:CC FLAGS_REG))])]
13128 {
13129 /* Handle the case where INTVAL (operands[2]) == 0. */
13130 if (operands[2] == const0_rtx)
13131 {
13132 if (!rtx_equal_p (operands[0], operands[1]))
13133 emit_move_insn (operands[0], operands[1]);
13134 else
13135 emit_note (NOTE_INSN_DELETED);
13136 DONE;
13137 }
13138 operands[0] = gen_lowpart (HImode, operands[0]);
13139 operands[1] = gen_lowpart (HImode, operands[1]);
13140 operands[2] = gen_int_mode (INTVAL (operands[2]) >> 8, QImode);
13141 })
13142
13143 ;; Since OR can be encoded with sign extended immediate, this is only
13144 ;; profitable when 7th bit is set.
13145 (define_split
13146 [(set (match_operand:SWI248 0 "any_QIreg_operand")
13147 (any_or:SWI248 (match_operand:SWI248 1 "general_operand")
13148 (match_operand:SWI248 2 "const_int_operand")))
13149 (clobber (reg:CC FLAGS_REG))]
13150 "reload_completed
13151 && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
13152 && !(INTVAL (operands[2]) & ~255)
13153 && (INTVAL (operands[2]) & 128)
13154 && !(TARGET_APX_NDD
13155 && !rtx_equal_p (operands[0], operands[1]))"
13156 [(parallel [(set (strict_low_part (match_dup 0))
13157 (any_or:QI (match_dup 1)
13158 (match_dup 2)))
13159 (clobber (reg:CC FLAGS_REG))])]
13160 {
13161 operands[0] = gen_lowpart (QImode, operands[0]);
13162 operands[1] = gen_lowpart (QImode, operands[1]);
13163 operands[2] = gen_int_mode (INTVAL (operands[2]), QImode);
13164 })
13165
13166 (define_expand "xorqi_ext_1_cc"
13167 [(parallel
13168 [(set (reg:CCNO FLAGS_REG)
13169 (compare:CCNO
13170 (xor:QI
13171 (subreg:QI
13172 (zero_extract:HI (match_operand:HI 1 "register_operand")
13173 (const_int 8)
13174 (const_int 8)) 0)
13175 (match_operand:QI 2 "const_int_operand"))
13176 (const_int 0)))
13177 (set (zero_extract:HI (match_operand:HI 0 "register_operand")
13178 (const_int 8)
13179 (const_int 8))
13180 (subreg:HI
13181 (xor:QI
13182 (subreg:QI
13183 (zero_extract:HI (match_dup 1)
13184 (const_int 8)
13185 (const_int 8)) 0)
13186 (match_dup 2)) 0))])])
13187
13188 ;; Peephole2 rega = 0; rega op= regb into rega = regb.
13189 (define_peephole2
13190 [(parallel [(set (match_operand:SWI 0 "general_reg_operand")
13191 (const_int 0))
13192 (clobber (reg:CC FLAGS_REG))])
13193 (parallel [(set (match_dup 0)
13194 (any_or_plus:SWI (match_dup 0)
13195 (match_operand:SWI 1 "<general_operand>")))
13196 (clobber (reg:CC FLAGS_REG))])]
13197 "!reg_mentioned_p (operands[0], operands[1])"
13198 [(set (match_dup 0) (match_dup 1))])
13199
13200 ;; Peephole2 dead instruction in rega = 0; rega op= rega.
13201 (define_peephole2
13202 [(parallel [(set (match_operand:SWI 0 "general_reg_operand")
13203 (const_int 0))
13204 (clobber (reg:CC FLAGS_REG))])
13205 (parallel [(set (match_dup 0)
13206 (any_or_plus:SWI (match_dup 0) (match_dup 0)))
13207 (clobber (reg:CC FLAGS_REG))])]
13208 ""
13209 [(parallel [(set (match_dup 0) (const_int 0))
13210 (clobber (reg:CC FLAGS_REG))])])
13211
13212 ;; Split DST = (HI<<32)|LO early to minimize register usage.
13213 (define_insn_and_split "*concat<mode><dwi>3_1"
13214 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
13215 (any_or_plus:<DWI>
13216 (ashift:<DWI> (match_operand:<DWI> 1 "register_operand" "r,r")
13217 (match_operand:QI 2 "const_int_operand"))
13218 (zero_extend:<DWI>
13219 (match_operand:DWIH 3 "nonimmediate_operand" "r,m"))))]
13220 "INTVAL (operands[2]) == <MODE_SIZE> * BITS_PER_UNIT"
13221 "#"
13222 "&& reload_completed"
13223 [(const_int 0)]
13224 {
13225 split_double_concat (<DWI>mode, operands[0], operands[3],
13226 gen_lowpart (<MODE>mode, operands[1]));
13227 DONE;
13228 })
13229
13230 (define_insn_and_split "*concat<mode><dwi>3_2"
13231 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
13232 (any_or_plus:<DWI>
13233 (zero_extend:<DWI>
13234 (match_operand:DWIH 1 "nonimmediate_operand" "r,m"))
13235 (ashift:<DWI> (match_operand:<DWI> 2 "register_operand" "r,r")
13236 (match_operand:QI 3 "const_int_operand"))))]
13237 "INTVAL (operands[3]) == <MODE_SIZE> * BITS_PER_UNIT"
13238 "#"
13239 "&& reload_completed"
13240 [(const_int 0)]
13241 {
13242 split_double_concat (<DWI>mode, operands[0], operands[1],
13243 gen_lowpart (<MODE>mode, operands[2]));
13244 DONE;
13245 })
13246
13247 (define_insn_and_split "*concat<mode><dwi>3_3"
13248 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r,r,&r,x")
13249 (any_or_plus:<DWI>
13250 (ashift:<DWI>
13251 (zero_extend:<DWI>
13252 (match_operand:DWIH 1 "nonimmediate_operand" "r,m,r,m,x"))
13253 (match_operand:QI 2 "const_int_operand"))
13254 (zero_extend:<DWI>
13255 (match_operand:DWIH 3 "nonimmediate_operand" "r,r,m,m,0"))))]
13256 "INTVAL (operands[2]) == <MODE_SIZE> * BITS_PER_UNIT"
13257 "#"
13258 "&& reload_completed"
13259 [(const_int 0)]
13260 {
13261 if (SSE_REG_P (operands[0]))
13262 {
13263 rtx tmp = gen_rtx_REG (V2DImode, REGNO (operands[0]));
13264 emit_insn (gen_vec_concatv2di (tmp, operands[3], operands[1]));
13265 }
13266 else
13267 split_double_concat (<DWI>mode, operands[0], operands[3], operands[1]);
13268 DONE;
13269 }
13270 [(set_attr "isa" "*,*,*,x64,x64")])
13271
13272 (define_insn_and_split "*concat<mode><dwi>3_4"
13273 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r,r,&r")
13274 (any_or_plus:<DWI>
13275 (zero_extend:<DWI>
13276 (match_operand:DWIH 1 "nonimmediate_operand" "r,m,r,m"))
13277 (ashift:<DWI>
13278 (zero_extend:<DWI>
13279 (match_operand:DWIH 2 "nonimmediate_operand" "r,r,m,m"))
13280 (match_operand:QI 3 "const_int_operand"))))]
13281 "INTVAL (operands[3]) == <MODE_SIZE> * BITS_PER_UNIT"
13282 "#"
13283 "&& reload_completed"
13284 [(const_int 0)]
13285 {
13286 split_double_concat (<DWI>mode, operands[0], operands[1], operands[2]);
13287 DONE;
13288 }
13289 [(set_attr "isa" "*,*,*,x64")])
13290
13291 (define_insn_and_split "*concat<half><mode>3_5"
13292 [(set (match_operand:DWI 0 "nonimmediate_operand" "=r,o,o")
13293 (any_or_plus:DWI
13294 (ashift:DWI (match_operand:DWI 1 "register_operand" "r,r,r")
13295 (match_operand:QI 2 "const_int_operand"))
13296 (match_operand:DWI 3 "const_scalar_int_operand" "n,n,Wd")))]
13297 "INTVAL (operands[2]) == <MODE_SIZE> * BITS_PER_UNIT / 2
13298 && (<MODE>mode == DImode
13299 ? CONST_INT_P (operands[3])
13300 && (UINTVAL (operands[3]) & ~GET_MODE_MASK (SImode)) == 0
13301 : CONST_INT_P (operands[3])
13302 ? INTVAL (operands[3]) >= 0
13303 : CONST_WIDE_INT_NUNITS (operands[3]) == 2
13304 && CONST_WIDE_INT_ELT (operands[3], 1) == 0)
13305 && !(CONST_INT_P (operands[3])
13306 ? ix86_endbr_immediate_operand (operands[3], VOIDmode)
13307 : ix86_endbr_immediate_operand (GEN_INT (CONST_WIDE_INT_ELT (operands[3],
13308 0)),
13309 VOIDmode))"
13310 "#"
13311 "&& reload_completed"
13312 [(const_int 0)]
13313 {
13314 rtx op3 = simplify_subreg (<HALF>mode, operands[3], <MODE>mode, 0);
13315 split_double_concat (<MODE>mode, operands[0], op3,
13316 gen_lowpart (<HALF>mode, operands[1]));
13317 DONE;
13318 }
13319 [(set_attr "isa" "*,nox64,x64")])
13320
13321 (define_insn_and_split "*concat<mode><dwi>3_6"
13322 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=r,o,o,r")
13323 (any_or_plus:<DWI>
13324 (ashift:<DWI>
13325 (zero_extend:<DWI>
13326 (match_operand:DWIH 1 "nonimmediate_operand" "r,r,r,m"))
13327 (match_operand:QI 2 "const_int_operand"))
13328 (match_operand:<DWI> 3 "const_scalar_int_operand" "n,n,Wd,n")))]
13329 "INTVAL (operands[2]) == <MODE_SIZE> * BITS_PER_UNIT
13330 && (<DWI>mode == DImode
13331 ? CONST_INT_P (operands[3])
13332 && (UINTVAL (operands[3]) & ~GET_MODE_MASK (SImode)) == 0
13333 : CONST_INT_P (operands[3])
13334 ? INTVAL (operands[3]) >= 0
13335 : CONST_WIDE_INT_NUNITS (operands[3]) == 2
13336 && CONST_WIDE_INT_ELT (operands[3], 1) == 0)
13337 && !(CONST_INT_P (operands[3])
13338 ? ix86_endbr_immediate_operand (operands[3], VOIDmode)
13339 : ix86_endbr_immediate_operand (GEN_INT (CONST_WIDE_INT_ELT (operands[3],
13340 0)),
13341 VOIDmode))"
13342 "#"
13343 "&& reload_completed"
13344 [(const_int 0)]
13345 {
13346 rtx op3 = simplify_subreg (<MODE>mode, operands[3], <DWI>mode, 0);
13347 split_double_concat (<DWI>mode, operands[0], op3, operands[1]);
13348 DONE;
13349 }
13350 [(set_attr "isa" "*,nox64,x64,*")])
13351
13352 (define_insn_and_split "*concat<mode><dwi>3_7"
13353 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=r,o,o,r")
13354 (any_or_plus:<DWI>
13355 (zero_extend:<DWI>
13356 (match_operand:DWIH 1 "nonimmediate_operand" "r,r,r,m"))
13357 (match_operand:<DWI> 2 "const_scalar_int_operand" "n,n,Wd,n")))]
13358 "<DWI>mode == DImode
13359 ? CONST_INT_P (operands[2])
13360 && (UINTVAL (operands[2]) & GET_MODE_MASK (SImode)) == 0
13361 && !ix86_endbr_immediate_operand (operands[2], VOIDmode)
13362 : CONST_WIDE_INT_P (operands[2])
13363 && CONST_WIDE_INT_NUNITS (operands[2]) == 2
13364 && CONST_WIDE_INT_ELT (operands[2], 0) == 0
13365 && !ix86_endbr_immediate_operand (GEN_INT (CONST_WIDE_INT_ELT (operands[2],
13366 1)),
13367 VOIDmode)"
13368 "#"
13369 "&& reload_completed"
13370 [(const_int 0)]
13371 {
13372 rtx op2;
13373 if (<DWI>mode == DImode)
13374 op2 = gen_int_mode (INTVAL (operands[2]) >> 32, <MODE>mode);
13375 else
13376 op2 = gen_int_mode (CONST_WIDE_INT_ELT (operands[2], 1), <MODE>mode);
13377 split_double_concat (<DWI>mode, operands[0], operands[1], op2);
13378 DONE;
13379 }
13380 [(set_attr "isa" "*,nox64,x64,*")])
13381 \f
13382 ;; Negation instructions
13383
13384 (define_expand "neg<mode>2"
13385 [(set (match_operand:SDWIM 0 "nonimmediate_operand")
13386 (neg:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand")))]
13387 ""
13388 "ix86_expand_unary_operator (NEG, <MODE>mode, operands,
13389 TARGET_APX_NDD); DONE;")
13390
13391 (define_insn_and_split "*neg<dwi>2_doubleword"
13392 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,&r")
13393 (neg:<DWI> (match_operand:<DWI> 1 "nonimmediate_operand" "0,ro")))
13394 (clobber (reg:CC FLAGS_REG))]
13395 "ix86_unary_operator_ok (NEG, <DWI>mode, operands, TARGET_APX_NDD)"
13396 "#"
13397 "&& reload_completed"
13398 [(parallel
13399 [(set (reg:CCC FLAGS_REG)
13400 (unspec:CCC [(match_dup 1) (const_int 0)] UNSPEC_CC_NE))
13401 (set (match_dup 0) (neg:DWIH (match_dup 1)))])
13402 (parallel
13403 [(set (match_dup 2)
13404 (plus:DWIH (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
13405 (match_dup 3))
13406 (const_int 0)))
13407 (clobber (reg:CC FLAGS_REG))])
13408 (parallel
13409 [(set (match_dup 2)
13410 (neg:DWIH (match_dup 2)))
13411 (clobber (reg:CC FLAGS_REG))])]
13412 "split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[2]);"
13413 [(set_attr "isa" "*,apx_ndd")])
13414
13415 ;; Convert:
13416 ;; mov %esi, %edx
13417 ;; negl %eax
13418 ;; adcl $0, %edx
13419 ;; negl %edx
13420 ;; to:
13421 ;; xorl %edx, %edx
13422 ;; negl %eax
13423 ;; sbbl %esi, %edx
13424
13425 (define_peephole2
13426 [(set (match_operand:SWI48 0 "general_reg_operand")
13427 (match_operand:SWI48 1 "nonimmediate_gr_operand"))
13428 (parallel
13429 [(set (reg:CCC FLAGS_REG)
13430 (unspec:CCC [(match_operand:SWI48 2 "general_reg_operand")
13431 (const_int 0)] UNSPEC_CC_NE))
13432 (set (match_dup 2) (neg:SWI48 (match_dup 2)))])
13433 (parallel
13434 [(set (match_dup 0)
13435 (plus:SWI48 (plus:SWI48
13436 (ltu:SWI48 (reg:CC FLAGS_REG) (const_int 0))
13437 (match_dup 0))
13438 (const_int 0)))
13439 (clobber (reg:CC FLAGS_REG))])
13440 (parallel
13441 [(set (match_dup 0)
13442 (neg:SWI48 (match_dup 0)))
13443 (clobber (reg:CC FLAGS_REG))])]
13444 "REGNO (operands[0]) != REGNO (operands[2])
13445 && !reg_mentioned_p (operands[0], operands[1])
13446 && !reg_mentioned_p (operands[2], operands[1])"
13447 [(parallel
13448 [(set (reg:CCC FLAGS_REG)
13449 (unspec:CCC [(match_dup 2) (const_int 0)] UNSPEC_CC_NE))
13450 (set (match_dup 2) (neg:SWI48 (match_dup 2)))])
13451 (parallel
13452 [(set (match_dup 0)
13453 (minus:SWI48 (minus:SWI48
13454 (match_dup 0)
13455 (ltu:SWI48 (reg:CC FLAGS_REG) (const_int 0)))
13456 (match_dup 1)))
13457 (clobber (reg:CC FLAGS_REG))])]
13458 "ix86_expand_clear (operands[0]);")
13459
13460 ;; Convert:
13461 ;; xorl %edx, %edx
13462 ;; negl %eax
13463 ;; adcl $0, %edx
13464 ;; negl %edx
13465 ;; to:
13466 ;; negl %eax
13467 ;; sbbl %edx, %edx // *x86_mov<mode>cc_0_m1
13468
13469 (define_peephole2
13470 [(parallel
13471 [(set (match_operand:SWI48 0 "general_reg_operand") (const_int 0))
13472 (clobber (reg:CC FLAGS_REG))])
13473 (parallel
13474 [(set (reg:CCC FLAGS_REG)
13475 (unspec:CCC [(match_operand:SWI48 1 "general_reg_operand")
13476 (const_int 0)] UNSPEC_CC_NE))
13477 (set (match_dup 1) (neg:SWI48 (match_dup 1)))])
13478 (parallel
13479 [(set (match_dup 0)
13480 (plus:SWI48 (plus:SWI48
13481 (ltu:SWI48 (reg:CC FLAGS_REG) (const_int 0))
13482 (match_dup 0))
13483 (const_int 0)))
13484 (clobber (reg:CC FLAGS_REG))])
13485 (parallel
13486 [(set (match_dup 0)
13487 (neg:SWI48 (match_dup 0)))
13488 (clobber (reg:CC FLAGS_REG))])]
13489 "REGNO (operands[0]) != REGNO (operands[1])"
13490 [(parallel
13491 [(set (reg:CCC FLAGS_REG)
13492 (unspec:CCC [(match_dup 1) (const_int 0)] UNSPEC_CC_NE))
13493 (set (match_dup 1) (neg:SWI48 (match_dup 1)))])
13494 (parallel
13495 [(set (match_dup 0)
13496 (if_then_else:SWI48 (ltu:SWI48 (reg:CC FLAGS_REG) (const_int 0))
13497 (const_int -1)
13498 (const_int 0)))
13499 (clobber (reg:CC FLAGS_REG))])])
13500
13501 (define_insn "*neg<mode>_1"
13502 [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,r")
13503 (neg:SWI (match_operand:SWI 1 "nonimmediate_operand" "0,rm")))
13504 (clobber (reg:CC FLAGS_REG))]
13505 "ix86_unary_operator_ok (NEG, <MODE>mode, operands, TARGET_APX_NDD)"
13506 "@
13507 neg{<imodesuffix>}\t%0
13508 neg{<imodesuffix>}\t{%1, %0|%0, %1}"
13509 [(set_attr "type" "negnot")
13510 (set_attr "isa" "*,apx_ndd")
13511 (set_attr "mode" "<MODE>")])
13512
13513 (define_insn "*negsi_1_zext"
13514 [(set (match_operand:DI 0 "register_operand" "=r,r")
13515 (zero_extend:DI
13516 (neg:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm"))))
13517 (clobber (reg:CC FLAGS_REG))]
13518 "TARGET_64BIT && ix86_unary_operator_ok (NEG, SImode, operands,
13519 TARGET_APX_NDD)"
13520 "@
13521 neg{l}\t%k0
13522 neg{l}\t{%k1, %k0|%k0, %k1}"
13523 [(set_attr "type" "negnot")
13524 (set_attr "isa" "*,apx_ndd")
13525 (set_attr "mode" "SI")])
13526
13527 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
13528 (define_insn_and_split "*neg<mode>_1_slp"
13529 [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>,&<r>"))
13530 (neg:SWI12 (match_operand:SWI12 1 "register_operand" "0,!<r>")))
13531 (clobber (reg:CC FLAGS_REG))]
13532 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
13533 "@
13534 neg{<imodesuffix>}\t%0
13535 #"
13536 "&& reload_completed
13537 && !(rtx_equal_p (operands[0], operands[1]))"
13538 [(set (strict_low_part (match_dup 0)) (match_dup 1))
13539 (parallel
13540 [(set (strict_low_part (match_dup 0))
13541 (neg:SWI12 (match_dup 0)))
13542 (clobber (reg:CC FLAGS_REG))])]
13543 ""
13544 [(set_attr "type" "negnot")
13545 (set_attr "mode" "<MODE>")])
13546
13547 (define_insn "*neg<mode>_2"
13548 [(set (reg FLAGS_REG)
13549 (compare
13550 (neg:SWI (match_operand:SWI 1 "nonimmediate_operand" "0,rm"))
13551 (const_int 0)))
13552 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,r")
13553 (neg:SWI (match_dup 1)))]
13554 "ix86_match_ccmode (insn, CCGOCmode)
13555 && ix86_unary_operator_ok (NEG, <MODE>mode, operands,
13556 TARGET_APX_NDD)"
13557 "@
13558 neg{<imodesuffix>}\t%0
13559 neg{<imodesuffix>}\t{%1, %0|%0, %1}"
13560 [(set_attr "type" "negnot")
13561 (set_attr "isa" "*,apx_ndd")
13562 (set_attr "mode" "<MODE>")])
13563
13564 (define_insn "*negsi_2_zext"
13565 [(set (reg FLAGS_REG)
13566 (compare
13567 (neg:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm"))
13568 (const_int 0)))
13569 (set (match_operand:DI 0 "register_operand" "=r,r")
13570 (zero_extend:DI
13571 (neg:SI (match_dup 1))))]
13572 "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
13573 && ix86_unary_operator_ok (NEG, SImode, operands,
13574 TARGET_APX_NDD)"
13575 "@
13576 neg{l}\t%k0
13577 neg{l}\t{%1, %k0|%k0, %1}"
13578 [(set_attr "type" "negnot")
13579 (set_attr "isa" "*,apx_ndd")
13580 (set_attr "mode" "SI")])
13581
13582 (define_insn "*neg<mode>_ccc_1"
13583 [(set (reg:CCC FLAGS_REG)
13584 (unspec:CCC
13585 [(match_operand:SWI 1 "nonimmediate_operand" "0,rm")
13586 (const_int 0)] UNSPEC_CC_NE))
13587 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,r")
13588 (neg:SWI (match_dup 1)))]
13589 ""
13590 "@
13591 neg{<imodesuffix>}\t%0
13592 neg{<imodesuffix>}\t{%1, %0|%0, %1}"
13593 [(set_attr "type" "negnot")
13594 (set_attr "isa" "*,apx_ndd")
13595 (set_attr "mode" "<MODE>")])
13596
13597 (define_insn "*neg<mode>_ccc_2"
13598 [(set (reg:CCC FLAGS_REG)
13599 (unspec:CCC
13600 [(match_operand:SWI 1 "nonimmediate_operand" "0,rm")
13601 (const_int 0)] UNSPEC_CC_NE))
13602 (clobber (match_scratch:SWI 0 "=<r>,r"))]
13603 ""
13604 "@
13605 neg{<imodesuffix>}\t%0
13606 neg{<imodesuffix>}\t{%1, %0|%0, %1}"
13607 [(set_attr "type" "negnot")
13608 (set_attr "isa" "*,apx_ndd")
13609 (set_attr "mode" "<MODE>")])
13610
13611 (define_expand "x86_neg<mode>_ccc"
13612 [(parallel
13613 [(set (reg:CCC FLAGS_REG)
13614 (unspec:CCC [(match_operand:SWI48 1 "register_operand")
13615 (const_int 0)] UNSPEC_CC_NE))
13616 (set (match_operand:SWI48 0 "register_operand")
13617 (neg:SWI48 (match_dup 1)))])])
13618
13619 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
13620 (define_insn_and_split "*negqi_ext<mode>_1"
13621 [(set (zero_extract:SWI248
13622 (match_operand 0 "int248_register_operand" "+Q,&Q")
13623 (const_int 8)
13624 (const_int 8))
13625 (subreg:SWI248
13626 (neg:QI
13627 (subreg:QI
13628 (match_operator:SWI248 2 "extract_operator"
13629 [(match_operand 1 "int248_register_operand" "0,!Q")
13630 (const_int 8)
13631 (const_int 8)]) 0)) 0))
13632 (clobber (reg:CC FLAGS_REG))]
13633 ""
13634 "@
13635 neg{b}\t%h0
13636 #"
13637 "reload_completed
13638 && !(rtx_equal_p (operands[0], operands[1]))"
13639 [(set (zero_extract:SWI248
13640 (match_dup 0) (const_int 8) (const_int 8))
13641 (zero_extract:SWI248
13642 (match_dup 1) (const_int 8) (const_int 8)))
13643 (parallel
13644 [(set (zero_extract:SWI248
13645 (match_dup 0) (const_int 8) (const_int 8))
13646 (subreg:SWI248
13647 (neg:QI
13648 (subreg:QI
13649 (match_op_dup 2
13650 [(match_dup 0) (const_int 8) (const_int 8)]) 0)) 0))
13651 (clobber (reg:CC FLAGS_REG))])]
13652 ""
13653 [(set_attr "type" "negnot")
13654 (set_attr "mode" "QI")])
13655
13656 ;; Negate with jump on overflow.
13657 (define_expand "negv<mode>3"
13658 [(parallel [(set (reg:CCO FLAGS_REG)
13659 (unspec:CCO
13660 [(match_operand:SWI 1 "register_operand")
13661 (match_dup 3)] UNSPEC_CC_NE))
13662 (set (match_operand:SWI 0 "register_operand")
13663 (neg:SWI (match_dup 1)))])
13664 (set (pc) (if_then_else
13665 (eq (reg:CCO FLAGS_REG) (const_int 0))
13666 (label_ref (match_operand 2))
13667 (pc)))]
13668 ""
13669 {
13670 operands[3]
13671 = gen_int_mode (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (<MODE>mode) - 1),
13672 <MODE>mode);
13673 })
13674
13675 (define_insn "*negv<mode>3"
13676 [(set (reg:CCO FLAGS_REG)
13677 (unspec:CCO [(match_operand:SWI 1 "nonimmediate_operand" "0")
13678 (match_operand:SWI 2 "const_int_operand")]
13679 UNSPEC_CC_NE))
13680 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
13681 (neg:SWI (match_dup 1)))]
13682 "ix86_unary_operator_ok (NEG, <MODE>mode, operands)
13683 && mode_signbit_p (<MODE>mode, operands[2])"
13684 "neg{<imodesuffix>}\t%0"
13685 [(set_attr "type" "negnot")
13686 (set_attr "mode" "<MODE>")])
13687
13688 ;; Optimize *negsi_1 followed by *cmpsi_ccno_1 (PR target/91384)
13689 (define_peephole2
13690 [(set (match_operand:SWI 0 "general_reg_operand")
13691 (match_operand:SWI 1 "general_reg_operand"))
13692 (parallel [(set (match_dup 0) (neg:SWI (match_dup 0)))
13693 (clobber (reg:CC FLAGS_REG))])
13694 (set (reg:CCZ FLAGS_REG) (compare:CCZ (match_dup 1) (const_int 0)))]
13695 ""
13696 [(set (match_dup 0) (match_dup 1))
13697 (parallel [(set (reg:CCZ FLAGS_REG)
13698 (compare:CCZ (neg:SWI (match_dup 0)) (const_int 0)))
13699 (set (match_dup 0) (neg:SWI (match_dup 0)))])])
13700
13701 ;; Special expand pattern to handle integer mode abs
13702
13703 (define_expand "abs<mode>2"
13704 [(parallel
13705 [(set (match_operand:SDWIM 0 "register_operand")
13706 (abs:SDWIM
13707 (match_operand:SDWIM 1 "general_operand")))
13708 (clobber (reg:CC FLAGS_REG))])]
13709 "TARGET_CMOVE
13710 && (<MODE>mode != QImode || !TARGET_PARTIAL_REG_STALL)"
13711 {
13712 if (TARGET_EXPAND_ABS)
13713 {
13714 machine_mode mode = <MODE>mode;
13715 operands[1] = force_reg (mode, operands[1]);
13716
13717 /* Generate rtx abs using:
13718 abs (x) = (((signed) x >> (W-1)) ^ x) - ((signed) x >> (W-1)) */
13719
13720 rtx shift_amount = gen_int_mode (GET_MODE_PRECISION (mode) - 1, QImode);
13721 rtx shift_dst = expand_simple_binop (mode, ASHIFTRT, operands[1],
13722 shift_amount, NULL_RTX,
13723 0, OPTAB_DIRECT);
13724 rtx xor_dst = expand_simple_binop (mode, XOR, shift_dst, operands[1],
13725 operands[0], 0, OPTAB_DIRECT);
13726 rtx minus_dst = expand_simple_binop (mode, MINUS, xor_dst, shift_dst,
13727 operands[0], 0, OPTAB_DIRECT);
13728 if (!rtx_equal_p (minus_dst, operands[0]))
13729 emit_move_insn (operands[0], minus_dst);
13730 DONE;
13731 }
13732 })
13733
13734 (define_insn_and_split "*abs<dwi>2_doubleword"
13735 [(set (match_operand:<DWI> 0 "register_operand")
13736 (abs:<DWI>
13737 (match_operand:<DWI> 1 "general_operand")))
13738 (clobber (reg:CC FLAGS_REG))]
13739 "TARGET_CMOVE
13740 && ix86_pre_reload_split ()"
13741 "#"
13742 "&& 1"
13743 [(parallel
13744 [(set (reg:CCC FLAGS_REG)
13745 (unspec:CCC [(match_dup 1) (const_int 0)] UNSPEC_CC_NE))
13746 (set (match_dup 2) (neg:DWIH (match_dup 1)))])
13747 (parallel
13748 [(set (match_dup 5)
13749 (plus:DWIH (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
13750 (match_dup 4))
13751 (const_int 0)))
13752 (clobber (reg:CC FLAGS_REG))])
13753 (parallel
13754 [(set (reg:CCGOC FLAGS_REG)
13755 (compare:CCGOC
13756 (neg:DWIH (match_dup 5))
13757 (const_int 0)))
13758 (set (match_dup 5)
13759 (neg:DWIH (match_dup 5)))])
13760 (set (match_dup 0)
13761 (if_then_else:DWIH
13762 (ge (reg:CCGOC FLAGS_REG) (const_int 0))
13763 (match_dup 2)
13764 (match_dup 1)))
13765 (set (match_dup 3)
13766 (if_then_else:DWIH
13767 (ge (reg:CCGOC FLAGS_REG) (const_int 0))
13768 (match_dup 5)
13769 (match_dup 4)))]
13770 {
13771 operands[1] = force_reg (<DWI>mode, operands[1]);
13772 operands[2] = gen_reg_rtx (<DWI>mode);
13773
13774 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
13775 })
13776
13777 (define_insn_and_split "*nabs<dwi>2_doubleword"
13778 [(set (match_operand:<DWI> 0 "register_operand")
13779 (neg:<DWI>
13780 (abs:<DWI>
13781 (match_operand:<DWI> 1 "general_operand"))))
13782 (clobber (reg:CC FLAGS_REG))]
13783 "TARGET_CMOVE
13784 && ix86_pre_reload_split ()"
13785 "#"
13786 "&& 1"
13787 [(parallel
13788 [(set (reg:CCC FLAGS_REG)
13789 (unspec:CCC [(match_dup 1) (const_int 0)] UNSPEC_CC_NE))
13790 (set (match_dup 2) (neg:DWIH (match_dup 1)))])
13791 (parallel
13792 [(set (match_dup 5)
13793 (plus:DWIH (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
13794 (match_dup 4))
13795 (const_int 0)))
13796 (clobber (reg:CC FLAGS_REG))])
13797 (parallel
13798 [(set (reg:CCGOC FLAGS_REG)
13799 (compare:CCGOC
13800 (neg:DWIH (match_dup 5))
13801 (const_int 0)))
13802 (set (match_dup 5)
13803 (neg:DWIH (match_dup 5)))])
13804 (set (match_dup 0)
13805 (if_then_else:DWIH
13806 (lt (reg:CCGOC FLAGS_REG) (const_int 0))
13807 (match_dup 2)
13808 (match_dup 1)))
13809 (set (match_dup 3)
13810 (if_then_else:DWIH
13811 (lt (reg:CCGOC FLAGS_REG) (const_int 0))
13812 (match_dup 5)
13813 (match_dup 4)))]
13814 {
13815 operands[1] = force_reg (<DWI>mode, operands[1]);
13816 operands[2] = gen_reg_rtx (<DWI>mode);
13817
13818 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
13819 })
13820
13821 (define_insn_and_split "*abs<mode>2_1"
13822 [(set (match_operand:SWI 0 "register_operand")
13823 (abs:SWI
13824 (match_operand:SWI 1 "general_operand")))
13825 (clobber (reg:CC FLAGS_REG))]
13826 "TARGET_CMOVE
13827 && (<MODE>mode != QImode || !TARGET_PARTIAL_REG_STALL)
13828 && ix86_pre_reload_split ()"
13829 "#"
13830 "&& 1"
13831 [(parallel
13832 [(set (reg:CCGOC FLAGS_REG)
13833 (compare:CCGOC
13834 (neg:SWI (match_dup 1))
13835 (const_int 0)))
13836 (set (match_dup 2)
13837 (neg:SWI (match_dup 1)))])
13838 (set (match_dup 0)
13839 (if_then_else:SWI
13840 (ge (reg:CCGOC FLAGS_REG) (const_int 0))
13841 (match_dup 2)
13842 (match_dup 1)))]
13843 {
13844 operands[1] = force_reg (<MODE>mode, operands[1]);
13845 operands[2] = gen_reg_rtx (<MODE>mode);
13846 })
13847
13848 (define_insn_and_split "*nabs<mode>2_1"
13849 [(set (match_operand:SWI 0 "register_operand")
13850 (neg:SWI
13851 (abs:SWI
13852 (match_operand:SWI 1 "general_operand"))))
13853 (clobber (reg:CC FLAGS_REG))]
13854 "TARGET_CMOVE
13855 && (<MODE>mode != QImode || !TARGET_PARTIAL_REG_STALL)
13856 && ix86_pre_reload_split ()"
13857 "#"
13858 "&& 1"
13859 [(parallel
13860 [(set (reg:CCGOC FLAGS_REG)
13861 (compare:CCGOC
13862 (neg:SWI (match_dup 1))
13863 (const_int 0)))
13864 (set (match_dup 2)
13865 (neg:SWI (match_dup 1)))])
13866 (set (match_dup 0)
13867 (if_then_else:SWI
13868 (lt (reg:CCGOC FLAGS_REG) (const_int 0))
13869 (match_dup 2)
13870 (match_dup 1)))]
13871 {
13872 operands[1] = force_reg (<MODE>mode, operands[1]);
13873 operands[2] = gen_reg_rtx (<MODE>mode);
13874 })
13875
13876 (define_expand "<code>tf2"
13877 [(set (match_operand:TF 0 "register_operand")
13878 (absneg:TF (match_operand:TF 1 "register_operand")))]
13879 "TARGET_SSE"
13880 "ix86_expand_fp_absneg_operator (<CODE>, TFmode, operands); DONE;")
13881
13882 (define_insn_and_split "*<code>tf2_1"
13883 [(set (match_operand:TF 0 "register_operand" "=x,x,Yv,Yv")
13884 (absneg:TF
13885 (match_operand:TF 1 "vector_operand" "0,xBm,Yv,m")))
13886 (use (match_operand:TF 2 "vector_operand" "xBm,0,Yvm,Yv"))]
13887 "TARGET_SSE"
13888 "#"
13889 "&& reload_completed"
13890 [(set (match_dup 0)
13891 (<absneg_op>:TF (match_dup 1) (match_dup 2)))]
13892 {
13893 if (TARGET_AVX)
13894 {
13895 if (MEM_P (operands[1]))
13896 std::swap (operands[1], operands[2]);
13897 }
13898 else
13899 {
13900 if (operands_match_p (operands[0], operands[2]))
13901 std::swap (operands[1], operands[2]);
13902 }
13903 }
13904 [(set_attr "isa" "noavx,noavx,avx,avx")])
13905
13906 (define_insn_and_split "*nabstf2_1"
13907 [(set (match_operand:TF 0 "register_operand" "=x,x,Yv,Yv")
13908 (neg:TF
13909 (abs:TF
13910 (match_operand:TF 1 "vector_operand" "0,xBm,Yv,m"))))
13911 (use (match_operand:TF 2 "vector_operand" "xBm,0,Yvm,Yv"))]
13912 "TARGET_SSE"
13913 "#"
13914 "&& reload_completed"
13915 [(set (match_dup 0)
13916 (ior:TF (match_dup 1) (match_dup 2)))]
13917 {
13918 if (TARGET_AVX)
13919 {
13920 if (MEM_P (operands[1]))
13921 std::swap (operands[1], operands[2]);
13922 }
13923 else
13924 {
13925 if (operands_match_p (operands[0], operands[2]))
13926 std::swap (operands[1], operands[2]);
13927 }
13928 }
13929 [(set_attr "isa" "noavx,noavx,avx,avx")])
13930
13931 (define_expand "<code>hf2"
13932 [(set (match_operand:HF 0 "register_operand")
13933 (absneg:HF (match_operand:HF 1 "register_operand")))]
13934 "TARGET_AVX512FP16"
13935 "ix86_expand_fp_absneg_operator (<CODE>, HFmode, operands); DONE;")
13936
13937 (define_expand "<code><mode>2"
13938 [(set (match_operand:X87MODEF 0 "register_operand")
13939 (absneg:X87MODEF (match_operand:X87MODEF 1 "register_operand")))]
13940 "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
13941 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
13942
13943 ;; Changing of sign for FP values is doable using integer unit too.
13944 (define_insn "*<code><mode>2_i387_1"
13945 [(set (match_operand:X87MODEF 0 "register_operand" "=f,!r")
13946 (absneg:X87MODEF
13947 (match_operand:X87MODEF 1 "register_operand" "0,0")))
13948 (clobber (reg:CC FLAGS_REG))]
13949 "TARGET_80387 && !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
13950 "#")
13951
13952 (define_split
13953 [(set (match_operand:X87MODEF 0 "fp_register_operand")
13954 (absneg:X87MODEF (match_operand:X87MODEF 1 "fp_register_operand")))
13955 (clobber (reg:CC FLAGS_REG))]
13956 "TARGET_80387 && reload_completed"
13957 [(set (match_dup 0) (absneg:X87MODEF (match_dup 1)))])
13958
13959 (define_split
13960 [(set (match_operand:X87MODEF 0 "general_reg_operand")
13961 (absneg:X87MODEF (match_operand:X87MODEF 1 "general_reg_operand")))
13962 (clobber (reg:CC FLAGS_REG))]
13963 "TARGET_80387 && reload_completed"
13964 [(const_int 0)]
13965 "ix86_split_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
13966
13967 (define_insn_and_split "*<code>hf2_1"
13968 [(set (match_operand:HF 0 "register_operand" "=Yv")
13969 (absneg:HF
13970 (match_operand:HF 1 "register_operand" "Yv")))
13971 (use (match_operand:V8HF 2 "vector_operand" "Yvm"))
13972 (clobber (reg:CC FLAGS_REG))]
13973 "TARGET_AVX512FP16"
13974 "#"
13975 "&& reload_completed"
13976 [(set (match_dup 0)
13977 (<absneg_op>:V8HF (match_dup 1) (match_dup 2)))]
13978 {
13979 operands[0] = lowpart_subreg (V8HFmode, operands[0], HFmode);
13980 operands[1] = lowpart_subreg (V8HFmode, operands[1], HFmode);
13981 })
13982
13983 (define_insn "*<code><mode>2_1"
13984 [(set (match_operand:MODEF 0 "register_operand" "=x,x,Yv,f,!r")
13985 (absneg:MODEF
13986 (match_operand:MODEF 1 "register_operand" "0,x,Yv,0,0")))
13987 (use (match_operand:<ssevecmode> 2 "vector_operand" "xBm,0,Yvm,X,X"))
13988 (clobber (reg:CC FLAGS_REG))]
13989 "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
13990 "#"
13991 [(set_attr "isa" "noavx,noavx,avx,*,*")
13992 (set (attr "enabled")
13993 (if_then_else
13994 (match_test ("SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"))
13995 (if_then_else
13996 (eq_attr "alternative" "3,4")
13997 (symbol_ref "TARGET_MIX_SSE_I387")
13998 (const_string "*"))
13999 (if_then_else
14000 (eq_attr "alternative" "3,4")
14001 (symbol_ref "true")
14002 (symbol_ref "false"))))])
14003
14004 (define_split
14005 [(set (match_operand:MODEF 0 "sse_reg_operand")
14006 (absneg:MODEF
14007 (match_operand:MODEF 1 "sse_reg_operand")))
14008 (use (match_operand:<ssevecmodef> 2 "vector_operand"))
14009 (clobber (reg:CC FLAGS_REG))]
14010 "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
14011 && reload_completed"
14012 [(set (match_dup 0)
14013 (<absneg_op>:<ssevecmodef> (match_dup 1) (match_dup 2)))]
14014 {
14015 machine_mode mode = <MODE>mode;
14016 machine_mode vmode = <ssevecmodef>mode;
14017
14018 operands[0] = lowpart_subreg (vmode, operands[0], mode);
14019 operands[1] = lowpart_subreg (vmode, operands[1], mode);
14020
14021 if (!TARGET_AVX && operands_match_p (operands[0], operands[2]))
14022 std::swap (operands[1], operands[2]);
14023 })
14024
14025 (define_split
14026 [(set (match_operand:MODEF 0 "fp_register_operand")
14027 (absneg:MODEF (match_operand:MODEF 1 "fp_register_operand")))
14028 (use (match_operand 2))
14029 (clobber (reg:CC FLAGS_REG))]
14030 "TARGET_80387 && reload_completed"
14031 [(set (match_dup 0) (absneg:MODEF (match_dup 1)))])
14032
14033 (define_split
14034 [(set (match_operand:MODEF 0 "general_reg_operand")
14035 (absneg:MODEF (match_operand:MODEF 1 "general_reg_operand")))
14036 (use (match_operand 2))
14037 (clobber (reg:CC FLAGS_REG))]
14038 "TARGET_80387 && reload_completed"
14039 [(const_int 0)]
14040 "ix86_split_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
14041
14042 (define_insn_and_split "*nabs<mode>2_1"
14043 [(set (match_operand:MODEF 0 "register_operand" "=x,x,Yv")
14044 (neg:MODEF
14045 (abs:MODEF
14046 (match_operand:MODEF 1 "register_operand" "0,x,Yv"))))
14047 (use (match_operand:<ssevecmode> 2 "vector_operand" "xBm,0,Yvm"))]
14048 "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
14049 "#"
14050 "&& reload_completed"
14051 [(set (match_dup 0)
14052 (ior:<ssevecmodef> (match_dup 1) (match_dup 2)))]
14053 {
14054 machine_mode mode = <MODE>mode;
14055 machine_mode vmode = <ssevecmodef>mode;
14056
14057 operands[0] = lowpart_subreg (vmode, operands[0], mode);
14058 operands[1] = lowpart_subreg (vmode, operands[1], mode);
14059
14060 if (!TARGET_AVX && operands_match_p (operands[0], operands[2]))
14061 std::swap (operands[1], operands[2]);
14062 }
14063 [(set_attr "isa" "noavx,noavx,avx")])
14064
14065 ;; Conditionalize these after reload. If they match before reload, we
14066 ;; lose the clobber and ability to use integer instructions.
14067
14068 (define_insn "*<code><mode>2_i387"
14069 [(set (match_operand:X87MODEF 0 "register_operand" "=f")
14070 (absneg:X87MODEF (match_operand:X87MODEF 1 "register_operand" "0")))]
14071 "TARGET_80387 && reload_completed"
14072 "<absneg_mnemonic>"
14073 [(set_attr "type" "fsgn")
14074 (set_attr "mode" "<MODE>")])
14075
14076 ;; Copysign instructions
14077
14078 (define_expand "copysign<mode>3"
14079 [(match_operand:SSEMODEF 0 "register_operand")
14080 (match_operand:SSEMODEF 1 "nonmemory_operand")
14081 (match_operand:SSEMODEF 2 "register_operand")]
14082 "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
14083 || (TARGET_SSE && (<MODE>mode == TFmode))
14084 || (TARGET_AVX512FP16 && (<MODE>mode ==HFmode))"
14085 "ix86_expand_copysign (operands); DONE;")
14086
14087 (define_expand "xorsign<mode>3"
14088 [(match_operand:MODEFH 0 "register_operand")
14089 (match_operand:MODEFH 1 "register_operand")
14090 (match_operand:MODEFH 2 "register_operand")]
14091 "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
14092 || <MODE>mode == HFmode"
14093 {
14094 if (rtx_equal_p (operands[1], operands[2]))
14095 emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
14096 else
14097 ix86_expand_xorsign (operands);
14098 DONE;
14099 })
14100 \f
14101 ;; One complement instructions
14102
14103 (define_expand "one_cmpl<mode>2"
14104 [(set (match_operand:SDWIM 0 "nonimmediate_operand")
14105 (not:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand")))]
14106 ""
14107 "ix86_expand_unary_operator (NOT, <MODE>mode, operands,
14108 TARGET_APX_NDD); DONE;")
14109
14110 (define_insn_and_split "*one_cmpl<dwi>2_doubleword"
14111 [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,&r")
14112 (not:<DWI> (match_operand:<DWI> 1 "nonimmediate_operand" "0,ro")))]
14113 "ix86_unary_operator_ok (NOT, <DWI>mode, operands, TARGET_APX_NDD)"
14114 "#"
14115 "&& reload_completed"
14116 [(set (match_dup 0)
14117 (not:DWIH (match_dup 1)))
14118 (set (match_dup 2)
14119 (not:DWIH (match_dup 3)))]
14120 "split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[2]);"
14121 [(set_attr "isa" "*,apx_ndd")])
14122
14123 (define_insn "*one_cmpl<mode>2_1"
14124 [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r,?k")
14125 (not:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "0,rm,k")))]
14126 "ix86_unary_operator_ok (NOT, <MODE>mode, operands, TARGET_APX_NDD)"
14127 "@
14128 not{<imodesuffix>}\t%0
14129 not{<imodesuffix>}\t{%1, %0|%0, %1}
14130 #"
14131 [(set_attr "isa" "*,apx_ndd,<kmov_isa>")
14132 (set_attr "type" "negnot,negnot,msklog")
14133 (set_attr "mode" "<MODE>")])
14134
14135 (define_insn "*one_cmplsi2_1_zext"
14136 [(set (match_operand:DI 0 "register_operand" "=r,r,?k")
14137 (zero_extend:DI
14138 (not:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm,k"))))]
14139 "TARGET_64BIT && ix86_unary_operator_ok (NOT, SImode, operands,
14140 TARGET_APX_NDD)"
14141 "@
14142 not{l}\t%k0
14143 not{l}\t{%1, %k0|%k0, %1}
14144 #"
14145 [(set_attr "isa" "x64,apx_ndd,avx512bw")
14146 (set_attr "type" "negnot,negnot,msklog")
14147 (set_attr "mode" "SI,SI,SI")])
14148
14149 (define_insn "*one_cmplqi2_1"
14150 [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,r,?k")
14151 (not:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,rm,k")))]
14152 "ix86_unary_operator_ok (NOT, QImode, operands, TARGET_APX_NDD)"
14153 "@
14154 not{b}\t%0
14155 not{l}\t%k0
14156 not{b}\t{%1, %0|%0, %1}
14157 #"
14158 [(set_attr "isa" "*,*,apx_ndd,avx512f")
14159 (set_attr "type" "negnot,negnot,negnot,msklog")
14160 (set (attr "mode")
14161 (cond [(eq_attr "alternative" "1")
14162 (const_string "SI")
14163 (and (eq_attr "alternative" "3")
14164 (match_test "!TARGET_AVX512DQ"))
14165 (const_string "HI")
14166 ]
14167 (const_string "QI")))
14168 ;; Potential partial reg stall on alternative 1.
14169 (set (attr "preferred_for_speed")
14170 (cond [(eq_attr "alternative" "1")
14171 (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
14172 (symbol_ref "true")))])
14173
14174 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
14175 (define_insn_and_split "*one_cmpl<mode>_1_slp"
14176 [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>,&<r>"))
14177 (not:SWI12 (match_operand:SWI12 1 "register_operand" "0,!<r>")))]
14178 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
14179 "@
14180 not{<imodesuffix>}\t%0
14181 #"
14182 "&& reload_completed
14183 && !(rtx_equal_p (operands[0], operands[1]))"
14184 [(set (strict_low_part (match_dup 0)) (match_dup 1))
14185 (set (strict_low_part (match_dup 0))
14186 (not:SWI12 (match_dup 0)))]
14187 ""
14188 [(set_attr "type" "negnot")
14189 (set_attr "mode" "<MODE>")])
14190
14191 (define_insn "*one_cmpl<mode>2_2"
14192 [(set (reg FLAGS_REG)
14193 (compare (not:SWI (match_operand:SWI 1 "nonimmediate_operand" "0,rm"))
14194 (const_int 0)))
14195 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,r")
14196 (not:SWI (match_dup 1)))]
14197 "ix86_match_ccmode (insn, CCNOmode)
14198 && ix86_unary_operator_ok (NOT, <MODE>mode, operands,
14199 TARGET_APX_NDD)"
14200 "#"
14201 [(set_attr "type" "alu1")
14202 (set_attr "isa" "*,apx_ndd")
14203 (set_attr "mode" "<MODE>")])
14204
14205 (define_split
14206 [(set (match_operand 0 "flags_reg_operand")
14207 (match_operator 2 "compare_operator"
14208 [(not:SWI (match_operand:SWI 3 "nonimmediate_operand"))
14209 (const_int 0)]))
14210 (set (match_operand:SWI 1 "nonimmediate_operand")
14211 (not:SWI (match_dup 3)))]
14212 "ix86_match_ccmode (insn, CCNOmode)"
14213 [(parallel [(set (match_dup 0)
14214 (match_op_dup 2 [(xor:SWI (match_dup 3) (const_int -1))
14215 (const_int 0)]))
14216 (set (match_dup 1)
14217 (xor:SWI (match_dup 3) (const_int -1)))])])
14218
14219 (define_insn "*one_cmplsi2_2_zext"
14220 [(set (reg FLAGS_REG)
14221 (compare (not:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm"))
14222 (const_int 0)))
14223 (set (match_operand:DI 0 "register_operand" "=r,r")
14224 (zero_extend:DI (not:SI (match_dup 1))))]
14225 "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
14226 && ix86_unary_operator_ok (NOT, SImode, operands, TARGET_APX_NDD)"
14227 "#"
14228 [(set_attr "type" "alu1")
14229 (set_attr "isa" "*,apx_ndd")
14230 (set_attr "mode" "SI")])
14231
14232 (define_split
14233 [(set (match_operand 0 "flags_reg_operand")
14234 (match_operator 2 "compare_operator"
14235 [(not:SI (match_operand:SI 3 "nonimmediate_operand"))
14236 (const_int 0)]))
14237 (set (match_operand:DI 1 "register_operand")
14238 (zero_extend:DI (not:SI (match_dup 3))))]
14239 "ix86_match_ccmode (insn, CCNOmode)"
14240 [(parallel [(set (match_dup 0)
14241 (match_op_dup 2 [(xor:SI (match_dup 3) (const_int -1))
14242 (const_int 0)]))
14243 (set (match_dup 1)
14244 (zero_extend:DI (xor:SI (match_dup 3) (const_int -1))))])])
14245
14246 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
14247 (define_insn_and_split "*one_cmplqi_ext<mode>_1"
14248 [(set (zero_extract:SWI248
14249 (match_operand 0 "int248_register_operand" "+Q,&Q")
14250 (const_int 8)
14251 (const_int 8))
14252 (subreg:SWI248
14253 (not:QI
14254 (subreg:QI
14255 (match_operator:SWI248 2 "extract_operator"
14256 [(match_operand 1 "int248_register_operand" "0,!Q")
14257 (const_int 8)
14258 (const_int 8)]) 0)) 0))]
14259 ""
14260 "@
14261 not{b}\t%h0
14262 #"
14263 "reload_completed
14264 && !(rtx_equal_p (operands[0], operands[1]))"
14265 [(set (zero_extract:SWI248
14266 (match_dup 0) (const_int 8) (const_int 8))
14267 (zero_extract:SWI248
14268 (match_dup 1) (const_int 8) (const_int 8)))
14269 (set (zero_extract:SWI248
14270 (match_dup 0) (const_int 8) (const_int 8))
14271 (subreg:SWI248
14272 (not:QI
14273 (subreg:QI
14274 (match_op_dup 2
14275 [(match_dup 0) (const_int 8) (const_int 8)]) 0)) 0))]
14276 ""
14277 [(set_attr "type" "negnot")
14278 (set_attr "mode" "QI")])
14279 \f
14280 ;; Shift instructions
14281
14282 ;; DImode shifts are implemented using the i386 "shift double" opcode,
14283 ;; which is written as "sh[lr]d[lw] imm,reg,reg/mem". If the shift count
14284 ;; is variable, then the count is in %cl and the "imm" operand is dropped
14285 ;; from the assembler input.
14286 ;;
14287 ;; This instruction shifts the target reg/mem as usual, but instead of
14288 ;; shifting in zeros, bits are shifted in from reg operand. If the insn
14289 ;; is a left shift double, bits are taken from the high order bits of
14290 ;; reg, else if the insn is a shift right double, bits are taken from the
14291 ;; low order bits of reg. So if %eax is "1234" and %edx is "5678",
14292 ;; "shldl $8,%edx,%eax" leaves %edx unchanged and sets %eax to "2345".
14293 ;;
14294 ;; Since sh[lr]d does not change the `reg' operand, that is done
14295 ;; separately, making all shifts emit pairs of shift double and normal
14296 ;; shift. Since sh[lr]d does not shift more than 31 bits, and we wish to
14297 ;; support a 63 bit shift, each shift where the count is in a reg expands
14298 ;; to a pair of shifts, a branch, a shift by 32 and a label.
14299 ;;
14300 ;; If the shift count is a constant, we need never emit more than one
14301 ;; shift pair, instead using moves and sign extension for counts greater
14302 ;; than 31.
14303
14304 (define_expand "ashl<mode>3"
14305 [(set (match_operand:SDWIM 0 "<shift_operand>")
14306 (ashift:SDWIM (match_operand:SDWIM 1 "<ashl_input_operand>")
14307 (match_operand:QI 2 "nonmemory_operand")))]
14308 ""
14309 "ix86_expand_binary_operator (ASHIFT, <MODE>mode, operands,
14310 TARGET_APX_NDD); DONE;")
14311
14312 (define_insn_and_split "*ashl<dwi>3_doubleword_mask"
14313 [(set (match_operand:<DWI> 0 "register_operand")
14314 (ashift:<DWI>
14315 (match_operand:<DWI> 1 "register_operand")
14316 (subreg:QI
14317 (and
14318 (match_operand 2 "int248_register_operand" "c")
14319 (match_operand 3 "const_int_operand")) 0)))
14320 (clobber (reg:CC FLAGS_REG))]
14321 "((INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT)) == 0
14322 || ((INTVAL (operands[3]) & (2 * <MODE_SIZE> * BITS_PER_UNIT - 1))
14323 == (2 * <MODE_SIZE> * BITS_PER_UNIT - 1)))
14324 && ix86_pre_reload_split ()"
14325 "#"
14326 "&& 1"
14327 [(parallel
14328 [(set (match_dup 6)
14329 (ior:DWIH (ashift:DWIH (match_dup 6)
14330 (and:QI (match_dup 2) (match_dup 8)))
14331 (subreg:DWIH
14332 (lshiftrt:<DWI> (zero_extend:<DWI> (match_dup 5))
14333 (minus:QI (match_dup 9)
14334 (and:QI (match_dup 2) (match_dup 8)))) 0)))
14335 (clobber (reg:CC FLAGS_REG))])
14336 (parallel
14337 [(set (match_dup 4)
14338 (ashift:DWIH (match_dup 5) (match_dup 2)))
14339 (clobber (reg:CC FLAGS_REG))])]
14340 {
14341 if ((INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT)) != 0)
14342 {
14343 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
14344 operands[2] = gen_lowpart (QImode, operands[2]);
14345 emit_insn (gen_ashl<dwi>3_doubleword (operands[0], operands[1],
14346 operands[2]));
14347 DONE;
14348 }
14349
14350 split_double_mode (<DWI>mode, &operands[0], 2, &operands[4], &operands[6]);
14351
14352 operands[8] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT - 1);
14353 operands[9] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);
14354
14355 if ((INTVAL (operands[3]) & ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
14356 != ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
14357 {
14358 rtx xops[3];
14359 xops[0] = gen_reg_rtx (GET_MODE (operands[2]));
14360 xops[1] = operands[2];
14361 xops[2] = GEN_INT (INTVAL (operands[3])
14362 & ((<MODE_SIZE> * BITS_PER_UNIT) - 1));
14363 ix86_expand_binary_operator (AND, GET_MODE (operands[2]), xops);
14364 operands[2] = xops[0];
14365 }
14366
14367 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
14368 operands[2] = gen_lowpart (QImode, operands[2]);
14369
14370 if (!rtx_equal_p (operands[6], operands[7]))
14371 emit_move_insn (operands[6], operands[7]);
14372 })
14373
14374 (define_insn_and_split "*ashl<dwi>3_doubleword_mask_1"
14375 [(set (match_operand:<DWI> 0 "register_operand")
14376 (ashift:<DWI>
14377 (match_operand:<DWI> 1 "register_operand")
14378 (and:QI
14379 (match_operand:QI 2 "register_operand" "c")
14380 (match_operand:QI 3 "const_int_operand"))))
14381 (clobber (reg:CC FLAGS_REG))]
14382 "((INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT)) == 0
14383 || ((INTVAL (operands[3]) & (2 * <MODE_SIZE> * BITS_PER_UNIT - 1))
14384 == (2 * <MODE_SIZE> * BITS_PER_UNIT - 1)))
14385 && ix86_pre_reload_split ()"
14386 "#"
14387 "&& 1"
14388 [(parallel
14389 [(set (match_dup 6)
14390 (ior:DWIH (ashift:DWIH (match_dup 6)
14391 (and:QI (match_dup 2) (match_dup 8)))
14392 (subreg:DWIH
14393 (lshiftrt:<DWI> (zero_extend:<DWI> (match_dup 5))
14394 (minus:QI (match_dup 9)
14395 (and:QI (match_dup 2) (match_dup 8)))) 0)))
14396 (clobber (reg:CC FLAGS_REG))])
14397 (parallel
14398 [(set (match_dup 4)
14399 (ashift:DWIH (match_dup 5) (match_dup 2)))
14400 (clobber (reg:CC FLAGS_REG))])]
14401 {
14402 if ((INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT)) != 0)
14403 {
14404 emit_insn (gen_ashl<dwi>3_doubleword (operands[0], operands[1],
14405 operands[2]));
14406 DONE;
14407 }
14408
14409 split_double_mode (<DWI>mode, &operands[0], 2, &operands[4], &operands[6]);
14410
14411 operands[8] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT - 1);
14412 operands[9] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);
14413
14414 if ((INTVAL (operands[3]) & ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
14415 != ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
14416 {
14417 rtx tem = gen_reg_rtx (QImode);
14418 emit_insn (gen_andqi3 (tem, operands[2], operands[3]));
14419 operands[2] = tem;
14420 }
14421
14422 if (!rtx_equal_p (operands[6], operands[7]))
14423 emit_move_insn (operands[6], operands[7]);
14424 })
14425
14426 (define_insn "ashl<mode>3_doubleword"
14427 [(set (match_operand:DWI 0 "register_operand" "=&r,&r")
14428 (ashift:DWI (match_operand:DWI 1 "reg_or_pm1_operand" "0n,r")
14429 (match_operand:QI 2 "nonmemory_operand" "<S>c,<S>c")))
14430 (clobber (reg:CC FLAGS_REG))]
14431 ""
14432 "#"
14433 [(set_attr "type" "multi")
14434 (set_attr "isa" "*,apx_ndd")])
14435
14436 (define_split
14437 [(set (match_operand:DWI 0 "register_operand")
14438 (ashift:DWI (match_operand:DWI 1 "nonmemory_operand")
14439 (match_operand:QI 2 "nonmemory_operand")))
14440 (clobber (reg:CC FLAGS_REG))]
14441 "epilogue_completed"
14442 [(const_int 0)]
14443 {
14444 if (TARGET_APX_NDD
14445 && !rtx_equal_p (operands[0], operands[1])
14446 && REG_P (operands[1]))
14447 ix86_split_ashl_ndd (operands, NULL_RTX);
14448 else
14449 ix86_split_ashl (operands, NULL_RTX, <MODE>mode);
14450 DONE;
14451 })
14452
14453 ;; By default we don't ask for a scratch register, because when DWImode
14454 ;; values are manipulated, registers are already at a premium. But if
14455 ;; we have one handy, we won't turn it away.
14456
14457 (define_peephole2
14458 [(match_scratch:DWIH 3 "r")
14459 (parallel [(set (match_operand:<DWI> 0 "register_operand")
14460 (ashift:<DWI>
14461 (match_operand:<DWI> 1 "nonmemory_operand")
14462 (match_operand:QI 2 "nonmemory_operand")))
14463 (clobber (reg:CC FLAGS_REG))])
14464 (match_dup 3)]
14465 "TARGET_CMOVE"
14466 [(const_int 0)]
14467 {
14468 if (TARGET_APX_NDD
14469 && !rtx_equal_p (operands[0], operands[1])
14470 && (REG_P (operands[1])))
14471 ix86_split_ashl_ndd (operands, operands[3]);
14472 else
14473 ix86_split_ashl (operands, operands[3], <DWI>mode);
14474 DONE;
14475 })
14476
14477 (define_insn_and_split "*ashl<dwi>3_doubleword_highpart"
14478 [(set (match_operand:<DWI> 0 "register_operand" "=r")
14479 (ashift:<DWI>
14480 (any_extend:<DWI> (match_operand:DWIH 1 "nonimmediate_operand" "rm"))
14481 (match_operand:QI 2 "const_int_operand")))
14482 (clobber (reg:CC FLAGS_REG))]
14483 "INTVAL (operands[2]) >= <MODE_SIZE> * BITS_PER_UNIT
14484 && INTVAL (operands[2]) < <MODE_SIZE> * BITS_PER_UNIT * 2"
14485 "#"
14486 "&& reload_completed"
14487 [(const_int 0)]
14488 {
14489 split_double_mode (<DWI>mode, &operands[0], 1, &operands[0], &operands[3]);
14490 int bits = INTVAL (operands[2]) - (<MODE_SIZE> * BITS_PER_UNIT);
14491 bool op_equal_p = rtx_equal_p (operands[3], operands[1]);
14492 if (bits == 0)
14493 {
14494 if (!op_equal_p)
14495 emit_move_insn (operands[3], operands[1]);
14496 }
14497 else
14498 {
14499 if (!op_equal_p && !TARGET_APX_NDD)
14500 emit_move_insn (operands[3], operands[1]);
14501 rtx op_tmp = TARGET_APX_NDD ? operands[1] : operands[3];
14502 emit_insn (gen_ashl<mode>3 (operands[3], op_tmp, GEN_INT (bits)));
14503 }
14504 ix86_expand_clear (operands[0]);
14505 DONE;
14506 })
14507
14508 (define_insn "x86_64_shld"
14509 [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
14510 (ior:DI (ashift:DI (match_dup 0)
14511 (and:QI (match_operand:QI 2 "nonmemory_operand" "Jc")
14512 (const_int 63)))
14513 (subreg:DI
14514 (lshiftrt:TI
14515 (zero_extend:TI
14516 (match_operand:DI 1 "register_operand" "r"))
14517 (minus:QI (const_int 64)
14518 (and:QI (match_dup 2) (const_int 63)))) 0)))
14519 (clobber (reg:CC FLAGS_REG))]
14520 "TARGET_64BIT"
14521 "shld{q}\t{%s2%1, %0|%0, %1, %2}"
14522 [(set_attr "type" "ishift")
14523 (set_attr "prefix_0f" "1")
14524 (set_attr "mode" "DI")
14525 (set_attr "athlon_decode" "vector")
14526 (set_attr "amdfam10_decode" "vector")
14527 (set_attr "bdver1_decode" "vector")])
14528
14529 (define_insn "x86_64_shld_ndd"
14530 [(set (match_operand:DI 0 "register_operand" "=r")
14531 (ior:DI (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "rm")
14532 (and:QI (match_operand:QI 3 "nonmemory_operand" "Jc")
14533 (const_int 63)))
14534 (subreg:DI
14535 (lshiftrt:TI
14536 (zero_extend:TI
14537 (match_operand:DI 2 "register_operand" "r"))
14538 (minus:QI (const_int 64)
14539 (and:QI (match_dup 3) (const_int 63)))) 0)))
14540 (clobber (reg:CC FLAGS_REG))]
14541 "TARGET_APX_NDD"
14542 "shld{q}\t{%s3%2, %1, %0|%0, %1, %2, %3}"
14543 [(set_attr "type" "ishift")
14544 (set_attr "mode" "DI")])
14545
14546 (define_insn "x86_64_shld_1"
14547 [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
14548 (ior:DI (ashift:DI (match_dup 0)
14549 (match_operand:QI 2 "const_0_to_63_operand"))
14550 (subreg:DI
14551 (lshiftrt:TI
14552 (zero_extend:TI
14553 (match_operand:DI 1 "register_operand" "r"))
14554 (match_operand:QI 3 "const_0_to_255_operand")) 0)))
14555 (clobber (reg:CC FLAGS_REG))]
14556 "TARGET_64BIT
14557 && INTVAL (operands[3]) == 64 - INTVAL (operands[2])"
14558 "shld{q}\t{%2, %1, %0|%0, %1, %2}"
14559 [(set_attr "type" "ishift")
14560 (set_attr "prefix_0f" "1")
14561 (set_attr "mode" "DI")
14562 (set_attr "length_immediate" "1")
14563 (set_attr "athlon_decode" "vector")
14564 (set_attr "amdfam10_decode" "vector")
14565 (set_attr "bdver1_decode" "vector")])
14566
14567 (define_insn "x86_64_shld_ndd_1"
14568 [(set (match_operand:DI 0 "register_operand" "=r")
14569 (ior:DI (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "rm")
14570 (match_operand:QI 3 "const_0_to_63_operand"))
14571 (subreg:DI
14572 (lshiftrt:TI
14573 (zero_extend:TI
14574 (match_operand:DI 2 "register_operand" "r"))
14575 (match_operand:QI 4 "const_0_to_255_operand")) 0)))
14576 (clobber (reg:CC FLAGS_REG))]
14577 "TARGET_APX_NDD
14578 && INTVAL (operands[4]) == 64 - INTVAL (operands[3])"
14579 "shld{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14580 [(set_attr "type" "ishift")
14581 (set_attr "mode" "DI")
14582 (set_attr "length_immediate" "1")])
14583
14584
14585 (define_insn_and_split "*x86_64_shld_shrd_1_nozext"
14586 [(set (match_operand:DI 0 "nonimmediate_operand")
14587 (ior:DI (ashift:DI (match_operand:DI 4 "nonimmediate_operand")
14588 (match_operand:QI 2 "const_0_to_63_operand"))
14589 (lshiftrt:DI
14590 (match_operand:DI 1 "nonimmediate_operand")
14591 (match_operand:QI 3 "const_0_to_63_operand"))))
14592 (clobber (reg:CC FLAGS_REG))]
14593 "TARGET_64BIT
14594 && INTVAL (operands[3]) == 64 - INTVAL (operands[2])
14595 && ix86_pre_reload_split ()"
14596 "#"
14597 "&& 1"
14598 [(const_int 0)]
14599 {
14600 if (rtx_equal_p (operands[4], operands[0]))
14601 {
14602 operands[1] = force_reg (DImode, operands[1]);
14603 emit_insn (gen_x86_64_shld_1 (operands[0], operands[1], operands[2], operands[3]));
14604 }
14605 else if (rtx_equal_p (operands[1], operands[0]))
14606 {
14607 operands[4] = force_reg (DImode, operands[4]);
14608 emit_insn (gen_x86_64_shrd_1 (operands[0], operands[4], operands[3], operands[2]));
14609 }
14610 else if (TARGET_APX_NDD)
14611 {
14612 rtx tmp = gen_reg_rtx (DImode);
14613 if (MEM_P (operands[4]))
14614 {
14615 operands[1] = force_reg (DImode, operands[1]);
14616 emit_insn (gen_x86_64_shld_ndd_1 (tmp, operands[4], operands[1],
14617 operands[2], operands[3]));
14618 }
14619 else if (MEM_P (operands[1]))
14620 emit_insn (gen_x86_64_shrd_ndd_1 (tmp, operands[1], operands[4],
14621 operands[3], operands[2]));
14622 else
14623 emit_insn (gen_x86_64_shld_ndd_1 (tmp, operands[4], operands[1],
14624 operands[2], operands[3]));
14625 emit_move_insn (operands[0], tmp);
14626 }
14627 else
14628 {
14629 operands[1] = force_reg (DImode, operands[1]);
14630 rtx tmp = gen_reg_rtx (DImode);
14631 emit_move_insn (tmp, operands[4]);
14632 emit_insn (gen_x86_64_shld_1 (tmp, operands[1], operands[2], operands[3]));
14633 emit_move_insn (operands[0], tmp);
14634 }
14635 DONE;
14636 })
14637
14638 (define_insn_and_split "*x86_64_shld_2"
14639 [(set (match_operand:DI 0 "nonimmediate_operand")
14640 (ior:DI (ashift:DI (match_dup 0)
14641 (match_operand:QI 2 "nonmemory_operand"))
14642 (lshiftrt:DI (match_operand:DI 1 "register_operand")
14643 (minus:QI (const_int 64) (match_dup 2)))))
14644 (clobber (reg:CC FLAGS_REG))]
14645 "TARGET_64BIT && ix86_pre_reload_split ()"
14646 "#"
14647 "&& 1"
14648 [(parallel [(set (match_dup 0)
14649 (ior:DI (ashift:DI (match_dup 0)
14650 (and:QI (match_dup 2) (const_int 63)))
14651 (subreg:DI
14652 (lshiftrt:TI
14653 (zero_extend:TI (match_dup 1))
14654 (minus:QI (const_int 64)
14655 (and:QI (match_dup 2)
14656 (const_int 63)))) 0)))
14657 (clobber (reg:CC FLAGS_REG))])])
14658
14659 (define_insn_and_split "*x86_64_shld_ndd_2"
14660 [(set (match_operand:DI 0 "nonimmediate_operand")
14661 (ior:DI (ashift:DI (match_operand:DI 1 "nonimmediate_operand")
14662 (match_operand:QI 3 "nonmemory_operand"))
14663 (lshiftrt:DI (match_operand:DI 2 "register_operand")
14664 (minus:QI (const_int 64) (match_dup 3)))))
14665 (clobber (reg:CC FLAGS_REG))]
14666 "TARGET_APX_NDD
14667 && ix86_pre_reload_split ()"
14668 "#"
14669 "&& 1"
14670 [(parallel [(set (match_dup 4)
14671 (ior:DI (ashift:DI (match_dup 1)
14672 (and:QI (match_dup 3) (const_int 63)))
14673 (subreg:DI
14674 (lshiftrt:TI
14675 (zero_extend:TI (match_dup 2))
14676 (minus:QI (const_int 64)
14677 (and:QI (match_dup 3)
14678 (const_int 63)))) 0)))
14679 (clobber (reg:CC FLAGS_REG))
14680 (set (match_dup 0) (match_dup 4))])]
14681 {
14682 operands[4] = gen_reg_rtx (DImode);
14683 emit_move_insn (operands[4], operands[0]);
14684 })
14685
14686 (define_insn "x86_shld"
14687 [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
14688 (ior:SI (ashift:SI (match_dup 0)
14689 (and:QI (match_operand:QI 2 "nonmemory_operand" "Ic")
14690 (const_int 31)))
14691 (subreg:SI
14692 (lshiftrt:DI
14693 (zero_extend:DI
14694 (match_operand:SI 1 "register_operand" "r"))
14695 (minus:QI (const_int 32)
14696 (and:QI (match_dup 2) (const_int 31)))) 0)))
14697 (clobber (reg:CC FLAGS_REG))]
14698 ""
14699 "shld{l}\t{%s2%1, %0|%0, %1, %2}"
14700 [(set_attr "type" "ishift")
14701 (set_attr "prefix_0f" "1")
14702 (set_attr "mode" "SI")
14703 (set_attr "pent_pair" "np")
14704 (set_attr "athlon_decode" "vector")
14705 (set_attr "amdfam10_decode" "vector")
14706 (set_attr "bdver1_decode" "vector")])
14707
14708 (define_insn "x86_shld_ndd"
14709 [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
14710 (ior:SI (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
14711 (and:QI (match_operand:QI 3 "nonmemory_operand" "Ic")
14712 (const_int 31)))
14713 (subreg:SI
14714 (lshiftrt:DI
14715 (zero_extend:DI
14716 (match_operand:SI 2 "register_operand" "r"))
14717 (minus:QI (const_int 32)
14718 (and:QI (match_dup 3) (const_int 31)))) 0)))
14719 (clobber (reg:CC FLAGS_REG))]
14720 "TARGET_APX_NDD"
14721 "shld{l}\t{%s3%2, %1, %0|%0, %1, %2, %3}"
14722 [(set_attr "type" "ishift")
14723 (set_attr "mode" "SI")])
14724
14725
14726 (define_insn "x86_shld_1"
14727 [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
14728 (ior:SI (ashift:SI (match_dup 0)
14729 (match_operand:QI 2 "const_0_to_31_operand"))
14730 (subreg:SI
14731 (lshiftrt:DI
14732 (zero_extend:DI
14733 (match_operand:SI 1 "register_operand" "r"))
14734 (match_operand:QI 3 "const_0_to_63_operand")) 0)))
14735 (clobber (reg:CC FLAGS_REG))]
14736 "INTVAL (operands[3]) == 32 - INTVAL (operands[2])"
14737 "shld{l}\t{%2, %1, %0|%0, %1, %2}"
14738 [(set_attr "type" "ishift")
14739 (set_attr "prefix_0f" "1")
14740 (set_attr "length_immediate" "1")
14741 (set_attr "mode" "SI")
14742 (set_attr "pent_pair" "np")
14743 (set_attr "athlon_decode" "vector")
14744 (set_attr "amdfam10_decode" "vector")
14745 (set_attr "bdver1_decode" "vector")])
14746
14747 (define_insn "x86_shld_ndd_1"
14748 [(set (match_operand:SI 0 "register_operand" "=r")
14749 (ior:SI (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
14750 (match_operand:QI 3 "const_0_to_31_operand"))
14751 (subreg:SI
14752 (lshiftrt:DI
14753 (zero_extend:DI
14754 (match_operand:SI 2 "register_operand" "r"))
14755 (match_operand:QI 4 "const_0_to_63_operand")) 0)))
14756 (clobber (reg:CC FLAGS_REG))]
14757 "TARGET_APX_NDD
14758 && INTVAL (operands[4]) == 32 - INTVAL (operands[3])"
14759 "shld{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14760 [(set_attr "type" "ishift")
14761 (set_attr "length_immediate" "1")
14762 (set_attr "mode" "SI")])
14763
14764
14765 (define_insn_and_split "*x86_shld_shrd_1_nozext"
14766 [(set (match_operand:SI 0 "nonimmediate_operand")
14767 (ior:SI (ashift:SI (match_operand:SI 4 "nonimmediate_operand")
14768 (match_operand:QI 2 "const_0_to_31_operand"))
14769 (lshiftrt:SI
14770 (match_operand:SI 1 "nonimmediate_operand")
14771 (match_operand:QI 3 "const_0_to_31_operand"))))
14772 (clobber (reg:CC FLAGS_REG))]
14773 "INTVAL (operands[3]) == 32 - INTVAL (operands[2])
14774 && ix86_pre_reload_split ()"
14775 "#"
14776 "&& 1"
14777 [(const_int 0)]
14778 {
14779 if (rtx_equal_p (operands[4], operands[0]))
14780 {
14781 operands[1] = force_reg (SImode, operands[1]);
14782 emit_insn (gen_x86_shld_1 (operands[0], operands[1], operands[2], operands[3]));
14783 }
14784 else if (rtx_equal_p (operands[1], operands[0]))
14785 {
14786 operands[4] = force_reg (SImode, operands[4]);
14787 emit_insn (gen_x86_shrd_1 (operands[0], operands[4], operands[3], operands[2]));
14788 }
14789 else if (TARGET_APX_NDD)
14790 {
14791 rtx tmp = gen_reg_rtx (SImode);
14792 if (MEM_P (operands[4]))
14793 {
14794 operands[1] = force_reg (SImode, operands[1]);
14795 emit_insn (gen_x86_shld_ndd_1 (tmp, operands[4], operands[1],
14796 operands[2], operands[3]));
14797 }
14798 else if (MEM_P (operands[1]))
14799 emit_insn (gen_x86_shrd_ndd_1 (tmp, operands[1], operands[4],
14800 operands[3], operands[2]));
14801 else
14802 emit_insn (gen_x86_shld_ndd_1 (tmp, operands[4], operands[1],
14803 operands[2], operands[3]));
14804 emit_move_insn (operands[0], tmp);
14805 }
14806 else
14807 {
14808 operands[1] = force_reg (SImode, operands[1]);
14809 rtx tmp = gen_reg_rtx (SImode);
14810 emit_move_insn (tmp, operands[4]);
14811 emit_insn (gen_x86_shld_1 (tmp, operands[1], operands[2], operands[3]));
14812 emit_move_insn (operands[0], tmp);
14813 }
14814 DONE;
14815 })
14816
14817 (define_insn_and_split "*x86_shld_2"
14818 [(set (match_operand:SI 0 "nonimmediate_operand")
14819 (ior:SI (ashift:SI (match_dup 0)
14820 (match_operand:QI 2 "nonmemory_operand"))
14821 (lshiftrt:SI (match_operand:SI 1 "register_operand")
14822 (minus:QI (const_int 32) (match_dup 2)))))
14823 (clobber (reg:CC FLAGS_REG))]
14824 "TARGET_64BIT && ix86_pre_reload_split ()"
14825 "#"
14826 "&& 1"
14827 [(parallel [(set (match_dup 0)
14828 (ior:SI (ashift:SI (match_dup 0)
14829 (and:QI (match_dup 2) (const_int 31)))
14830 (subreg:SI
14831 (lshiftrt:DI
14832 (zero_extend:DI (match_dup 1))
14833 (minus:QI (const_int 32)
14834 (and:QI (match_dup 2)
14835 (const_int 31)))) 0)))
14836 (clobber (reg:CC FLAGS_REG))])])
14837
14838 (define_insn_and_split "*x86_shld_ndd_2"
14839 [(set (match_operand:SI 0 "nonimmediate_operand")
14840 (ior:SI (ashift:SI (match_operand:SI 1 "nonimmediate_operand")
14841 (match_operand:QI 3 "nonmemory_operand"))
14842 (lshiftrt:SI (match_operand:SI 2 "register_operand")
14843 (minus:QI (const_int 32) (match_dup 3)))))
14844 (clobber (reg:CC FLAGS_REG))]
14845 "TARGET_APX_NDD
14846 && ix86_pre_reload_split ()"
14847 "#"
14848 "&& 1"
14849 [(parallel [(set (match_dup 4)
14850 (ior:SI (ashift:SI (match_dup 1)
14851 (and:QI (match_dup 3) (const_int 31)))
14852 (subreg:SI
14853 (lshiftrt:DI
14854 (zero_extend:DI (match_dup 2))
14855 (minus:QI (const_int 32)
14856 (and:QI (match_dup 3)
14857 (const_int 31)))) 0)))
14858 (clobber (reg:CC FLAGS_REG))
14859 (set (match_dup 0) (match_dup 4))])]
14860 {
14861 operands[4] = gen_reg_rtx (SImode);
14862 emit_move_insn (operands[4], operands[0]);
14863 })
14864
14865 (define_expand "@x86_shift<mode>_adj_1"
14866 [(set (reg:CCZ FLAGS_REG)
14867 (compare:CCZ (and:QI (match_operand:QI 2 "register_operand")
14868 (match_dup 4))
14869 (const_int 0)))
14870 (set (match_operand:SWI48 0 "register_operand")
14871 (if_then_else:SWI48 (ne (reg:CCZ FLAGS_REG) (const_int 0))
14872 (match_operand:SWI48 1 "register_operand")
14873 (match_dup 0)))
14874 (set (match_dup 1)
14875 (if_then_else:SWI48 (ne (reg:CCZ FLAGS_REG) (const_int 0))
14876 (match_operand:SWI48 3 "register_operand")
14877 (match_dup 1)))]
14878 "TARGET_CMOVE"
14879 "operands[4] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));")
14880
14881 (define_expand "@x86_shift<mode>_adj_2"
14882 [(use (match_operand:SWI48 0 "register_operand"))
14883 (use (match_operand:SWI48 1 "register_operand"))
14884 (use (match_operand:QI 2 "register_operand"))]
14885 ""
14886 {
14887 rtx_code_label *label = gen_label_rtx ();
14888 rtx tmp;
14889
14890 emit_insn (gen_testqi_ccz_1 (operands[2],
14891 GEN_INT (GET_MODE_BITSIZE (<MODE>mode))));
14892
14893 tmp = gen_rtx_REG (CCZmode, FLAGS_REG);
14894 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
14895 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14896 gen_rtx_LABEL_REF (VOIDmode, label),
14897 pc_rtx);
14898 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
14899 JUMP_LABEL (tmp) = label;
14900
14901 emit_move_insn (operands[0], operands[1]);
14902 ix86_expand_clear (operands[1]);
14903
14904 emit_label (label);
14905 LABEL_NUSES (label) = 1;
14906
14907 DONE;
14908 })
14909
14910 ;; Avoid useless masking of count operand.
14911 (define_insn_and_split "*ashl<mode>3_mask"
14912 [(set (match_operand:SWI48 0 "nonimmediate_operand")
14913 (ashift:SWI48
14914 (match_operand:SWI48 1 "nonimmediate_operand")
14915 (subreg:QI
14916 (and
14917 (match_operand 2 "int248_register_operand" "c,r")
14918 (match_operand 3 "const_int_operand")) 0)))
14919 (clobber (reg:CC FLAGS_REG))]
14920 "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)
14921 && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
14922 == GET_MODE_BITSIZE (<MODE>mode)-1
14923 && ix86_pre_reload_split ()"
14924 "#"
14925 "&& 1"
14926 [(parallel
14927 [(set (match_dup 0)
14928 (ashift:SWI48 (match_dup 1)
14929 (match_dup 2)))
14930 (clobber (reg:CC FLAGS_REG))])]
14931 {
14932 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
14933 operands[2] = gen_lowpart (QImode, operands[2]);
14934 }
14935 [(set_attr "isa" "*,bmi2")])
14936
14937 (define_insn_and_split "*ashl<mode>3_mask_1"
14938 [(set (match_operand:SWI48 0 "nonimmediate_operand")
14939 (ashift:SWI48
14940 (match_operand:SWI48 1 "nonimmediate_operand")
14941 (and:QI
14942 (match_operand:QI 2 "register_operand" "c,r")
14943 (match_operand:QI 3 "const_int_operand"))))
14944 (clobber (reg:CC FLAGS_REG))]
14945 "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)
14946 && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
14947 == GET_MODE_BITSIZE (<MODE>mode)-1
14948 && ix86_pre_reload_split ()"
14949 "#"
14950 "&& 1"
14951 [(parallel
14952 [(set (match_dup 0)
14953 (ashift:SWI48 (match_dup 1)
14954 (match_dup 2)))
14955 (clobber (reg:CC FLAGS_REG))])]
14956 ""
14957 [(set_attr "isa" "*,bmi2")])
14958
14959 (define_insn "*bmi2_ashl<mode>3_1"
14960 [(set (match_operand:SWI48 0 "register_operand" "=r")
14961 (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
14962 (match_operand:SWI48 2 "register_operand" "r")))]
14963 "TARGET_BMI2"
14964 "shlx\t{%2, %1, %0|%0, %1, %2}"
14965 [(set_attr "type" "ishiftx")
14966 (set_attr "mode" "<MODE>")])
14967
14968 (define_insn "*ashl<mode>3_1"
14969 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,?k,r")
14970 (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l,rm,k,rm")
14971 (match_operand:QI 2 "nonmemory_operand" "c<S>,M,r,<KS>,c<S>")))
14972 (clobber (reg:CC FLAGS_REG))]
14973 "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands,
14974 TARGET_APX_NDD)"
14975 {
14976 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
14977 switch (get_attr_type (insn))
14978 {
14979 case TYPE_LEA:
14980 case TYPE_ISHIFTX:
14981 case TYPE_MSKLOG:
14982 return "#";
14983
14984 case TYPE_ALU:
14985 gcc_assert (operands[2] == const1_rtx);
14986 gcc_assert (rtx_equal_p (operands[0], operands[1]));
14987 return "add{<imodesuffix>}\t%0, %0";
14988
14989 default:
14990 if (operands[2] == const1_rtx
14991 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
14992 /* For NDD form instructions related to TARGET_SHIFT1, the $1
14993 immediate do not need to be omitted as assembler will map it
14994 to use shorter encoding. */
14995 && !use_ndd)
14996 return "sal{<imodesuffix>}\t%0";
14997 else
14998 return use_ndd ? "sal{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
14999 : "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
15000 }
15001 }
15002 [(set_attr "isa" "*,*,bmi2,avx512bw,apx_ndd")
15003 (set (attr "type")
15004 (cond [(eq_attr "alternative" "1")
15005 (const_string "lea")
15006 (eq_attr "alternative" "2")
15007 (const_string "ishiftx")
15008 (eq_attr "alternative" "4")
15009 (const_string "ishift")
15010 (and (and (match_test "TARGET_DOUBLE_WITH_ADD")
15011 (match_operand 0 "register_operand"))
15012 (match_operand 2 "const1_operand"))
15013 (const_string "alu")
15014 (eq_attr "alternative" "3")
15015 (const_string "msklog")
15016 ]
15017 (const_string "ishift")))
15018 (set (attr "length_immediate")
15019 (if_then_else
15020 (ior (eq_attr "type" "alu")
15021 (and (eq_attr "type" "ishift")
15022 (and (match_operand 2 "const1_operand")
15023 (ior (match_test "TARGET_SHIFT1")
15024 (match_test "optimize_function_for_size_p (cfun)")))))
15025 (const_string "0")
15026 (const_string "*")))
15027 (set_attr "mode" "<MODE>")])
15028
15029 ;; Convert shift to the shiftx pattern to avoid flags dependency.
15030 (define_split
15031 [(set (match_operand:SWI48 0 "register_operand")
15032 (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
15033 (match_operand:QI 2 "register_operand")))
15034 (clobber (reg:CC FLAGS_REG))]
15035 "TARGET_BMI2 && reload_completed"
15036 [(set (match_dup 0)
15037 (ashift:SWI48 (match_dup 1) (match_dup 2)))]
15038 "operands[2] = gen_lowpart (<MODE>mode, operands[2]);")
15039
15040 (define_insn "*bmi2_ashlsi3_1_zext"
15041 [(set (match_operand:DI 0 "register_operand" "=r")
15042 (zero_extend:DI
15043 (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
15044 (match_operand:SI 2 "register_operand" "r"))))]
15045 "TARGET_64BIT && TARGET_BMI2"
15046 "shlx\t{%2, %1, %k0|%k0, %1, %2}"
15047 [(set_attr "type" "ishiftx")
15048 (set_attr "mode" "SI")])
15049
15050 (define_insn "*ashlsi3_1_zext"
15051 [(set (match_operand:DI 0 "register_operand" "=r,r,r,r")
15052 (zero_extend:DI
15053 (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0,l,rm,rm")
15054 (match_operand:QI 2 "nonmemory_operand" "cI,M,r,cI"))))
15055 (clobber (reg:CC FLAGS_REG))]
15056 "TARGET_64BIT && ix86_binary_operator_ok (ASHIFT, SImode, operands,
15057 TARGET_APX_NDD)"
15058 {
15059 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
15060 switch (get_attr_type (insn))
15061 {
15062 case TYPE_LEA:
15063 case TYPE_ISHIFTX:
15064 return "#";
15065
15066 case TYPE_ALU:
15067 gcc_assert (operands[2] == const1_rtx);
15068 return "add{l}\t%k0, %k0";
15069
15070 default:
15071 if (operands[2] == const1_rtx
15072 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
15073 && !use_ndd)
15074 return "sal{l}\t%k0";
15075 else
15076 return use_ndd ? "sal{l}\t{%2, %1, %k0|%k0, %1, %2}"
15077 : "sal{l}\t{%2, %k0|%k0, %2}";
15078 }
15079 }
15080 [(set_attr "isa" "*,*,bmi2,apx_ndd")
15081 (set (attr "type")
15082 (cond [(eq_attr "alternative" "1")
15083 (const_string "lea")
15084 (eq_attr "alternative" "2")
15085 (const_string "ishiftx")
15086 (eq_attr "alternative" "3")
15087 (const_string "ishift")
15088 (and (match_test "TARGET_DOUBLE_WITH_ADD")
15089 (match_operand 2 "const1_operand"))
15090 (const_string "alu")
15091 ]
15092 (const_string "ishift")))
15093 (set (attr "length_immediate")
15094 (if_then_else
15095 (ior (eq_attr "type" "alu")
15096 (and (eq_attr "type" "ishift")
15097 (and (match_operand 2 "const1_operand")
15098 (ior (match_test "TARGET_SHIFT1")
15099 (match_test "optimize_function_for_size_p (cfun)")))))
15100 (const_string "0")
15101 (const_string "*")))
15102 (set_attr "mode" "SI")])
15103
15104 ;; Convert shift to the shiftx pattern to avoid flags dependency.
15105 (define_split
15106 [(set (match_operand:DI 0 "register_operand")
15107 (zero_extend:DI
15108 (ashift:SI (match_operand:SI 1 "nonimmediate_operand")
15109 (match_operand:QI 2 "register_operand"))))
15110 (clobber (reg:CC FLAGS_REG))]
15111 "TARGET_64BIT && TARGET_BMI2 && reload_completed"
15112 [(set (match_dup 0)
15113 (zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))]
15114 "operands[2] = gen_lowpart (SImode, operands[2]);")
15115
15116 (define_insn "*ashlhi3_1"
15117 [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,Yp,?k,r")
15118 (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l,k,rm")
15119 (match_operand:QI 2 "nonmemory_operand" "cI,M,Ww,cI")))
15120 (clobber (reg:CC FLAGS_REG))]
15121 "ix86_binary_operator_ok (ASHIFT, HImode, operands,
15122 TARGET_APX_NDD)"
15123 {
15124 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
15125 switch (get_attr_type (insn))
15126 {
15127 case TYPE_LEA:
15128 case TYPE_MSKLOG:
15129 return "#";
15130
15131 case TYPE_ALU:
15132 gcc_assert (operands[2] == const1_rtx);
15133 return "add{w}\t%0, %0";
15134
15135 default:
15136 if (operands[2] == const1_rtx
15137 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
15138 && !use_ndd)
15139 return "sal{w}\t%0";
15140 else
15141 return use_ndd ? "sal{w}\t{%2, %1, %0|%0, %1, %2}"
15142 : "sal{w}\t{%2, %0|%0, %2}";
15143 }
15144 }
15145 [(set_attr "isa" "*,*,avx512f,apx_ndd")
15146 (set (attr "type")
15147 (cond [(eq_attr "alternative" "1")
15148 (const_string "lea")
15149 (eq_attr "alternative" "2")
15150 (const_string "msklog")
15151 (eq_attr "alternative" "3")
15152 (const_string "ishift")
15153 (and (and (match_test "TARGET_DOUBLE_WITH_ADD")
15154 (match_operand 0 "register_operand"))
15155 (match_operand 2 "const1_operand"))
15156 (const_string "alu")
15157 ]
15158 (const_string "ishift")))
15159 (set (attr "length_immediate")
15160 (if_then_else
15161 (ior (eq_attr "type" "alu")
15162 (and (eq_attr "type" "ishift")
15163 (and (match_operand 2 "const1_operand")
15164 (ior (match_test "TARGET_SHIFT1")
15165 (match_test "optimize_function_for_size_p (cfun)")))))
15166 (const_string "0")
15167 (const_string "*")))
15168 (set_attr "mode" "HI,SI,HI,HI")])
15169
15170 (define_insn "*ashlqi3_1"
15171 [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,Yp,?k,r")
15172 (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l,k,rm")
15173 (match_operand:QI 2 "nonmemory_operand" "cI,cI,M,Wb,cI")))
15174 (clobber (reg:CC FLAGS_REG))]
15175 "ix86_binary_operator_ok (ASHIFT, QImode, operands,
15176 TARGET_APX_NDD)"
15177 {
15178 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
15179 switch (get_attr_type (insn))
15180 {
15181 case TYPE_LEA:
15182 case TYPE_MSKLOG:
15183 return "#";
15184
15185 case TYPE_ALU:
15186 gcc_assert (operands[2] == const1_rtx);
15187 if (REG_P (operands[1]) && !ANY_QI_REGNO_P (REGNO (operands[1])))
15188 return "add{l}\t%k0, %k0";
15189 else
15190 return "add{b}\t%0, %0";
15191
15192 default:
15193 if (operands[2] == const1_rtx
15194 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
15195 && !use_ndd)
15196 {
15197 if (get_attr_mode (insn) == MODE_SI)
15198 return "sal{l}\t%k0";
15199 else
15200 return "sal{b}\t%0";
15201 }
15202 else
15203 {
15204 if (get_attr_mode (insn) == MODE_SI)
15205 return "sal{l}\t{%2, %k0|%k0, %2}";
15206 else
15207 return use_ndd ? "sal{b}\t{%2, %1, %0|%0, %1, %2}"
15208 : "sal{b}\t{%2, %0|%0, %2}";
15209 }
15210 }
15211 }
15212 [(set_attr "isa" "*,*,*,avx512dq,apx_ndd")
15213 (set (attr "type")
15214 (cond [(eq_attr "alternative" "2")
15215 (const_string "lea")
15216 (eq_attr "alternative" "3")
15217 (const_string "msklog")
15218 (eq_attr "alternative" "4")
15219 (const_string "ishift")
15220 (and (and (match_test "TARGET_DOUBLE_WITH_ADD")
15221 (match_operand 0 "register_operand"))
15222 (match_operand 2 "const1_operand"))
15223 (const_string "alu")
15224 ]
15225 (const_string "ishift")))
15226 (set (attr "length_immediate")
15227 (if_then_else
15228 (ior (eq_attr "type" "alu")
15229 (and (eq_attr "type" "ishift")
15230 (and (match_operand 2 "const1_operand")
15231 (ior (match_test "TARGET_SHIFT1")
15232 (match_test "optimize_function_for_size_p (cfun)")))))
15233 (const_string "0")
15234 (const_string "*")))
15235 (set_attr "mode" "QI,SI,SI,QI,QI")
15236 ;; Potential partial reg stall on alternative 1.
15237 (set (attr "preferred_for_speed")
15238 (cond [(eq_attr "alternative" "1,4")
15239 (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
15240 (symbol_ref "true")))])
15241
15242 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
15243 (define_insn_and_split "*ashl<mode>3_1_slp"
15244 [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>,&<r>"))
15245 (ashift:SWI12 (match_operand:SWI12 1 "register_operand" "0,!<r>")
15246 (match_operand:QI 2 "nonmemory_operand" "cI,cI")))
15247 (clobber (reg:CC FLAGS_REG))]
15248 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
15249 {
15250 if (which_alternative)
15251 return "#";
15252
15253 switch (get_attr_type (insn))
15254 {
15255 case TYPE_ALU:
15256 gcc_assert (operands[2] == const1_rtx);
15257 return "add{<imodesuffix>}\t%0, %0";
15258
15259 default:
15260 if (operands[2] == const1_rtx
15261 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
15262 return "sal{<imodesuffix>}\t%0";
15263 else
15264 return "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
15265 }
15266 }
15267 "&& reload_completed
15268 && !(rtx_equal_p (operands[0], operands[1]))"
15269 [(set (strict_low_part (match_dup 0)) (match_dup 1))
15270 (parallel
15271 [(set (strict_low_part (match_dup 0))
15272 (ashift:SWI12 (match_dup 0) (match_dup 2)))
15273 (clobber (reg:CC FLAGS_REG))])]
15274 ""
15275 [(set (attr "type")
15276 (cond [(and (match_test "TARGET_DOUBLE_WITH_ADD")
15277 (match_operand 2 "const1_operand"))
15278 (const_string "alu")
15279 ]
15280 (const_string "ishift")))
15281 (set (attr "length_immediate")
15282 (if_then_else
15283 (ior (eq_attr "type" "alu")
15284 (and (eq_attr "type" "ishift")
15285 (and (match_operand 2 "const1_operand")
15286 (ior (match_test "TARGET_SHIFT1")
15287 (match_test "optimize_function_for_size_p (cfun)")))))
15288 (const_string "0")
15289 (const_string "*")))
15290 (set_attr "mode" "<MODE>")])
15291
15292 ;; Convert ashift to the lea pattern to avoid flags dependency.
15293 (define_split
15294 [(set (match_operand:SWI 0 "general_reg_operand")
15295 (ashift:SWI (match_operand:SWI 1 "index_reg_operand")
15296 (match_operand 2 "const_0_to_3_operand")))
15297 (clobber (reg:CC FLAGS_REG))]
15298 "reload_completed
15299 && REGNO (operands[0]) != REGNO (operands[1])"
15300 [(set (match_dup 0)
15301 (mult:<LEAMODE> (match_dup 1) (match_dup 2)))]
15302 {
15303 if (<MODE>mode != <LEAMODE>mode)
15304 {
15305 operands[0] = gen_lowpart (<LEAMODE>mode, operands[0]);
15306 operands[1] = gen_lowpart (<LEAMODE>mode, operands[1]);
15307 }
15308 operands[2] = GEN_INT (1 << INTVAL (operands[2]));
15309 })
15310
15311 ;; Convert ashift to the lea pattern to avoid flags dependency.
15312 (define_split
15313 [(set (match_operand:DI 0 "general_reg_operand")
15314 (zero_extend:DI
15315 (ashift:SI (match_operand:SI 1 "index_reg_operand")
15316 (match_operand 2 "const_0_to_3_operand"))))
15317 (clobber (reg:CC FLAGS_REG))]
15318 "TARGET_64BIT && reload_completed
15319 && REGNO (operands[0]) != REGNO (operands[1])"
15320 [(set (match_dup 0)
15321 (zero_extend:DI (mult:SI (match_dup 1) (match_dup 2))))]
15322 {
15323 operands[1] = gen_lowpart (SImode, operands[1]);
15324 operands[2] = GEN_INT (1 << INTVAL (operands[2]));
15325 })
15326
15327 ;; This pattern can't accept a variable shift count, since shifts by
15328 ;; zero don't affect the flags. We assume that shifts by constant
15329 ;; zero are optimized away.
15330 (define_insn "*ashl<mode>3_cmp"
15331 [(set (reg FLAGS_REG)
15332 (compare
15333 (ashift:SWI (match_operand:SWI 1 "nonimmediate_operand" "0,rm")
15334 (match_operand:QI 2 "<shift_immediate_operand>" "<S>,<S>"))
15335 (const_int 0)))
15336 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,r")
15337 (ashift:SWI (match_dup 1) (match_dup 2)))]
15338 "(optimize_function_for_size_p (cfun)
15339 || !TARGET_PARTIAL_FLAG_REG_STALL
15340 || (operands[2] == const1_rtx
15341 && (TARGET_SHIFT1
15342 || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))
15343 && ix86_match_ccmode (insn, CCGOCmode)
15344 && ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands,
15345 TARGET_APX_NDD)"
15346 {
15347 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
15348 switch (get_attr_type (insn))
15349 {
15350 case TYPE_ALU:
15351 gcc_assert (operands[2] == const1_rtx);
15352 return "add{<imodesuffix>}\t%0, %0";
15353
15354 default:
15355 if (operands[2] == const1_rtx
15356 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
15357 && !use_ndd)
15358 return "sal{<imodesuffix>}\t%0";
15359 else
15360 return use_ndd ? "sal{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
15361 : "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
15362 }
15363 }
15364 [(set_attr "isa" "*,apx_ndd")
15365 (set (attr "type")
15366 (cond [(eq_attr "alternative" "1")
15367 (const_string "ishift")
15368 (and (and (match_test "TARGET_DOUBLE_WITH_ADD")
15369 (match_operand 0 "register_operand"))
15370 (match_operand 2 "const1_operand"))
15371 (const_string "alu")
15372 ]
15373 (const_string "ishift")))
15374 (set (attr "length_immediate")
15375 (if_then_else
15376 (ior (eq_attr "type" "alu")
15377 (and (eq_attr "type" "ishift")
15378 (and (match_operand 2 "const1_operand")
15379 (ior (match_test "TARGET_SHIFT1")
15380 (match_test "optimize_function_for_size_p (cfun)")))))
15381 (const_string "0")
15382 (const_string "*")))
15383 (set_attr "mode" "<MODE>")])
15384
15385 (define_insn "*ashlsi3_cmp_zext"
15386 [(set (reg FLAGS_REG)
15387 (compare
15388 (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm")
15389 (match_operand:QI 2 "const_1_to_31_operand"))
15390 (const_int 0)))
15391 (set (match_operand:DI 0 "register_operand" "=r,r")
15392 (zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))]
15393 "TARGET_64BIT
15394 && (optimize_function_for_size_p (cfun)
15395 || !TARGET_PARTIAL_FLAG_REG_STALL
15396 || (operands[2] == const1_rtx
15397 && (TARGET_SHIFT1
15398 || TARGET_DOUBLE_WITH_ADD)))
15399 && ix86_match_ccmode (insn, CCGOCmode)
15400 && ix86_binary_operator_ok (ASHIFT, SImode, operands,
15401 TARGET_APX_NDD)"
15402 {
15403 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
15404 switch (get_attr_type (insn))
15405 {
15406 case TYPE_ALU:
15407 gcc_assert (operands[2] == const1_rtx);
15408 return "add{l}\t%k0, %k0";
15409
15410 default:
15411 if (operands[2] == const1_rtx
15412 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
15413 && !use_ndd)
15414 return "sal{l}\t%k0";
15415 else
15416 return use_ndd ? "sal{l}\t{%2, %1, %k0|%k0, %1, %2}"
15417 : "sal{l}\t{%2, %k0|%k0, %2}";
15418 }
15419 }
15420 [(set_attr "isa" "*,apx_ndd")
15421 (set (attr "type")
15422 (cond [(eq_attr "alternative" "1")
15423 (const_string "ishift")
15424 (and (match_test "TARGET_DOUBLE_WITH_ADD")
15425 (match_operand 2 "const1_operand"))
15426 (const_string "alu")
15427 ]
15428 (const_string "ishift")))
15429 (set (attr "length_immediate")
15430 (if_then_else
15431 (ior (eq_attr "type" "alu")
15432 (and (eq_attr "type" "ishift")
15433 (and (match_operand 2 "const1_operand")
15434 (ior (match_test "TARGET_SHIFT1")
15435 (match_test "optimize_function_for_size_p (cfun)")))))
15436 (const_string "0")
15437 (const_string "*")))
15438 (set_attr "mode" "SI")])
15439
15440 (define_insn "*ashl<mode>3_cconly"
15441 [(set (reg FLAGS_REG)
15442 (compare
15443 (ashift:SWI (match_operand:SWI 1 "nonimmediate_operand" "0,rm")
15444 (match_operand:QI 2 "<shift_immediate_operand>" "<S>,<S>"))
15445 (const_int 0)))
15446 (clobber (match_scratch:SWI 0 "=<r>,r"))]
15447 "(optimize_function_for_size_p (cfun)
15448 || !TARGET_PARTIAL_FLAG_REG_STALL
15449 || (operands[2] == const1_rtx
15450 && (TARGET_SHIFT1
15451 || TARGET_DOUBLE_WITH_ADD)))
15452 && ix86_match_ccmode (insn, CCGOCmode)"
15453 {
15454 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
15455 switch (get_attr_type (insn))
15456 {
15457 case TYPE_ALU:
15458 gcc_assert (operands[2] == const1_rtx);
15459 return "add{<imodesuffix>}\t%0, %0";
15460
15461 default:
15462 if (operands[2] == const1_rtx
15463 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
15464 && !use_ndd)
15465 return "sal{<imodesuffix>}\t%0";
15466 else
15467 return use_ndd ? "sal{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
15468 : "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
15469 }
15470 }
15471 [(set_attr "isa" "*,apx_ndd")
15472 (set (attr "type")
15473 (cond [(eq_attr "alternative" "1")
15474 (const_string "ishift")
15475 (and (and (match_test "TARGET_DOUBLE_WITH_ADD")
15476 (match_operand 0 "register_operand"))
15477 (match_operand 2 "const1_operand"))
15478 (const_string "alu")
15479 ]
15480 (const_string "ishift")))
15481 (set (attr "length_immediate")
15482 (if_then_else
15483 (ior (eq_attr "type" "alu")
15484 (and (eq_attr "type" "ishift")
15485 (and (match_operand 2 "const1_operand")
15486 (ior (match_test "TARGET_SHIFT1")
15487 (match_test "optimize_function_for_size_p (cfun)")))))
15488 (const_string "0")
15489 (const_string "*")))
15490 (set_attr "mode" "<MODE>")])
15491
15492 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
15493 (define_insn_and_split "*ashlqi_ext<mode>_1"
15494 [(set (zero_extract:SWI248
15495 (match_operand 0 "int248_register_operand" "+Q,&Q")
15496 (const_int 8)
15497 (const_int 8))
15498 (subreg:SWI248
15499 (ashift:QI
15500 (subreg:QI
15501 (match_operator:SWI248 3 "extract_operator"
15502 [(match_operand 1 "int248_register_operand" "0,!Q")
15503 (const_int 8)
15504 (const_int 8)]) 0)
15505 (match_operand:QI 2 "nonmemory_operand" "cI,cI")) 0))
15506 (clobber (reg:CC FLAGS_REG))]
15507 ""
15508 {
15509 if (which_alternative)
15510 return "#";
15511
15512 switch (get_attr_type (insn))
15513 {
15514 case TYPE_ALU:
15515 gcc_assert (operands[2] == const1_rtx);
15516 return "add{b}\t%h0, %h0";
15517
15518 default:
15519 if (operands[2] == const1_rtx
15520 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
15521 return "sal{b}\t%h0";
15522 else
15523 return "sal{b}\t{%2, %h0|%h0, %2}";
15524 }
15525 }
15526 "reload_completed
15527 && !(rtx_equal_p (operands[0], operands[1]))"
15528 [(set (zero_extract:SWI248
15529 (match_dup 0) (const_int 8) (const_int 8))
15530 (match_dup 1))
15531 (parallel
15532 [(set (zero_extract:SWI248
15533 (match_dup 0) (const_int 8) (const_int 8))
15534 (subreg:SWI248
15535 (ashift:QI
15536 (subreg:QI
15537 (match_op_dup 3
15538 [(match_dup 0) (const_int 8) (const_int 8)]) 0)
15539 (match_dup 2)) 0))
15540 (clobber (reg:CC FLAGS_REG))])]
15541 ""
15542 [(set (attr "type")
15543 (cond [(and (match_test "TARGET_DOUBLE_WITH_ADD")
15544 (match_operand 2 "const1_operand"))
15545 (const_string "alu")
15546 ]
15547 (const_string "ishift")))
15548 (set (attr "length_immediate")
15549 (if_then_else
15550 (ior (eq_attr "type" "alu")
15551 (and (eq_attr "type" "ishift")
15552 (and (match_operand 2 "const1_operand")
15553 (ior (match_test "TARGET_SHIFT1")
15554 (match_test "optimize_function_for_size_p (cfun)")))))
15555 (const_string "0")
15556 (const_string "*")))
15557 (set_attr "mode" "QI")])
15558
15559 ;; See comment above `ashl<mode>3' about how this works.
15560
15561 (define_expand "<insn><mode>3"
15562 [(set (match_operand:SDWIM 0 "<shift_operand>")
15563 (any_shiftrt:SDWIM (match_operand:SDWIM 1 "<shift_operand>")
15564 (match_operand:QI 2 "nonmemory_operand")))]
15565 ""
15566 "ix86_expand_binary_operator (<CODE>, <MODE>mode, operands,
15567 TARGET_APX_NDD); DONE;")
15568
15569 ;; Avoid useless masking of count operand.
15570 (define_insn_and_split "*<insn><mode>3_mask"
15571 [(set (match_operand:SWI48 0 "nonimmediate_operand")
15572 (any_shiftrt:SWI48
15573 (match_operand:SWI48 1 "nonimmediate_operand")
15574 (subreg:QI
15575 (and
15576 (match_operand 2 "int248_register_operand" "c,r")
15577 (match_operand 3 "const_int_operand")) 0)))
15578 (clobber (reg:CC FLAGS_REG))]
15579 "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
15580 && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
15581 == GET_MODE_BITSIZE (<MODE>mode)-1
15582 && ix86_pre_reload_split ()"
15583 "#"
15584 "&& 1"
15585 [(parallel
15586 [(set (match_dup 0)
15587 (any_shiftrt:SWI48 (match_dup 1)
15588 (match_dup 2)))
15589 (clobber (reg:CC FLAGS_REG))])]
15590 {
15591 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
15592 operands[2] = gen_lowpart (QImode, operands[2]);
15593 }
15594 [(set_attr "isa" "*,bmi2")])
15595
15596 (define_insn_and_split "*<insn><mode>3_mask_1"
15597 [(set (match_operand:SWI48 0 "nonimmediate_operand")
15598 (any_shiftrt:SWI48
15599 (match_operand:SWI48 1 "nonimmediate_operand")
15600 (and:QI
15601 (match_operand:QI 2 "register_operand" "c,r")
15602 (match_operand:QI 3 "const_int_operand"))))
15603 (clobber (reg:CC FLAGS_REG))]
15604 "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
15605 && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
15606 == GET_MODE_BITSIZE (<MODE>mode)-1
15607 && ix86_pre_reload_split ()"
15608 "#"
15609 "&& 1"
15610 [(parallel
15611 [(set (match_dup 0)
15612 (any_shiftrt:SWI48 (match_dup 1)
15613 (match_dup 2)))
15614 (clobber (reg:CC FLAGS_REG))])]
15615 ""
15616 [(set_attr "isa" "*,bmi2")])
15617
15618 (define_insn_and_split "*<insn><dwi>3_doubleword_mask"
15619 [(set (match_operand:<DWI> 0 "register_operand")
15620 (any_shiftrt:<DWI>
15621 (match_operand:<DWI> 1 "register_operand")
15622 (subreg:QI
15623 (and
15624 (match_operand 2 "int248_register_operand" "c")
15625 (match_operand 3 "const_int_operand")) 0)))
15626 (clobber (reg:CC FLAGS_REG))]
15627 "((INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT)) == 0
15628 || ((INTVAL (operands[3]) & (2 * <MODE_SIZE> * BITS_PER_UNIT - 1))
15629 == (2 * <MODE_SIZE> * BITS_PER_UNIT - 1)))
15630 && ix86_pre_reload_split ()"
15631 "#"
15632 "&& 1"
15633 [(parallel
15634 [(set (match_dup 4)
15635 (ior:DWIH (lshiftrt:DWIH (match_dup 4)
15636 (and:QI (match_dup 2) (match_dup 8)))
15637 (subreg:DWIH
15638 (ashift:<DWI> (zero_extend:<DWI> (match_dup 7))
15639 (minus:QI (match_dup 9)
15640 (and:QI (match_dup 2) (match_dup 8)))) 0)))
15641 (clobber (reg:CC FLAGS_REG))])
15642 (parallel
15643 [(set (match_dup 6)
15644 (any_shiftrt:DWIH (match_dup 7) (match_dup 2)))
15645 (clobber (reg:CC FLAGS_REG))])]
15646 {
15647 if ((INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT)) != 0)
15648 {
15649 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
15650 operands[2] = gen_lowpart (QImode, operands[2]);
15651 emit_insn (gen_<insn><dwi>3_doubleword (operands[0], operands[1],
15652 operands[2]));
15653 DONE;
15654 }
15655
15656 split_double_mode (<DWI>mode, &operands[0], 2, &operands[4], &operands[6]);
15657
15658 operands[8] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT - 1);
15659 operands[9] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);
15660
15661 if ((INTVAL (operands[3]) & ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
15662 != ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
15663 {
15664 rtx xops[3];
15665 xops[0] = gen_reg_rtx (GET_MODE (operands[2]));
15666 xops[1] = operands[2];
15667 xops[2] = GEN_INT (INTVAL (operands[3])
15668 & ((<MODE_SIZE> * BITS_PER_UNIT) - 1));
15669 ix86_expand_binary_operator (AND, GET_MODE (operands[2]), xops);
15670 operands[2] = xops[0];
15671 }
15672
15673 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
15674 operands[2] = gen_lowpart (QImode, operands[2]);
15675
15676 if (!rtx_equal_p (operands[4], operands[5]))
15677 emit_move_insn (operands[4], operands[5]);
15678 })
15679
15680 (define_insn_and_split "*<insn><dwi>3_doubleword_mask_1"
15681 [(set (match_operand:<DWI> 0 "register_operand")
15682 (any_shiftrt:<DWI>
15683 (match_operand:<DWI> 1 "register_operand")
15684 (and:QI
15685 (match_operand:QI 2 "register_operand" "c")
15686 (match_operand:QI 3 "const_int_operand"))))
15687 (clobber (reg:CC FLAGS_REG))]
15688 "((INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT)) == 0
15689 || ((INTVAL (operands[3]) & (2 * <MODE_SIZE> * BITS_PER_UNIT - 1))
15690 == (2 * <MODE_SIZE> * BITS_PER_UNIT - 1)))
15691 && ix86_pre_reload_split ()"
15692 "#"
15693 "&& 1"
15694 [(parallel
15695 [(set (match_dup 4)
15696 (ior:DWIH (lshiftrt:DWIH (match_dup 4)
15697 (and:QI (match_dup 2) (match_dup 8)))
15698 (subreg:DWIH
15699 (ashift:<DWI> (zero_extend:<DWI> (match_dup 7))
15700 (minus:QI (match_dup 9)
15701 (and:QI (match_dup 2) (match_dup 8)))) 0)))
15702 (clobber (reg:CC FLAGS_REG))])
15703 (parallel
15704 [(set (match_dup 6)
15705 (any_shiftrt:DWIH (match_dup 7) (match_dup 2)))
15706 (clobber (reg:CC FLAGS_REG))])]
15707 {
15708 if ((INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT)) != 0)
15709 {
15710 emit_insn (gen_<insn><dwi>3_doubleword (operands[0], operands[1],
15711 operands[2]));
15712 DONE;
15713 }
15714
15715 split_double_mode (<DWI>mode, &operands[0], 2, &operands[4], &operands[6]);
15716
15717 operands[8] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT - 1);
15718 operands[9] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);
15719
15720 if ((INTVAL (operands[3]) & ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
15721 != ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
15722 {
15723 rtx tem = gen_reg_rtx (QImode);
15724 emit_insn (gen_andqi3 (tem, operands[2], operands[3]));
15725 operands[2] = tem;
15726 }
15727
15728 if (!rtx_equal_p (operands[4], operands[5]))
15729 emit_move_insn (operands[4], operands[5]);
15730 })
15731
15732 (define_insn_and_split "<insn><mode>3_doubleword"
15733 [(set (match_operand:DWI 0 "register_operand" "=&r,&r")
15734 (any_shiftrt:DWI (match_operand:DWI 1 "register_operand" "0,r")
15735 (match_operand:QI 2 "nonmemory_operand" "<S>c,<S>c")))
15736 (clobber (reg:CC FLAGS_REG))]
15737 ""
15738 "#"
15739 "epilogue_completed"
15740 [(const_int 0)]
15741 {
15742 if (TARGET_APX_NDD
15743 && !rtx_equal_p (operands[0], operands[1]))
15744 ix86_split_rshift_ndd (<CODE>, operands, NULL_RTX);
15745 else
15746 ix86_split_<insn> (operands, NULL_RTX, <MODE>mode);
15747 DONE;
15748 }
15749 [(set_attr "type" "multi")
15750 (set_attr "isa" "*,apx_ndd")])
15751
15752 ;; By default we don't ask for a scratch register, because when DWImode
15753 ;; values are manipulated, registers are already at a premium. But if
15754 ;; we have one handy, we won't turn it away.
15755
15756 (define_peephole2
15757 [(match_scratch:DWIH 3 "r")
15758 (parallel [(set (match_operand:<DWI> 0 "register_operand")
15759 (any_shiftrt:<DWI>
15760 (match_operand:<DWI> 1 "register_operand")
15761 (match_operand:QI 2 "nonmemory_operand")))
15762 (clobber (reg:CC FLAGS_REG))])
15763 (match_dup 3)]
15764 "TARGET_CMOVE"
15765 [(const_int 0)]
15766 {
15767 if (TARGET_APX_NDD
15768 && !rtx_equal_p (operands[0], operands[1]))
15769 ix86_split_rshift_ndd (<CODE>, operands, operands[3]);
15770 else
15771 ix86_split_<insn> (operands, operands[3], <DWI>mode);
15772 DONE;
15773 })
15774
15775 ;; Split truncations of double word right shifts into x86_shrd_1.
15776 (define_insn_and_split "<insn><dwi>3_doubleword_lowpart"
15777 [(set (match_operand:DWIH 0 "register_operand" "=&r")
15778 (subreg:DWIH
15779 (any_shiftrt:<DWI> (match_operand:<DWI> 1 "register_operand" "r")
15780 (match_operand:QI 2 "const_int_operand")) 0))
15781 (clobber (reg:CC FLAGS_REG))]
15782 "UINTVAL (operands[2]) < <MODE_SIZE> * BITS_PER_UNIT"
15783 "#"
15784 "&& reload_completed"
15785 [(parallel
15786 [(set (match_dup 0)
15787 (ior:DWIH (lshiftrt:DWIH (match_dup 0) (match_dup 2))
15788 (subreg:DWIH
15789 (ashift:<DWI> (zero_extend:<DWI> (match_dup 3))
15790 (match_dup 4)) 0)))
15791 (clobber (reg:CC FLAGS_REG))])]
15792 {
15793 split_double_mode (<DWI>mode, &operands[1], 1, &operands[1], &operands[3]);
15794 operands[4] = GEN_INT ((<MODE_SIZE> * BITS_PER_UNIT) - INTVAL (operands[2]));
15795 if (!rtx_equal_p (operands[0], operands[1]))
15796 emit_move_insn (operands[0], operands[1]);
15797 })
15798
15799 (define_insn "x86_64_shrd"
15800 [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
15801 (ior:DI (lshiftrt:DI (match_dup 0)
15802 (and:QI (match_operand:QI 2 "nonmemory_operand" "Jc")
15803 (const_int 63)))
15804 (subreg:DI
15805 (ashift:TI
15806 (zero_extend:TI
15807 (match_operand:DI 1 "register_operand" "r"))
15808 (minus:QI (const_int 64)
15809 (and:QI (match_dup 2) (const_int 63)))) 0)))
15810 (clobber (reg:CC FLAGS_REG))]
15811 "TARGET_64BIT"
15812 "shrd{q}\t{%s2%1, %0|%0, %1, %2}"
15813 [(set_attr "type" "ishift")
15814 (set_attr "prefix_0f" "1")
15815 (set_attr "mode" "DI")
15816 (set_attr "athlon_decode" "vector")
15817 (set_attr "amdfam10_decode" "vector")
15818 (set_attr "bdver1_decode" "vector")])
15819
15820 (define_insn "x86_64_shrd_ndd"
15821 [(set (match_operand:DI 0 "register_operand" "=r")
15822 (ior:DI (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "rm")
15823 (and:QI (match_operand:QI 3 "nonmemory_operand" "Jc")
15824 (const_int 63)))
15825 (subreg:DI
15826 (ashift:TI
15827 (zero_extend:TI
15828 (match_operand:DI 2 "register_operand" "r"))
15829 (minus:QI (const_int 64)
15830 (and:QI (match_dup 3) (const_int 63)))) 0)))
15831 (clobber (reg:CC FLAGS_REG))]
15832 "TARGET_APX_NDD"
15833 "shrd{q}\t{%s3%2, %1, %0|%0, %1, %2, %3}"
15834 [(set_attr "type" "ishift")
15835 (set_attr "mode" "DI")])
15836
15837
15838 (define_insn "x86_64_shrd_1"
15839 [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
15840 (ior:DI (lshiftrt:DI (match_dup 0)
15841 (match_operand:QI 2 "const_0_to_63_operand"))
15842 (subreg:DI
15843 (ashift:TI
15844 (zero_extend:TI
15845 (match_operand:DI 1 "register_operand" "r"))
15846 (match_operand:QI 3 "const_0_to_255_operand")) 0)))
15847 (clobber (reg:CC FLAGS_REG))]
15848 "TARGET_64BIT
15849 && INTVAL (operands[3]) == 64 - INTVAL (operands[2])"
15850 "shrd{q}\t{%2, %1, %0|%0, %1, %2}"
15851 [(set_attr "type" "ishift")
15852 (set_attr "prefix_0f" "1")
15853 (set_attr "length_immediate" "1")
15854 (set_attr "mode" "DI")
15855 (set_attr "athlon_decode" "vector")
15856 (set_attr "amdfam10_decode" "vector")
15857 (set_attr "bdver1_decode" "vector")])
15858
15859 (define_insn "x86_64_shrd_ndd_1"
15860 [(set (match_operand:DI 0 "register_operand" "=r")
15861 (ior:DI (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "rm")
15862 (match_operand:QI 3 "const_0_to_63_operand"))
15863 (subreg:DI
15864 (ashift:TI
15865 (zero_extend:TI
15866 (match_operand:DI 2 "register_operand" "r"))
15867 (match_operand:QI 4 "const_0_to_255_operand")) 0)))
15868 (clobber (reg:CC FLAGS_REG))]
15869 "TARGET_APX_NDD
15870 && INTVAL (operands[4]) == 64 - INTVAL (operands[3])"
15871 "shrd{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15872 [(set_attr "type" "ishift")
15873 (set_attr "length_immediate" "1")
15874 (set_attr "mode" "DI")])
15875
15876
15877 (define_insn_and_split "*x86_64_shrd_shld_1_nozext"
15878 [(set (match_operand:DI 0 "nonimmediate_operand")
15879 (ior:DI (lshiftrt:DI (match_operand:DI 4 "nonimmediate_operand")
15880 (match_operand:QI 2 "const_0_to_63_operand"))
15881 (ashift:DI
15882 (match_operand:DI 1 "nonimmediate_operand")
15883 (match_operand:QI 3 "const_0_to_63_operand"))))
15884 (clobber (reg:CC FLAGS_REG))]
15885 "TARGET_64BIT
15886 && INTVAL (operands[3]) == 64 - INTVAL (operands[2])
15887 && ix86_pre_reload_split ()"
15888 "#"
15889 "&& 1"
15890 [(const_int 0)]
15891 {
15892 if (rtx_equal_p (operands[4], operands[0]))
15893 {
15894 operands[1] = force_reg (DImode, operands[1]);
15895 emit_insn (gen_x86_64_shrd_1 (operands[0], operands[1], operands[2], operands[3]));
15896 }
15897 else if (rtx_equal_p (operands[1], operands[0]))
15898 {
15899 operands[4] = force_reg (DImode, operands[4]);
15900 emit_insn (gen_x86_64_shld_1 (operands[0], operands[4], operands[3], operands[2]));
15901 }
15902 else if (TARGET_APX_NDD)
15903 {
15904 rtx tmp = gen_reg_rtx (DImode);
15905 if (MEM_P (operands[4]))
15906 {
15907 operands[1] = force_reg (DImode, operands[1]);
15908 emit_insn (gen_x86_64_shrd_ndd_1 (tmp, operands[4], operands[1],
15909 operands[2], operands[3]));
15910 }
15911 else if (MEM_P (operands[1]))
15912 emit_insn (gen_x86_64_shld_ndd_1 (tmp, operands[1], operands[4],
15913 operands[3], operands[2]));
15914 else
15915 emit_insn (gen_x86_64_shrd_ndd_1 (tmp, operands[4], operands[1],
15916 operands[2], operands[3]));
15917 emit_move_insn (operands[0], tmp);
15918 }
15919 else
15920 {
15921 operands[1] = force_reg (DImode, operands[1]);
15922 rtx tmp = gen_reg_rtx (DImode);
15923 emit_move_insn (tmp, operands[4]);
15924 emit_insn (gen_x86_64_shrd_1 (tmp, operands[1], operands[2], operands[3]));
15925 emit_move_insn (operands[0], tmp);
15926 }
15927 DONE;
15928 })
15929
15930 (define_insn_and_split "*x86_64_shrd_2"
15931 [(set (match_operand:DI 0 "nonimmediate_operand")
15932 (ior:DI (lshiftrt:DI (match_dup 0)
15933 (match_operand:QI 2 "nonmemory_operand"))
15934 (ashift:DI (match_operand:DI 1 "register_operand")
15935 (minus:QI (const_int 64) (match_dup 2)))))
15936 (clobber (reg:CC FLAGS_REG))]
15937 "TARGET_64BIT && ix86_pre_reload_split ()"
15938 "#"
15939 "&& 1"
15940 [(parallel [(set (match_dup 0)
15941 (ior:DI (lshiftrt:DI (match_dup 0)
15942 (and:QI (match_dup 2) (const_int 63)))
15943 (subreg:DI
15944 (ashift:TI
15945 (zero_extend:TI (match_dup 1))
15946 (minus:QI (const_int 64)
15947 (and:QI (match_dup 2)
15948 (const_int 63)))) 0)))
15949 (clobber (reg:CC FLAGS_REG))])])
15950
15951 (define_insn_and_split "*x86_64_shrd_ndd_2"
15952 [(set (match_operand:DI 0 "nonimmediate_operand")
15953 (ior:DI (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand")
15954 (match_operand:QI 3 "nonmemory_operand"))
15955 (ashift:DI (match_operand:DI 2 "register_operand")
15956 (minus:QI (const_int 64) (match_dup 2)))))
15957 (clobber (reg:CC FLAGS_REG))]
15958 "TARGET_APX_NDD
15959 && ix86_pre_reload_split ()"
15960 "#"
15961 "&& 1"
15962 [(parallel [(set (match_dup 4)
15963 (ior:DI (lshiftrt:DI (match_dup 1)
15964 (and:QI (match_dup 3) (const_int 63)))
15965 (subreg:DI
15966 (ashift:TI
15967 (zero_extend:TI (match_dup 2))
15968 (minus:QI (const_int 64)
15969 (and:QI (match_dup 3)
15970 (const_int 63)))) 0)))
15971 (clobber (reg:CC FLAGS_REG))
15972 (set (match_dup 0) (match_dup 4))])]
15973 {
15974 operands[4] = gen_reg_rtx (DImode);
15975 emit_move_insn (operands[4], operands[0]);
15976 })
15977
15978 (define_insn "x86_shrd"
15979 [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
15980 (ior:SI (lshiftrt:SI (match_dup 0)
15981 (and:QI (match_operand:QI 2 "nonmemory_operand" "Ic")
15982 (const_int 31)))
15983 (subreg:SI
15984 (ashift:DI
15985 (zero_extend:DI
15986 (match_operand:SI 1 "register_operand" "r"))
15987 (minus:QI (const_int 32)
15988 (and:QI (match_dup 2) (const_int 31)))) 0)))
15989 (clobber (reg:CC FLAGS_REG))]
15990 ""
15991 "shrd{l}\t{%s2%1, %0|%0, %1, %2}"
15992 [(set_attr "type" "ishift")
15993 (set_attr "prefix_0f" "1")
15994 (set_attr "mode" "SI")
15995 (set_attr "pent_pair" "np")
15996 (set_attr "athlon_decode" "vector")
15997 (set_attr "amdfam10_decode" "vector")
15998 (set_attr "bdver1_decode" "vector")])
15999
16000 (define_insn "x86_shrd_ndd"
16001 [(set (match_operand:SI 0 "register_operand" "=r")
16002 (ior:SI (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
16003 (and:QI (match_operand:QI 3 "nonmemory_operand" "Ic")
16004 (const_int 31)))
16005 (subreg:SI
16006 (ashift:DI
16007 (zero_extend:DI
16008 (match_operand:SI 2 "register_operand" "r"))
16009 (minus:QI (const_int 32)
16010 (and:QI (match_dup 3) (const_int 31)))) 0)))
16011 (clobber (reg:CC FLAGS_REG))]
16012 "TARGET_APX_NDD"
16013 "shrd{l}\t{%s3%2, %1, %0|%0, %1, %2, %3}"
16014 [(set_attr "type" "ishift")
16015 (set_attr "mode" "SI")])
16016
16017 (define_insn "x86_shrd_1"
16018 [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
16019 (ior:SI (lshiftrt:SI (match_dup 0)
16020 (match_operand:QI 2 "const_0_to_31_operand"))
16021 (subreg:SI
16022 (ashift:DI
16023 (zero_extend:DI
16024 (match_operand:SI 1 "register_operand" "r"))
16025 (match_operand:QI 3 "const_0_to_63_operand")) 0)))
16026 (clobber (reg:CC FLAGS_REG))]
16027 "INTVAL (operands[3]) == 32 - INTVAL (operands[2])"
16028 "shrd{l}\t{%2, %1, %0|%0, %1, %2}"
16029 [(set_attr "type" "ishift")
16030 (set_attr "prefix_0f" "1")
16031 (set_attr "length_immediate" "1")
16032 (set_attr "mode" "SI")
16033 (set_attr "pent_pair" "np")
16034 (set_attr "athlon_decode" "vector")
16035 (set_attr "amdfam10_decode" "vector")
16036 (set_attr "bdver1_decode" "vector")])
16037
16038 (define_insn "x86_shrd_ndd_1"
16039 [(set (match_operand:SI 0 "register_operand" "=r")
16040 (ior:SI (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
16041 (match_operand:QI 3 "const_0_to_31_operand"))
16042 (subreg:SI
16043 (ashift:DI
16044 (zero_extend:DI
16045 (match_operand:SI 2 "register_operand" "r"))
16046 (match_operand:QI 4 "const_0_to_63_operand")) 0)))
16047 (clobber (reg:CC FLAGS_REG))]
16048 "TARGET_APX_NDD
16049 && (INTVAL (operands[4]) == 32 - INTVAL (operands[3]))"
16050 "shrd{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16051 [(set_attr "type" "ishift")
16052 (set_attr "length_immediate" "1")
16053 (set_attr "mode" "SI")])
16054
16055
16056 (define_insn_and_split "*x86_shrd_shld_1_nozext"
16057 [(set (match_operand:SI 0 "nonimmediate_operand")
16058 (ior:SI (lshiftrt:SI (match_operand:SI 4 "nonimmediate_operand")
16059 (match_operand:QI 2 "const_0_to_31_operand"))
16060 (ashift:SI
16061 (match_operand:SI 1 "nonimmediate_operand")
16062 (match_operand:QI 3 "const_0_to_31_operand"))))
16063 (clobber (reg:CC FLAGS_REG))]
16064 "INTVAL (operands[3]) == 32 - INTVAL (operands[2])
16065 && ix86_pre_reload_split ()"
16066 "#"
16067 "&& 1"
16068 [(const_int 0)]
16069 {
16070 if (rtx_equal_p (operands[4], operands[0]))
16071 {
16072 operands[1] = force_reg (SImode, operands[1]);
16073 emit_insn (gen_x86_shrd_1 (operands[0], operands[1], operands[2], operands[3]));
16074 }
16075 else if (rtx_equal_p (operands[1], operands[0]))
16076 {
16077 operands[4] = force_reg (SImode, operands[4]);
16078 emit_insn (gen_x86_shld_1 (operands[0], operands[4], operands[3], operands[2]));
16079 }
16080 else if (TARGET_APX_NDD)
16081 {
16082 rtx tmp = gen_reg_rtx (SImode);
16083 if (MEM_P (operands[4]))
16084 {
16085 operands[1] = force_reg (SImode, operands[1]);
16086 emit_insn (gen_x86_shrd_ndd_1 (tmp, operands[4], operands[1],
16087 operands[2], operands[3]));
16088 }
16089 else if (MEM_P (operands[1]))
16090 emit_insn (gen_x86_shld_ndd_1 (tmp, operands[1], operands[4],
16091 operands[3], operands[2]));
16092 else
16093 emit_insn (gen_x86_shrd_ndd_1 (tmp, operands[4], operands[1],
16094 operands[2], operands[3]));
16095 emit_move_insn (operands[0], tmp);
16096 }
16097 else
16098 {
16099 operands[1] = force_reg (SImode, operands[1]);
16100 rtx tmp = gen_reg_rtx (SImode);
16101 emit_move_insn (tmp, operands[4]);
16102 emit_insn (gen_x86_shrd_1 (tmp, operands[1], operands[2], operands[3]));
16103 emit_move_insn (operands[0], tmp);
16104 }
16105 DONE;
16106 })
16107
16108 (define_insn_and_split "*x86_shrd_2"
16109 [(set (match_operand:SI 0 "nonimmediate_operand")
16110 (ior:SI (lshiftrt:SI (match_dup 0)
16111 (match_operand:QI 2 "nonmemory_operand"))
16112 (ashift:SI (match_operand:SI 1 "register_operand")
16113 (minus:QI (const_int 32) (match_dup 2)))))
16114 (clobber (reg:CC FLAGS_REG))]
16115 "TARGET_64BIT && ix86_pre_reload_split ()"
16116 "#"
16117 "&& 1"
16118 [(parallel [(set (match_dup 0)
16119 (ior:SI (lshiftrt:SI (match_dup 0)
16120 (and:QI (match_dup 2) (const_int 31)))
16121 (subreg:SI
16122 (ashift:DI
16123 (zero_extend:DI (match_dup 1))
16124 (minus:QI (const_int 32)
16125 (and:QI (match_dup 2)
16126 (const_int 31)))) 0)))
16127 (clobber (reg:CC FLAGS_REG))])])
16128
16129 (define_insn_and_split "*x86_shrd_ndd_2"
16130 [(set (match_operand:SI 0 "nonimmediate_operand")
16131 (ior:SI (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand")
16132 (match_operand:QI 3 "nonmemory_operand"))
16133 (ashift:SI (match_operand:SI 2 "register_operand")
16134 (minus:QI (const_int 32) (match_dup 3)))))
16135 (clobber (reg:CC FLAGS_REG))]
16136 "TARGET_APX_NDD
16137 && ix86_pre_reload_split ()"
16138 "#"
16139 "&& 1"
16140 [(parallel [(set (match_dup 4)
16141 (ior:SI (lshiftrt:SI (match_dup 1)
16142 (and:QI (match_dup 3) (const_int 31)))
16143 (subreg:SI
16144 (ashift:DI
16145 (zero_extend:DI (match_dup 2))
16146 (minus:QI (const_int 32)
16147 (and:QI (match_dup 3)
16148 (const_int 31)))) 0)))
16149 (clobber (reg:CC FLAGS_REG))
16150 (set (match_dup 0) (match_dup 4))])]
16151 {
16152 operands[4] = gen_reg_rtx (SImode);
16153 emit_move_insn (operands[4], operands[0]);
16154 })
16155
16156 ;; Base name for insn mnemonic.
16157 (define_mode_attr cvt_mnemonic
16158 [(SI "{cltd|cdq}") (DI "{cqto|cqo}")])
16159
16160 (define_insn "ashr<mode>3_cvt"
16161 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=*d,rm,r")
16162 (ashiftrt:SWI48
16163 (match_operand:SWI48 1 "nonimmediate_operand" "*a,0,rm")
16164 (match_operand:QI 2 "const_int_operand")))
16165 (clobber (reg:CC FLAGS_REG))]
16166 "INTVAL (operands[2]) == GET_MODE_BITSIZE (<MODE>mode)-1
16167 && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun))
16168 && ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands,
16169 TARGET_APX_NDD)"
16170 "@
16171 <cvt_mnemonic>
16172 sar{<imodesuffix>}\t{%2, %0|%0, %2}
16173 sar{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
16174 [(set_attr "isa" "*,*,apx_ndd")
16175 (set_attr "type" "imovx,ishift,ishift")
16176 (set_attr "prefix_0f" "0,*,*")
16177 (set_attr "length_immediate" "0,*,*")
16178 (set_attr "modrm" "0,1,1")
16179 (set_attr "mode" "<MODE>")])
16180
16181 (define_insn "*ashrsi3_cvt_zext"
16182 [(set (match_operand:DI 0 "register_operand" "=*d,r,r")
16183 (zero_extend:DI
16184 (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "*a,0,rm")
16185 (match_operand:QI 2 "const_int_operand"))))
16186 (clobber (reg:CC FLAGS_REG))]
16187 "TARGET_64BIT && INTVAL (operands[2]) == 31
16188 && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun))
16189 && ix86_binary_operator_ok (ASHIFTRT, SImode, operands,
16190 TARGET_APX_NDD)"
16191 "@
16192 {cltd|cdq}
16193 sar{l}\t{%2, %k0|%k0, %2}
16194 sar{l}\t{%2, %1, %k0|%k0, %1, %2}"
16195 [(set_attr "isa" "*,*,apx_ndd")
16196 (set_attr "type" "imovx,ishift,ishift")
16197 (set_attr "prefix_0f" "0,*,*")
16198 (set_attr "length_immediate" "0,*,*")
16199 (set_attr "modrm" "0,1,1")
16200 (set_attr "mode" "SI")])
16201
16202 (define_expand "@x86_shift<mode>_adj_3"
16203 [(use (match_operand:SWI48 0 "register_operand"))
16204 (use (match_operand:SWI48 1 "register_operand"))
16205 (use (match_operand:QI 2 "register_operand"))]
16206 ""
16207 {
16208 rtx_code_label *label = gen_label_rtx ();
16209 rtx tmp;
16210
16211 emit_insn (gen_testqi_ccz_1 (operands[2],
16212 GEN_INT (GET_MODE_BITSIZE (<MODE>mode))));
16213
16214 tmp = gen_rtx_REG (CCZmode, FLAGS_REG);
16215 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
16216 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
16217 gen_rtx_LABEL_REF (VOIDmode, label),
16218 pc_rtx);
16219 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
16220 JUMP_LABEL (tmp) = label;
16221
16222 emit_move_insn (operands[0], operands[1]);
16223 emit_insn (gen_ashr<mode>3_cvt (operands[1], operands[1],
16224 GEN_INT (GET_MODE_BITSIZE (<MODE>mode)-1)));
16225 emit_label (label);
16226 LABEL_NUSES (label) = 1;
16227
16228 DONE;
16229 })
16230
16231 (define_insn "*bmi2_<insn><mode>3_1"
16232 [(set (match_operand:SWI48 0 "register_operand" "=r")
16233 (any_shiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
16234 (match_operand:SWI48 2 "register_operand" "r")))]
16235 "TARGET_BMI2"
16236 "<shift>x\t{%2, %1, %0|%0, %1, %2}"
16237 [(set_attr "type" "ishiftx")
16238 (set_attr "mode" "<MODE>")])
16239
16240 (define_insn "*ashr<mode>3_1"
16241 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r")
16242 (ashiftrt:SWI48
16243 (match_operand:SWI48 1 "nonimmediate_operand" "0,rm,rm")
16244 (match_operand:QI 2 "nonmemory_operand" "c<S>,r,c<S>")))
16245 (clobber (reg:CC FLAGS_REG))]
16246 "ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands,
16247 TARGET_APX_NDD)"
16248 {
16249 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
16250 switch (get_attr_type (insn))
16251 {
16252 case TYPE_ISHIFTX:
16253 return "#";
16254
16255 default:
16256 if (operands[2] == const1_rtx
16257 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
16258 && !use_ndd)
16259 return "sar{<imodesuffix>}\t%0";
16260 else
16261 return use_ndd ? "sar{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
16262 : "sar{<imodesuffix>}\t{%2, %0|%0, %2}";
16263 }
16264 }
16265 [(set_attr "isa" "*,bmi2,apx_ndd")
16266 (set_attr "type" "ishift,ishiftx,ishift")
16267 (set (attr "length_immediate")
16268 (if_then_else
16269 (and (match_operand 2 "const1_operand")
16270 (ior (match_test "TARGET_SHIFT1")
16271 (match_test "optimize_function_for_size_p (cfun)")))
16272 (const_string "0")
16273 (const_string "*")))
16274 (set_attr "mode" "<MODE>")])
16275
16276 ;; Specialization of *lshr<mode>3_1 below, extracting the SImode
16277 ;; highpart of a DI to be extracted, but allowing it to be clobbered.
16278 (define_insn_and_split "*highpartdisi2"
16279 [(set (subreg:DI (match_operand:SI 0 "register_operand" "=r,x,?k,r") 0)
16280 (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0,0,k,rm")
16281 (const_int 32)))
16282 (clobber (reg:CC FLAGS_REG))]
16283 "TARGET_64BIT"
16284 "#"
16285 "&& reload_completed"
16286 [(parallel
16287 [(set (match_dup 0) (lshiftrt:DI (match_dup 1) (const_int 32)))
16288 (clobber (reg:CC FLAGS_REG))])]
16289 {
16290 if (SSE_REG_P (operands[0]))
16291 {
16292 rtx tmp = gen_rtx_REG (V4SImode, REGNO (operands[0]));
16293 emit_insn (gen_sse_shufps_v4si (tmp, tmp, tmp,
16294 const1_rtx, const1_rtx,
16295 GEN_INT (5), GEN_INT (5)));
16296 DONE;
16297 }
16298 operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
16299 }
16300 [(set_attr "isa" "*,*,*,apx_ndd")])
16301
16302
16303 (define_insn "*lshr<mode>3_1"
16304 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,?k,r")
16305 (lshiftrt:SWI48
16306 (match_operand:SWI48 1 "nonimmediate_operand" "0,rm,k,rm")
16307 (match_operand:QI 2 "nonmemory_operand" "c<S>,r,<KS>,c<S>")))
16308 (clobber (reg:CC FLAGS_REG))]
16309 "ix86_binary_operator_ok (LSHIFTRT, <MODE>mode, operands,
16310 TARGET_APX_NDD)"
16311 {
16312 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
16313 switch (get_attr_type (insn))
16314 {
16315 case TYPE_ISHIFTX:
16316 case TYPE_MSKLOG:
16317 return "#";
16318
16319 default:
16320 if (operands[2] == const1_rtx
16321 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
16322 && !use_ndd)
16323 return "shr{<imodesuffix>}\t%0";
16324 else
16325 return use_ndd ? "shr{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
16326 : "shr{<imodesuffix>}\t{%2, %0|%0, %2}";
16327 }
16328 }
16329 [(set_attr "isa" "*,bmi2,avx512bw,apx_ndd")
16330 (set_attr "type" "ishift,ishiftx,msklog,ishift")
16331 (set (attr "length_immediate")
16332 (if_then_else
16333 (and (and (match_operand 2 "const1_operand")
16334 (eq_attr "alternative" "0"))
16335 (ior (match_test "TARGET_SHIFT1")
16336 (match_test "optimize_function_for_size_p (cfun)")))
16337 (const_string "0")
16338 (const_string "*")))
16339 (set_attr "mode" "<MODE>")])
16340
16341 ;; Convert shift to the shiftx pattern to avoid flags dependency.
16342 (define_split
16343 [(set (match_operand:SWI48 0 "register_operand")
16344 (any_shiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
16345 (match_operand:QI 2 "register_operand")))
16346 (clobber (reg:CC FLAGS_REG))]
16347 "TARGET_BMI2 && reload_completed"
16348 [(set (match_dup 0)
16349 (any_shiftrt:SWI48 (match_dup 1) (match_dup 2)))]
16350 "operands[2] = gen_lowpart (<MODE>mode, operands[2]);")
16351
16352 (define_insn "*bmi2_<insn>si3_1_zext"
16353 [(set (match_operand:DI 0 "register_operand" "=r")
16354 (zero_extend:DI
16355 (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
16356 (match_operand:SI 2 "register_operand" "r"))))]
16357 "TARGET_64BIT && TARGET_BMI2"
16358 "<shift>x\t{%2, %1, %k0|%k0, %1, %2}"
16359 [(set_attr "type" "ishiftx")
16360 (set_attr "mode" "SI")])
16361
16362 (define_insn "*<insn>si3_1_zext"
16363 [(set (match_operand:DI 0 "register_operand" "=r,r,r")
16364 (zero_extend:DI
16365 (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm,rm")
16366 (match_operand:QI 2 "nonmemory_operand" "cI,r,cI"))))
16367 (clobber (reg:CC FLAGS_REG))]
16368 "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands,
16369 TARGET_APX_NDD)"
16370 {
16371 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
16372 switch (get_attr_type (insn))
16373 {
16374 case TYPE_ISHIFTX:
16375 return "#";
16376
16377 default:
16378 if (operands[2] == const1_rtx
16379 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
16380 && !use_ndd)
16381 return "<shift>{l}\t%k0";
16382 else
16383 return use_ndd ? "<shift>{l}\t{%2, %1, %k0|%k0, %1, %2}"
16384 : "<shift>{l}\t{%2, %k0|%k0, %2}";
16385 }
16386 }
16387 [(set_attr "isa" "*,bmi2,apx_ndd")
16388 (set_attr "type" "ishift,ishiftx,ishift")
16389 (set (attr "length_immediate")
16390 (if_then_else
16391 (and (match_operand 2 "const1_operand")
16392 (ior (match_test "TARGET_SHIFT1")
16393 (match_test "optimize_function_for_size_p (cfun)")))
16394 (const_string "0")
16395 (const_string "*")))
16396 (set_attr "mode" "SI")])
16397
16398 ;; Convert shift to the shiftx pattern to avoid flags dependency.
16399 (define_split
16400 [(set (match_operand:DI 0 "register_operand")
16401 (zero_extend:DI
16402 (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand")
16403 (match_operand:QI 2 "register_operand"))))
16404 (clobber (reg:CC FLAGS_REG))]
16405 "TARGET_64BIT && TARGET_BMI2 && reload_completed"
16406 [(set (match_dup 0)
16407 (zero_extend:DI (any_shiftrt:SI (match_dup 1) (match_dup 2))))]
16408 "operands[2] = gen_lowpart (SImode, operands[2]);")
16409
16410 (define_insn "*ashr<mode>3_1"
16411 [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m, r")
16412 (ashiftrt:SWI12
16413 (match_operand:SWI12 1 "nonimmediate_operand" "0, rm")
16414 (match_operand:QI 2 "nonmemory_operand" "c<S>, c<S>")))
16415 (clobber (reg:CC FLAGS_REG))]
16416 "ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands,
16417 TARGET_APX_NDD)"
16418 {
16419 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
16420 if (operands[2] == const1_rtx
16421 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
16422 && !use_ndd)
16423 return "sar{<imodesuffix>}\t%0";
16424 else
16425 return use_ndd ? "sar{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
16426 : "sar{<imodesuffix>}\t{%2, %0|%0, %2}";
16427 }
16428 [(set_attr "isa" "*, apx_ndd")
16429 (set_attr "type" "ishift")
16430 (set (attr "length_immediate")
16431 (if_then_else
16432 (and (match_operand 2 "const1_operand")
16433 (ior (match_test "TARGET_SHIFT1")
16434 (match_test "optimize_function_for_size_p (cfun)")))
16435 (const_string "0")
16436 (const_string "*")))
16437 (set_attr "mode" "<MODE>")])
16438
16439 (define_insn "*lshrqi3_1"
16440 [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,?k,r")
16441 (lshiftrt:QI
16442 (match_operand:QI 1 "nonimmediate_operand" "0, k, rm")
16443 (match_operand:QI 2 "nonmemory_operand" "cI,Wb,cI")))
16444 (clobber (reg:CC FLAGS_REG))]
16445 "ix86_binary_operator_ok (LSHIFTRT, QImode, operands,
16446 TARGET_APX_NDD)"
16447 {
16448 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
16449 switch (get_attr_type (insn))
16450 {
16451 case TYPE_ISHIFT:
16452 if (operands[2] == const1_rtx
16453 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
16454 && !use_ndd)
16455 return "shr{b}\t%0";
16456 else
16457 return use_ndd ? "shr{b}\t{%2, %1, %0|%0, %1, %2}"
16458 : "shr{b}\t{%2, %0|%0, %2}";
16459 case TYPE_MSKLOG:
16460 return "#";
16461 default:
16462 gcc_unreachable ();
16463 }
16464 }
16465 [(set_attr "isa" "*,avx512dq,apx_ndd")
16466 (set_attr "type" "ishift,msklog,ishift")
16467 (set (attr "length_immediate")
16468 (if_then_else
16469 (and (and (match_operand 2 "const1_operand")
16470 (eq_attr "alternative" "0"))
16471 (ior (match_test "TARGET_SHIFT1")
16472 (match_test "optimize_function_for_size_p (cfun)")))
16473 (const_string "0")
16474 (const_string "*")))
16475 (set_attr "mode" "QI")])
16476
16477 (define_insn "*lshrhi3_1"
16478 [(set (match_operand:HI 0 "nonimmediate_operand" "=rm, ?k, r")
16479 (lshiftrt:HI
16480 (match_operand:HI 1 "nonimmediate_operand" "0, k, rm")
16481 (match_operand:QI 2 "nonmemory_operand" "cI, Ww, cI")))
16482 (clobber (reg:CC FLAGS_REG))]
16483 "ix86_binary_operator_ok (LSHIFTRT, HImode, operands,
16484 TARGET_APX_NDD)"
16485 {
16486 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
16487 switch (get_attr_type (insn))
16488 {
16489 case TYPE_ISHIFT:
16490 if (operands[2] == const1_rtx
16491 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
16492 && !use_ndd)
16493 return "shr{w}\t%0";
16494 else
16495 return use_ndd ? "shr{w}\t{%2, %1, %0|%0, %1, %2}"
16496 : "shr{w}\t{%2, %0|%0, %2}";
16497 case TYPE_MSKLOG:
16498 return "#";
16499 default:
16500 gcc_unreachable ();
16501 }
16502 }
16503 [(set_attr "isa" "*, avx512f, apx_ndd")
16504 (set_attr "type" "ishift,msklog,ishift")
16505 (set (attr "length_immediate")
16506 (if_then_else
16507 (and (and (match_operand 2 "const1_operand")
16508 (eq_attr "alternative" "0"))
16509 (ior (match_test "TARGET_SHIFT1")
16510 (match_test "optimize_function_for_size_p (cfun)")))
16511 (const_string "0")
16512 (const_string "*")))
16513 (set_attr "mode" "HI")])
16514
16515 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
16516 (define_insn_and_split "*<insn><mode>3_1_slp"
16517 [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>,&<r>"))
16518 (any_shiftrt:SWI12 (match_operand:SWI12 1 "register_operand" "0,!<r>")
16519 (match_operand:QI 2 "nonmemory_operand" "cI,cI")))
16520 (clobber (reg:CC FLAGS_REG))]
16521 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
16522 {
16523 if (which_alternative)
16524 return "#";
16525
16526 if (operands[2] == const1_rtx
16527 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
16528 return "<shift>{<imodesuffix>}\t%0";
16529 else
16530 return "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}";
16531 }
16532 "&& reload_completed
16533 && !(rtx_equal_p (operands[0], operands[1]))"
16534 [(set (strict_low_part (match_dup 0)) (match_dup 1))
16535 (parallel
16536 [(set (strict_low_part (match_dup 0))
16537 (any_shiftrt:SWI12 (match_dup 0) (match_dup 2)))
16538 (clobber (reg:CC FLAGS_REG))])]
16539 ""
16540 [(set_attr "type" "ishift")
16541 (set (attr "length_immediate")
16542 (if_then_else
16543 (and (match_operand 2 "const1_operand")
16544 (ior (match_test "TARGET_SHIFT1")
16545 (match_test "optimize_function_for_size_p (cfun)")))
16546 (const_string "0")
16547 (const_string "*")))
16548 (set_attr "mode" "<MODE>")])
16549
16550 ;; This pattern can't accept a variable shift count, since shifts by
16551 ;; zero don't affect the flags. We assume that shifts by constant
16552 ;; zero are optimized away.
16553 (define_insn "*<insn><mode>3_cmp"
16554 [(set (reg FLAGS_REG)
16555 (compare
16556 (any_shiftrt:SWI
16557 (match_operand:SWI 1 "nonimmediate_operand" "0,rm")
16558 (match_operand:QI 2 "<shift_immediate_operand>" "<S>,<S>"))
16559 (const_int 0)))
16560 (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,r")
16561 (any_shiftrt:SWI (match_dup 1) (match_dup 2)))]
16562 "(optimize_function_for_size_p (cfun)
16563 || !TARGET_PARTIAL_FLAG_REG_STALL
16564 || (operands[2] == const1_rtx
16565 && TARGET_SHIFT1))
16566 && ix86_match_ccmode (insn, CCGOCmode)
16567 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands,
16568 TARGET_APX_NDD)"
16569 {
16570 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
16571 if (operands[2] == const1_rtx
16572 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
16573 && !use_ndd)
16574 return "<shift>{<imodesuffix>}\t%0";
16575 else
16576 return use_ndd ? "<shift>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
16577 : "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}";
16578 }
16579 [(set_attr "isa" "*,apx_ndd")
16580 (set_attr "type" "ishift")
16581 (set (attr "length_immediate")
16582 (if_then_else
16583 (and (match_operand 2 "const1_operand")
16584 (ior (match_test "TARGET_SHIFT1")
16585 (match_test "optimize_function_for_size_p (cfun)")))
16586 (const_string "0")
16587 (const_string "*")))
16588 (set_attr "mode" "<MODE>")])
16589
16590 (define_insn "*<insn>si3_cmp_zext"
16591 [(set (reg FLAGS_REG)
16592 (compare
16593 (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm")
16594 (match_operand:QI 2 "const_1_to_31_operand"))
16595 (const_int 0)))
16596 (set (match_operand:DI 0 "register_operand" "=r,r")
16597 (zero_extend:DI (any_shiftrt:SI (match_dup 1) (match_dup 2))))]
16598 "TARGET_64BIT
16599 && (optimize_function_for_size_p (cfun)
16600 || !TARGET_PARTIAL_FLAG_REG_STALL
16601 || (operands[2] == const1_rtx
16602 && TARGET_SHIFT1))
16603 && ix86_match_ccmode (insn, CCGOCmode)
16604 && ix86_binary_operator_ok (<CODE>, SImode, operands,
16605 TARGET_APX_NDD)"
16606 {
16607 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
16608 if (operands[2] == const1_rtx
16609 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
16610 && !use_ndd)
16611 return "<shift>{l}\t%k0";
16612 else
16613 return use_ndd ? "<shift>{l}\t{%2, %1, %k0|%k0, %1, %2}"
16614 : "<shift>{l}\t{%2, %k0|%k0, %2}";
16615 }
16616 [(set_attr "isa" "*,apx_ndd")
16617 (set_attr "type" "ishift")
16618 (set (attr "length_immediate")
16619 (if_then_else
16620 (and (match_operand 2 "const1_operand")
16621 (ior (match_test "TARGET_SHIFT1")
16622 (match_test "optimize_function_for_size_p (cfun)")))
16623 (const_string "0")
16624 (const_string "*")))
16625 (set_attr "mode" "SI")])
16626
16627 (define_insn "*<insn><mode>3_cconly"
16628 [(set (reg FLAGS_REG)
16629 (compare
16630 (any_shiftrt:SWI
16631 (match_operand:SWI 1 "nonimmediate_operand" "0,rm")
16632 (match_operand:QI 2 "<shift_immediate_operand>" "<S>,<S>"))
16633 (const_int 0)))
16634 (clobber (match_scratch:SWI 0 "=<r>,r"))]
16635 "(optimize_function_for_size_p (cfun)
16636 || !TARGET_PARTIAL_FLAG_REG_STALL
16637 || (operands[2] == const1_rtx
16638 && TARGET_SHIFT1))
16639 && ix86_match_ccmode (insn, CCGOCmode)"
16640 {
16641 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
16642 if (operands[2] == const1_rtx
16643 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
16644 && !use_ndd)
16645 return "<shift>{<imodesuffix>}\t%0";
16646 else
16647 return use_ndd
16648 ? "<shift>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
16649 : "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}";
16650 }
16651 [(set_attr "isa" "*,apx_ndd")
16652 (set_attr "type" "ishift")
16653 (set (attr "length_immediate")
16654 (if_then_else
16655 (and (match_operand 2 "const1_operand")
16656 (ior (match_test "TARGET_SHIFT1")
16657 (match_test "optimize_function_for_size_p (cfun)")))
16658 (const_string "0")
16659 (const_string "*")))
16660 (set_attr "mode" "<MODE>")])
16661
16662 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
16663 (define_insn_and_split "*<insn>qi_ext<mode>_1"
16664 [(set (zero_extract:SWI248
16665 (match_operand 0 "int248_register_operand" "+Q,&Q")
16666 (const_int 8)
16667 (const_int 8))
16668 (subreg:SWI248
16669 (any_shiftrt:QI
16670 (subreg:QI
16671 (match_operator:SWI248 3 "extract_operator"
16672 [(match_operand 1 "int248_register_operand" "0,!Q")
16673 (const_int 8)
16674 (const_int 8)]) 0)
16675 (match_operand:QI 2 "nonmemory_operand" "cI,cI")) 0))
16676 (clobber (reg:CC FLAGS_REG))]
16677 ""
16678 {
16679 if (which_alternative)
16680 return "#";
16681
16682 if (operands[2] == const1_rtx
16683 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
16684 return "<shift>{b}\t%h0";
16685 else
16686 return "<shift>{b}\t{%2, %h0|%h0, %2}";
16687 }
16688 "reload_completed
16689 && !(rtx_equal_p (operands[0], operands[1]))"
16690 [(set (zero_extract:SWI248
16691 (match_dup 0) (const_int 8) (const_int 8))
16692 (match_dup 1))
16693 (parallel
16694 [(set (zero_extract:SWI248
16695 (match_dup 0) (const_int 8) (const_int 8))
16696 (subreg:SWI248
16697 (any_shiftrt:QI
16698 (subreg:QI
16699 (match_op_dup 3
16700 [(match_dup 0) (const_int 8) (const_int 8)]) 0)
16701 (match_dup 2)) 0))
16702 (clobber (reg:CC FLAGS_REG))])]
16703 ""
16704 [(set_attr "type" "ishift")
16705 (set (attr "length_immediate")
16706 (if_then_else
16707 (and (match_operand 2 "const1_operand")
16708 (ior (match_test "TARGET_SHIFT1")
16709 (match_test "optimize_function_for_size_p (cfun)")))
16710 (const_string "0")
16711 (const_string "*")))
16712 (set_attr "mode" "QI")])
16713
16714 (define_insn_and_split "*extend<dwi>2_doubleword_highpart"
16715 [(set (match_operand:<DWI> 0 "register_operand" "=r")
16716 (ashiftrt:<DWI>
16717 (ashift:<DWI> (match_operand:<DWI> 1 "nonimmediate_operand" "0")
16718 (match_operand:QI 2 "const_int_operand"))
16719 (match_operand:QI 3 "const_int_operand")))
16720 (clobber (reg:CC FLAGS_REG))]
16721 "INTVAL (operands[2]) == INTVAL (operands[3])
16722 && UINTVAL (operands[2]) < <MODE_SIZE> * BITS_PER_UNIT"
16723 "#"
16724 "&& reload_completed"
16725 [(parallel [(set (match_dup 4)
16726 (ashift:DWIH (match_dup 4) (match_dup 2)))
16727 (clobber (reg:CC FLAGS_REG))])
16728 (parallel [(set (match_dup 4)
16729 (ashiftrt:DWIH (match_dup 4) (match_dup 2)))
16730 (clobber (reg:CC FLAGS_REG))])]
16731 "split_double_mode (<DWI>mode, &operands[0], 1, &operands[0], &operands[4]);")
16732
16733 (define_insn_and_split "*extendv2di2_highpart_stv"
16734 [(set (match_operand:V2DI 0 "register_operand" "=v")
16735 (ashiftrt:V2DI
16736 (ashift:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "vm")
16737 (match_operand:QI 2 "const_int_operand"))
16738 (match_operand:QI 3 "const_int_operand")))]
16739 "!TARGET_64BIT && TARGET_STV && TARGET_AVX512VL
16740 && INTVAL (operands[2]) == INTVAL (operands[3])
16741 && UINTVAL (operands[2]) < 32"
16742 "#"
16743 "&& reload_completed"
16744 [(set (match_dup 0)
16745 (ashift:V2DI (match_dup 1) (match_dup 2)))
16746 (set (match_dup 0)
16747 (ashiftrt:V2DI (match_dup 0) (match_dup 2)))])
16748 \f
16749 ;; Rotate instructions
16750
16751 (define_expand "<insn>ti3"
16752 [(set (match_operand:TI 0 "register_operand")
16753 (any_rotate:TI (match_operand:TI 1 "register_operand")
16754 (match_operand:QI 2 "nonmemory_operand")))]
16755 "TARGET_64BIT"
16756 {
16757 if (const_1_to_63_operand (operands[2], VOIDmode))
16758 emit_insn (gen_ix86_<insn>ti3_doubleword
16759 (operands[0], operands[1], operands[2]));
16760 else if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 64)
16761 {
16762 operands[1] = force_reg (TImode, operands[1]);
16763 emit_insn (gen_<insn>64ti2_doubleword (operands[0], operands[1]));
16764 }
16765 else
16766 {
16767 rtx amount = force_reg (QImode, operands[2]);
16768 rtx src_lo = gen_lowpart (DImode, operands[1]);
16769 rtx src_hi = gen_highpart (DImode, operands[1]);
16770 rtx tmp_lo = gen_reg_rtx (DImode);
16771 rtx tmp_hi = gen_reg_rtx (DImode);
16772 emit_move_insn (tmp_lo, src_lo);
16773 emit_move_insn (tmp_hi, src_hi);
16774 rtx (*shiftd) (rtx, rtx, rtx)
16775 = (<CODE> == ROTATE) ? gen_x86_64_shld : gen_x86_64_shrd;
16776 emit_insn (shiftd (tmp_lo, src_hi, amount));
16777 emit_insn (shiftd (tmp_hi, src_lo, amount));
16778 rtx dst_lo = gen_lowpart (DImode, operands[0]);
16779 rtx dst_hi = gen_highpart (DImode, operands[0]);
16780 emit_move_insn (dst_lo, tmp_lo);
16781 emit_move_insn (dst_hi, tmp_hi);
16782 emit_insn (gen_x86_shiftdi_adj_1 (dst_lo, dst_hi, amount, tmp_lo));
16783 }
16784 DONE;
16785 })
16786
16787 (define_expand "<insn>di3"
16788 [(set (match_operand:DI 0 "shiftdi_operand")
16789 (any_rotate:DI (match_operand:DI 1 "shiftdi_operand")
16790 (match_operand:QI 2 "nonmemory_operand")))]
16791 ""
16792 {
16793 if (TARGET_64BIT)
16794 ix86_expand_binary_operator (<CODE>, DImode, operands,
16795 TARGET_APX_NDD);
16796 else if (const_1_to_31_operand (operands[2], VOIDmode))
16797 emit_insn (gen_ix86_<insn>di3_doubleword
16798 (operands[0], operands[1], operands[2]));
16799 else if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 32)
16800 {
16801 operands[1] = force_reg (DImode, operands[1]);
16802 emit_insn (gen_<insn>32di2_doubleword (operands[0], operands[1]));
16803 }
16804 else
16805 FAIL;
16806
16807 DONE;
16808 })
16809
16810 (define_expand "<insn><mode>3"
16811 [(set (match_operand:SWIM124 0 "nonimmediate_operand")
16812 (any_rotate:SWIM124 (match_operand:SWIM124 1 "nonimmediate_operand")
16813 (match_operand:QI 2 "nonmemory_operand")))]
16814 ""
16815 "ix86_expand_binary_operator (<CODE>, <MODE>mode, operands,
16816 TARGET_APX_NDD); DONE;")
16817
16818 ;; Avoid useless masking of count operand.
16819 (define_insn_and_split "*<insn><mode>3_mask"
16820 [(set (match_operand:SWI 0 "nonimmediate_operand")
16821 (any_rotate:SWI
16822 (match_operand:SWI 1 "nonimmediate_operand")
16823 (subreg:QI
16824 (and
16825 (match_operand 2 "int248_register_operand" "c")
16826 (match_operand 3 "const_int_operand")) 0)))
16827 (clobber (reg:CC FLAGS_REG))]
16828 "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
16829 && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
16830 == GET_MODE_BITSIZE (<MODE>mode)-1
16831 && ix86_pre_reload_split ()"
16832 "#"
16833 "&& 1"
16834 [(parallel
16835 [(set (match_dup 0)
16836 (any_rotate:SWI (match_dup 1)
16837 (match_dup 2)))
16838 (clobber (reg:CC FLAGS_REG))])]
16839 {
16840 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
16841 operands[2] = gen_lowpart (QImode, operands[2]);
16842 })
16843
16844 (define_split
16845 [(set (match_operand:SWI 0 "register_operand")
16846 (any_rotate:SWI
16847 (match_operand:SWI 1 "const_int_operand")
16848 (subreg:QI
16849 (and
16850 (match_operand 2 "int248_register_operand")
16851 (match_operand 3 "const_int_operand")) 0)))]
16852 "(INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode) - 1))
16853 == GET_MODE_BITSIZE (<MODE>mode) - 1"
16854 [(set (match_dup 4) (match_dup 1))
16855 (set (match_dup 0)
16856 (any_rotate:SWI (match_dup 4)
16857 (subreg:QI (match_dup 2) 0)))]
16858 "operands[4] = gen_reg_rtx (<MODE>mode);")
16859
16860 (define_insn_and_split "*<insn><mode>3_mask_1"
16861 [(set (match_operand:SWI 0 "nonimmediate_operand")
16862 (any_rotate:SWI
16863 (match_operand:SWI 1 "nonimmediate_operand")
16864 (and:QI
16865 (match_operand:QI 2 "register_operand" "c")
16866 (match_operand:QI 3 "const_int_operand"))))
16867 (clobber (reg:CC FLAGS_REG))]
16868 "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
16869 && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
16870 == GET_MODE_BITSIZE (<MODE>mode)-1
16871 && ix86_pre_reload_split ()"
16872 "#"
16873 "&& 1"
16874 [(parallel
16875 [(set (match_dup 0)
16876 (any_rotate:SWI (match_dup 1)
16877 (match_dup 2)))
16878 (clobber (reg:CC FLAGS_REG))])])
16879
16880 (define_split
16881 [(set (match_operand:SWI 0 "register_operand")
16882 (any_rotate:SWI
16883 (match_operand:SWI 1 "const_int_operand")
16884 (and:QI
16885 (match_operand:QI 2 "register_operand")
16886 (match_operand:QI 3 "const_int_operand"))))]
16887 "(INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode) - 1))
16888 == GET_MODE_BITSIZE (<MODE>mode) - 1"
16889 [(set (match_dup 4) (match_dup 1))
16890 (set (match_dup 0)
16891 (any_rotate:SWI (match_dup 4) (match_dup 2)))]
16892 "operands[4] = gen_reg_rtx (<MODE>mode);")
16893
16894 ;; Implement rotation using two double-precision
16895 ;; shift instructions and a scratch register.
16896
16897 (define_insn_and_split "ix86_rotl<dwi>3_doubleword"
16898 [(set (match_operand:<DWI> 0 "register_operand" "=r")
16899 (rotate:<DWI> (match_operand:<DWI> 1 "register_operand" "0")
16900 (match_operand:QI 2 "<shift_immediate_operand>" "<S>")))
16901 (clobber (reg:CC FLAGS_REG))
16902 (clobber (match_scratch:DWIH 3 "=&r"))]
16903 ""
16904 "#"
16905 "reload_completed"
16906 [(set (match_dup 3) (match_dup 4))
16907 (parallel
16908 [(set (match_dup 4)
16909 (ior:DWIH (ashift:DWIH (match_dup 4)
16910 (and:QI (match_dup 2) (match_dup 6)))
16911 (subreg:DWIH
16912 (lshiftrt:<DWI> (zero_extend:<DWI> (match_dup 5))
16913 (minus:QI (match_dup 7)
16914 (and:QI (match_dup 2)
16915 (match_dup 6)))) 0)))
16916 (clobber (reg:CC FLAGS_REG))])
16917 (parallel
16918 [(set (match_dup 5)
16919 (ior:DWIH (ashift:DWIH (match_dup 5)
16920 (and:QI (match_dup 2) (match_dup 6)))
16921 (subreg:DWIH
16922 (lshiftrt:<DWI> (zero_extend:<DWI> (match_dup 3))
16923 (minus:QI (match_dup 7)
16924 (and:QI (match_dup 2)
16925 (match_dup 6)))) 0)))
16926 (clobber (reg:CC FLAGS_REG))])]
16927 {
16928 operands[6] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode) - 1);
16929 operands[7] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
16930
16931 split_double_mode (<DWI>mode, &operands[0], 1, &operands[4], &operands[5]);
16932 })
16933
16934 (define_insn_and_split "ix86_rotr<dwi>3_doubleword"
16935 [(set (match_operand:<DWI> 0 "register_operand" "=r")
16936 (rotatert:<DWI> (match_operand:<DWI> 1 "register_operand" "0")
16937 (match_operand:QI 2 "<shift_immediate_operand>" "<S>")))
16938 (clobber (reg:CC FLAGS_REG))
16939 (clobber (match_scratch:DWIH 3 "=&r"))]
16940 ""
16941 "#"
16942 "reload_completed"
16943 [(set (match_dup 3) (match_dup 4))
16944 (parallel
16945 [(set (match_dup 4)
16946 (ior:DWIH (lshiftrt:DWIH (match_dup 4)
16947 (and:QI (match_dup 2) (match_dup 6)))
16948 (subreg:DWIH
16949 (ashift:<DWI> (zero_extend:<DWI> (match_dup 5))
16950 (minus:QI (match_dup 7)
16951 (and:QI (match_dup 2)
16952 (match_dup 6)))) 0)))
16953 (clobber (reg:CC FLAGS_REG))])
16954 (parallel
16955 [(set (match_dup 5)
16956 (ior:DWIH (lshiftrt:DWIH (match_dup 5)
16957 (and:QI (match_dup 2) (match_dup 6)))
16958 (subreg:DWIH
16959 (ashift:<DWI> (zero_extend:<DWI> (match_dup 3))
16960 (minus:QI (match_dup 7)
16961 (and:QI (match_dup 2)
16962 (match_dup 6)))) 0)))
16963 (clobber (reg:CC FLAGS_REG))])]
16964 {
16965 operands[6] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode) - 1);
16966 operands[7] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
16967
16968 split_double_mode (<DWI>mode, &operands[0], 1, &operands[4], &operands[5]);
16969 })
16970
16971 (define_insn_and_split "<insn>32di2_doubleword"
16972 [(set (match_operand:DI 0 "register_operand" "=r,r")
16973 (any_rotate:DI (match_operand:DI 1 "register_operand" "0,r")
16974 (const_int 32)))]
16975 "!TARGET_64BIT"
16976 "#"
16977 "&& reload_completed"
16978 [(set (match_dup 0) (match_dup 3))
16979 (set (match_dup 2) (match_dup 1))]
16980 {
16981 split_double_mode (DImode, &operands[0], 2, &operands[0], &operands[2]);
16982 if (rtx_equal_p (operands[0], operands[1]))
16983 {
16984 emit_insn (gen_swapsi (operands[0], operands[2]));
16985 DONE;
16986 }
16987 })
16988
16989 (define_insn_and_split "<insn>64ti2_doubleword"
16990 [(set (match_operand:TI 0 "register_operand" "=r,r")
16991 (any_rotate:TI (match_operand:TI 1 "register_operand" "0,r")
16992 (const_int 64)))]
16993 "TARGET_64BIT"
16994 "#"
16995 "&& reload_completed"
16996 [(set (match_dup 0) (match_dup 3))
16997 (set (match_dup 2) (match_dup 1))]
16998 {
16999 split_double_mode (TImode, &operands[0], 2, &operands[0], &operands[2]);
17000 if (rtx_equal_p (operands[0], operands[1]))
17001 {
17002 emit_insn (gen_swapdi (operands[0], operands[2]));
17003 DONE;
17004 }
17005 })
17006
17007 (define_mode_attr rorx_immediate_operand
17008 [(SI "const_0_to_31_operand")
17009 (DI "const_0_to_63_operand")])
17010
17011 (define_insn "*bmi2_rorx<mode>3_1"
17012 [(set (match_operand:SWI48 0 "register_operand" "=r")
17013 (rotatert:SWI48
17014 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
17015 (match_operand:QI 2 "<rorx_immediate_operand>" "<S>")))]
17016 "TARGET_BMI2 && !optimize_function_for_size_p (cfun)"
17017 "rorx\t{%2, %1, %0|%0, %1, %2}"
17018 [(set_attr "type" "rotatex")
17019 (set_attr "mode" "<MODE>")])
17020
17021 (define_insn "*<insn><mode>3_1"
17022 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r")
17023 (any_rotate:SWI48
17024 (match_operand:SWI48 1 "nonimmediate_operand" "0,rm,rm")
17025 (match_operand:QI 2 "nonmemory_operand" "c<S>,<S>,c<S>")))
17026 (clobber (reg:CC FLAGS_REG))]
17027 "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands,
17028 TARGET_APX_NDD)"
17029 {
17030 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
17031 switch (get_attr_type (insn))
17032 {
17033 case TYPE_ROTATEX:
17034 return "#";
17035
17036 default:
17037 if (operands[2] == const1_rtx
17038 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
17039 && !use_ndd)
17040 return "<rotate>{<imodesuffix>}\t%0";
17041 else
17042 return use_ndd ? "<rotate>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
17043 : "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
17044 }
17045 }
17046 [(set_attr "isa" "*,bmi2,apx_ndd")
17047 (set_attr "type" "rotate,rotatex,rotate")
17048 (set (attr "preferred_for_size")
17049 (cond [(eq_attr "alternative" "0")
17050 (symbol_ref "true")]
17051 (symbol_ref "false")))
17052 (set (attr "length_immediate")
17053 (if_then_else
17054 (and (eq_attr "type" "rotate")
17055 (and (match_operand 2 "const1_operand")
17056 (ior (match_test "TARGET_SHIFT1")
17057 (match_test "optimize_function_for_size_p (cfun)"))))
17058 (const_string "0")
17059 (const_string "*")))
17060 (set_attr "mode" "<MODE>")])
17061
17062 ;; Convert rotate to the rotatex pattern to avoid flags dependency.
17063 (define_split
17064 [(set (match_operand:SWI48 0 "register_operand")
17065 (rotate:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
17066 (match_operand:QI 2 "const_int_operand")))
17067 (clobber (reg:CC FLAGS_REG))]
17068 "TARGET_BMI2 && reload_completed && !optimize_function_for_size_p (cfun)"
17069 [(set (match_dup 0)
17070 (rotatert:SWI48 (match_dup 1) (match_dup 2)))]
17071 {
17072 int bitsize = GET_MODE_BITSIZE (<MODE>mode);
17073
17074 operands[2] = GEN_INT ((bitsize - INTVAL (operands[2])) % bitsize);
17075 })
17076
17077 (define_split
17078 [(set (match_operand:SWI48 0 "register_operand")
17079 (rotatert:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
17080 (match_operand:QI 2 "const_int_operand")))
17081 (clobber (reg:CC FLAGS_REG))]
17082 "TARGET_BMI2 && reload_completed && !optimize_function_for_size_p (cfun)"
17083 [(set (match_dup 0)
17084 (rotatert:SWI48 (match_dup 1) (match_dup 2)))])
17085
17086 (define_insn "*bmi2_rorxsi3_1_zext"
17087 [(set (match_operand:DI 0 "register_operand" "=r")
17088 (zero_extend:DI
17089 (rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
17090 (match_operand:QI 2 "const_0_to_31_operand"))))]
17091 "TARGET_64BIT && TARGET_BMI2 && !optimize_function_for_size_p (cfun)"
17092 "rorx\t{%2, %1, %k0|%k0, %1, %2}"
17093 [(set_attr "type" "rotatex")
17094 (set_attr "mode" "SI")])
17095
17096 (define_insn "*<insn>si3_1_zext"
17097 [(set (match_operand:DI 0 "register_operand" "=r,r,r")
17098 (zero_extend:DI
17099 (any_rotate:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm,rm")
17100 (match_operand:QI 2 "nonmemory_operand" "cI,I,cI"))))
17101 (clobber (reg:CC FLAGS_REG))]
17102 "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
17103 {
17104 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
17105 switch (get_attr_type (insn))
17106 {
17107 case TYPE_ROTATEX:
17108 return "#";
17109
17110 default:
17111 if (operands[2] == const1_rtx
17112 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
17113 && !use_ndd)
17114 return "<rotate>{l}\t%k0";
17115 else
17116 return use_ndd ? "<rotate>{l}\t{%2, %1, %k0|%k0, %1, %2}"
17117 : "<rotate>{l}\t{%2, %k0|%k0, %2}";
17118 }
17119 }
17120 [(set_attr "isa" "*,bmi2,apx_ndd")
17121 (set_attr "type" "rotate,rotatex,rotate")
17122 (set (attr "preferred_for_size")
17123 (cond [(eq_attr "alternative" "0")
17124 (symbol_ref "true")]
17125 (symbol_ref "false")))
17126 (set (attr "length_immediate")
17127 (if_then_else
17128 (and (eq_attr "type" "rotate")
17129 (and (match_operand 2 "const1_operand")
17130 (ior (match_test "TARGET_SHIFT1")
17131 (match_test "optimize_function_for_size_p (cfun)"))))
17132 (const_string "0")
17133 (const_string "*")))
17134 (set_attr "mode" "SI")])
17135
17136 ;; Convert rotate to the rotatex pattern to avoid flags dependency.
17137 (define_split
17138 [(set (match_operand:DI 0 "register_operand")
17139 (zero_extend:DI
17140 (rotate:SI (match_operand:SI 1 "nonimmediate_operand")
17141 (match_operand:QI 2 "const_int_operand"))))
17142 (clobber (reg:CC FLAGS_REG))]
17143 "TARGET_64BIT && TARGET_BMI2 && reload_completed
17144 && !optimize_function_for_size_p (cfun)"
17145 [(set (match_dup 0)
17146 (zero_extend:DI (rotatert:SI (match_dup 1) (match_dup 2))))]
17147 {
17148 int bitsize = GET_MODE_BITSIZE (SImode);
17149
17150 operands[2] = GEN_INT ((bitsize - INTVAL (operands[2])) % bitsize);
17151 })
17152
17153 (define_split
17154 [(set (match_operand:DI 0 "register_operand")
17155 (zero_extend:DI
17156 (rotatert:SI (match_operand:SI 1 "nonimmediate_operand")
17157 (match_operand:QI 2 "const_int_operand"))))
17158 (clobber (reg:CC FLAGS_REG))]
17159 "TARGET_64BIT && TARGET_BMI2 && reload_completed
17160 && !optimize_function_for_size_p (cfun)"
17161 [(set (match_dup 0)
17162 (zero_extend:DI (rotatert:SI (match_dup 1) (match_dup 2))))])
17163
17164 (define_insn "*<insn><mode>3_1"
17165 [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m,r")
17166 (any_rotate:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "0,rm")
17167 (match_operand:QI 2 "nonmemory_operand" "c<S>,c<S>")))
17168 (clobber (reg:CC FLAGS_REG))]
17169 "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands,
17170 TARGET_APX_NDD)"
17171 {
17172 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
17173 if (operands[2] == const1_rtx
17174 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
17175 && !use_ndd)
17176 return "<rotate>{<imodesuffix>}\t%0";
17177 else
17178 return use_ndd
17179 ? "<rotate>{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
17180 : "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
17181 }
17182 [(set_attr "isa" "*,apx_ndd")
17183 (set_attr "type" "rotate")
17184 (set (attr "length_immediate")
17185 (if_then_else
17186 (and (match_operand 2 "const1_operand")
17187 (ior (match_test "TARGET_SHIFT1")
17188 (match_test "optimize_function_for_size_p (cfun)")))
17189 (const_string "0")
17190 (const_string "*")))
17191 (set_attr "mode" "<MODE>")])
17192
17193 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
17194 (define_insn_and_split "*<insn><mode>3_1_slp"
17195 [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>,&<r>"))
17196 (any_rotate:SWI12 (match_operand:SWI12 1 "register_operand" "0,!<r>")
17197 (match_operand:QI 2 "nonmemory_operand" "cI,cI")))
17198 (clobber (reg:CC FLAGS_REG))]
17199 "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
17200 {
17201 if (which_alternative)
17202 return "#";
17203
17204 if (operands[2] == const1_rtx
17205 && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
17206 return "<rotate>{<imodesuffix>}\t%0";
17207 else
17208 return "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
17209 }
17210 "&& reload_completed
17211 && !(rtx_equal_p (operands[0], operands[1]))"
17212 [(set (strict_low_part (match_dup 0)) (match_dup 1))
17213 (parallel
17214 [(set (strict_low_part (match_dup 0))
17215 (any_rotate:SWI12 (match_dup 0) (match_dup 2)))
17216 (clobber (reg:CC FLAGS_REG))])]
17217 ""
17218 [(set_attr "type" "rotate")
17219 (set (attr "length_immediate")
17220 (if_then_else
17221 (and (match_operand 2 "const1_operand")
17222 (ior (match_test "TARGET_SHIFT1")
17223 (match_test "optimize_function_for_size_p (cfun)")))
17224 (const_string "0")
17225 (const_string "*")))
17226 (set_attr "mode" "<MODE>")])
17227
17228 (define_split
17229 [(set (match_operand:HI 0 "QIreg_operand")
17230 (any_rotate:HI (match_dup 0) (const_int 8)))
17231 (clobber (reg:CC FLAGS_REG))]
17232 "reload_completed
17233 && (TARGET_USE_XCHGB || optimize_function_for_size_p (cfun))"
17234 [(parallel [(set (strict_low_part (match_dup 0))
17235 (bswap:HI (match_dup 0)))
17236 (clobber (reg:CC FLAGS_REG))])])
17237
17238 ;; Rotations through carry flag
17239 (define_insn "rcrsi2"
17240 [(set (match_operand:SI 0 "register_operand" "=r,r")
17241 (plus:SI
17242 (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm")
17243 (const_int 1))
17244 (ashift:SI (ltu:SI (reg:CCC FLAGS_REG) (const_int 0))
17245 (const_int 31))))
17246 (clobber (reg:CC FLAGS_REG))]
17247 ""
17248 "@
17249 rcr{l}\t%0
17250 rcr{l}\t{%1, %0|%0, %1}"
17251 [(set_attr "isa" "*,apx_ndd")
17252 (set_attr "type" "ishift1")
17253 (set_attr "memory" "none")
17254 (set_attr "length_immediate" "0")
17255 (set_attr "mode" "SI")])
17256
17257 (define_insn "rcrdi2"
17258 [(set (match_operand:DI 0 "register_operand" "=r,r")
17259 (plus:DI
17260 (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0,rm")
17261 (const_int 1))
17262 (ashift:DI (ltu:DI (reg:CCC FLAGS_REG) (const_int 0))
17263 (const_int 63))))
17264 (clobber (reg:CC FLAGS_REG))]
17265 "TARGET_64BIT"
17266 "@
17267 rcr{q}\t%0
17268 rcr{q}\t{%1, %0|%0, %1}"
17269 [(set_attr "isa" "*,apx_ndd")
17270 (set_attr "type" "ishift1")
17271 (set_attr "length_immediate" "0")
17272 (set_attr "mode" "DI")])
17273
17274 ;; Versions of sar and shr that set the carry flag.
17275 (define_insn "<insn><mode>3_carry"
17276 [(set (reg:CCC FLAGS_REG)
17277 (unspec:CCC [(and:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,rm")
17278 (const_int 1))
17279 (const_int 0)] UNSPEC_CC_NE))
17280 (set (match_operand:SWI48 0 "register_operand" "=r,r")
17281 (any_shiftrt:SWI48 (match_dup 1) (const_int 1)))]
17282 ""
17283 {
17284 bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
17285 if ((TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
17286 && !use_ndd)
17287 return "<shift>{<imodesuffix>}\t%0";
17288 return use_ndd ? "<shift>{<imodesuffix>}\t{$1, %1, %0|%0, %1, 1}"
17289 : "<shift>{<imodesuffix>}\t{$1, %0|%0, 1}";
17290 }
17291 [(set_attr "isa" "*, apx_ndd")
17292 (set_attr "type" "ishift1")
17293 (set (attr "length_immediate")
17294 (if_then_else
17295 (ior (match_test "TARGET_SHIFT1")
17296 (match_test "optimize_function_for_size_p (cfun)"))
17297 (const_string "0")
17298 (const_string "*")))
17299 (set_attr "mode" "<MODE>")])
17300 \f
17301 ;; Bit set / bit test instructions
17302
17303 ;; %%% bts, btr, btc
17304
17305 ;; These instructions are *slow* when applied to memory.
17306
17307 (define_code_attr btsc [(ior "bts") (xor "btc")])
17308
17309 (define_insn "*<btsc><mode>"
17310 [(set (match_operand:SWI48 0 "register_operand" "=r")
17311 (any_or:SWI48
17312 (ashift:SWI48 (const_int 1)
17313 (match_operand:QI 2 "register_operand" "r"))
17314 (match_operand:SWI48 1 "register_operand" "0")))
17315 (clobber (reg:CC FLAGS_REG))]
17316 "TARGET_USE_BT"
17317 "<btsc>{<imodesuffix>}\t{%<k>2, %0|%0, %<k>2}"
17318 [(set_attr "type" "alu1")
17319 (set_attr "prefix_0f" "1")
17320 (set_attr "znver1_decode" "double")
17321 (set_attr "mode" "<MODE>")])
17322
17323 ;; Avoid useless masking of count operand.
17324 (define_insn_and_split "*<btsc><mode>_mask"
17325 [(set (match_operand:SWI48 0 "register_operand")
17326 (any_or:SWI48
17327 (ashift:SWI48
17328 (const_int 1)
17329 (subreg:QI
17330 (and
17331 (match_operand 1 "int248_register_operand")
17332 (match_operand 2 "const_int_operand")) 0))
17333 (match_operand:SWI48 3 "register_operand")))
17334 (clobber (reg:CC FLAGS_REG))]
17335 "TARGET_USE_BT
17336 && (INTVAL (operands[2]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
17337 == GET_MODE_BITSIZE (<MODE>mode)-1
17338 && ix86_pre_reload_split ()"
17339 "#"
17340 "&& 1"
17341 [(parallel
17342 [(set (match_dup 0)
17343 (any_or:SWI48
17344 (ashift:SWI48 (const_int 1)
17345 (match_dup 1))
17346 (match_dup 3)))
17347 (clobber (reg:CC FLAGS_REG))])]
17348 {
17349 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
17350 operands[1] = gen_lowpart (QImode, operands[1]);
17351 })
17352
17353 (define_insn_and_split "*<btsc><mode>_mask_1"
17354 [(set (match_operand:SWI48 0 "register_operand")
17355 (any_or:SWI48
17356 (ashift:SWI48
17357 (const_int 1)
17358 (and:QI
17359 (match_operand:QI 1 "register_operand")
17360 (match_operand:QI 2 "const_int_operand")))
17361 (match_operand:SWI48 3 "register_operand")))
17362 (clobber (reg:CC FLAGS_REG))]
17363 "TARGET_USE_BT
17364 && (INTVAL (operands[2]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
17365 == GET_MODE_BITSIZE (<MODE>mode)-1
17366 && ix86_pre_reload_split ()"
17367 "#"
17368 "&& 1"
17369 [(parallel
17370 [(set (match_dup 0)
17371 (any_or:SWI48
17372 (ashift:SWI48 (const_int 1)
17373 (match_dup 1))
17374 (match_dup 3)))
17375 (clobber (reg:CC FLAGS_REG))])])
17376
17377 (define_insn "*btr<mode>"
17378 [(set (match_operand:SWI48 0 "register_operand" "=r")
17379 (and:SWI48
17380 (rotate:SWI48 (const_int -2)
17381 (match_operand:QI 2 "register_operand" "r"))
17382 (match_operand:SWI48 1 "register_operand" "0")))
17383 (clobber (reg:CC FLAGS_REG))]
17384 "TARGET_USE_BT"
17385 "btr{<imodesuffix>}\t{%<k>2, %0|%0, %<k>2}"
17386 [(set_attr "type" "alu1")
17387 (set_attr "prefix_0f" "1")
17388 (set_attr "znver1_decode" "double")
17389 (set_attr "mode" "<MODE>")])
17390
17391 ;; Avoid useless masking of count operand.
17392 (define_insn_and_split "*btr<mode>_mask"
17393 [(set (match_operand:SWI48 0 "register_operand")
17394 (and:SWI48
17395 (rotate:SWI48
17396 (const_int -2)
17397 (subreg:QI
17398 (and
17399 (match_operand 1 "int248_register_operand")
17400 (match_operand 2 "const_int_operand")) 0))
17401 (match_operand:SWI48 3 "register_operand")))
17402 (clobber (reg:CC FLAGS_REG))]
17403 "TARGET_USE_BT
17404 && (INTVAL (operands[2]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
17405 == GET_MODE_BITSIZE (<MODE>mode)-1
17406 && ix86_pre_reload_split ()"
17407 "#"
17408 "&& 1"
17409 [(parallel
17410 [(set (match_dup 0)
17411 (and:SWI48
17412 (rotate:SWI48 (const_int -2)
17413 (match_dup 1))
17414 (match_dup 3)))
17415 (clobber (reg:CC FLAGS_REG))])]
17416 {
17417 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
17418 operands[1] = gen_lowpart (QImode, operands[1]);
17419 })
17420
17421 (define_insn_and_split "*btr<mode>_mask_1"
17422 [(set (match_operand:SWI48 0 "register_operand")
17423 (and:SWI48
17424 (rotate:SWI48
17425 (const_int -2)
17426 (and:QI
17427 (match_operand:QI 1 "register_operand")
17428 (match_operand:QI 2 "const_int_operand")))
17429 (match_operand:SWI48 3 "register_operand")))
17430 (clobber (reg:CC FLAGS_REG))]
17431 "TARGET_USE_BT
17432 && (INTVAL (operands[2]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
17433 == GET_MODE_BITSIZE (<MODE>mode)-1
17434 && ix86_pre_reload_split ()"
17435 "#"
17436 "&& 1"
17437 [(parallel
17438 [(set (match_dup 0)
17439 (and:SWI48
17440 (rotate:SWI48 (const_int -2)
17441 (match_dup 1))
17442 (match_dup 3)))
17443 (clobber (reg:CC FLAGS_REG))])])
17444
17445 (define_insn_and_split "*btr<mode>_1"
17446 [(set (match_operand:SWI12 0 "register_operand")
17447 (and:SWI12
17448 (subreg:SWI12
17449 (rotate:SI (const_int -2)
17450 (match_operand:QI 2 "register_operand")) 0)
17451 (match_operand:SWI12 1 "nonimmediate_operand")))
17452 (clobber (reg:CC FLAGS_REG))]
17453 "TARGET_USE_BT && ix86_pre_reload_split ()"
17454 "#"
17455 "&& 1"
17456 [(parallel
17457 [(set (match_dup 0)
17458 (and:SI (rotate:SI (const_int -2) (match_dup 2))
17459 (match_dup 1)))
17460 (clobber (reg:CC FLAGS_REG))])]
17461 {
17462 operands[0] = lowpart_subreg (SImode, operands[0], <MODE>mode);
17463 operands[1] = force_reg (<MODE>mode, operands[1]);
17464 operands[1] = lowpart_subreg (SImode, operands[1], <MODE>mode);
17465 })
17466
17467 (define_insn_and_split "*btr<mode>_2"
17468 [(set (zero_extract:HI
17469 (match_operand:SWI12 0 "nonimmediate_operand")
17470 (const_int 1)
17471 (match_operand:QI 1 "register_operand"))
17472 (const_int 0))
17473 (clobber (reg:CC FLAGS_REG))]
17474 "TARGET_USE_BT && ix86_pre_reload_split ()"
17475 "#"
17476 "&& MEM_P (operands[0])"
17477 [(set (match_dup 2) (match_dup 0))
17478 (parallel
17479 [(set (match_dup 3)
17480 (and:SI (rotate:SI (const_int -2) (match_dup 1))
17481 (match_dup 4)))
17482 (clobber (reg:CC FLAGS_REG))])
17483 (set (match_dup 0) (match_dup 5))]
17484 {
17485 operands[2] = gen_reg_rtx (<MODE>mode);
17486 operands[5] = gen_reg_rtx (<MODE>mode);
17487 operands[3] = lowpart_subreg (SImode, operands[5], <MODE>mode);
17488 operands[4] = lowpart_subreg (SImode, operands[2], <MODE>mode);
17489 })
17490
17491 (define_split
17492 [(set (zero_extract:HI
17493 (match_operand:SWI12 0 "register_operand")
17494 (const_int 1)
17495 (match_operand:QI 1 "register_operand"))
17496 (const_int 0))
17497 (clobber (reg:CC FLAGS_REG))]
17498 "TARGET_USE_BT && ix86_pre_reload_split ()"
17499 [(parallel
17500 [(set (match_dup 0)
17501 (and:SI (rotate:SI (const_int -2) (match_dup 1))
17502 (match_dup 2)))
17503 (clobber (reg:CC FLAGS_REG))])]
17504 {
17505 operands[2] = lowpart_subreg (SImode, operands[0], <MODE>mode);
17506 operands[0] = lowpart_subreg (SImode, operands[0], <MODE>mode);
17507 })
17508
17509 ;; These instructions are never faster than the corresponding
17510 ;; and/ior/xor operations when using immediate operand, so with
17511 ;; 32-bit there's no point. But in 64-bit, we can't hold the
17512 ;; relevant immediates within the instruction itself, so operating
17513 ;; on bits in the high 32-bits of a register becomes easier.
17514 ;;
17515 ;; These are slow on Nocona, but fast on Athlon64. We do require the use
17516 ;; of btrq and btcq for corner cases of post-reload expansion of absdf and
17517 ;; negdf respectively, so they can never be disabled entirely.
17518
17519 (define_insn "*btsq_imm"
17520 [(set (zero_extract:DI (match_operand:DI 0 "nonimmediate_operand" "+rm")
17521 (const_int 1)
17522 (match_operand:QI 1 "const_0_to_63_operand"))
17523 (const_int 1))
17524 (clobber (reg:CC FLAGS_REG))]
17525 "TARGET_64BIT && (TARGET_USE_BT || reload_completed)"
17526 "bts{q}\t{%1, %0|%0, %1}"
17527 [(set_attr "type" "alu1")
17528 (set_attr "prefix_0f" "1")
17529 (set_attr "znver1_decode" "double")
17530 (set_attr "mode" "DI")])
17531
17532 (define_insn "*btrq_imm"
17533 [(set (zero_extract:DI (match_operand:DI 0 "nonimmediate_operand" "+rm")
17534 (const_int 1)
17535 (match_operand:QI 1 "const_0_to_63_operand"))
17536 (const_int 0))
17537 (clobber (reg:CC FLAGS_REG))]
17538 "TARGET_64BIT && (TARGET_USE_BT || reload_completed)"
17539 "btr{q}\t{%1, %0|%0, %1}"
17540 [(set_attr "type" "alu1")
17541 (set_attr "prefix_0f" "1")
17542 (set_attr "znver1_decode" "double")
17543 (set_attr "mode" "DI")])
17544
17545 (define_insn "*btcq_imm"
17546 [(set (zero_extract:DI (match_operand:DI 0 "nonimmediate_operand" "+rm")
17547 (const_int 1)
17548 (match_operand:QI 1 "const_0_to_63_operand"))
17549 (not:DI (zero_extract:DI (match_dup 0) (const_int 1) (match_dup 1))))
17550 (clobber (reg:CC FLAGS_REG))]
17551 "TARGET_64BIT && (TARGET_USE_BT || reload_completed)"
17552 "btc{q}\t{%1, %0|%0, %1}"
17553 [(set_attr "type" "alu1")
17554 (set_attr "prefix_0f" "1")
17555 (set_attr "znver1_decode" "double")
17556 (set_attr "mode" "DI")])
17557
17558 ;; Allow Nocona to avoid these instructions if a register is available.
17559
17560 (define_peephole2
17561 [(match_scratch:DI 2 "r")
17562 (parallel [(set (zero_extract:DI
17563 (match_operand:DI 0 "nonimmediate_operand")
17564 (const_int 1)
17565 (match_operand:QI 1 "const_0_to_63_operand"))
17566 (const_int 1))
17567 (clobber (reg:CC FLAGS_REG))])]
17568 "TARGET_64BIT && !TARGET_USE_BT"
17569 [(parallel [(set (match_dup 0)
17570 (ior:DI (match_dup 0) (match_dup 3)))
17571 (clobber (reg:CC FLAGS_REG))])]
17572 {
17573 int i = INTVAL (operands[1]);
17574
17575 operands[3] = gen_int_mode (HOST_WIDE_INT_1U << i, DImode);
17576
17577 if (!x86_64_immediate_operand (operands[3], DImode))
17578 {
17579 emit_move_insn (operands[2], operands[3]);
17580 operands[3] = operands[2];
17581 }
17582 })
17583
17584 (define_peephole2
17585 [(match_scratch:DI 2 "r")
17586 (parallel [(set (zero_extract:DI
17587 (match_operand:DI 0 "nonimmediate_operand")
17588 (const_int 1)
17589 (match_operand:QI 1 "const_0_to_63_operand"))
17590 (const_int 0))
17591 (clobber (reg:CC FLAGS_REG))])]
17592 "TARGET_64BIT && !TARGET_USE_BT"
17593 [(parallel [(set (match_dup 0)
17594 (and:DI (match_dup 0) (match_dup 3)))
17595 (clobber (reg:CC FLAGS_REG))])]
17596 {
17597 int i = INTVAL (operands[1]);
17598
17599 operands[3] = gen_int_mode (~(HOST_WIDE_INT_1U << i), DImode);
17600
17601 if (!x86_64_immediate_operand (operands[3], DImode))
17602 {
17603 emit_move_insn (operands[2], operands[3]);
17604 operands[3] = operands[2];
17605 }
17606 })
17607
17608 (define_peephole2
17609 [(match_scratch:DI 2 "r")
17610 (parallel [(set (zero_extract:DI
17611 (match_operand:DI 0 "nonimmediate_operand")
17612 (const_int 1)
17613 (match_operand:QI 1 "const_0_to_63_operand"))
17614 (not:DI (zero_extract:DI
17615 (match_dup 0) (const_int 1) (match_dup 1))))
17616 (clobber (reg:CC FLAGS_REG))])]
17617 "TARGET_64BIT && !TARGET_USE_BT"
17618 [(parallel [(set (match_dup 0)
17619 (xor:DI (match_dup 0) (match_dup 3)))
17620 (clobber (reg:CC FLAGS_REG))])]
17621 {
17622 int i = INTVAL (operands[1]);
17623
17624 operands[3] = gen_int_mode (HOST_WIDE_INT_1U << i, DImode);
17625
17626 if (!x86_64_immediate_operand (operands[3], DImode))
17627 {
17628 emit_move_insn (operands[2], operands[3]);
17629 operands[3] = operands[2];
17630 }
17631 })
17632
17633 ;; %%% bt
17634
17635 (define_insn "*bt<mode>"
17636 [(set (reg:CCC FLAGS_REG)
17637 (compare:CCC
17638 (zero_extract:SWI48
17639 (match_operand:SWI48 0 "nonimmediate_operand" "r,m")
17640 (const_int 1)
17641 (match_operand:QI 1 "nonmemory_operand" "q<S>,<S>"))
17642 (const_int 0)))]
17643 ""
17644 {
17645 switch (get_attr_mode (insn))
17646 {
17647 case MODE_SI:
17648 return "bt{l}\t{%k1, %k0|%k0, %k1}";
17649
17650 case MODE_DI:
17651 return "bt{q}\t{%q1, %0|%0, %q1}";
17652
17653 default:
17654 gcc_unreachable ();
17655 }
17656 }
17657 [(set_attr "type" "alu1")
17658 (set_attr "prefix_0f" "1")
17659 (set (attr "mode")
17660 (if_then_else
17661 (and (match_test "CONST_INT_P (operands[1])")
17662 (match_test "INTVAL (operands[1]) < 32"))
17663 (const_string "SI")
17664 (const_string "<MODE>")))])
17665
17666 (define_insn_and_split "*bt<SWI48:mode>_mask"
17667 [(set (reg:CCC FLAGS_REG)
17668 (compare:CCC
17669 (zero_extract:SWI48
17670 (match_operand:SWI48 0 "nonimmediate_operand" "r,m")
17671 (const_int 1)
17672 (subreg:QI
17673 (and:SWI248
17674 (match_operand:SWI248 1 "register_operand")
17675 (match_operand 2 "const_int_operand")) 0))
17676 (const_int 0)))]
17677 "TARGET_USE_BT
17678 && (INTVAL (operands[2]) & (GET_MODE_BITSIZE (<SWI48:MODE>mode)-1))
17679 == GET_MODE_BITSIZE (<SWI48:MODE>mode)-1
17680 && ix86_pre_reload_split ()"
17681 "#"
17682 "&& 1"
17683 [(set (reg:CCC FLAGS_REG)
17684 (compare:CCC
17685 (zero_extract:SWI48 (match_dup 0) (const_int 1) (match_dup 1))
17686 (const_int 0)))]
17687 "operands[1] = gen_lowpart (QImode, operands[1]);")
17688
17689 (define_insn_and_split "*jcc_bt<mode>"
17690 [(set (pc)
17691 (if_then_else (match_operator 0 "bt_comparison_operator"
17692 [(zero_extract:SWI48
17693 (match_operand:SWI48 1 "nonimmediate_operand")
17694 (const_int 1)
17695 (match_operand:QI 2 "nonmemory_operand"))
17696 (const_int 0)])
17697 (label_ref (match_operand 3))
17698 (pc)))
17699 (clobber (reg:CC FLAGS_REG))]
17700 "(TARGET_USE_BT || optimize_function_for_size_p (cfun))
17701 && (CONST_INT_P (operands[2])
17702 ? (INTVAL (operands[2]) < GET_MODE_BITSIZE (<MODE>mode)
17703 && INTVAL (operands[2])
17704 >= (optimize_function_for_size_p (cfun) ? 8 : 32))
17705 : !memory_operand (operands[1], <MODE>mode))
17706 && ix86_pre_reload_split ()"
17707 "#"
17708 "&& 1"
17709 [(set (reg:CCC FLAGS_REG)
17710 (compare:CCC
17711 (zero_extract:SWI48
17712 (match_dup 1)
17713 (const_int 1)
17714 (match_dup 2))
17715 (const_int 0)))
17716 (set (pc)
17717 (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
17718 (label_ref (match_dup 3))
17719 (pc)))]
17720 {
17721 operands[0] = shallow_copy_rtx (operands[0]);
17722 PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
17723 })
17724
17725 ;; Avoid useless masking of bit offset operand.
17726 (define_insn_and_split "*jcc_bt<mode>_mask"
17727 [(set (pc)
17728 (if_then_else (match_operator 0 "bt_comparison_operator"
17729 [(zero_extract:SWI48
17730 (match_operand:SWI48 1 "register_operand")
17731 (const_int 1)
17732 (and:QI
17733 (match_operand:QI 2 "register_operand")
17734 (match_operand 3 "const_int_operand")))
17735 (const_int 0)])
17736 (label_ref (match_operand 4))
17737 (pc)))
17738 (clobber (reg:CC FLAGS_REG))]
17739 "(TARGET_USE_BT || optimize_function_for_size_p (cfun))
17740 && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
17741 == GET_MODE_BITSIZE (<MODE>mode)-1
17742 && ix86_pre_reload_split ()"
17743 "#"
17744 "&& 1"
17745 [(set (reg:CCC FLAGS_REG)
17746 (compare:CCC
17747 (zero_extract:SWI48
17748 (match_dup 1)
17749 (const_int 1)
17750 (match_dup 2))
17751 (const_int 0)))
17752 (set (pc)
17753 (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
17754 (label_ref (match_dup 4))
17755 (pc)))]
17756 {
17757 operands[0] = shallow_copy_rtx (operands[0]);
17758 PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
17759 })
17760
17761 ;; Avoid useless masking of bit offset operand.
17762 (define_insn_and_split "*jcc_bt<SWI48:mode>_mask_1"
17763 [(set (pc)
17764 (if_then_else (match_operator 0 "bt_comparison_operator"
17765 [(zero_extract:SWI48
17766 (match_operand:SWI48 1 "register_operand")
17767 (const_int 1)
17768 (subreg:QI
17769 (and:SWI248
17770 (match_operand:SWI248 2 "register_operand")
17771 (match_operand 3 "const_int_operand")) 0))
17772 (const_int 0)])
17773 (label_ref (match_operand 4))
17774 (pc)))
17775 (clobber (reg:CC FLAGS_REG))]
17776 "(TARGET_USE_BT || optimize_function_for_size_p (cfun))
17777 && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<SWI48:MODE>mode)-1))
17778 == GET_MODE_BITSIZE (<SWI48:MODE>mode)-1
17779 && ix86_pre_reload_split ()"
17780 "#"
17781 "&& 1"
17782 [(set (reg:CCC FLAGS_REG)
17783 (compare:CCC
17784 (zero_extract:SWI48
17785 (match_dup 1)
17786 (const_int 1)
17787 (match_dup 2))
17788 (const_int 0)))
17789 (set (pc)
17790 (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
17791 (label_ref (match_dup 4))
17792 (pc)))]
17793 {
17794 operands[0] = shallow_copy_rtx (operands[0]);
17795 PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
17796 operands[2] = gen_lowpart (QImode, operands[2]);
17797 })
17798
17799 ;; Help combine recognize bt followed by cmov
17800 (define_split
17801 [(set (match_operand:SWI248 0 "register_operand")
17802 (if_then_else:SWI248
17803 (match_operator 5 "bt_comparison_operator"
17804 [(zero_extract:SWI48
17805 (match_operand:SWI48 1 "register_operand")
17806 (const_int 1)
17807 (match_operand:QI 2 "register_operand"))
17808 (const_int 0)])
17809 (match_operand:SWI248 3 "nonimmediate_operand")
17810 (match_operand:SWI248 4 "nonimmediate_operand")))]
17811 "TARGET_USE_BT && TARGET_CMOVE
17812 && !(MEM_P (operands[3]) && MEM_P (operands[4]))
17813 && ix86_pre_reload_split ()"
17814 [(set (reg:CCC FLAGS_REG)
17815 (compare:CCC
17816 (zero_extract:SWI48 (match_dup 1) (const_int 1) (match_dup 2))
17817 (const_int 0)))
17818 (set (match_dup 0)
17819 (if_then_else:SWI248 (eq (reg:CCC FLAGS_REG) (const_int 0))
17820 (match_dup 3)
17821 (match_dup 4)))]
17822 {
17823 if (GET_CODE (operands[5]) == EQ)
17824 std::swap (operands[3], operands[4]);
17825 })
17826
17827 ;; Help combine recognize bt followed by setc
17828 (define_insn_and_split "*bt<mode>_setcqi"
17829 [(set (subreg:SWI48 (match_operand:QI 0 "register_operand") 0)
17830 (zero_extract:SWI48
17831 (match_operand:SWI48 1 "register_operand")
17832 (const_int 1)
17833 (match_operand:QI 2 "register_operand")))
17834 (clobber (reg:CC FLAGS_REG))]
17835 "TARGET_USE_BT && ix86_pre_reload_split ()"
17836 "#"
17837 "&& 1"
17838 [(set (reg:CCC FLAGS_REG)
17839 (compare:CCC
17840 (zero_extract:SWI48 (match_dup 1) (const_int 1) (match_dup 2))
17841 (const_int 0)))
17842 (set (match_dup 0)
17843 (eq:QI (reg:CCC FLAGS_REG) (const_int 0)))])
17844
17845 ;; Help combine recognize bt followed by setnc
17846 (define_insn_and_split "*bt<mode>_setncqi"
17847 [(set (match_operand:QI 0 "register_operand")
17848 (and:QI
17849 (not:QI
17850 (subreg:QI
17851 (lshiftrt:SWI48 (match_operand:SWI48 1 "register_operand")
17852 (match_operand:QI 2 "register_operand")) 0))
17853 (const_int 1)))
17854 (clobber (reg:CC FLAGS_REG))]
17855 "TARGET_USE_BT && ix86_pre_reload_split ()"
17856 "#"
17857 "&& 1"
17858 [(set (reg:CCC FLAGS_REG)
17859 (compare:CCC
17860 (zero_extract:SWI48 (match_dup 1) (const_int 1) (match_dup 2))
17861 (const_int 0)))
17862 (set (match_dup 0)
17863 (ne:QI (reg:CCC FLAGS_REG) (const_int 0)))])
17864
17865 (define_insn_and_split "*bt<mode>_setnc<mode>"
17866 [(set (match_operand:SWI48 0 "register_operand")
17867 (and:SWI48
17868 (not:SWI48
17869 (lshiftrt:SWI48 (match_operand:SWI48 1 "register_operand")
17870 (match_operand:QI 2 "register_operand")))
17871 (const_int 1)))
17872 (clobber (reg:CC FLAGS_REG))]
17873 "TARGET_USE_BT && ix86_pre_reload_split ()"
17874 "#"
17875 "&& 1"
17876 [(set (reg:CCC FLAGS_REG)
17877 (compare:CCC
17878 (zero_extract:SWI48 (match_dup 1) (const_int 1) (match_dup 2))
17879 (const_int 0)))
17880 (set (match_dup 3)
17881 (ne:QI (reg:CCC FLAGS_REG) (const_int 0)))
17882 (set (match_dup 0) (zero_extend:SWI48 (match_dup 3)))]
17883 "operands[3] = gen_reg_rtx (QImode);")
17884
17885 ;; Help combine recognize bt followed by setnc (PR target/110588)
17886 (define_insn_and_split "*bt<mode>_setncqi_2"
17887 [(set (match_operand:QI 0 "register_operand")
17888 (eq:QI
17889 (zero_extract:SWI48
17890 (match_operand:SWI48 1 "register_operand")
17891 (const_int 1)
17892 (match_operand:QI 2 "register_operand"))
17893 (const_int 0)))
17894 (clobber (reg:CC FLAGS_REG))]
17895 "TARGET_USE_BT && ix86_pre_reload_split ()"
17896 "#"
17897 "&& 1"
17898 [(set (reg:CCC FLAGS_REG)
17899 (compare:CCC
17900 (zero_extract:SWI48 (match_dup 1) (const_int 1) (match_dup 2))
17901 (const_int 0)))
17902 (set (match_dup 0)
17903 (ne:QI (reg:CCC FLAGS_REG) (const_int 0)))])
17904
17905 ;; Help combine recognize bt followed by setc
17906 (define_insn_and_split "*bt<mode>_setc<mode>_mask"
17907 [(set (match_operand:SWI48 0 "register_operand")
17908 (zero_extract:SWI48
17909 (match_operand:SWI48 1 "register_operand")
17910 (const_int 1)
17911 (subreg:QI
17912 (and:SWI48
17913 (match_operand:SWI48 2 "register_operand")
17914 (match_operand 3 "const_int_operand")) 0)))
17915 (clobber (reg:CC FLAGS_REG))]
17916 "TARGET_USE_BT
17917 && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
17918 == GET_MODE_BITSIZE (<MODE>mode)-1
17919 && ix86_pre_reload_split ()"
17920 "#"
17921 "&& 1"
17922 [(set (reg:CCC FLAGS_REG)
17923 (compare:CCC
17924 (zero_extract:SWI48 (match_dup 1) (const_int 1) (match_dup 2))
17925 (const_int 0)))
17926 (set (match_dup 3)
17927 (eq:QI (reg:CCC FLAGS_REG) (const_int 0)))
17928 (set (match_dup 0) (zero_extend:SWI48 (match_dup 3)))]
17929 {
17930 operands[2] = gen_lowpart (QImode, operands[2]);
17931 operands[3] = gen_reg_rtx (QImode);
17932 })
17933 \f
17934 ;; Store-flag instructions.
17935
17936 (define_split
17937 [(set (match_operand:QI 0 "nonimmediate_operand")
17938 (match_operator:QI 1 "add_comparison_operator"
17939 [(not:SWI (match_operand:SWI 2 "register_operand"))
17940 (match_operand:SWI 3 "nonimmediate_operand")]))]
17941 ""
17942 [(set (reg:CCC FLAGS_REG)
17943 (compare:CCC
17944 (plus:SWI (match_dup 2) (match_dup 3))
17945 (match_dup 2)))
17946 (set (match_dup 0)
17947 (match_op_dup 1 [(reg:CCC FLAGS_REG) (const_int 0)]))])
17948
17949 (define_split
17950 [(set (match_operand:QI 0 "nonimmediate_operand")
17951 (match_operator:QI 1 "shr_comparison_operator"
17952 [(match_operand:DI 2 "register_operand")
17953 (match_operand 3 "const_int_operand")]))]
17954 "TARGET_64BIT
17955 && IN_RANGE (exact_log2 (UINTVAL (operands[3]) + 1), 32, 63)"
17956 [(set (reg:CCZ FLAGS_REG)
17957 (compare:CCZ
17958 (lshiftrt:DI (match_dup 2) (match_dup 4))
17959 (const_int 0)))
17960 (set (match_dup 0)
17961 (match_op_dup 1 [(reg:CCZ FLAGS_REG) (const_int 0)]))]
17962 {
17963 enum rtx_code new_code;
17964
17965 operands[1] = shallow_copy_rtx (operands[1]);
17966 switch (GET_CODE (operands[1]))
17967 {
17968 case GTU: new_code = NE; break;
17969 case LEU: new_code = EQ; break;
17970 default: gcc_unreachable ();
17971 }
17972 PUT_CODE (operands[1], new_code);
17973
17974 operands[4] = GEN_INT (exact_log2 (UINTVAL (operands[3]) + 1));
17975 })
17976
17977 ;; For all sCOND expanders, also expand the compare or test insn that
17978 ;; generates cc0. Generate an equality comparison if `seq' or `sne'.
17979
17980 (define_insn_and_split "*setcc_di_1"
17981 [(set (match_operand:DI 0 "register_operand" "=q")
17982 (match_operator:DI 1 "ix86_comparison_operator"
17983 [(reg FLAGS_REG) (const_int 0)]))]
17984 "TARGET_64BIT && !TARGET_PARTIAL_REG_STALL"
17985 "#"
17986 "&& reload_completed"
17987 [(set (match_dup 2) (match_dup 1))
17988 (set (match_dup 0) (zero_extend:DI (match_dup 2)))]
17989 {
17990 operands[1] = shallow_copy_rtx (operands[1]);
17991 PUT_MODE (operands[1], QImode);
17992 operands[2] = gen_lowpart (QImode, operands[0]);
17993 })
17994
17995 (define_insn_and_split "*setcc_<mode>_1_and"
17996 [(set (match_operand:SWI24 0 "register_operand" "=q")
17997 (match_operator:SWI24 1 "ix86_comparison_operator"
17998 [(reg FLAGS_REG) (const_int 0)]))
17999 (clobber (reg:CC FLAGS_REG))]
18000 "!TARGET_PARTIAL_REG_STALL
18001 && TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)"
18002 "#"
18003 "&& reload_completed"
18004 [(set (match_dup 2) (match_dup 1))
18005 (parallel [(set (match_dup 0) (zero_extend:SWI24 (match_dup 2)))
18006 (clobber (reg:CC FLAGS_REG))])]
18007 {
18008 operands[1] = shallow_copy_rtx (operands[1]);
18009 PUT_MODE (operands[1], QImode);
18010 operands[2] = gen_lowpart (QImode, operands[0]);
18011 })
18012
18013 (define_insn_and_split "*setcc_<mode>_1_movzbl"
18014 [(set (match_operand:SWI24 0 "register_operand" "=q")
18015 (match_operator:SWI24 1 "ix86_comparison_operator"
18016 [(reg FLAGS_REG) (const_int 0)]))]
18017 "!TARGET_PARTIAL_REG_STALL
18018 && (!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun))"
18019 "#"
18020 "&& reload_completed"
18021 [(set (match_dup 2) (match_dup 1))
18022 (set (match_dup 0) (zero_extend:SWI24 (match_dup 2)))]
18023 {
18024 operands[1] = shallow_copy_rtx (operands[1]);
18025 PUT_MODE (operands[1], QImode);
18026 operands[2] = gen_lowpart (QImode, operands[0]);
18027 })
18028
18029 (define_insn "*setcc_qi"
18030 [(set (match_operand:QI 0 "nonimmediate_operand" "=qm")
18031 (match_operator:QI 1 "ix86_comparison_operator"
18032 [(reg FLAGS_REG) (const_int 0)]))]
18033 ""
18034 "set%C1\t%0"
18035 [(set_attr "type" "setcc")
18036 (set_attr "mode" "QI")])
18037
18038 (define_insn "*setcc_qi_slp"
18039 [(set (strict_low_part (match_operand:QI 0 "register_operand" "+q"))
18040 (match_operator:QI 1 "ix86_comparison_operator"
18041 [(reg FLAGS_REG) (const_int 0)]))]
18042 ""
18043 "set%C1\t%0"
18044 [(set_attr "type" "setcc")
18045 (set_attr "mode" "QI")])
18046
18047 ;; In general it is not safe to assume too much about CCmode registers,
18048 ;; so simplify-rtx stops when it sees a second one. Under certain
18049 ;; conditions this is safe on x86, so help combine not create
18050 ;;
18051 ;; seta %al
18052 ;; testb %al, %al
18053 ;; sete %al
18054
18055 (define_split
18056 [(set (match_operand:QI 0 "nonimmediate_operand")
18057 (ne:QI (match_operator 1 "ix86_comparison_operator"
18058 [(reg FLAGS_REG) (const_int 0)])
18059 (const_int 0)))]
18060 ""
18061 [(set (match_dup 0) (match_dup 1))]
18062 {
18063 operands[1] = shallow_copy_rtx (operands[1]);
18064 PUT_MODE (operands[1], QImode);
18065 })
18066
18067 (define_split
18068 [(set (strict_low_part (match_operand:QI 0 "register_operand"))
18069 (ne:QI (match_operator 1 "ix86_comparison_operator"
18070 [(reg FLAGS_REG) (const_int 0)])
18071 (const_int 0)))]
18072 ""
18073 [(set (match_dup 0) (match_dup 1))]
18074 {
18075 operands[1] = shallow_copy_rtx (operands[1]);
18076 PUT_MODE (operands[1], QImode);
18077 })
18078
18079 (define_split
18080 [(set (match_operand:QI 0 "nonimmediate_operand")
18081 (eq:QI (match_operator 1 "ix86_comparison_operator"
18082 [(reg FLAGS_REG) (const_int 0)])
18083 (const_int 0)))]
18084 ""
18085 [(set (match_dup 0) (match_dup 1))]
18086 {
18087 operands[1] = shallow_copy_rtx (operands[1]);
18088 PUT_MODE (operands[1], QImode);
18089 PUT_CODE (operands[1],
18090 ix86_reverse_condition (GET_CODE (operands[1]),
18091 GET_MODE (XEXP (operands[1], 0))));
18092
18093 /* Make sure that (a) the CCmode we have for the flags is strong
18094 enough for the reversed compare or (b) we have a valid FP compare. */
18095 if (! ix86_comparison_operator (operands[1], VOIDmode))
18096 FAIL;
18097 })
18098
18099 (define_split
18100 [(set (strict_low_part (match_operand:QI 0 "register_operand"))
18101 (eq:QI (match_operator 1 "ix86_comparison_operator"
18102 [(reg FLAGS_REG) (const_int 0)])
18103 (const_int 0)))]
18104 ""
18105 [(set (match_dup 0) (match_dup 1))]
18106 {
18107 operands[1] = shallow_copy_rtx (operands[1]);
18108 PUT_MODE (operands[1], QImode);
18109 PUT_CODE (operands[1],
18110 ix86_reverse_condition (GET_CODE (operands[1]),
18111 GET_MODE (XEXP (operands[1], 0))));
18112
18113 /* Make sure that (a) the CCmode we have for the flags is strong
18114 enough for the reversed compare or (b) we have a valid FP compare. */
18115 if (! ix86_comparison_operator (operands[1], VOIDmode))
18116 FAIL;
18117 })
18118
18119 ;; Eliminate redundant compare between set{z,nz} and j{z,nz}:
18120 ;; setz %al; test %al,%al; jz <...> -> setz %al; jnz <...> and
18121 ;; setnz %al, test %al,%al; jz <...> -> setnz %al; jz <...>.
18122 (define_peephole2
18123 [(set (match_operand:QI 0 "nonimmediate_operand")
18124 (match_operator:QI 1 "bt_comparison_operator"
18125 [(reg:CCZ FLAGS_REG) (const_int 0)]))
18126 (set (reg:CCZ FLAGS_REG)
18127 (compare:CCZ (match_dup 0) (const_int 0)))
18128 (set (pc)
18129 (if_then_else (match_operator 2 "bt_comparison_operator"
18130 [(reg:CCZ FLAGS_REG) (const_int 0)])
18131 (match_operand 3)
18132 (pc)))]
18133 "peep2_regno_dead_p (3, FLAGS_REG)"
18134 [(set (match_dup 0)
18135 (match_op_dup 1 [(reg:CCZ FLAGS_REG) (const_int 0)]))
18136 (set (pc)
18137 (if_then_else (match_dup 2)
18138 (match_dup 3)
18139 (pc)))]
18140 {
18141 if (GET_CODE (operands[1]) == EQ)
18142 {
18143 operands[2] = shallow_copy_rtx (operands[2]);
18144 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
18145 }
18146 })
18147
18148 ;; The SSE store flag instructions saves 0 or 0xffffffff to the result.
18149 ;; subsequent logical operations are used to imitate conditional moves.
18150 ;; 0xffffffff is NaN, but not in normalized form, so we can't represent
18151 ;; it directly.
18152
18153 (define_insn "setcc_<mode>_sse"
18154 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
18155 (match_operator:MODEF 3 "sse_comparison_operator"
18156 [(match_operand:MODEF 1 "register_operand" "0,x")
18157 (match_operand:MODEF 2 "nonimmediate_operand" "xm,xjm")]))]
18158 "SSE_FLOAT_MODE_P (<MODE>mode)"
18159 "@
18160 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
18161 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
18162 [(set_attr "isa" "noavx,avx")
18163 (set_attr "addr" "*,gpr16")
18164 (set_attr "type" "ssecmp")
18165 (set_attr "length_immediate" "1")
18166 (set_attr "prefix" "orig,vex")
18167 (set_attr "mode" "<MODE>")])
18168
18169 (define_insn "setcc_hf_mask"
18170 [(set (match_operand:QI 0 "register_operand" "=k")
18171 (unspec:QI
18172 [(match_operand:HF 1 "register_operand" "v")
18173 (match_operand:HF 2 "nonimmediate_operand" "vm")
18174 (match_operand:SI 3 "const_0_to_31_operand")]
18175 UNSPEC_PCMP))]
18176 "TARGET_AVX512FP16"
18177 "vcmpsh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18178 [(set_attr "type" "ssecmp")
18179 (set_attr "prefix" "evex")
18180 (set_attr "mode" "HF")])
18181
18182 \f
18183 ;; Basic conditional jump instructions.
18184
18185 (define_split
18186 [(set (pc)
18187 (if_then_else
18188 (match_operator 1 "add_comparison_operator"
18189 [(not:SWI (match_operand:SWI 2 "register_operand"))
18190 (match_operand:SWI 3 "nonimmediate_operand")])
18191 (label_ref (match_operand 0))
18192 (pc)))]
18193 ""
18194 [(set (reg:CCC FLAGS_REG)
18195 (compare:CCC
18196 (plus:SWI (match_dup 2) (match_dup 3))
18197 (match_dup 2)))
18198 (set (pc)
18199 (if_then_else (match_op_dup 1 [(reg:CCC FLAGS_REG) (const_int 0)])
18200 (label_ref (match_operand 0))
18201 (pc)))])
18202
18203 (define_split
18204 [(set (pc)
18205 (if_then_else
18206 (match_operator 1 "shr_comparison_operator"
18207 [(match_operand:DI 2 "register_operand")
18208 (match_operand 3 "const_int_operand")])
18209 (label_ref (match_operand 0))
18210 (pc)))]
18211 "TARGET_64BIT
18212 && IN_RANGE (exact_log2 (UINTVAL (operands[3]) + 1), 32, 63)"
18213 [(set (reg:CCZ FLAGS_REG)
18214 (compare:CCZ
18215 (lshiftrt:DI (match_dup 2) (match_dup 4))
18216 (const_int 0)))
18217 (set (pc)
18218 (if_then_else (match_op_dup 1 [(reg:CCZ FLAGS_REG) (const_int 0)])
18219 (label_ref (match_operand 0))
18220 (pc)))]
18221 {
18222 enum rtx_code new_code;
18223
18224 operands[1] = shallow_copy_rtx (operands[1]);
18225 switch (GET_CODE (operands[1]))
18226 {
18227 case GTU: new_code = NE; break;
18228 case LEU: new_code = EQ; break;
18229 default: gcc_unreachable ();
18230 }
18231 PUT_CODE (operands[1], new_code);
18232
18233 operands[4] = GEN_INT (exact_log2 (UINTVAL (operands[3]) + 1));
18234 })
18235
18236 ;; We ignore the overflow flag for signed branch instructions.
18237
18238 (define_insn "*jcc"
18239 [(set (pc)
18240 (if_then_else (match_operator 1 "ix86_comparison_operator"
18241 [(reg FLAGS_REG) (const_int 0)])
18242 (label_ref (match_operand 0))
18243 (pc)))]
18244 ""
18245 "%!%+j%C1\t%l0"
18246 [(set_attr "type" "ibr")
18247 (set_attr "modrm" "0")
18248 (set (attr "length")
18249 (if_then_else
18250 (and (ge (minus (match_dup 0) (pc))
18251 (const_int -126))
18252 (lt (minus (match_dup 0) (pc))
18253 (const_int 128)))
18254 (const_int 2)
18255 (const_int 6)))])
18256
18257 ;; In general it is not safe to assume too much about CCmode registers,
18258 ;; so simplify-rtx stops when it sees a second one. Under certain
18259 ;; conditions this is safe on x86, so help combine not create
18260 ;;
18261 ;; seta %al
18262 ;; testb %al, %al
18263 ;; je Lfoo
18264
18265 (define_split
18266 [(set (pc)
18267 (if_then_else (ne (match_operator 0 "ix86_comparison_operator"
18268 [(reg FLAGS_REG) (const_int 0)])
18269 (const_int 0))
18270 (label_ref (match_operand 1))
18271 (pc)))]
18272 ""
18273 [(set (pc)
18274 (if_then_else (match_dup 0)
18275 (label_ref (match_dup 1))
18276 (pc)))]
18277 {
18278 operands[0] = shallow_copy_rtx (operands[0]);
18279 PUT_MODE (operands[0], VOIDmode);
18280 })
18281
18282 (define_split
18283 [(set (pc)
18284 (if_then_else (eq (match_operator 0 "ix86_comparison_operator"
18285 [(reg FLAGS_REG) (const_int 0)])
18286 (const_int 0))
18287 (label_ref (match_operand 1))
18288 (pc)))]
18289 ""
18290 [(set (pc)
18291 (if_then_else (match_dup 0)
18292 (label_ref (match_dup 1))
18293 (pc)))]
18294 {
18295 operands[0] = shallow_copy_rtx (operands[0]);
18296 PUT_MODE (operands[0], VOIDmode);
18297 PUT_CODE (operands[0],
18298 ix86_reverse_condition (GET_CODE (operands[0]),
18299 GET_MODE (XEXP (operands[0], 0))));
18300
18301 /* Make sure that (a) the CCmode we have for the flags is strong
18302 enough for the reversed compare or (b) we have a valid FP compare. */
18303 if (! ix86_comparison_operator (operands[0], VOIDmode))
18304 FAIL;
18305 })
18306 \f
18307 ;; Unconditional and other jump instructions
18308
18309 (define_insn "jump"
18310 [(set (pc)
18311 (label_ref (match_operand 0)))]
18312 ""
18313 "%!jmp\t%l0"
18314 [(set_attr "type" "ibr")
18315 (set_attr "modrm" "0")
18316 (set (attr "length")
18317 (if_then_else
18318 (and (ge (minus (match_dup 0) (pc))
18319 (const_int -126))
18320 (lt (minus (match_dup 0) (pc))
18321 (const_int 128)))
18322 (const_int 2)
18323 (const_int 5)))])
18324
18325 (define_expand "indirect_jump"
18326 [(set (pc) (match_operand 0 "indirect_branch_operand"))]
18327 ""
18328 {
18329 if (TARGET_X32 || TARGET_INDIRECT_BRANCH_REGISTER)
18330 operands[0] = convert_memory_address (word_mode, operands[0]);
18331 cfun->machine->has_local_indirect_jump = true;
18332 })
18333
18334 (define_insn "*indirect_jump"
18335 [(set (pc) (match_operand:W 0 "indirect_branch_operand" "rBw"))]
18336 ""
18337 "* return ix86_output_indirect_jmp (operands[0]);"
18338 [(set (attr "type")
18339 (if_then_else (match_test "(cfun->machine->indirect_branch_type
18340 != indirect_branch_keep)")
18341 (const_string "multi")
18342 (const_string "ibr")))
18343 (set_attr "length_immediate" "0")])
18344
18345 (define_expand "tablejump"
18346 [(parallel [(set (pc) (match_operand 0 "indirect_branch_operand"))
18347 (use (label_ref (match_operand 1)))])]
18348 ""
18349 {
18350 /* In PIC mode, the table entries are stored GOT (32-bit) or PC (64-bit)
18351 relative. Convert the relative address to an absolute address. */
18352 if (flag_pic)
18353 {
18354 rtx op0, op1;
18355 enum rtx_code code;
18356
18357 /* We can't use @GOTOFF for text labels on VxWorks;
18358 see gotoff_operand. */
18359 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
18360 {
18361 code = PLUS;
18362 op0 = operands[0];
18363 op1 = gen_rtx_LABEL_REF (Pmode, operands[1]);
18364 }
18365 else if (TARGET_MACHO || HAVE_AS_GOTOFF_IN_DATA)
18366 {
18367 code = PLUS;
18368 op0 = operands[0];
18369 op1 = pic_offset_table_rtx;
18370 }
18371 else
18372 {
18373 code = MINUS;
18374 op0 = pic_offset_table_rtx;
18375 op1 = operands[0];
18376 }
18377
18378 operands[0] = expand_simple_binop (Pmode, code, op0, op1, NULL_RTX, 0,
18379 OPTAB_DIRECT);
18380 }
18381
18382 if (TARGET_X32 || TARGET_INDIRECT_BRANCH_REGISTER)
18383 operands[0] = convert_memory_address (word_mode, operands[0]);
18384 cfun->machine->has_local_indirect_jump = true;
18385 })
18386
18387 (define_insn "*tablejump_1"
18388 [(set (pc) (match_operand:W 0 "indirect_branch_operand" "rBw"))
18389 (use (label_ref (match_operand 1)))]
18390 ""
18391 "* return ix86_output_indirect_jmp (operands[0]);"
18392 [(set (attr "type")
18393 (if_then_else (match_test "(cfun->machine->indirect_branch_type
18394 != indirect_branch_keep)")
18395 (const_string "multi")
18396 (const_string "ibr")))
18397 (set_attr "length_immediate" "0")])
18398 \f
18399 ;; Convert setcc + movzbl to xor + setcc if operands don't overlap.
18400
18401 (define_peephole2
18402 [(set (match_operand 4 "flags_reg_operand") (match_operand 0))
18403 (set (match_operand:QI 1 "register_operand")
18404 (match_operator:QI 2 "ix86_comparison_operator"
18405 [(reg FLAGS_REG) (const_int 0)]))
18406 (set (match_operand 3 "any_QIreg_operand")
18407 (zero_extend (match_dup 1)))]
18408 "(peep2_reg_dead_p (3, operands[1])
18409 || operands_match_p (operands[1], operands[3]))
18410 && ! reg_overlap_mentioned_p (operands[3], operands[0])
18411 && peep2_regno_dead_p (0, FLAGS_REG)"
18412 [(set (match_dup 4) (match_dup 0))
18413 (set (strict_low_part (match_dup 5))
18414 (match_dup 2))]
18415 {
18416 operands[5] = gen_lowpart (QImode, operands[3]);
18417 ix86_expand_clear (operands[3]);
18418 })
18419
18420 (define_peephole2
18421 [(parallel [(set (match_operand 5 "flags_reg_operand") (match_operand 0))
18422 (match_operand 4)])
18423 (set (match_operand:QI 1 "register_operand")
18424 (match_operator:QI 2 "ix86_comparison_operator"
18425 [(reg FLAGS_REG) (const_int 0)]))
18426 (set (match_operand 3 "any_QIreg_operand")
18427 (zero_extend (match_dup 1)))]
18428 "(peep2_reg_dead_p (3, operands[1])
18429 || operands_match_p (operands[1], operands[3]))
18430 && ! reg_overlap_mentioned_p (operands[3], operands[0])
18431 && ! reg_overlap_mentioned_p (operands[3], operands[4])
18432 && ! reg_set_p (operands[3], operands[4])
18433 && peep2_regno_dead_p (0, FLAGS_REG)"
18434 [(parallel [(set (match_dup 5) (match_dup 0))
18435 (match_dup 4)])
18436 (set (strict_low_part (match_dup 6))
18437 (match_dup 2))]
18438 {
18439 operands[6] = gen_lowpart (QImode, operands[3]);
18440 ix86_expand_clear (operands[3]);
18441 })
18442
18443 (define_peephole2
18444 [(set (match_operand 6 "flags_reg_operand") (match_operand 0))
18445 (parallel [(set (match_operand 7 "flags_reg_operand") (match_operand 1))
18446 (match_operand 5)])
18447 (set (match_operand:QI 2 "register_operand")
18448 (match_operator:QI 3 "ix86_comparison_operator"
18449 [(reg FLAGS_REG) (const_int 0)]))
18450 (set (match_operand 4 "any_QIreg_operand")
18451 (zero_extend (match_dup 2)))]
18452 "(peep2_reg_dead_p (4, operands[2])
18453 || operands_match_p (operands[2], operands[4]))
18454 && ! reg_overlap_mentioned_p (operands[4], operands[0])
18455 && ! reg_overlap_mentioned_p (operands[4], operands[1])
18456 && ! reg_overlap_mentioned_p (operands[4], operands[5])
18457 && ! reg_set_p (operands[4], operands[5])
18458 && refers_to_regno_p (FLAGS_REG, operands[1], (rtx *)NULL)
18459 && peep2_regno_dead_p (0, FLAGS_REG)"
18460 [(set (match_dup 6) (match_dup 0))
18461 (parallel [(set (match_dup 7) (match_dup 1))
18462 (match_dup 5)])
18463 (set (strict_low_part (match_dup 8))
18464 (match_dup 3))]
18465 {
18466 operands[8] = gen_lowpart (QImode, operands[4]);
18467 ix86_expand_clear (operands[4]);
18468 })
18469
18470 ;; Similar, but match zero extend with andsi3.
18471
18472 (define_peephole2
18473 [(set (match_operand 4 "flags_reg_operand") (match_operand 0))
18474 (set (match_operand:QI 1 "register_operand")
18475 (match_operator:QI 2 "ix86_comparison_operator"
18476 [(reg FLAGS_REG) (const_int 0)]))
18477 (parallel [(set (match_operand:SI 3 "any_QIreg_operand")
18478 (and:SI (match_dup 3) (const_int 255)))
18479 (clobber (reg:CC FLAGS_REG))])]
18480 "REGNO (operands[1]) == REGNO (operands[3])
18481 && ! reg_overlap_mentioned_p (operands[3], operands[0])
18482 && peep2_regno_dead_p (0, FLAGS_REG)"
18483 [(set (match_dup 4) (match_dup 0))
18484 (set (strict_low_part (match_dup 5))
18485 (match_dup 2))]
18486 {
18487 operands[5] = gen_lowpart (QImode, operands[3]);
18488 ix86_expand_clear (operands[3]);
18489 })
18490
18491 (define_peephole2
18492 [(parallel [(set (match_operand 5 "flags_reg_operand") (match_operand 0))
18493 (match_operand 4)])
18494 (set (match_operand:QI 1 "register_operand")
18495 (match_operator:QI 2 "ix86_comparison_operator"
18496 [(reg FLAGS_REG) (const_int 0)]))
18497 (parallel [(set (match_operand 3 "any_QIreg_operand")
18498 (zero_extend (match_dup 1)))
18499 (clobber (reg:CC FLAGS_REG))])]
18500 "(peep2_reg_dead_p (3, operands[1])
18501 || operands_match_p (operands[1], operands[3]))
18502 && ! reg_overlap_mentioned_p (operands[3], operands[0])
18503 && ! reg_overlap_mentioned_p (operands[3], operands[4])
18504 && ! reg_set_p (operands[3], operands[4])
18505 && peep2_regno_dead_p (0, FLAGS_REG)"
18506 [(parallel [(set (match_dup 5) (match_dup 0))
18507 (match_dup 4)])
18508 (set (strict_low_part (match_dup 6))
18509 (match_dup 2))]
18510 {
18511 operands[6] = gen_lowpart (QImode, operands[3]);
18512 ix86_expand_clear (operands[3]);
18513 })
18514
18515 (define_peephole2
18516 [(set (match_operand 6 "flags_reg_operand") (match_operand 0))
18517 (parallel [(set (match_operand 7 "flags_reg_operand") (match_operand 1))
18518 (match_operand 5)])
18519 (set (match_operand:QI 2 "register_operand")
18520 (match_operator:QI 3 "ix86_comparison_operator"
18521 [(reg FLAGS_REG) (const_int 0)]))
18522 (parallel [(set (match_operand 4 "any_QIreg_operand")
18523 (zero_extend (match_dup 2)))
18524 (clobber (reg:CC FLAGS_REG))])]
18525 "(peep2_reg_dead_p (4, operands[2])
18526 || operands_match_p (operands[2], operands[4]))
18527 && ! reg_overlap_mentioned_p (operands[4], operands[0])
18528 && ! reg_overlap_mentioned_p (operands[4], operands[1])
18529 && ! reg_overlap_mentioned_p (operands[4], operands[5])
18530 && ! reg_set_p (operands[4], operands[5])
18531 && refers_to_regno_p (FLAGS_REG, operands[1], (rtx *)NULL)
18532 && peep2_regno_dead_p (0, FLAGS_REG)"
18533 [(set (match_dup 6) (match_dup 0))
18534 (parallel [(set (match_dup 7) (match_dup 1))
18535 (match_dup 5)])
18536 (set (strict_low_part (match_dup 8))
18537 (match_dup 3))]
18538 {
18539 operands[8] = gen_lowpart (QImode, operands[4]);
18540 ix86_expand_clear (operands[4]);
18541 })
18542 \f
18543 ;; Call instructions.
18544
18545 ;; The predicates normally associated with named expanders are not properly
18546 ;; checked for calls. This is a bug in the generic code, but it isn't that
18547 ;; easy to fix. Ignore it for now and be prepared to fix things up.
18548
18549 ;; P6 processors will jump to the address after the decrement when %esp
18550 ;; is used as a call operand, so they will execute return address as a code.
18551 ;; See Pentium Pro errata 70, Pentium 2 errata A33 and Pentium 3 errata E17.
18552
18553 ;; Register constraint for call instruction.
18554 (define_mode_attr c [(SI "l") (DI "r")])
18555
18556 ;; Call subroutine returning no value.
18557
18558 (define_expand "call"
18559 [(call (match_operand:QI 0)
18560 (match_operand 1))
18561 (use (match_operand 2))]
18562 ""
18563 {
18564 ix86_expand_call (NULL, operands[0], operands[1],
18565 operands[2], NULL, false);
18566 DONE;
18567 })
18568
18569 (define_expand "sibcall"
18570 [(call (match_operand:QI 0)
18571 (match_operand 1))
18572 (use (match_operand 2))]
18573 ""
18574 {
18575 ix86_expand_call (NULL, operands[0], operands[1],
18576 operands[2], NULL, true);
18577 DONE;
18578 })
18579
18580 (define_insn "*call"
18581 [(call (mem:QI (match_operand:W 0 "call_insn_operand" "<c>BwBz"))
18582 (match_operand 1))]
18583 "!SIBLING_CALL_P (insn)"
18584 "* return ix86_output_call_insn (insn, operands[0]);"
18585 [(set_attr "type" "call")])
18586
18587 ;; This covers both call and sibcall since only GOT slot is allowed.
18588 (define_insn "*call_got_x32"
18589 [(call (mem:QI (zero_extend:DI
18590 (match_operand:SI 0 "GOT_memory_operand" "Bg")))
18591 (match_operand 1))]
18592 "TARGET_X32"
18593 {
18594 rtx fnaddr = gen_const_mem (DImode, XEXP (operands[0], 0));
18595 return ix86_output_call_insn (insn, fnaddr);
18596 }
18597 [(set_attr "type" "call")])
18598
18599 ;; Since sibcall never returns, we can only use call-clobbered register
18600 ;; as GOT base.
18601 (define_insn "*sibcall_GOT_32"
18602 [(call (mem:QI
18603 (mem:SI (plus:SI
18604 (match_operand:SI 0 "register_no_elim_operand" "U")
18605 (match_operand:SI 1 "GOT32_symbol_operand"))))
18606 (match_operand 2))]
18607 "!TARGET_MACHO
18608 && !TARGET_64BIT
18609 && !TARGET_INDIRECT_BRANCH_REGISTER
18610 && SIBLING_CALL_P (insn)"
18611 {
18612 rtx fnaddr = gen_rtx_PLUS (SImode, operands[0], operands[1]);
18613 fnaddr = gen_const_mem (SImode, fnaddr);
18614 return ix86_output_call_insn (insn, fnaddr);
18615 }
18616 [(set_attr "type" "call")])
18617
18618 (define_insn "*sibcall"
18619 [(call (mem:QI (match_operand:W 0 "sibcall_insn_operand" "UBsBz"))
18620 (match_operand 1))]
18621 "SIBLING_CALL_P (insn)"
18622 "* return ix86_output_call_insn (insn, operands[0]);"
18623 [(set_attr "type" "call")])
18624
18625 (define_insn "*sibcall_memory"
18626 [(call (mem:QI (match_operand:W 0 "memory_operand" "m"))
18627 (match_operand 1))
18628 (unspec [(const_int 0)] UNSPEC_PEEPSIB)]
18629 "!TARGET_X32 && !TARGET_INDIRECT_BRANCH_REGISTER"
18630 "* return ix86_output_call_insn (insn, operands[0]);"
18631 [(set_attr "type" "call")])
18632
18633 (define_peephole2
18634 [(set (match_operand:W 0 "register_operand")
18635 (match_operand:W 1 "memory_operand"))
18636 (call (mem:QI (match_dup 0))
18637 (match_operand 3))]
18638 "!TARGET_X32
18639 && !TARGET_INDIRECT_BRANCH_REGISTER
18640 && SIBLING_CALL_P (peep2_next_insn (1))
18641 && !reg_mentioned_p (operands[0],
18642 CALL_INSN_FUNCTION_USAGE (peep2_next_insn (1)))"
18643 [(parallel [(call (mem:QI (match_dup 1))
18644 (match_dup 3))
18645 (unspec [(const_int 0)] UNSPEC_PEEPSIB)])])
18646
18647 (define_peephole2
18648 [(set (match_operand:W 0 "register_operand")
18649 (match_operand:W 1 "memory_operand"))
18650 (unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
18651 (call (mem:QI (match_dup 0))
18652 (match_operand 3))]
18653 "!TARGET_X32
18654 && !TARGET_INDIRECT_BRANCH_REGISTER
18655 && SIBLING_CALL_P (peep2_next_insn (2))
18656 && !reg_mentioned_p (operands[0],
18657 CALL_INSN_FUNCTION_USAGE (peep2_next_insn (2)))"
18658 [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
18659 (parallel [(call (mem:QI (match_dup 1))
18660 (match_dup 3))
18661 (unspec [(const_int 0)] UNSPEC_PEEPSIB)])])
18662
18663 (define_expand "call_pop"
18664 [(parallel [(call (match_operand:QI 0)
18665 (match_operand:SI 1))
18666 (set (reg:SI SP_REG)
18667 (plus:SI (reg:SI SP_REG)
18668 (match_operand:SI 3)))])]
18669 "!TARGET_64BIT"
18670 {
18671 ix86_expand_call (NULL, operands[0], operands[1],
18672 operands[2], operands[3], false);
18673 DONE;
18674 })
18675
18676 (define_insn "*call_pop"
18677 [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lBwBz"))
18678 (match_operand 1))
18679 (set (reg:SI SP_REG)
18680 (plus:SI (reg:SI SP_REG)
18681 (match_operand:SI 2 "immediate_operand" "i")))]
18682 "!TARGET_64BIT && !SIBLING_CALL_P (insn)"
18683 "* return ix86_output_call_insn (insn, operands[0]);"
18684 [(set_attr "type" "call")])
18685
18686 (define_insn "*sibcall_pop"
18687 [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "UBsBz"))
18688 (match_operand 1))
18689 (set (reg:SI SP_REG)
18690 (plus:SI (reg:SI SP_REG)
18691 (match_operand:SI 2 "immediate_operand" "i")))]
18692 "!TARGET_64BIT && SIBLING_CALL_P (insn)"
18693 "* return ix86_output_call_insn (insn, operands[0]);"
18694 [(set_attr "type" "call")])
18695
18696 (define_insn "*sibcall_pop_memory"
18697 [(call (mem:QI (match_operand:SI 0 "memory_operand" "Bs"))
18698 (match_operand 1))
18699 (set (reg:SI SP_REG)
18700 (plus:SI (reg:SI SP_REG)
18701 (match_operand:SI 2 "immediate_operand" "i")))
18702 (unspec [(const_int 0)] UNSPEC_PEEPSIB)]
18703 "!TARGET_64BIT"
18704 "* return ix86_output_call_insn (insn, operands[0]);"
18705 [(set_attr "type" "call")])
18706
18707 (define_peephole2
18708 [(set (match_operand:SI 0 "register_operand")
18709 (match_operand:SI 1 "memory_operand"))
18710 (parallel [(call (mem:QI (match_dup 0))
18711 (match_operand 3))
18712 (set (reg:SI SP_REG)
18713 (plus:SI (reg:SI SP_REG)
18714 (match_operand:SI 4 "immediate_operand")))])]
18715 "!TARGET_64BIT && SIBLING_CALL_P (peep2_next_insn (1))
18716 && !reg_mentioned_p (operands[0],
18717 CALL_INSN_FUNCTION_USAGE (peep2_next_insn (1)))"
18718 [(parallel [(call (mem:QI (match_dup 1))
18719 (match_dup 3))
18720 (set (reg:SI SP_REG)
18721 (plus:SI (reg:SI SP_REG)
18722 (match_dup 4)))
18723 (unspec [(const_int 0)] UNSPEC_PEEPSIB)])])
18724
18725 (define_peephole2
18726 [(set (match_operand:SI 0 "register_operand")
18727 (match_operand:SI 1 "memory_operand"))
18728 (unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
18729 (parallel [(call (mem:QI (match_dup 0))
18730 (match_operand 3))
18731 (set (reg:SI SP_REG)
18732 (plus:SI (reg:SI SP_REG)
18733 (match_operand:SI 4 "immediate_operand")))])]
18734 "!TARGET_64BIT && SIBLING_CALL_P (peep2_next_insn (2))
18735 && !reg_mentioned_p (operands[0],
18736 CALL_INSN_FUNCTION_USAGE (peep2_next_insn (2)))"
18737 [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
18738 (parallel [(call (mem:QI (match_dup 1))
18739 (match_dup 3))
18740 (set (reg:SI SP_REG)
18741 (plus:SI (reg:SI SP_REG)
18742 (match_dup 4)))
18743 (unspec [(const_int 0)] UNSPEC_PEEPSIB)])])
18744
18745 ;; Combining simple memory jump instruction
18746
18747 (define_peephole2
18748 [(set (match_operand:W 0 "register_operand")
18749 (match_operand:W 1 "memory_operand"))
18750 (set (pc) (match_dup 0))]
18751 "!TARGET_X32
18752 && !TARGET_INDIRECT_BRANCH_REGISTER
18753 && peep2_reg_dead_p (2, operands[0])"
18754 [(set (pc) (match_dup 1))])
18755
18756 ;; Call subroutine, returning value in operand 0
18757
18758 (define_expand "call_value"
18759 [(set (match_operand 0)
18760 (call (match_operand:QI 1)
18761 (match_operand 2)))
18762 (use (match_operand 3))]
18763 ""
18764 {
18765 ix86_expand_call (operands[0], operands[1], operands[2],
18766 operands[3], NULL, false);
18767 DONE;
18768 })
18769
18770 (define_expand "sibcall_value"
18771 [(set (match_operand 0)
18772 (call (match_operand:QI 1)
18773 (match_operand 2)))
18774 (use (match_operand 3))]
18775 ""
18776 {
18777 ix86_expand_call (operands[0], operands[1], operands[2],
18778 operands[3], NULL, true);
18779 DONE;
18780 })
18781
18782 (define_insn "*call_value"
18783 [(set (match_operand 0)
18784 (call (mem:QI (match_operand:W 1 "call_insn_operand" "<c>BwBz"))
18785 (match_operand 2)))]
18786 "!SIBLING_CALL_P (insn)"
18787 "* return ix86_output_call_insn (insn, operands[1]);"
18788 [(set_attr "type" "callv")])
18789
18790 ;; This covers both call and sibcall since only GOT slot is allowed.
18791 (define_insn "*call_value_got_x32"
18792 [(set (match_operand 0)
18793 (call (mem:QI
18794 (zero_extend:DI
18795 (match_operand:SI 1 "GOT_memory_operand" "Bg")))
18796 (match_operand 2)))]
18797 "TARGET_X32"
18798 {
18799 rtx fnaddr = gen_const_mem (DImode, XEXP (operands[1], 0));
18800 return ix86_output_call_insn (insn, fnaddr);
18801 }
18802 [(set_attr "type" "callv")])
18803
18804 ;; Since sibcall never returns, we can only use call-clobbered register
18805 ;; as GOT base.
18806 (define_insn "*sibcall_value_GOT_32"
18807 [(set (match_operand 0)
18808 (call (mem:QI
18809 (mem:SI (plus:SI
18810 (match_operand:SI 1 "register_no_elim_operand" "U")
18811 (match_operand:SI 2 "GOT32_symbol_operand"))))
18812 (match_operand 3)))]
18813 "!TARGET_MACHO
18814 && !TARGET_64BIT
18815 && !TARGET_INDIRECT_BRANCH_REGISTER
18816 && SIBLING_CALL_P (insn)"
18817 {
18818 rtx fnaddr = gen_rtx_PLUS (SImode, operands[1], operands[2]);
18819 fnaddr = gen_const_mem (SImode, fnaddr);
18820 return ix86_output_call_insn (insn, fnaddr);
18821 }
18822 [(set_attr "type" "callv")])
18823
18824 (define_insn "*sibcall_value"
18825 [(set (match_operand 0)
18826 (call (mem:QI (match_operand:W 1 "sibcall_insn_operand" "UBsBz"))
18827 (match_operand 2)))]
18828 "SIBLING_CALL_P (insn)"
18829 "* return ix86_output_call_insn (insn, operands[1]);"
18830 [(set_attr "type" "callv")])
18831
18832 (define_insn "*sibcall_value_memory"
18833 [(set (match_operand 0)
18834 (call (mem:QI (match_operand:W 1 "memory_operand" "m"))
18835 (match_operand 2)))
18836 (unspec [(const_int 0)] UNSPEC_PEEPSIB)]
18837 "!TARGET_X32 && !TARGET_INDIRECT_BRANCH_REGISTER"
18838 "* return ix86_output_call_insn (insn, operands[1]);"
18839 [(set_attr "type" "callv")])
18840
18841 (define_peephole2
18842 [(set (match_operand:W 0 "register_operand")
18843 (match_operand:W 1 "memory_operand"))
18844 (set (match_operand 2)
18845 (call (mem:QI (match_dup 0))
18846 (match_operand 3)))]
18847 "!TARGET_X32
18848 && !TARGET_INDIRECT_BRANCH_REGISTER
18849 && SIBLING_CALL_P (peep2_next_insn (1))
18850 && !reg_mentioned_p (operands[0],
18851 CALL_INSN_FUNCTION_USAGE (peep2_next_insn (1)))"
18852 [(parallel [(set (match_dup 2)
18853 (call (mem:QI (match_dup 1))
18854 (match_dup 3)))
18855 (unspec [(const_int 0)] UNSPEC_PEEPSIB)])])
18856
18857 (define_peephole2
18858 [(set (match_operand:W 0 "register_operand")
18859 (match_operand:W 1 "memory_operand"))
18860 (unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
18861 (set (match_operand 2)
18862 (call (mem:QI (match_dup 0))
18863 (match_operand 3)))]
18864 "!TARGET_X32
18865 && !TARGET_INDIRECT_BRANCH_REGISTER
18866 && SIBLING_CALL_P (peep2_next_insn (2))
18867 && !reg_mentioned_p (operands[0],
18868 CALL_INSN_FUNCTION_USAGE (peep2_next_insn (2)))"
18869 [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
18870 (parallel [(set (match_dup 2)
18871 (call (mem:QI (match_dup 1))
18872 (match_dup 3)))
18873 (unspec [(const_int 0)] UNSPEC_PEEPSIB)])])
18874
18875 (define_expand "call_value_pop"
18876 [(parallel [(set (match_operand 0)
18877 (call (match_operand:QI 1)
18878 (match_operand:SI 2)))
18879 (set (reg:SI SP_REG)
18880 (plus:SI (reg:SI SP_REG)
18881 (match_operand:SI 4)))])]
18882 "!TARGET_64BIT"
18883 {
18884 ix86_expand_call (operands[0], operands[1], operands[2],
18885 operands[3], operands[4], false);
18886 DONE;
18887 })
18888
18889 (define_insn "*call_value_pop"
18890 [(set (match_operand 0)
18891 (call (mem:QI (match_operand:SI 1 "call_insn_operand" "lBwBz"))
18892 (match_operand 2)))
18893 (set (reg:SI SP_REG)
18894 (plus:SI (reg:SI SP_REG)
18895 (match_operand:SI 3 "immediate_operand" "i")))]
18896 "!TARGET_64BIT && !SIBLING_CALL_P (insn)"
18897 "* return ix86_output_call_insn (insn, operands[1]);"
18898 [(set_attr "type" "callv")])
18899
18900 (define_insn "*sibcall_value_pop"
18901 [(set (match_operand 0)
18902 (call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "UBsBz"))
18903 (match_operand 2)))
18904 (set (reg:SI SP_REG)
18905 (plus:SI (reg:SI SP_REG)
18906 (match_operand:SI 3 "immediate_operand" "i")))]
18907 "!TARGET_64BIT && SIBLING_CALL_P (insn)"
18908 "* return ix86_output_call_insn (insn, operands[1]);"
18909 [(set_attr "type" "callv")])
18910
18911 (define_insn "*sibcall_value_pop_memory"
18912 [(set (match_operand 0)
18913 (call (mem:QI (match_operand:SI 1 "memory_operand" "m"))
18914 (match_operand 2)))
18915 (set (reg:SI SP_REG)
18916 (plus:SI (reg:SI SP_REG)
18917 (match_operand:SI 3 "immediate_operand" "i")))
18918 (unspec [(const_int 0)] UNSPEC_PEEPSIB)]
18919 "!TARGET_64BIT"
18920 "* return ix86_output_call_insn (insn, operands[1]);"
18921 [(set_attr "type" "callv")])
18922
18923 (define_peephole2
18924 [(set (match_operand:SI 0 "register_operand")
18925 (match_operand:SI 1 "memory_operand"))
18926 (parallel [(set (match_operand 2)
18927 (call (mem:QI (match_dup 0))
18928 (match_operand 3)))
18929 (set (reg:SI SP_REG)
18930 (plus:SI (reg:SI SP_REG)
18931 (match_operand:SI 4 "immediate_operand")))])]
18932 "!TARGET_64BIT && SIBLING_CALL_P (peep2_next_insn (1))
18933 && !reg_mentioned_p (operands[0],
18934 CALL_INSN_FUNCTION_USAGE (peep2_next_insn (1)))"
18935 [(parallel [(set (match_dup 2)
18936 (call (mem:QI (match_dup 1))
18937 (match_dup 3)))
18938 (set (reg:SI SP_REG)
18939 (plus:SI (reg:SI SP_REG)
18940 (match_dup 4)))
18941 (unspec [(const_int 0)] UNSPEC_PEEPSIB)])])
18942
18943 (define_peephole2
18944 [(set (match_operand:SI 0 "register_operand")
18945 (match_operand:SI 1 "memory_operand"))
18946 (unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
18947 (parallel [(set (match_operand 2)
18948 (call (mem:QI (match_dup 0))
18949 (match_operand 3)))
18950 (set (reg:SI SP_REG)
18951 (plus:SI (reg:SI SP_REG)
18952 (match_operand:SI 4 "immediate_operand")))])]
18953 "!TARGET_64BIT && SIBLING_CALL_P (peep2_next_insn (2))
18954 && !reg_mentioned_p (operands[0],
18955 CALL_INSN_FUNCTION_USAGE (peep2_next_insn (2)))"
18956 [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
18957 (parallel [(set (match_dup 2)
18958 (call (mem:QI (match_dup 1))
18959 (match_dup 3)))
18960 (set (reg:SI SP_REG)
18961 (plus:SI (reg:SI SP_REG)
18962 (match_dup 4)))
18963 (unspec [(const_int 0)] UNSPEC_PEEPSIB)])])
18964
18965 ;; Call subroutine returning any type.
18966
18967 (define_expand "untyped_call"
18968 [(parallel [(call (match_operand 0)
18969 (const_int 0))
18970 (match_operand 1)
18971 (match_operand 2)])]
18972 ""
18973 {
18974 int i;
18975
18976 /* In order to give reg-stack an easier job in validating two
18977 coprocessor registers as containing a possible return value,
18978 simply pretend the untyped call returns a complex long double
18979 value.
18980
18981 We can't use SSE_REGPARM_MAX here since callee is unprototyped
18982 and should have the default ABI. */
18983
18984 ix86_expand_call ((TARGET_FLOAT_RETURNS_IN_80387
18985 ? gen_rtx_REG (XCmode, FIRST_FLOAT_REG) : NULL),
18986 operands[0], const0_rtx,
18987 GEN_INT ((TARGET_64BIT
18988 ? (ix86_abi == SYSV_ABI
18989 ? X86_64_SSE_REGPARM_MAX
18990 : X86_64_MS_SSE_REGPARM_MAX)
18991 : X86_32_SSE_REGPARM_MAX)
18992 - 1),
18993 NULL, false);
18994
18995 for (i = 0; i < XVECLEN (operands[2], 0); i++)
18996 {
18997 rtx set = XVECEXP (operands[2], 0, i);
18998 emit_move_insn (SET_DEST (set), SET_SRC (set));
18999 }
19000
19001 /* The optimizer does not know that the call sets the function value
19002 registers we stored in the result block. We avoid problems by
19003 claiming that all hard registers are used and clobbered at this
19004 point. */
19005 emit_insn (gen_blockage ());
19006
19007 DONE;
19008 })
19009 \f
19010 ;; Prologue and epilogue instructions
19011
19012 ;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and
19013 ;; all of memory. This blocks insns from being moved across this point.
19014
19015 (define_insn "blockage"
19016 [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)]
19017 ""
19018 ""
19019 [(set_attr "length" "0")])
19020
19021 ;; Do not schedule instructions accessing memory across this point.
19022
19023 (define_expand "memory_blockage"
19024 [(set (match_dup 0)
19025 (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BLOCKAGE))]
19026 ""
19027 {
19028 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
19029 MEM_VOLATILE_P (operands[0]) = 1;
19030 })
19031
19032 (define_insn "*memory_blockage"
19033 [(set (match_operand:BLK 0)
19034 (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BLOCKAGE))]
19035 ""
19036 ""
19037 [(set_attr "length" "0")])
19038
19039 ;; As USE insns aren't meaningful after reload, this is used instead
19040 ;; to prevent deleting instructions setting registers for PIC code
19041 (define_insn "prologue_use"
19042 [(unspec_volatile [(match_operand 0)] UNSPECV_PROLOGUE_USE)]
19043 ""
19044 ""
19045 [(set_attr "length" "0")])
19046
19047 ;; Insn emitted into the body of a function to return from a function.
19048 ;; This is only done if the function's epilogue is known to be simple.
19049 ;; See comments for ix86_can_use_return_insn_p in i386.cc.
19050
19051 (define_expand "return"
19052 [(simple_return)]
19053 "ix86_can_use_return_insn_p ()"
19054 {
19055 if (crtl->args.pops_args)
19056 {
19057 rtx popc = GEN_INT (crtl->args.pops_args);
19058 emit_jump_insn (gen_simple_return_pop_internal (popc));
19059 DONE;
19060 }
19061 })
19062
19063 ;; We need to disable this for TARGET_SEH, as otherwise
19064 ;; shrink-wrapped prologue gets enabled too. This might exceed
19065 ;; the maximum size of prologue in unwind information.
19066 ;; Also disallow shrink-wrapping if using stack slot to pass the
19067 ;; static chain pointer - the first instruction has to be pushl %esi
19068 ;; and it can't be moved around, as we use alternate entry points
19069 ;; in that case.
19070 ;; Also disallow for ms_hook_prologue functions which have frame
19071 ;; pointer set up in function label which is correctly handled in
19072 ;; ix86_expand_{prologue|epligoue}() only.
19073
19074 (define_expand "simple_return"
19075 [(simple_return)]
19076 "!TARGET_SEH && !ix86_static_chain_on_stack && !ix86_function_ms_hook_prologue (cfun->decl)"
19077 {
19078 if (crtl->args.pops_args)
19079 {
19080 rtx popc = GEN_INT (crtl->args.pops_args);
19081 emit_jump_insn (gen_simple_return_pop_internal (popc));
19082 DONE;
19083 }
19084 })
19085
19086 (define_insn "simple_return_internal"
19087 [(simple_return)]
19088 "reload_completed"
19089 "* return ix86_output_function_return (false);"
19090 [(set_attr "length" "1")
19091 (set_attr "atom_unit" "jeu")
19092 (set_attr "length_immediate" "0")
19093 (set_attr "modrm" "0")])
19094
19095 (define_insn "interrupt_return"
19096 [(simple_return)
19097 (unspec [(const_int 0)] UNSPEC_INTERRUPT_RETURN)]
19098 "reload_completed"
19099 {
19100 return TARGET_64BIT ? (TARGET_UINTR ? "uiret" : "iretq") : "iret";
19101 })
19102
19103 ;; Used by x86_machine_dependent_reorg to avoid penalty on single byte RET
19104 ;; instruction Athlon and K8 have.
19105
19106 (define_insn "simple_return_internal_long"
19107 [(simple_return)
19108 (unspec [(const_int 0)] UNSPEC_REP)]
19109 "reload_completed"
19110 "* return ix86_output_function_return (true);"
19111 [(set_attr "length" "2")
19112 (set_attr "atom_unit" "jeu")
19113 (set_attr "length_immediate" "0")
19114 (set_attr "prefix_rep" "1")
19115 (set_attr "modrm" "0")])
19116
19117 (define_insn_and_split "simple_return_pop_internal"
19118 [(simple_return)
19119 (use (match_operand:SI 0 "const_int_operand"))]
19120 "reload_completed"
19121 "ret\t%0"
19122 "&& cfun->machine->function_return_type != indirect_branch_keep"
19123 [(const_int 0)]
19124 "ix86_split_simple_return_pop_internal (operands[0]); DONE;"
19125 [(set_attr "length" "3")
19126 (set_attr "atom_unit" "jeu")
19127 (set_attr "length_immediate" "2")
19128 (set_attr "modrm" "0")])
19129
19130 (define_expand "simple_return_indirect_internal"
19131 [(parallel
19132 [(simple_return)
19133 (use (match_operand 0 "register_operand"))])])
19134
19135 (define_insn "*simple_return_indirect_internal<mode>"
19136 [(simple_return)
19137 (use (match_operand:W 0 "register_operand" "r"))]
19138 "reload_completed"
19139 "* return ix86_output_indirect_function_return (operands[0]);"
19140 [(set (attr "type")
19141 (if_then_else (match_test "(cfun->machine->indirect_branch_type
19142 != indirect_branch_keep)")
19143 (const_string "multi")
19144 (const_string "ibr")))
19145 (set_attr "length_immediate" "0")])
19146
19147 (define_insn "nop"
19148 [(const_int 0)]
19149 ""
19150 "nop"
19151 [(set_attr "length" "1")
19152 (set_attr "length_immediate" "0")
19153 (set_attr "modrm" "0")])
19154
19155 ;; Generate nops. Operand 0 is the number of nops, up to 8.
19156 (define_insn "nops"
19157 [(unspec_volatile [(match_operand 0 "const_int_operand")]
19158 UNSPECV_NOPS)]
19159 "reload_completed"
19160 {
19161 int num = INTVAL (operands[0]);
19162
19163 gcc_assert (IN_RANGE (num, 1, 8));
19164
19165 while (num--)
19166 fputs ("\tnop\n", asm_out_file);
19167
19168 return "";
19169 }
19170 [(set (attr "length") (symbol_ref "INTVAL (operands[0])"))
19171 (set_attr "length_immediate" "0")
19172 (set_attr "modrm" "0")])
19173
19174 ;; Pad to 16-byte boundary, max skip in op0. Used to avoid
19175 ;; branch prediction penalty for the third jump in a 16-byte
19176 ;; block on K8.
19177
19178 (define_insn "pad"
19179 [(unspec_volatile [(match_operand 0)] UNSPECV_ALIGN)]
19180 ""
19181 {
19182 #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
19183 ASM_OUTPUT_MAX_SKIP_ALIGN (asm_out_file, 4, (int)INTVAL (operands[0]));
19184 #else
19185 /* It is tempting to use ASM_OUTPUT_ALIGN here, but we don't want to do that.
19186 The align insn is used to avoid 3 jump instructions in the row to improve
19187 branch prediction and the benefits hardly outweigh the cost of extra 8
19188 nops on the average inserted by full alignment pseudo operation. */
19189 #endif
19190 return "";
19191 }
19192 [(set_attr "length" "16")])
19193
19194 (define_expand "prologue"
19195 [(const_int 0)]
19196 ""
19197 "ix86_expand_prologue (); DONE;")
19198
19199 (define_expand "set_got"
19200 [(parallel
19201 [(set (match_operand:SI 0 "register_operand")
19202 (unspec:SI [(const_int 0)] UNSPEC_SET_GOT))
19203 (clobber (reg:CC FLAGS_REG))])]
19204 "!TARGET_64BIT"
19205 {
19206 if (flag_pic && !TARGET_VXWORKS_RTP)
19207 ix86_pc_thunk_call_expanded = true;
19208 })
19209
19210 (define_insn "*set_got"
19211 [(set (match_operand:SI 0 "register_operand" "=r")
19212 (unspec:SI [(const_int 0)] UNSPEC_SET_GOT))
19213 (clobber (reg:CC FLAGS_REG))]
19214 "!TARGET_64BIT"
19215 "* return output_set_got (operands[0], NULL_RTX);"
19216 [(set_attr "type" "multi")
19217 (set_attr "length" "12")])
19218
19219 (define_expand "set_got_labelled"
19220 [(parallel
19221 [(set (match_operand:SI 0 "register_operand")
19222 (unspec:SI [(label_ref (match_operand 1))]
19223 UNSPEC_SET_GOT))
19224 (clobber (reg:CC FLAGS_REG))])]
19225 "!TARGET_64BIT"
19226 {
19227 if (flag_pic && !TARGET_VXWORKS_RTP)
19228 ix86_pc_thunk_call_expanded = true;
19229 })
19230
19231 (define_insn "*set_got_labelled"
19232 [(set (match_operand:SI 0 "register_operand" "=r")
19233 (unspec:SI [(label_ref (match_operand 1))]
19234 UNSPEC_SET_GOT))
19235 (clobber (reg:CC FLAGS_REG))]
19236 "!TARGET_64BIT"
19237 "* return output_set_got (operands[0], operands[1]);"
19238 [(set_attr "type" "multi")
19239 (set_attr "length" "12")])
19240
19241 (define_insn "set_got_rex64"
19242 [(set (match_operand:DI 0 "register_operand" "=r")
19243 (unspec:DI [(const_int 0)] UNSPEC_SET_GOT))]
19244 "TARGET_64BIT"
19245 "lea{q}\t{_GLOBAL_OFFSET_TABLE_(%%rip), %0|%0, _GLOBAL_OFFSET_TABLE_[rip]}"
19246 [(set_attr "type" "lea")
19247 (set_attr "length_address" "4")
19248 (set_attr "mode" "DI")])
19249
19250 (define_insn "set_rip_rex64"
19251 [(set (match_operand:DI 0 "register_operand" "=r")
19252 (unspec:DI [(label_ref (match_operand 1))] UNSPEC_SET_RIP))]
19253 "TARGET_64BIT"
19254 "lea{q}\t{%l1(%%rip), %0|%0, %l1[rip]}"
19255 [(set_attr "type" "lea")
19256 (set_attr "length_address" "4")
19257 (set_attr "mode" "DI")])
19258
19259 (define_insn "set_got_offset_rex64"
19260 [(set (match_operand:DI 0 "register_operand" "=r")
19261 (unspec:DI
19262 [(label_ref (match_operand 1))]
19263 UNSPEC_SET_GOT_OFFSET))]
19264 "TARGET_LP64"
19265 "movabs{q}\t{$_GLOBAL_OFFSET_TABLE_-%l1, %0|%0, OFFSET FLAT:_GLOBAL_OFFSET_TABLE_-%l1}"
19266 [(set_attr "type" "imov")
19267 (set_attr "length_immediate" "0")
19268 (set_attr "length_address" "8")
19269 (set_attr "mode" "DI")])
19270
19271 (define_expand "epilogue"
19272 [(const_int 0)]
19273 ""
19274 "ix86_expand_epilogue (1); DONE;")
19275
19276 (define_expand "sibcall_epilogue"
19277 [(const_int 0)]
19278 ""
19279 "ix86_expand_epilogue (0); DONE;")
19280
19281 (define_expand "eh_return"
19282 [(use (match_operand 0 "register_operand"))]
19283 ""
19284 {
19285 rtx tmp, sa = EH_RETURN_STACKADJ_RTX, ra = operands[0];
19286
19287 /* Tricky bit: we write the address of the handler to which we will
19288 be returning into someone else's stack frame, one word below the
19289 stack address we wish to restore. */
19290 tmp = gen_rtx_PLUS (Pmode, arg_pointer_rtx, sa);
19291 tmp = plus_constant (Pmode, tmp, -UNITS_PER_WORD);
19292 /* Return address is always in word_mode. */
19293 tmp = gen_rtx_MEM (word_mode, tmp);
19294 if (GET_MODE (ra) != word_mode)
19295 ra = convert_to_mode (word_mode, ra, 1);
19296 emit_move_insn (tmp, ra);
19297
19298 emit_jump_insn (gen_eh_return_internal ());
19299 emit_barrier ();
19300 DONE;
19301 })
19302
19303 (define_insn_and_split "eh_return_internal"
19304 [(eh_return)]
19305 ""
19306 "#"
19307 "epilogue_completed"
19308 [(const_int 0)]
19309 "ix86_expand_epilogue (2); DONE;")
19310
19311 (define_expand "@leave_<mode>"
19312 [(parallel
19313 [(set (reg:W SP_REG) (plus:W (reg:W BP_REG) (match_dup 0)))
19314 (set (reg:W BP_REG) (mem:W (reg:W BP_REG)))
19315 (clobber (mem:BLK (scratch)))])]
19316 ""
19317 "operands[0] = GEN_INT (<MODE_SIZE>);")
19318
19319 (define_insn "*leave"
19320 [(set (reg:SI SP_REG) (plus:SI (reg:SI BP_REG) (const_int 4)))
19321 (set (reg:SI BP_REG) (mem:SI (reg:SI BP_REG)))
19322 (clobber (mem:BLK (scratch)))]
19323 "!TARGET_64BIT"
19324 "leave"
19325 [(set_attr "type" "leave")])
19326
19327 (define_insn "*leave_rex64"
19328 [(set (reg:DI SP_REG) (plus:DI (reg:DI BP_REG) (const_int 8)))
19329 (set (reg:DI BP_REG) (mem:DI (reg:DI BP_REG)))
19330 (clobber (mem:BLK (scratch)))]
19331 "TARGET_64BIT"
19332 "leave"
19333 [(set_attr "type" "leave")])
19334 \f
19335 ;; Handle -fsplit-stack.
19336
19337 (define_expand "split_stack_prologue"
19338 [(const_int 0)]
19339 ""
19340 {
19341 ix86_expand_split_stack_prologue ();
19342 DONE;
19343 })
19344
19345 ;; In order to support the call/return predictor, we use a return
19346 ;; instruction which the middle-end doesn't see.
19347 (define_insn "split_stack_return"
19348 [(unspec_volatile [(match_operand:SI 0 "const_int_operand")]
19349 UNSPECV_SPLIT_STACK_RETURN)]
19350 ""
19351 {
19352 if (operands[0] == const0_rtx)
19353 return "ret";
19354 else
19355 return "ret\t%0";
19356 }
19357 [(set_attr "atom_unit" "jeu")
19358 (set_attr "modrm" "0")
19359 (set (attr "length")
19360 (if_then_else (match_operand:SI 0 "const0_operand")
19361 (const_int 1)
19362 (const_int 3)))
19363 (set (attr "length_immediate")
19364 (if_then_else (match_operand:SI 0 "const0_operand")
19365 (const_int 0)
19366 (const_int 2)))])
19367
19368 ;; If there are operand 0 bytes available on the stack, jump to
19369 ;; operand 1.
19370
19371 (define_expand "split_stack_space_check"
19372 [(set (pc) (if_then_else
19373 (ltu (minus (reg SP_REG)
19374 (match_operand 0 "register_operand"))
19375 (match_dup 2))
19376 (label_ref (match_operand 1))
19377 (pc)))]
19378 ""
19379 {
19380 rtx reg = gen_reg_rtx (Pmode);
19381
19382 emit_insn (gen_sub3_insn (reg, stack_pointer_rtx, operands[0]));
19383
19384 operands[2] = ix86_split_stack_guard ();
19385 ix86_expand_branch (GEU, reg, operands[2], operands[1]);
19386
19387 DONE;
19388 })
19389 \f
19390 ;; Bit manipulation instructions.
19391
19392 (define_expand "ffs<mode>2"
19393 [(set (match_dup 2) (const_int -1))
19394 (parallel [(set (match_dup 3) (match_dup 4))
19395 (set (match_operand:SWI48 0 "register_operand")
19396 (ctz:SWI48
19397 (match_operand:SWI48 1 "nonimmediate_operand")))])
19398 (set (match_dup 0) (if_then_else:SWI48
19399 (eq (match_dup 3) (const_int 0))
19400 (match_dup 2)
19401 (match_dup 0)))
19402 (parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (const_int 1)))
19403 (clobber (reg:CC FLAGS_REG))])]
19404 ""
19405 {
19406 machine_mode flags_mode;
19407
19408 if (<MODE>mode == SImode && !TARGET_CMOVE)
19409 {
19410 emit_insn (gen_ffssi2_no_cmove (operands[0], operands [1]));
19411 DONE;
19412 }
19413
19414 flags_mode = TARGET_BMI ? CCCmode : CCZmode;
19415
19416 operands[2] = gen_reg_rtx (<MODE>mode);
19417 operands[3] = gen_rtx_REG (flags_mode, FLAGS_REG);
19418 operands[4] = gen_rtx_COMPARE (flags_mode, operands[1], const0_rtx);
19419 })
19420
19421 (define_insn_and_split "ffssi2_no_cmove"
19422 [(set (match_operand:SI 0 "register_operand" "=r")
19423 (ffs:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))
19424 (clobber (match_scratch:SI 2 "=&q"))
19425 (clobber (reg:CC FLAGS_REG))]
19426 "!TARGET_CMOVE"
19427 "#"
19428 "&& reload_completed"
19429 [(parallel [(set (match_dup 4) (match_dup 5))
19430 (set (match_dup 0) (ctz:SI (match_dup 1)))])
19431 (set (strict_low_part (match_dup 3))
19432 (eq:QI (match_dup 4) (const_int 0)))
19433 (parallel [(set (match_dup 2) (neg:SI (match_dup 2)))
19434 (clobber (reg:CC FLAGS_REG))])
19435 (parallel [(set (match_dup 0) (ior:SI (match_dup 0) (match_dup 2)))
19436 (clobber (reg:CC FLAGS_REG))])
19437 (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1)))
19438 (clobber (reg:CC FLAGS_REG))])]
19439 {
19440 machine_mode flags_mode = TARGET_BMI ? CCCmode : CCZmode;
19441
19442 operands[3] = gen_lowpart (QImode, operands[2]);
19443 operands[4] = gen_rtx_REG (flags_mode, FLAGS_REG);
19444 operands[5] = gen_rtx_COMPARE (flags_mode, operands[1], const0_rtx);
19445
19446 ix86_expand_clear (operands[2]);
19447 })
19448
19449 (define_insn_and_split "*tzcnt<mode>_1"
19450 [(set (reg:CCC FLAGS_REG)
19451 (compare:CCC (match_operand:SWI48 1 "nonimmediate_operand" "rm")
19452 (const_int 0)))
19453 (set (match_operand:SWI48 0 "register_operand" "=r")
19454 (ctz:SWI48 (match_dup 1)))]
19455 "TARGET_BMI"
19456 "tzcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
19457 "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
19458 && optimize_function_for_speed_p (cfun)
19459 && !reg_mentioned_p (operands[0], operands[1])"
19460 [(parallel
19461 [(set (reg:CCC FLAGS_REG)
19462 (compare:CCC (match_dup 1) (const_int 0)))
19463 (set (match_dup 0)
19464 (ctz:SWI48 (match_dup 1)))
19465 (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])]
19466 "ix86_expand_clear (operands[0]);"
19467 [(set_attr "type" "alu1")
19468 (set_attr "prefix_0f" "1")
19469 (set_attr "prefix_rep" "1")
19470 (set_attr "btver2_decode" "double")
19471 (set_attr "mode" "<MODE>")])
19472
19473 ; False dependency happens when destination is only updated by tzcnt,
19474 ; lzcnt or popcnt. There is no false dependency when destination is
19475 ; also used in source.
19476 (define_insn "*tzcnt<mode>_1_falsedep"
19477 [(set (reg:CCC FLAGS_REG)
19478 (compare:CCC (match_operand:SWI48 1 "nonimmediate_operand" "rm")
19479 (const_int 0)))
19480 (set (match_operand:SWI48 0 "register_operand" "=r")
19481 (ctz:SWI48 (match_dup 1)))
19482 (unspec [(match_operand:SWI48 2 "register_operand" "0")]
19483 UNSPEC_INSN_FALSE_DEP)]
19484 "TARGET_BMI"
19485 "tzcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
19486 [(set_attr "type" "alu1")
19487 (set_attr "prefix_0f" "1")
19488 (set_attr "prefix_rep" "1")
19489 (set_attr "btver2_decode" "double")
19490 (set_attr "mode" "<MODE>")])
19491
19492 (define_insn "*bsf<mode>_1"
19493 [(set (reg:CCZ FLAGS_REG)
19494 (compare:CCZ (match_operand:SWI48 1 "nonimmediate_operand" "rm")
19495 (const_int 0)))
19496 (set (match_operand:SWI48 0 "register_operand" "=r")
19497 (ctz:SWI48 (match_dup 1)))]
19498 ""
19499 "bsf{<imodesuffix>}\t{%1, %0|%0, %1}"
19500 [(set_attr "type" "alu1")
19501 (set_attr "prefix_0f" "1")
19502 (set_attr "btver2_decode" "double")
19503 (set_attr "znver1_decode" "vector")
19504 (set_attr "mode" "<MODE>")])
19505
19506 (define_insn_and_split "ctz<mode>2"
19507 [(set (match_operand:SWI48 0 "register_operand" "=r")
19508 (ctz:SWI48
19509 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
19510 (clobber (reg:CC FLAGS_REG))]
19511 ""
19512 {
19513 if (TARGET_BMI)
19514 return "tzcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
19515 else if (optimize_function_for_size_p (cfun))
19516 ;
19517 else if (TARGET_CPU_P (GENERIC))
19518 /* tzcnt expands to 'rep bsf' and we can use it even if !TARGET_BMI. */
19519 return "rep%; bsf{<imodesuffix>}\t{%1, %0|%0, %1}";
19520
19521 return "bsf{<imodesuffix>}\t{%1, %0|%0, %1}";
19522 }
19523 "(TARGET_BMI || TARGET_CPU_P (GENERIC))
19524 && TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
19525 && optimize_function_for_speed_p (cfun)
19526 && !reg_mentioned_p (operands[0], operands[1])"
19527 [(parallel
19528 [(set (match_dup 0)
19529 (ctz:SWI48 (match_dup 1)))
19530 (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
19531 (clobber (reg:CC FLAGS_REG))])]
19532 "ix86_expand_clear (operands[0]);"
19533 [(set_attr "type" "alu1")
19534 (set_attr "prefix_0f" "1")
19535 (set (attr "prefix_rep")
19536 (if_then_else
19537 (ior (match_test "TARGET_BMI")
19538 (and (not (match_test "optimize_function_for_size_p (cfun)"))
19539 (match_test "TARGET_CPU_P (GENERIC)")))
19540 (const_string "1")
19541 (const_string "0")))
19542 (set_attr "mode" "<MODE>")])
19543
19544 ; False dependency happens when destination is only updated by tzcnt,
19545 ; lzcnt or popcnt. There is no false dependency when destination is
19546 ; also used in source.
19547 (define_insn "*ctz<mode>2_falsedep"
19548 [(set (match_operand:SWI48 0 "register_operand" "=r")
19549 (ctz:SWI48
19550 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
19551 (unspec [(match_operand:SWI48 2 "register_operand" "0")]
19552 UNSPEC_INSN_FALSE_DEP)
19553 (clobber (reg:CC FLAGS_REG))]
19554 ""
19555 {
19556 if (TARGET_BMI)
19557 return "tzcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
19558 else if (TARGET_CPU_P (GENERIC))
19559 /* tzcnt expands to 'rep bsf' and we can use it even if !TARGET_BMI. */
19560 return "rep%; bsf{<imodesuffix>}\t{%1, %0|%0, %1}";
19561 else
19562 gcc_unreachable ();
19563 }
19564 [(set_attr "type" "alu1")
19565 (set_attr "prefix_0f" "1")
19566 (set_attr "prefix_rep" "1")
19567 (set_attr "mode" "<MODE>")])
19568
19569 (define_insn_and_split "*ctzsi2_zext"
19570 [(set (match_operand:DI 0 "register_operand" "=r")
19571 (and:DI
19572 (subreg:DI
19573 (ctz:SI
19574 (match_operand:SI 1 "nonimmediate_operand" "rm")) 0)
19575 (const_int 63)))
19576 (clobber (reg:CC FLAGS_REG))]
19577 "TARGET_BMI && TARGET_64BIT"
19578 "tzcnt{l}\t{%1, %k0|%k0, %1}"
19579 "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
19580 && optimize_function_for_speed_p (cfun)
19581 && !reg_mentioned_p (operands[0], operands[1])"
19582 [(parallel
19583 [(set (match_dup 0)
19584 (and:DI (subreg:DI (ctz:SI (match_dup 1)) 0) (const_int 63)))
19585 (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
19586 (clobber (reg:CC FLAGS_REG))])]
19587 "ix86_expand_clear (operands[0]);"
19588 [(set_attr "type" "alu1")
19589 (set_attr "prefix_0f" "1")
19590 (set_attr "prefix_rep" "1")
19591 (set_attr "mode" "SI")])
19592
19593 ; False dependency happens when destination is only updated by tzcnt,
19594 ; lzcnt or popcnt. There is no false dependency when destination is
19595 ; also used in source.
19596 (define_insn "*ctzsi2_zext_falsedep"
19597 [(set (match_operand:DI 0 "register_operand" "=r")
19598 (and:DI
19599 (subreg:DI
19600 (ctz:SI
19601 (match_operand:SI 1 "nonimmediate_operand" "rm")) 0)
19602 (const_int 63)))
19603 (unspec [(match_operand:DI 2 "register_operand" "0")]
19604 UNSPEC_INSN_FALSE_DEP)
19605 (clobber (reg:CC FLAGS_REG))]
19606 "TARGET_BMI && TARGET_64BIT"
19607 "tzcnt{l}\t{%1, %k0|%k0, %1}"
19608 [(set_attr "type" "alu1")
19609 (set_attr "prefix_0f" "1")
19610 (set_attr "prefix_rep" "1")
19611 (set_attr "mode" "SI")])
19612
19613 (define_insn_and_split "*ctzsidi2_<s>ext"
19614 [(set (match_operand:DI 0 "register_operand" "=r")
19615 (any_extend:DI
19616 (ctz:SI
19617 (match_operand:SI 1 "nonimmediate_operand" "rm"))))
19618 (clobber (reg:CC FLAGS_REG))]
19619 "TARGET_64BIT"
19620 {
19621 if (TARGET_BMI)
19622 return "tzcnt{l}\t{%1, %k0|%k0, %1}";
19623 else if (TARGET_CPU_P (GENERIC)
19624 && !optimize_function_for_size_p (cfun))
19625 /* tzcnt expands to 'rep bsf' and we can use it even if !TARGET_BMI. */
19626 return "rep%; bsf{l}\t{%1, %k0|%k0, %1}";
19627 return "bsf{l}\t{%1, %k0|%k0, %1}";
19628 }
19629 "(TARGET_BMI || TARGET_CPU_P (GENERIC))
19630 && TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
19631 && optimize_function_for_speed_p (cfun)
19632 && !reg_mentioned_p (operands[0], operands[1])"
19633 [(parallel
19634 [(set (match_dup 0)
19635 (any_extend:DI (ctz:SI (match_dup 1))))
19636 (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
19637 (clobber (reg:CC FLAGS_REG))])]
19638 "ix86_expand_clear (operands[0]);"
19639 [(set_attr "type" "alu1")
19640 (set_attr "prefix_0f" "1")
19641 (set (attr "prefix_rep")
19642 (if_then_else
19643 (ior (match_test "TARGET_BMI")
19644 (and (not (match_test "optimize_function_for_size_p (cfun)"))
19645 (match_test "TARGET_CPU_P (GENERIC)")))
19646 (const_string "1")
19647 (const_string "0")))
19648 (set_attr "mode" "SI")])
19649
19650 (define_insn "*ctzsidi2_<s>ext_falsedep"
19651 [(set (match_operand:DI 0 "register_operand" "=r")
19652 (any_extend:DI
19653 (ctz:SI
19654 (match_operand:SI 1 "nonimmediate_operand" "rm"))))
19655 (unspec [(match_operand:DI 2 "register_operand" "0")]
19656 UNSPEC_INSN_FALSE_DEP)
19657 (clobber (reg:CC FLAGS_REG))]
19658 "TARGET_64BIT"
19659 {
19660 if (TARGET_BMI)
19661 return "tzcnt{l}\t{%1, %k0|%k0, %1}";
19662 else if (TARGET_CPU_P (GENERIC))
19663 /* tzcnt expands to 'rep bsf' and we can use it even if !TARGET_BMI. */
19664 return "rep%; bsf{l}\t{%1, %k0|%k0, %1}";
19665 else
19666 gcc_unreachable ();
19667 }
19668 [(set_attr "type" "alu1")
19669 (set_attr "prefix_0f" "1")
19670 (set_attr "prefix_rep" "1")
19671 (set_attr "mode" "SI")])
19672
19673 (define_insn "bsr_rex64"
19674 [(set (reg:CCZ FLAGS_REG)
19675 (compare:CCZ (match_operand:DI 1 "nonimmediate_operand" "rm")
19676 (const_int 0)))
19677 (set (match_operand:DI 0 "register_operand" "=r")
19678 (minus:DI (const_int 63)
19679 (clz:DI (match_dup 1))))]
19680 "TARGET_64BIT"
19681 "bsr{q}\t{%1, %0|%0, %1}"
19682 [(set_attr "type" "alu1")
19683 (set_attr "prefix_0f" "1")
19684 (set_attr "znver1_decode" "vector")
19685 (set_attr "mode" "DI")])
19686
19687 (define_insn "bsr_rex64_1"
19688 [(set (match_operand:DI 0 "register_operand" "=r")
19689 (minus:DI (const_int 63)
19690 (clz:DI (match_operand:DI 1 "nonimmediate_operand" "rm"))))
19691 (clobber (reg:CC FLAGS_REG))]
19692 "!TARGET_LZCNT && TARGET_64BIT"
19693 "bsr{q}\t{%1, %0|%0, %1}"
19694 [(set_attr "type" "alu1")
19695 (set_attr "prefix_0f" "1")
19696 (set_attr "znver1_decode" "vector")
19697 (set_attr "mode" "DI")])
19698
19699 (define_insn "bsr_rex64_1_zext"
19700 [(set (match_operand:DI 0 "register_operand" "=r")
19701 (zero_extend:DI
19702 (minus:SI (const_int 63)
19703 (subreg:SI
19704 (clz:DI (match_operand:DI 1 "nonimmediate_operand" "rm"))
19705 0))))
19706 (clobber (reg:CC FLAGS_REG))]
19707 "!TARGET_LZCNT && TARGET_64BIT"
19708 "bsr{q}\t{%1, %0|%0, %1}"
19709 [(set_attr "type" "alu1")
19710 (set_attr "prefix_0f" "1")
19711 (set_attr "znver1_decode" "vector")
19712 (set_attr "mode" "DI")])
19713
19714 (define_insn "bsr"
19715 [(set (reg:CCZ FLAGS_REG)
19716 (compare:CCZ (match_operand:SI 1 "nonimmediate_operand" "rm")
19717 (const_int 0)))
19718 (set (match_operand:SI 0 "register_operand" "=r")
19719 (minus:SI (const_int 31)
19720 (clz:SI (match_dup 1))))]
19721 ""
19722 "bsr{l}\t{%1, %0|%0, %1}"
19723 [(set_attr "type" "alu1")
19724 (set_attr "prefix_0f" "1")
19725 (set_attr "znver1_decode" "vector")
19726 (set_attr "mode" "SI")])
19727
19728 (define_insn "bsr_1"
19729 [(set (match_operand:SI 0 "register_operand" "=r")
19730 (minus:SI (const_int 31)
19731 (clz:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))))
19732 (clobber (reg:CC FLAGS_REG))]
19733 "!TARGET_LZCNT"
19734 "bsr{l}\t{%1, %0|%0, %1}"
19735 [(set_attr "type" "alu1")
19736 (set_attr "prefix_0f" "1")
19737 (set_attr "znver1_decode" "vector")
19738 (set_attr "mode" "SI")])
19739
19740 (define_insn "bsr_zext_1"
19741 [(set (match_operand:DI 0 "register_operand" "=r")
19742 (zero_extend:DI
19743 (minus:SI
19744 (const_int 31)
19745 (clz:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))))
19746 (clobber (reg:CC FLAGS_REG))]
19747 "!TARGET_LZCNT && TARGET_64BIT"
19748 "bsr{l}\t{%1, %k0|%k0, %1}"
19749 [(set_attr "type" "alu1")
19750 (set_attr "prefix_0f" "1")
19751 (set_attr "znver1_decode" "vector")
19752 (set_attr "mode" "SI")])
19753
19754 ; As bsr is undefined behavior on zero and for other input
19755 ; values it is in range 0 to 63, we can optimize away sign-extends.
19756 (define_insn_and_split "*bsr_rex64_2"
19757 [(set (match_operand:DI 0 "register_operand")
19758 (xor:DI
19759 (sign_extend:DI
19760 (minus:SI
19761 (const_int 63)
19762 (subreg:SI (clz:DI (match_operand:DI 1 "nonimmediate_operand"))
19763 0)))
19764 (const_int 63)))
19765 (clobber (reg:CC FLAGS_REG))]
19766 "!TARGET_LZCNT && TARGET_64BIT && ix86_pre_reload_split ()"
19767 "#"
19768 "&& 1"
19769 [(parallel [(set (reg:CCZ FLAGS_REG)
19770 (compare:CCZ (match_dup 1) (const_int 0)))
19771 (set (match_dup 2)
19772 (minus:DI (const_int 63) (clz:DI (match_dup 1))))])
19773 (parallel [(set (match_dup 0)
19774 (zero_extend:DI (xor:SI (match_dup 3) (const_int 63))))
19775 (clobber (reg:CC FLAGS_REG))])]
19776 {
19777 operands[2] = gen_reg_rtx (DImode);
19778 operands[3] = lowpart_subreg (SImode, operands[2], DImode);
19779 })
19780
19781 (define_insn_and_split "*bsr_2"
19782 [(set (match_operand:DI 0 "register_operand")
19783 (sign_extend:DI
19784 (xor:SI
19785 (minus:SI
19786 (const_int 31)
19787 (clz:SI (match_operand:SI 1 "nonimmediate_operand")))
19788 (const_int 31))))
19789 (clobber (reg:CC FLAGS_REG))]
19790 "!TARGET_LZCNT && TARGET_64BIT && ix86_pre_reload_split ()"
19791 "#"
19792 "&& 1"
19793 [(parallel [(set (reg:CCZ FLAGS_REG)
19794 (compare:CCZ (match_dup 1) (const_int 0)))
19795 (set (match_dup 2)
19796 (minus:SI (const_int 31) (clz:SI (match_dup 1))))])
19797 (parallel [(set (match_dup 0)
19798 (zero_extend:DI (xor:SI (match_dup 2) (const_int 31))))
19799 (clobber (reg:CC FLAGS_REG))])]
19800 "operands[2] = gen_reg_rtx (SImode);")
19801
19802 ; Splitters to optimize 64 - __builtin_clzl (x) or 32 - __builtin_clz (x).
19803 ; Again, as for !TARGET_LZCNT CLZ is UB at zero, CLZ is guaranteed to be
19804 ; in [0, 63] or [0, 31] range.
19805 (define_split
19806 [(set (match_operand:SI 0 "register_operand")
19807 (minus:SI
19808 (match_operand:SI 2 "const_int_operand")
19809 (xor:SI
19810 (minus:SI (const_int 63)
19811 (subreg:SI
19812 (clz:DI (match_operand:DI 1 "nonimmediate_operand"))
19813 0))
19814 (const_int 63))))]
19815 "!TARGET_LZCNT && TARGET_64BIT && ix86_pre_reload_split ()"
19816 [(set (match_dup 3)
19817 (minus:DI (const_int 63) (clz:DI (match_dup 1))))
19818 (set (match_dup 0)
19819 (plus:SI (match_dup 5) (match_dup 4)))]
19820 {
19821 operands[3] = gen_reg_rtx (DImode);
19822 operands[5] = lowpart_subreg (SImode, operands[3], DImode);
19823 if (INTVAL (operands[2]) == 63)
19824 {
19825 emit_insn (gen_bsr_rex64_1_zext (operands[3], operands[1]));
19826 emit_move_insn (operands[0], operands[5]);
19827 DONE;
19828 }
19829 operands[4] = gen_int_mode (UINTVAL (operands[2]) - 63, SImode);
19830 })
19831
19832 (define_split
19833 [(set (match_operand:SI 0 "register_operand")
19834 (minus:SI
19835 (match_operand:SI 2 "const_int_operand")
19836 (xor:SI
19837 (minus:SI (const_int 31)
19838 (clz:SI (match_operand:SI 1 "nonimmediate_operand")))
19839 (const_int 31))))]
19840 "!TARGET_LZCNT && ix86_pre_reload_split ()"
19841 [(set (match_dup 3)
19842 (minus:SI (const_int 31) (clz:SI (match_dup 1))))
19843 (set (match_dup 0)
19844 (plus:SI (match_dup 3) (match_dup 4)))]
19845 {
19846 if (INTVAL (operands[2]) == 31)
19847 {
19848 emit_insn (gen_bsr_1 (operands[0], operands[1]));
19849 DONE;
19850 }
19851 operands[3] = gen_reg_rtx (SImode);
19852 operands[4] = gen_int_mode (UINTVAL (operands[2]) - 31, SImode);
19853 })
19854
19855 (define_split
19856 [(set (match_operand:DI 0 "register_operand")
19857 (minus:DI
19858 (match_operand:DI 2 "const_int_operand")
19859 (xor:DI
19860 (sign_extend:DI
19861 (minus:SI (const_int 63)
19862 (subreg:SI
19863 (clz:DI (match_operand:DI 1 "nonimmediate_operand"))
19864 0)))
19865 (const_int 63))))]
19866 "!TARGET_LZCNT
19867 && TARGET_64BIT
19868 && ix86_pre_reload_split ()
19869 && ((unsigned HOST_WIDE_INT)
19870 trunc_int_for_mode (UINTVAL (operands[2]) - 63, SImode)
19871 == UINTVAL (operands[2]) - 63)"
19872 [(set (match_dup 3)
19873 (minus:DI (const_int 63) (clz:DI (match_dup 1))))
19874 (set (match_dup 0)
19875 (plus:DI (match_dup 3) (match_dup 4)))]
19876 {
19877 if (INTVAL (operands[2]) == 63)
19878 {
19879 emit_insn (gen_bsr_rex64_1 (operands[0], operands[1]));
19880 DONE;
19881 }
19882 operands[3] = gen_reg_rtx (DImode);
19883 operands[4] = GEN_INT (UINTVAL (operands[2]) - 63);
19884 })
19885
19886 (define_split
19887 [(set (match_operand:DI 0 "register_operand")
19888 (minus:DI
19889 (match_operand:DI 2 "const_int_operand")
19890 (sign_extend:DI
19891 (xor:SI
19892 (minus:SI (const_int 31)
19893 (clz:SI (match_operand:SI 1 "nonimmediate_operand")))
19894 (const_int 31)))))]
19895 "!TARGET_LZCNT
19896 && TARGET_64BIT
19897 && ix86_pre_reload_split ()
19898 && ((unsigned HOST_WIDE_INT)
19899 trunc_int_for_mode (UINTVAL (operands[2]) - 31, SImode)
19900 == UINTVAL (operands[2]) - 31)"
19901 [(set (match_dup 3)
19902 (zero_extend:DI (minus:SI (const_int 31) (clz:SI (match_dup 1)))))
19903 (set (match_dup 0)
19904 (plus:DI (match_dup 3) (match_dup 4)))]
19905 {
19906 if (INTVAL (operands[2]) == 31)
19907 {
19908 emit_insn (gen_bsr_zext_1 (operands[0], operands[1]));
19909 DONE;
19910 }
19911 operands[3] = gen_reg_rtx (DImode);
19912 operands[4] = GEN_INT (UINTVAL (operands[2]) - 31);
19913 })
19914
19915 (define_expand "clz<mode>2"
19916 [(parallel
19917 [(set (reg:CCZ FLAGS_REG)
19918 (compare:CCZ (match_operand:SWI48 1 "nonimmediate_operand" "rm")
19919 (const_int 0)))
19920 (set (match_dup 3) (minus:SWI48
19921 (match_dup 2)
19922 (clz:SWI48 (match_dup 1))))])
19923 (parallel
19924 [(set (match_operand:SWI48 0 "register_operand")
19925 (xor:SWI48 (match_dup 3) (match_dup 2)))
19926 (clobber (reg:CC FLAGS_REG))])]
19927 ""
19928 {
19929 if (TARGET_LZCNT)
19930 {
19931 emit_insn (gen_clz<mode>2_lzcnt (operands[0], operands[1]));
19932 DONE;
19933 }
19934 operands[2] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode)-1);
19935 operands[3] = gen_reg_rtx (<MODE>mode);
19936 })
19937
19938 (define_insn_and_split "clz<mode>2_lzcnt"
19939 [(set (match_operand:SWI48 0 "register_operand" "=r")
19940 (clz:SWI48
19941 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
19942 (clobber (reg:CC FLAGS_REG))]
19943 "TARGET_LZCNT"
19944 "lzcnt{<imodesuffix>}\t{%1, %0|%0, %1}"
19945 "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
19946 && optimize_function_for_speed_p (cfun)
19947 && !reg_mentioned_p (operands[0], operands[1])"
19948 [(parallel
19949 [(set (match_dup 0)
19950 (clz:SWI48 (match_dup 1)))
19951 (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
19952 (clobber (reg:CC FLAGS_REG))])]
19953 "ix86_expand_clear (operands[0]);"
19954 [(set_attr "prefix_rep" "1")
19955 (set_attr "type" "bitmanip")
19956 (set_attr "mode" "<MODE>")])
19957
19958 ; False dependency happens when destination is only updated by tzcnt,
19959 ; lzcnt or popcnt. There is no false dependency when destination is
19960 ; also used in source.
19961 (define_insn "*clz<mode>2_lzcnt_falsedep"
19962 [(set (match_operand:SWI48 0 "register_operand" "=r")
19963 (clz:SWI48
19964 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
19965 (unspec [(match_operand:SWI48 2 "register_operand" "0")]
19966 UNSPEC_INSN_FALSE_DEP)
19967 (clobber (reg:CC FLAGS_REG))]
19968 "TARGET_LZCNT"
19969 "lzcnt{<imodesuffix>}\t{%1, %0|%0, %1}"
19970 [(set_attr "prefix_rep" "1")
19971 (set_attr "type" "bitmanip")
19972 (set_attr "mode" "<MODE>")])
19973
19974 (define_insn_and_split "*clzsi2_lzcnt_zext"
19975 [(set (match_operand:DI 0 "register_operand" "=r")
19976 (and:DI
19977 (subreg:DI
19978 (clz:SI
19979 (match_operand:SI 1 "nonimmediate_operand" "rm")) 0)
19980 (const_int 63)))
19981 (clobber (reg:CC FLAGS_REG))]
19982 "TARGET_LZCNT && TARGET_64BIT"
19983 "lzcnt{l}\t{%1, %k0|%k0, %1}"
19984 "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
19985 && optimize_function_for_speed_p (cfun)
19986 && !reg_mentioned_p (operands[0], operands[1])"
19987 [(parallel
19988 [(set (match_dup 0)
19989 (and:DI (subreg:DI (clz:SI (match_dup 1)) 0) (const_int 63)))
19990 (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
19991 (clobber (reg:CC FLAGS_REG))])]
19992 "ix86_expand_clear (operands[0]);"
19993 [(set_attr "prefix_rep" "1")
19994 (set_attr "type" "bitmanip")
19995 (set_attr "mode" "SI")])
19996
19997 ; False dependency happens when destination is only updated by tzcnt,
19998 ; lzcnt or popcnt. There is no false dependency when destination is
19999 ; also used in source.
20000 (define_insn "*clzsi2_lzcnt_zext_falsedep"
20001 [(set (match_operand:DI 0 "register_operand" "=r")
20002 (and:DI
20003 (subreg:DI
20004 (clz:SI
20005 (match_operand:SWI48 1 "nonimmediate_operand" "rm")) 0)
20006 (const_int 63)))
20007 (unspec [(match_operand:DI 2 "register_operand" "0")]
20008 UNSPEC_INSN_FALSE_DEP)
20009 (clobber (reg:CC FLAGS_REG))]
20010 "TARGET_LZCNT"
20011 "lzcnt{l}\t{%1, %k0|%k0, %1}"
20012 [(set_attr "prefix_rep" "1")
20013 (set_attr "type" "bitmanip")
20014 (set_attr "mode" "SI")])
20015
20016 (define_insn_and_split "*clzsi2_lzcnt_zext_2"
20017 [(set (match_operand:DI 0 "register_operand" "=r")
20018 (zero_extend:DI
20019 (clz:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))))
20020 (clobber (reg:CC FLAGS_REG))]
20021 "TARGET_LZCNT && TARGET_64BIT"
20022 "lzcnt{l}\t{%1, %k0|%k0, %1}"
20023 "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
20024 && optimize_function_for_speed_p (cfun)
20025 && !reg_mentioned_p (operands[0], operands[1])"
20026 [(parallel
20027 [(set (match_dup 0)
20028 (zero_extend:DI (clz:SI (match_dup 1))))
20029 (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
20030 (clobber (reg:CC FLAGS_REG))])]
20031 "ix86_expand_clear (operands[0]);"
20032 [(set_attr "prefix_rep" "1")
20033 (set_attr "type" "bitmanip")
20034 (set_attr "mode" "SI")])
20035
20036 ; False dependency happens when destination is only updated by tzcnt,
20037 ; lzcnt or popcnt. There is no false dependency when destination is
20038 ; also used in source.
20039 (define_insn "*clzsi2_lzcnt_zext_2_falsedep"
20040 [(set (match_operand:DI 0 "register_operand" "=r")
20041 (zero_extend:DI
20042 (clz:SI (match_operand:SWI48 1 "nonimmediate_operand" "rm"))))
20043 (unspec [(match_operand:DI 2 "register_operand" "0")]
20044 UNSPEC_INSN_FALSE_DEP)
20045 (clobber (reg:CC FLAGS_REG))]
20046 "TARGET_LZCNT"
20047 "lzcnt{l}\t{%1, %k0|%k0, %1}"
20048 [(set_attr "prefix_rep" "1")
20049 (set_attr "type" "bitmanip")
20050 (set_attr "mode" "SI")])
20051
20052 (define_int_iterator LT_ZCNT
20053 [(UNSPEC_TZCNT "TARGET_BMI")
20054 (UNSPEC_LZCNT "TARGET_LZCNT")])
20055
20056 (define_int_attr lt_zcnt
20057 [(UNSPEC_TZCNT "tzcnt")
20058 (UNSPEC_LZCNT "lzcnt")])
20059
20060 (define_int_attr lt_zcnt_type
20061 [(UNSPEC_TZCNT "alu1")
20062 (UNSPEC_LZCNT "bitmanip")])
20063
20064 ;; Version of lzcnt/tzcnt that is expanded from intrinsics. This version
20065 ;; provides operand size as output when source operand is zero.
20066
20067 (define_insn_and_split "<lt_zcnt>_<mode>"
20068 [(set (match_operand:SWI48 0 "register_operand" "=r")
20069 (unspec:SWI48
20070 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] LT_ZCNT))
20071 (clobber (reg:CC FLAGS_REG))]
20072 ""
20073 "<lt_zcnt>{<imodesuffix>}\t{%1, %0|%0, %1}"
20074 "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
20075 && optimize_function_for_speed_p (cfun)
20076 && !reg_mentioned_p (operands[0], operands[1])"
20077 [(parallel
20078 [(set (match_dup 0)
20079 (unspec:SWI48 [(match_dup 1)] LT_ZCNT))
20080 (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
20081 (clobber (reg:CC FLAGS_REG))])]
20082 "ix86_expand_clear (operands[0]);"
20083 [(set_attr "type" "<lt_zcnt_type>")
20084 (set_attr "prefix_0f" "1")
20085 (set_attr "prefix_rep" "1")
20086 (set_attr "mode" "<MODE>")])
20087
20088 ; False dependency happens when destination is only updated by tzcnt,
20089 ; lzcnt or popcnt. There is no false dependency when destination is
20090 ; also used in source.
20091 (define_insn "*<lt_zcnt>_<mode>_falsedep"
20092 [(set (match_operand:SWI48 0 "register_operand" "=r")
20093 (unspec:SWI48
20094 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] LT_ZCNT))
20095 (unspec [(match_operand:SWI48 2 "register_operand" "0")]
20096 UNSPEC_INSN_FALSE_DEP)
20097 (clobber (reg:CC FLAGS_REG))]
20098 ""
20099 "<lt_zcnt>{<imodesuffix>}\t{%1, %0|%0, %1}"
20100 [(set_attr "type" "<lt_zcnt_type>")
20101 (set_attr "prefix_0f" "1")
20102 (set_attr "prefix_rep" "1")
20103 (set_attr "mode" "<MODE>")])
20104
20105 (define_insn "<lt_zcnt>_hi"
20106 [(set (match_operand:HI 0 "register_operand" "=r")
20107 (unspec:HI
20108 [(match_operand:HI 1 "nonimmediate_operand" "rm")] LT_ZCNT))
20109 (clobber (reg:CC FLAGS_REG))]
20110 ""
20111 "<lt_zcnt>{w}\t{%1, %0|%0, %1}"
20112 [(set_attr "type" "<lt_zcnt_type>")
20113 (set_attr "prefix_0f" "1")
20114 (set_attr "prefix_rep" "1")
20115 (set_attr "mode" "HI")])
20116
20117 ;; BMI instructions.
20118
20119 (define_insn "bmi_bextr_<mode>"
20120 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
20121 (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "r,m")
20122 (match_operand:SWI48 2 "register_operand" "r,r")]
20123 UNSPEC_BEXTR))
20124 (clobber (reg:CC FLAGS_REG))]
20125 "TARGET_BMI"
20126 "bextr\t{%2, %1, %0|%0, %1, %2}"
20127 [(set_attr "type" "bitmanip")
20128 (set_attr "btver2_decode" "direct, double")
20129 (set_attr "mode" "<MODE>")])
20130
20131 (define_insn "*bmi_bextr_<mode>_ccz"
20132 [(set (reg:CCZ FLAGS_REG)
20133 (compare:CCZ
20134 (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "r,m")
20135 (match_operand:SWI48 2 "register_operand" "r,r")]
20136 UNSPEC_BEXTR)
20137 (const_int 0)))
20138 (clobber (match_scratch:SWI48 0 "=r,r"))]
20139 "TARGET_BMI"
20140 "bextr\t{%2, %1, %0|%0, %1, %2}"
20141 [(set_attr "type" "bitmanip")
20142 (set_attr "btver2_decode" "direct, double")
20143 (set_attr "mode" "<MODE>")])
20144
20145 (define_insn "*bmi_blsi_<mode>"
20146 [(set (match_operand:SWI48 0 "register_operand" "=r")
20147 (and:SWI48
20148 (neg:SWI48
20149 (match_operand:SWI48 1 "nonimmediate_operand" "rm"))
20150 (match_dup 1)))
20151 (clobber (reg:CC FLAGS_REG))]
20152 "TARGET_BMI"
20153 "blsi\t{%1, %0|%0, %1}"
20154 [(set_attr "type" "bitmanip")
20155 (set_attr "btver2_decode" "double")
20156 (set_attr "mode" "<MODE>")])
20157
20158 (define_insn "*bmi_blsi_<mode>_cmp"
20159 [(set (reg FLAGS_REG)
20160 (compare
20161 (and:SWI48
20162 (neg:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm"))
20163 (match_dup 1))
20164 (const_int 0)))
20165 (set (match_operand:SWI48 0 "register_operand" "=r")
20166 (and:SWI48 (neg:SWI48 (match_dup 1)) (match_dup 1)))]
20167 "TARGET_BMI && ix86_match_ccmode (insn, CCNOmode)"
20168 "blsi\t{%1, %0|%0, %1}"
20169 [(set_attr "type" "bitmanip")
20170 (set_attr "btver2_decode" "double")
20171 (set_attr "mode" "<MODE>")])
20172
20173 (define_insn "*bmi_blsi_<mode>_ccno"
20174 [(set (reg FLAGS_REG)
20175 (compare
20176 (and:SWI48
20177 (neg:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm"))
20178 (match_dup 1))
20179 (const_int 0)))
20180 (clobber (match_scratch:SWI48 0 "=r"))]
20181 "TARGET_BMI && ix86_match_ccmode (insn, CCNOmode)"
20182 "blsi\t{%1, %0|%0, %1}"
20183 [(set_attr "type" "bitmanip")
20184 (set_attr "btver2_decode" "double")
20185 (set_attr "mode" "<MODE>")])
20186
20187 (define_insn "*bmi_blsmsk_<mode>"
20188 [(set (match_operand:SWI48 0 "register_operand" "=r")
20189 (xor:SWI48
20190 (plus:SWI48
20191 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
20192 (const_int -1))
20193 (match_dup 1)))
20194 (clobber (reg:CC FLAGS_REG))]
20195 "TARGET_BMI"
20196 "blsmsk\t{%1, %0|%0, %1}"
20197 [(set_attr "type" "bitmanip")
20198 (set_attr "btver2_decode" "double")
20199 (set_attr "mode" "<MODE>")])
20200
20201 (define_insn "*bmi_blsr_<mode>"
20202 [(set (match_operand:SWI48 0 "register_operand" "=r")
20203 (and:SWI48
20204 (plus:SWI48
20205 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
20206 (const_int -1))
20207 (match_dup 1)))
20208 (clobber (reg:CC FLAGS_REG))]
20209 "TARGET_BMI"
20210 "blsr\t{%1, %0|%0, %1}"
20211 [(set_attr "type" "bitmanip")
20212 (set_attr "btver2_decode" "double")
20213 (set_attr "mode" "<MODE>")])
20214
20215 (define_insn "*bmi_blsr_<mode>_cmp"
20216 [(set (reg:CCZ FLAGS_REG)
20217 (compare:CCZ
20218 (and:SWI48
20219 (plus:SWI48
20220 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
20221 (const_int -1))
20222 (match_dup 1))
20223 (const_int 0)))
20224 (set (match_operand:SWI48 0 "register_operand" "=r")
20225 (and:SWI48
20226 (plus:SWI48
20227 (match_dup 1)
20228 (const_int -1))
20229 (match_dup 1)))]
20230 "TARGET_BMI"
20231 "blsr\t{%1, %0|%0, %1}"
20232 [(set_attr "type" "bitmanip")
20233 (set_attr "btver2_decode" "double")
20234 (set_attr "mode" "<MODE>")])
20235
20236 (define_insn "*bmi_blsr_<mode>_ccz"
20237 [(set (reg:CCZ FLAGS_REG)
20238 (compare:CCZ
20239 (and:SWI48
20240 (plus:SWI48
20241 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
20242 (const_int -1))
20243 (match_dup 1))
20244 (const_int 0)))
20245 (clobber (match_scratch:SWI48 0 "=r"))]
20246 "TARGET_BMI"
20247 "blsr\t{%1, %0|%0, %1}"
20248 [(set_attr "type" "bitmanip")
20249 (set_attr "btver2_decode" "double")
20250 (set_attr "mode" "<MODE>")])
20251
20252 ;; BMI2 instructions.
20253 (define_expand "bmi2_bzhi_<mode>3"
20254 [(parallel
20255 [(set (match_operand:SWI48 0 "register_operand")
20256 (if_then_else:SWI48
20257 (ne:QI (match_operand:QI 2 "register_operand")
20258 (const_int 0))
20259 (zero_extract:SWI48
20260 (match_operand:SWI48 1 "nonimmediate_operand")
20261 (umin:QI (match_dup 2) (match_dup 3))
20262 (const_int 0))
20263 (const_int 0)))
20264 (clobber (reg:CC FLAGS_REG))])]
20265 "TARGET_BMI2"
20266 {
20267 operands[2] = gen_lowpart (QImode, operands[2]);
20268 operands[3] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);
20269 })
20270
20271 (define_insn "*bmi2_bzhi_<mode>3"
20272 [(set (match_operand:SWI48 0 "register_operand" "=r")
20273 (if_then_else:SWI48
20274 (ne:QI (match_operand:QI 2 "register_operand" "q")
20275 (const_int 0))
20276 (zero_extract:SWI48
20277 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
20278 (umin:QI (match_dup 2)
20279 (match_operand:QI 3 "const_int_operand"))
20280 (const_int 0))
20281 (const_int 0)))
20282 (clobber (reg:CC FLAGS_REG))]
20283 "TARGET_BMI2 && INTVAL (operands[3]) == <MODE_SIZE> * BITS_PER_UNIT"
20284 "bzhi\t{%<k>2, %1, %0|%0, %1, %<k>2}"
20285 [(set_attr "type" "bitmanip")
20286 (set_attr "prefix" "vex")
20287 (set_attr "mode" "<MODE>")])
20288
20289 (define_insn "*bmi2_bzhi_<mode>3_1_ccz"
20290 [(set (reg:CCZ FLAGS_REG)
20291 (compare:CCZ
20292 (if_then_else:SWI48
20293 (ne:QI (match_operand:QI 2 "register_operand" "r") (const_int 0))
20294 (zero_extract:SWI48
20295 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
20296 (umin:QI (match_dup 2)
20297 (match_operand:QI 3 "const_int_operand"))
20298 (const_int 0))
20299 (const_int 0))
20300 (const_int 0)))
20301 (clobber (match_scratch:SWI48 0 "=r"))]
20302 "TARGET_BMI2 && INTVAL (operands[3]) == <MODE_SIZE> * BITS_PER_UNIT"
20303 "bzhi\t{%<k>2, %1, %0|%0, %1, %<k>2}"
20304 [(set_attr "type" "bitmanip")
20305 (set_attr "prefix" "vex")
20306 (set_attr "mode" "<MODE>")])
20307
20308 (define_insn "*bmi2_bzhi_<mode>3_2"
20309 [(set (match_operand:SWI48 0 "register_operand" "=r")
20310 (and:SWI48
20311 (plus:SWI48
20312 (ashift:SWI48 (const_int 1)
20313 (match_operand:QI 2 "register_operand" "r"))
20314 (const_int -1))
20315 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
20316 (clobber (reg:CC FLAGS_REG))]
20317 "TARGET_BMI2"
20318 "bzhi\t{%<k>2, %1, %0|%0, %1, %<k>2}"
20319 [(set_attr "type" "bitmanip")
20320 (set_attr "prefix" "vex")
20321 (set_attr "mode" "<MODE>")])
20322
20323 (define_insn "*bmi2_bzhi_<mode>3_3"
20324 [(set (match_operand:SWI48 0 "register_operand" "=r")
20325 (and:SWI48
20326 (not:SWI48
20327 (ashift:SWI48 (const_int -1)
20328 (match_operand:QI 2 "register_operand" "r")))
20329 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
20330 (clobber (reg:CC FLAGS_REG))]
20331 "TARGET_BMI2"
20332 "bzhi\t{%<k>2, %1, %0|%0, %1, %<k>2}"
20333 [(set_attr "type" "bitmanip")
20334 (set_attr "prefix" "vex")
20335 (set_attr "mode" "<MODE>")])
20336
20337 (define_insn "*bmi2_bzhi_zero_extendsidi_4"
20338 [(set (match_operand:DI 0 "register_operand" "=r")
20339 (zero_extend:DI
20340 (and:SI
20341 (plus:SI
20342 (ashift:SI (const_int 1)
20343 (match_operand:QI 2 "register_operand" "r"))
20344 (const_int -1))
20345 (match_operand:SI 1 "nonimmediate_operand" "rm"))))
20346 (clobber (reg:CC FLAGS_REG))]
20347 "TARGET_64BIT && TARGET_BMI2"
20348 "bzhi\t{%q2, %q1, %q0|%q0, %q1, %q2}"
20349 [(set_attr "type" "bitmanip")
20350 (set_attr "prefix" "vex")
20351 (set_attr "mode" "DI")])
20352
20353 (define_insn "*bmi2_bzhi_zero_extendsidi_5"
20354 [(set (match_operand:DI 0 "register_operand" "=r")
20355 (and:DI
20356 (zero_extend:DI
20357 (plus:SI
20358 (ashift:SI (const_int 1)
20359 (match_operand:QI 2 "register_operand" "r"))
20360 (const_int -1)))
20361 (match_operand:DI 1 "nonimmediate_operand" "rm")))
20362 (clobber (reg:CC FLAGS_REG))]
20363 "TARGET_64BIT && TARGET_BMI2"
20364 "bzhi\t{%q2, %q1, %q0|%q0, %q1, %q2}"
20365 [(set_attr "type" "bitmanip")
20366 (set_attr "prefix" "vex")
20367 (set_attr "mode" "DI")])
20368
20369 (define_insn "bmi2_pdep_<mode>3"
20370 [(set (match_operand:SWI48 0 "register_operand" "=r")
20371 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")
20372 (match_operand:SWI48 2 "nonimmediate_operand" "rm")]
20373 UNSPEC_PDEP))]
20374 "TARGET_BMI2"
20375 "pdep\t{%2, %1, %0|%0, %1, %2}"
20376 [(set_attr "type" "bitmanip")
20377 (set_attr "prefix" "vex")
20378 (set_attr "mode" "<MODE>")])
20379
20380 (define_insn "bmi2_pext_<mode>3"
20381 [(set (match_operand:SWI48 0 "register_operand" "=r")
20382 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")
20383 (match_operand:SWI48 2 "nonimmediate_operand" "rm")]
20384 UNSPEC_PEXT))]
20385 "TARGET_BMI2"
20386 "pext\t{%2, %1, %0|%0, %1, %2}"
20387 [(set_attr "type" "bitmanip")
20388 (set_attr "prefix" "vex")
20389 (set_attr "mode" "<MODE>")])
20390
20391 ;; TBM instructions.
20392 (define_insn "@tbm_bextri_<mode>"
20393 [(set (match_operand:SWI48 0 "register_operand" "=r")
20394 (zero_extract:SWI48
20395 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
20396 (match_operand:QI 2 "const_0_to_255_operand")
20397 (match_operand:QI 3 "const_0_to_255_operand")))
20398 (clobber (reg:CC FLAGS_REG))]
20399 "TARGET_TBM"
20400 {
20401 operands[2] = GEN_INT (INTVAL (operands[2]) << 8 | INTVAL (operands[3]));
20402 return "bextr\t{%2, %1, %0|%0, %1, %2}";
20403 }
20404 [(set_attr "type" "bitmanip")
20405 (set_attr "mode" "<MODE>")])
20406
20407 (define_insn "*tbm_blcfill_<mode>"
20408 [(set (match_operand:SWI48 0 "register_operand" "=r")
20409 (and:SWI48
20410 (plus:SWI48
20411 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
20412 (const_int 1))
20413 (match_dup 1)))
20414 (clobber (reg:CC FLAGS_REG))]
20415 "TARGET_TBM"
20416 "blcfill\t{%1, %0|%0, %1}"
20417 [(set_attr "type" "bitmanip")
20418 (set_attr "mode" "<MODE>")])
20419
20420 (define_insn "*tbm_blci_<mode>"
20421 [(set (match_operand:SWI48 0 "register_operand" "=r")
20422 (ior:SWI48
20423 (not:SWI48
20424 (plus:SWI48
20425 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
20426 (const_int 1)))
20427 (match_dup 1)))
20428 (clobber (reg:CC FLAGS_REG))]
20429 "TARGET_TBM"
20430 "blci\t{%1, %0|%0, %1}"
20431 [(set_attr "type" "bitmanip")
20432 (set_attr "mode" "<MODE>")])
20433
20434 (define_insn "*tbm_blcic_<mode>"
20435 [(set (match_operand:SWI48 0 "register_operand" "=r")
20436 (and:SWI48
20437 (plus:SWI48
20438 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
20439 (const_int 1))
20440 (not:SWI48
20441 (match_dup 1))))
20442 (clobber (reg:CC FLAGS_REG))]
20443 "TARGET_TBM"
20444 "blcic\t{%1, %0|%0, %1}"
20445 [(set_attr "type" "bitmanip")
20446 (set_attr "mode" "<MODE>")])
20447
20448 (define_insn "*tbm_blcmsk_<mode>"
20449 [(set (match_operand:SWI48 0 "register_operand" "=r")
20450 (xor:SWI48
20451 (plus:SWI48
20452 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
20453 (const_int 1))
20454 (match_dup 1)))
20455 (clobber (reg:CC FLAGS_REG))]
20456 "TARGET_TBM"
20457 "blcmsk\t{%1, %0|%0, %1}"
20458 [(set_attr "type" "bitmanip")
20459 (set_attr "mode" "<MODE>")])
20460
20461 (define_insn "*tbm_blcs_<mode>"
20462 [(set (match_operand:SWI48 0 "register_operand" "=r")
20463 (ior:SWI48
20464 (plus:SWI48
20465 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
20466 (const_int 1))
20467 (match_dup 1)))
20468 (clobber (reg:CC FLAGS_REG))]
20469 "TARGET_TBM"
20470 "blcs\t{%1, %0|%0, %1}"
20471 [(set_attr "type" "bitmanip")
20472 (set_attr "mode" "<MODE>")])
20473
20474 (define_insn "*tbm_blsfill_<mode>"
20475 [(set (match_operand:SWI48 0 "register_operand" "=r")
20476 (ior:SWI48
20477 (plus:SWI48
20478 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
20479 (const_int -1))
20480 (match_dup 1)))
20481 (clobber (reg:CC FLAGS_REG))]
20482 "TARGET_TBM"
20483 "blsfill\t{%1, %0|%0, %1}"
20484 [(set_attr "type" "bitmanip")
20485 (set_attr "mode" "<MODE>")])
20486
20487 (define_insn "*tbm_blsic_<mode>"
20488 [(set (match_operand:SWI48 0 "register_operand" "=r")
20489 (ior:SWI48
20490 (plus:SWI48
20491 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
20492 (const_int -1))
20493 (not:SWI48
20494 (match_dup 1))))
20495 (clobber (reg:CC FLAGS_REG))]
20496 "TARGET_TBM"
20497 "blsic\t{%1, %0|%0, %1}"
20498 [(set_attr "type" "bitmanip")
20499 (set_attr "mode" "<MODE>")])
20500
20501 (define_insn "*tbm_t1mskc_<mode>"
20502 [(set (match_operand:SWI48 0 "register_operand" "=r")
20503 (ior:SWI48
20504 (plus:SWI48
20505 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
20506 (const_int 1))
20507 (not:SWI48
20508 (match_dup 1))))
20509 (clobber (reg:CC FLAGS_REG))]
20510 "TARGET_TBM"
20511 "t1mskc\t{%1, %0|%0, %1}"
20512 [(set_attr "type" "bitmanip")
20513 (set_attr "mode" "<MODE>")])
20514
20515 (define_insn "*tbm_tzmsk_<mode>"
20516 [(set (match_operand:SWI48 0 "register_operand" "=r")
20517 (and:SWI48
20518 (plus:SWI48
20519 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
20520 (const_int -1))
20521 (not:SWI48
20522 (match_dup 1))))
20523 (clobber (reg:CC FLAGS_REG))]
20524 "TARGET_TBM"
20525 "tzmsk\t{%1, %0|%0, %1}"
20526 [(set_attr "type" "bitmanip")
20527 (set_attr "mode" "<MODE>")])
20528
20529 (define_insn_and_split "popcount<mode>2"
20530 [(set (match_operand:SWI48 0 "register_operand" "=r")
20531 (popcount:SWI48
20532 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
20533 (clobber (reg:CC FLAGS_REG))]
20534 "TARGET_POPCNT"
20535 {
20536 #if TARGET_MACHO
20537 return "popcnt\t{%1, %0|%0, %1}";
20538 #else
20539 return "popcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
20540 #endif
20541 }
20542 "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
20543 && optimize_function_for_speed_p (cfun)
20544 && !reg_mentioned_p (operands[0], operands[1])"
20545 [(parallel
20546 [(set (match_dup 0)
20547 (popcount:SWI48 (match_dup 1)))
20548 (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
20549 (clobber (reg:CC FLAGS_REG))])]
20550 "ix86_expand_clear (operands[0]);"
20551 [(set_attr "prefix_rep" "1")
20552 (set_attr "type" "bitmanip")
20553 (set_attr "mode" "<MODE>")])
20554
20555 ; False dependency happens when destination is only updated by tzcnt,
20556 ; lzcnt or popcnt. There is no false dependency when destination is
20557 ; also used in source.
20558 (define_insn "*popcount<mode>2_falsedep"
20559 [(set (match_operand:SWI48 0 "register_operand" "=r")
20560 (popcount:SWI48
20561 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
20562 (unspec [(match_operand:SWI48 2 "register_operand" "0")]
20563 UNSPEC_INSN_FALSE_DEP)
20564 (clobber (reg:CC FLAGS_REG))]
20565 "TARGET_POPCNT"
20566 {
20567 #if TARGET_MACHO
20568 return "popcnt\t{%1, %0|%0, %1}";
20569 #else
20570 return "popcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
20571 #endif
20572 }
20573 [(set_attr "prefix_rep" "1")
20574 (set_attr "type" "bitmanip")
20575 (set_attr "mode" "<MODE>")])
20576
20577 (define_insn_and_split "*popcountsi2_zext"
20578 [(set (match_operand:DI 0 "register_operand" "=r")
20579 (and:DI
20580 (subreg:DI
20581 (popcount:SI
20582 (match_operand:SI 1 "nonimmediate_operand" "rm")) 0)
20583 (const_int 63)))
20584 (clobber (reg:CC FLAGS_REG))]
20585 "TARGET_POPCNT && TARGET_64BIT"
20586 {
20587 #if TARGET_MACHO
20588 return "popcnt\t{%1, %k0|%k0, %1}";
20589 #else
20590 return "popcnt{l}\t{%1, %k0|%k0, %1}";
20591 #endif
20592 }
20593 "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
20594 && optimize_function_for_speed_p (cfun)
20595 && !reg_mentioned_p (operands[0], operands[1])"
20596 [(parallel
20597 [(set (match_dup 0)
20598 (and:DI (subreg:DI (popcount:SI (match_dup 1)) 0) (const_int 63)))
20599 (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
20600 (clobber (reg:CC FLAGS_REG))])]
20601 "ix86_expand_clear (operands[0]);"
20602 [(set_attr "prefix_rep" "1")
20603 (set_attr "type" "bitmanip")
20604 (set_attr "mode" "SI")])
20605
20606 ; False dependency happens when destination is only updated by tzcnt,
20607 ; lzcnt or popcnt. There is no false dependency when destination is
20608 ; also used in source.
20609 (define_insn "*popcountsi2_zext_falsedep"
20610 [(set (match_operand:DI 0 "register_operand" "=r")
20611 (and:DI
20612 (subreg:DI
20613 (popcount:SI
20614 (match_operand:SI 1 "nonimmediate_operand" "rm")) 0)
20615 (const_int 63)))
20616 (unspec [(match_operand:DI 2 "register_operand" "0")]
20617 UNSPEC_INSN_FALSE_DEP)
20618 (clobber (reg:CC FLAGS_REG))]
20619 "TARGET_POPCNT && TARGET_64BIT"
20620 {
20621 #if TARGET_MACHO
20622 return "popcnt\t{%1, %k0|%k0, %1}";
20623 #else
20624 return "popcnt{l}\t{%1, %k0|%k0, %1}";
20625 #endif
20626 }
20627 [(set_attr "prefix_rep" "1")
20628 (set_attr "type" "bitmanip")
20629 (set_attr "mode" "SI")])
20630
20631 (define_insn_and_split "*popcountsi2_zext_2"
20632 [(set (match_operand:DI 0 "register_operand" "=r")
20633 (zero_extend:DI
20634 (popcount:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))))
20635 (clobber (reg:CC FLAGS_REG))]
20636 "TARGET_POPCNT && TARGET_64BIT"
20637 {
20638 #if TARGET_MACHO
20639 return "popcnt\t{%1, %k0|%k0, %1}";
20640 #else
20641 return "popcnt{l}\t{%1, %k0|%k0, %1}";
20642 #endif
20643 }
20644 "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
20645 && optimize_function_for_speed_p (cfun)
20646 && !reg_mentioned_p (operands[0], operands[1])"
20647 [(parallel
20648 [(set (match_dup 0)
20649 (zero_extend:DI (popcount:SI (match_dup 1))))
20650 (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
20651 (clobber (reg:CC FLAGS_REG))])]
20652 "ix86_expand_clear (operands[0]);"
20653 [(set_attr "prefix_rep" "1")
20654 (set_attr "type" "bitmanip")
20655 (set_attr "mode" "SI")])
20656
20657 ; False dependency happens when destination is only updated by tzcnt,
20658 ; lzcnt or popcnt. There is no false dependency when destination is
20659 ; also used in source.
20660 (define_insn "*popcountsi2_zext_2_falsedep"
20661 [(set (match_operand:DI 0 "register_operand" "=r")
20662 (zero_extend:DI
20663 (popcount:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))))
20664 (unspec [(match_operand:DI 2 "register_operand" "0")]
20665 UNSPEC_INSN_FALSE_DEP)
20666 (clobber (reg:CC FLAGS_REG))]
20667 "TARGET_POPCNT && TARGET_64BIT"
20668 {
20669 #if TARGET_MACHO
20670 return "popcnt\t{%1, %k0|%k0, %1}";
20671 #else
20672 return "popcnt{l}\t{%1, %k0|%k0, %1}";
20673 #endif
20674 }
20675 [(set_attr "prefix_rep" "1")
20676 (set_attr "type" "bitmanip")
20677 (set_attr "mode" "SI")])
20678
20679 (define_insn_and_split "*popcounthi2_1"
20680 [(set (match_operand:SI 0 "register_operand")
20681 (popcount:SI
20682 (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand"))))
20683 (clobber (reg:CC FLAGS_REG))]
20684 "TARGET_POPCNT
20685 && ix86_pre_reload_split ()"
20686 "#"
20687 "&& 1"
20688 [(const_int 0)]
20689 {
20690 rtx tmp = gen_reg_rtx (HImode);
20691
20692 emit_insn (gen_popcounthi2 (tmp, operands[1]));
20693 emit_insn (gen_zero_extendhisi2 (operands[0], tmp));
20694 DONE;
20695 })
20696
20697 (define_insn_and_split "*popcounthi2_2"
20698 [(set (match_operand:SI 0 "register_operand")
20699 (zero_extend:SI
20700 (popcount:HI (match_operand:HI 1 "nonimmediate_operand"))))
20701 (clobber (reg:CC FLAGS_REG))]
20702 "TARGET_POPCNT
20703 && ix86_pre_reload_split ()"
20704 "#"
20705 "&& 1"
20706 [(const_int 0)]
20707 {
20708 rtx tmp = gen_reg_rtx (HImode);
20709
20710 emit_insn (gen_popcounthi2 (tmp, operands[1]));
20711 emit_insn (gen_zero_extendhisi2 (operands[0], tmp));
20712 DONE;
20713 })
20714
20715 (define_insn "popcounthi2"
20716 [(set (match_operand:HI 0 "register_operand" "=r")
20717 (popcount:HI
20718 (match_operand:HI 1 "nonimmediate_operand" "rm")))
20719 (clobber (reg:CC FLAGS_REG))]
20720 "TARGET_POPCNT"
20721 {
20722 #if TARGET_MACHO
20723 return "popcnt\t{%1, %0|%0, %1}";
20724 #else
20725 return "popcnt{w}\t{%1, %0|%0, %1}";
20726 #endif
20727 }
20728 [(set_attr "prefix_rep" "1")
20729 (set_attr "type" "bitmanip")
20730 (set_attr "mode" "HI")])
20731
20732 (define_expand "bswapdi2"
20733 [(set (match_operand:DI 0 "register_operand")
20734 (bswap:DI (match_operand:DI 1 "nonimmediate_operand")))]
20735 "TARGET_64BIT"
20736 {
20737 if (!TARGET_MOVBE)
20738 operands[1] = force_reg (DImode, operands[1]);
20739 })
20740
20741 (define_expand "bswapsi2"
20742 [(set (match_operand:SI 0 "register_operand")
20743 (bswap:SI (match_operand:SI 1 "nonimmediate_operand")))]
20744 ""
20745 {
20746 if (TARGET_MOVBE)
20747 ;
20748 else if (TARGET_BSWAP)
20749 operands[1] = force_reg (SImode, operands[1]);
20750 else
20751 {
20752 rtx x = operands[0];
20753
20754 emit_move_insn (x, operands[1]);
20755 emit_insn (gen_bswaphi_lowpart (gen_lowpart (HImode, x)));
20756 emit_insn (gen_rotlsi3 (x, x, GEN_INT (16)));
20757 emit_insn (gen_bswaphi_lowpart (gen_lowpart (HImode, x)));
20758 DONE;
20759 }
20760 })
20761
20762 (define_insn "*bswap<mode>2_movbe"
20763 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r,r,m")
20764 (bswap:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,m,r")))]
20765 "TARGET_MOVBE
20766 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
20767 "@
20768 bswap\t%0
20769 movbe{<imodesuffix>}\t{%1, %0|%0, %1}
20770 movbe{<imodesuffix>}\t{%1, %0|%0, %1}"
20771 [(set_attr "type" "bitmanip,imov,imov")
20772 (set_attr "modrm" "0,1,1")
20773 (set_attr "prefix_0f" "*,1,1")
20774 (set_attr "prefix_extra" "*,1,1")
20775 (set_attr "mode" "<MODE>")])
20776
20777 (define_insn "*bswap<mode>2"
20778 [(set (match_operand:SWI48 0 "register_operand" "=r")
20779 (bswap:SWI48 (match_operand:SWI48 1 "register_operand" "0")))]
20780 "TARGET_BSWAP"
20781 "bswap\t%0"
20782 [(set_attr "type" "bitmanip")
20783 (set_attr "modrm" "0")
20784 (set_attr "mode" "<MODE>")])
20785
20786 (define_expand "bswaphi2"
20787 [(set (match_operand:HI 0 "register_operand")
20788 (bswap:HI (match_operand:HI 1 "nonimmediate_operand")))]
20789 "TARGET_MOVBE")
20790
20791 (define_insn "*bswaphi2_movbe"
20792 [(set (match_operand:HI 0 "nonimmediate_operand" "=Q,r,m")
20793 (bswap:HI (match_operand:HI 1 "nonimmediate_operand" "0,m,r")))]
20794 "TARGET_MOVBE
20795 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
20796 "@
20797 xchg{b}\t{%h0, %b0|%b0, %h0}
20798 movbe{w}\t{%1, %0|%0, %1}
20799 movbe{w}\t{%1, %0|%0, %1}"
20800 [(set_attr "type" "imov")
20801 (set_attr "modrm" "*,1,1")
20802 (set_attr "prefix_0f" "*,1,1")
20803 (set_attr "prefix_extra" "*,1,1")
20804 (set_attr "pent_pair" "np,*,*")
20805 (set_attr "athlon_decode" "vector,*,*")
20806 (set_attr "amdfam10_decode" "double,*,*")
20807 (set_attr "bdver1_decode" "double,*,*")
20808 (set_attr "mode" "QI,HI,HI")])
20809
20810 (define_peephole2
20811 [(set (match_operand:HI 0 "general_reg_operand")
20812 (bswap:HI (match_dup 0)))]
20813 "TARGET_MOVBE
20814 && !(TARGET_USE_XCHGB || optimize_function_for_size_p (cfun))
20815 && peep2_regno_dead_p (0, FLAGS_REG)"
20816 [(parallel [(set (match_dup 0) (rotate:HI (match_dup 0) (const_int 8)))
20817 (clobber (reg:CC FLAGS_REG))])])
20818
20819 (define_insn "bswaphi_lowpart"
20820 [(set (strict_low_part (match_operand:HI 0 "register_operand" "+Q,r"))
20821 (bswap:HI (match_dup 0)))
20822 (clobber (reg:CC FLAGS_REG))]
20823 ""
20824 "@
20825 xchg{b}\t{%h0, %b0|%b0, %h0}
20826 rol{w}\t{$8, %0|%0, 8}"
20827 [(set (attr "preferred_for_size")
20828 (cond [(eq_attr "alternative" "0")
20829 (symbol_ref "true")]
20830 (symbol_ref "false")))
20831 (set (attr "preferred_for_speed")
20832 (cond [(eq_attr "alternative" "0")
20833 (symbol_ref "TARGET_USE_XCHGB")]
20834 (symbol_ref "!TARGET_USE_XCHGB")))
20835 (set_attr "length" "2,4")
20836 (set_attr "mode" "QI,HI")])
20837
20838 (define_expand "paritydi2"
20839 [(set (match_operand:DI 0 "register_operand")
20840 (parity:DI (match_operand:DI 1 "register_operand")))]
20841 "! TARGET_POPCNT"
20842 {
20843 rtx scratch = gen_reg_rtx (QImode);
20844 rtx hipart1 = gen_reg_rtx (SImode);
20845 rtx lopart1 = gen_reg_rtx (SImode);
20846 rtx xor1 = gen_reg_rtx (SImode);
20847 rtx shift2 = gen_reg_rtx (SImode);
20848 rtx hipart2 = gen_reg_rtx (HImode);
20849 rtx lopart2 = gen_reg_rtx (HImode);
20850 rtx xor2 = gen_reg_rtx (HImode);
20851
20852 if (TARGET_64BIT)
20853 {
20854 rtx shift1 = gen_reg_rtx (DImode);
20855 emit_insn (gen_lshrdi3 (shift1, operands[1], GEN_INT (32)));
20856 emit_move_insn (hipart1, gen_lowpart (SImode, shift1));
20857 }
20858 else
20859 emit_move_insn (hipart1, gen_highpart (SImode, operands[1]));
20860
20861 emit_move_insn (lopart1, gen_lowpart (SImode, operands[1]));
20862 emit_insn (gen_xorsi3 (xor1, hipart1, lopart1));
20863
20864 emit_insn (gen_lshrsi3 (shift2, xor1, GEN_INT (16)));
20865 emit_move_insn (hipart2, gen_lowpart (HImode, shift2));
20866 emit_move_insn (lopart2, gen_lowpart (HImode, xor1));
20867 emit_insn (gen_xorhi3 (xor2, hipart2, lopart2));
20868
20869 emit_insn (gen_parityhi2_cmp (xor2));
20870
20871 ix86_expand_setcc (scratch, ORDERED,
20872 gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
20873
20874 if (TARGET_64BIT)
20875 emit_insn (gen_zero_extendqidi2 (operands[0], scratch));
20876 else
20877 {
20878 rtx tmp = gen_reg_rtx (SImode);
20879
20880 emit_insn (gen_zero_extendqisi2 (tmp, scratch));
20881 emit_insn (gen_zero_extendsidi2 (operands[0], tmp));
20882 }
20883 DONE;
20884 })
20885
20886 (define_expand "paritysi2"
20887 [(set (match_operand:SI 0 "register_operand")
20888 (parity:SI (match_operand:SI 1 "register_operand")))]
20889 "! TARGET_POPCNT"
20890 {
20891 rtx scratch = gen_reg_rtx (QImode);
20892 rtx shift = gen_reg_rtx (SImode);
20893 rtx hipart = gen_reg_rtx (HImode);
20894 rtx lopart = gen_reg_rtx (HImode);
20895 rtx tmp = gen_reg_rtx (HImode);
20896
20897 emit_insn (gen_lshrsi3 (shift, operands[1], GEN_INT (16)));
20898 emit_move_insn (hipart, gen_lowpart (HImode, shift));
20899 emit_move_insn (lopart, gen_lowpart (HImode, operands[1]));
20900 emit_insn (gen_xorhi3 (tmp, hipart, lopart));
20901
20902 emit_insn (gen_parityhi2_cmp (tmp));
20903
20904 ix86_expand_setcc (scratch, ORDERED,
20905 gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
20906
20907 emit_insn (gen_zero_extendqisi2 (operands[0], scratch));
20908 DONE;
20909 })
20910
20911 (define_expand "parityhi2"
20912 [(set (match_operand:HI 0 "register_operand")
20913 (parity:HI (match_operand:HI 1 "register_operand")))]
20914 "! TARGET_POPCNT"
20915 {
20916 rtx scratch = gen_reg_rtx (QImode);
20917 rtx tmp = gen_reg_rtx (HImode);
20918
20919 emit_move_insn (tmp, operands[1]);
20920 emit_insn (gen_parityhi2_cmp (tmp));
20921
20922 ix86_expand_setcc (scratch, ORDERED,
20923 gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
20924
20925 emit_insn (gen_zero_extendqihi2 (operands[0], scratch));
20926 DONE;
20927 })
20928
20929 (define_expand "parityqi2"
20930 [(set (match_operand:QI 0 "register_operand")
20931 (parity:QI (match_operand:QI 1 "register_operand")))]
20932 "! TARGET_POPCNT"
20933 {
20934 emit_insn (gen_parityqi2_cmp (operands[1]));
20935
20936 ix86_expand_setcc (operands[0], ORDERED,
20937 gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
20938 DONE;
20939 })
20940
20941 (define_insn "parityhi2_cmp"
20942 [(set (reg:CC FLAGS_REG)
20943 (unspec:CC [(match_operand:HI 0 "register_operand" "+Q")]
20944 UNSPEC_PARITY))
20945 (clobber (match_dup 0))]
20946 ""
20947 "xor{b}\t{%h0, %b0|%b0, %h0}"
20948 [(set_attr "length" "2")
20949 (set_attr "mode" "QI")])
20950
20951 (define_insn "parityqi2_cmp"
20952 [(set (reg:CC FLAGS_REG)
20953 (unspec:CC [(match_operand:QI 0 "register_operand" "q")]
20954 UNSPEC_PARITY))]
20955 ""
20956 "test{b}\t%0, %0"
20957 [(set_attr "mode" "QI")])
20958
20959 ;; Replace zero_extend:HI followed by parityhi2_cmp with parityqi2_cmp
20960 (define_peephole2
20961 [(set (match_operand:HI 0 "register_operand")
20962 (zero_extend:HI (match_operand:QI 1 "general_reg_operand")))
20963 (parallel [(set (reg:CC FLAGS_REG)
20964 (unspec:CC [(match_dup 0)] UNSPEC_PARITY))
20965 (clobber (match_dup 0))])]
20966 ""
20967 [(set (reg:CC FLAGS_REG)
20968 (unspec:CC [(match_dup 1)] UNSPEC_PARITY))])
20969
20970 ;; Eliminate QImode popcount&1 using parity flag
20971 (define_peephole2
20972 [(set (match_operand:SI 0 "register_operand")
20973 (zero_extend:SI (match_operand:QI 1 "general_reg_operand")))
20974 (parallel [(set (match_operand:SI 2 "register_operand")
20975 (popcount:SI (match_dup 0)))
20976 (clobber (reg:CC FLAGS_REG))])
20977 (set (reg:CCZ FLAGS_REG)
20978 (compare:CCZ (and:QI (match_operand:QI 3 "register_operand")
20979 (const_int 1))
20980 (const_int 0)))
20981 (set (pc) (if_then_else (match_operator 4 "bt_comparison_operator"
20982 [(reg:CCZ FLAGS_REG)
20983 (const_int 0)])
20984 (label_ref (match_operand 5))
20985 (pc)))]
20986 "REGNO (operands[2]) == REGNO (operands[3])
20987 && peep2_reg_dead_p (3, operands[0])
20988 && peep2_reg_dead_p (3, operands[2])
20989 && peep2_regno_dead_p (4, FLAGS_REG)"
20990 [(set (reg:CC FLAGS_REG)
20991 (unspec:CC [(match_dup 1)] UNSPEC_PARITY))
20992 (set (pc) (if_then_else (match_op_dup 4 [(reg:CC FLAGS_REG)
20993 (const_int 0)])
20994 (label_ref (match_dup 5))
20995 (pc)))]
20996 {
20997 operands[4] = shallow_copy_rtx (operands[4]);
20998 PUT_CODE (operands[4], GET_CODE (operands[4]) == EQ ? UNORDERED : ORDERED);
20999 })
21000
21001 ;; Eliminate HImode popcount&1 using parity flag
21002 (define_peephole2
21003 [(match_scratch:HI 0 "Q")
21004 (parallel [(set (match_operand:HI 1 "register_operand")
21005 (popcount:HI
21006 (match_operand:HI 2 "nonimmediate_operand")))
21007 (clobber (reg:CC FLAGS_REG))])
21008 (set (match_operand 3 "register_operand")
21009 (zero_extend (match_dup 1)))
21010 (set (reg:CCZ FLAGS_REG)
21011 (compare:CCZ (and:QI (match_operand:QI 4 "register_operand")
21012 (const_int 1))
21013 (const_int 0)))
21014 (set (pc) (if_then_else (match_operator 5 "bt_comparison_operator"
21015 [(reg:CCZ FLAGS_REG)
21016 (const_int 0)])
21017 (label_ref (match_operand 6))
21018 (pc)))]
21019 "REGNO (operands[3]) == REGNO (operands[4])
21020 && peep2_reg_dead_p (3, operands[1])
21021 && peep2_reg_dead_p (3, operands[3])
21022 && peep2_regno_dead_p (4, FLAGS_REG)"
21023 [(set (match_dup 0) (match_dup 2))
21024 (parallel [(set (reg:CC FLAGS_REG)
21025 (unspec:CC [(match_dup 0)] UNSPEC_PARITY))
21026 (clobber (match_dup 0))])
21027 (set (pc) (if_then_else (match_op_dup 5 [(reg:CC FLAGS_REG)
21028 (const_int 0)])
21029 (label_ref (match_dup 6))
21030 (pc)))]
21031 {
21032 operands[5] = shallow_copy_rtx (operands[5]);
21033 PUT_CODE (operands[5], GET_CODE (operands[5]) == EQ ? UNORDERED : ORDERED);
21034 })
21035
21036 ;; Eliminate HImode popcount&1 using parity flag (variant 2)
21037 (define_peephole2
21038 [(match_scratch:HI 0 "Q")
21039 (parallel [(set (match_operand:HI 1 "register_operand")
21040 (popcount:HI
21041 (match_operand:HI 2 "nonimmediate_operand")))
21042 (clobber (reg:CC FLAGS_REG))])
21043 (set (reg:CCZ FLAGS_REG)
21044 (compare:CCZ (and:QI (match_operand:QI 3 "register_operand")
21045 (const_int 1))
21046 (const_int 0)))
21047 (set (pc) (if_then_else (match_operator 4 "bt_comparison_operator"
21048 [(reg:CCZ FLAGS_REG)
21049 (const_int 0)])
21050 (label_ref (match_operand 5))
21051 (pc)))]
21052 "REGNO (operands[1]) == REGNO (operands[3])
21053 && peep2_reg_dead_p (2, operands[1])
21054 && peep2_reg_dead_p (2, operands[3])
21055 && peep2_regno_dead_p (3, FLAGS_REG)"
21056 [(set (match_dup 0) (match_dup 2))
21057 (parallel [(set (reg:CC FLAGS_REG)
21058 (unspec:CC [(match_dup 0)] UNSPEC_PARITY))
21059 (clobber (match_dup 0))])
21060 (set (pc) (if_then_else (match_op_dup 4 [(reg:CC FLAGS_REG)
21061 (const_int 0)])
21062 (label_ref (match_dup 5))
21063 (pc)))]
21064 {
21065 operands[4] = shallow_copy_rtx (operands[4]);
21066 PUT_CODE (operands[4], GET_CODE (operands[4]) == EQ ? UNORDERED : ORDERED);
21067 })
21068
21069 \f
21070 ;; Thread-local storage patterns for ELF.
21071 ;;
21072 ;; Note that these code sequences must appear exactly as shown
21073 ;; in order to allow linker relaxation.
21074
21075 (define_insn "*tls_global_dynamic_32_gnu"
21076 [(set (match_operand:SI 0 "register_operand" "=a")
21077 (unspec:SI
21078 [(match_operand:SI 1 "register_operand" "Yb")
21079 (match_operand 2 "tls_symbolic_operand")
21080 (match_operand 3 "constant_call_address_operand" "Bz")
21081 (reg:SI SP_REG)]
21082 UNSPEC_TLS_GD))
21083 (clobber (match_scratch:SI 4 "=d"))
21084 (clobber (match_scratch:SI 5 "=c"))
21085 (clobber (reg:CC FLAGS_REG))]
21086 "!TARGET_64BIT && TARGET_GNU_TLS"
21087 {
21088 if (TARGET_SUN_TLS || flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT)
21089 output_asm_insn
21090 ("lea{l}\t{%E2@tlsgd(,%1,1), %0|%0, %E2@tlsgd[%1*1]}", operands);
21091 else
21092 output_asm_insn
21093 ("lea{l}\t{%E2@tlsgd(%1), %0|%0, %E2@tlsgd[%1]}", operands);
21094 if (TARGET_SUN_TLS)
21095 #ifdef HAVE_AS_IX86_TLSGDPLT
21096 return "call\t%a2@tlsgdplt";
21097 #else
21098 return "call\t%p3@plt";
21099 #endif
21100 if (flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT)
21101 return "call\t%P3";
21102 return "call\t{*%p3@GOT(%1)|[DWORD PTR %p3@GOT[%1]]}";
21103 }
21104 [(set_attr "type" "multi")
21105 (set_attr "length" "12")])
21106
21107 (define_expand "tls_global_dynamic_32"
21108 [(parallel
21109 [(set (match_operand:SI 0 "register_operand")
21110 (unspec:SI [(match_operand:SI 2 "register_operand")
21111 (match_operand 1 "tls_symbolic_operand")
21112 (match_operand 3 "constant_call_address_operand")
21113 (reg:SI SP_REG)]
21114 UNSPEC_TLS_GD))
21115 (clobber (scratch:SI))
21116 (clobber (scratch:SI))
21117 (clobber (reg:CC FLAGS_REG))])]
21118 ""
21119 "ix86_tls_descriptor_calls_expanded_in_cfun = true;")
21120
21121 (define_insn "*tls_global_dynamic_64_<mode>"
21122 [(set (match_operand:P 0 "register_operand" "=a")
21123 (call:P
21124 (mem:QI (match_operand 2 "constant_call_address_operand" "Bz"))
21125 (match_operand 3)))
21126 (unspec:P [(match_operand 1 "tls_symbolic_operand")
21127 (reg:P SP_REG)]
21128 UNSPEC_TLS_GD)]
21129 "TARGET_64BIT"
21130 {
21131 if (!TARGET_X32)
21132 /* The .loc directive has effect for 'the immediately following assembly
21133 instruction'. So for a sequence:
21134 .loc f l
21135 .byte x
21136 insn1
21137 the 'immediately following assembly instruction' is insn1.
21138 We want to emit an insn prefix here, but if we use .byte (as shown in
21139 'ELF Handling For Thread-Local Storage'), a preceding .loc will point
21140 inside the insn sequence, rather than to the start. After relaxation
21141 of the sequence by the linker, the .loc might point inside an insn.
21142 Use data16 prefix instead, which doesn't have this problem. */
21143 fputs ("\tdata16", asm_out_file);
21144 output_asm_insn
21145 ("lea{q}\t{%E1@tlsgd(%%rip), %%rdi|rdi, %E1@tlsgd[rip]}", operands);
21146 if (TARGET_SUN_TLS || flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT)
21147 fputs (ASM_SHORT "0x6666\n", asm_out_file);
21148 else
21149 fputs (ASM_BYTE "0x66\n", asm_out_file);
21150 fputs ("\trex64\n", asm_out_file);
21151 if (TARGET_SUN_TLS)
21152 return "call\t%p2@plt";
21153 if (flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT)
21154 return "call\t%P2";
21155 return "call\t{*%p2@GOTPCREL(%%rip)|[QWORD PTR %p2@GOTPCREL[rip]]}";
21156 }
21157 [(set_attr "type" "multi")
21158 (set (attr "length")
21159 (symbol_ref "TARGET_X32 ? 15 : 16"))])
21160
21161 (define_insn "*tls_global_dynamic_64_largepic"
21162 [(set (match_operand:DI 0 "register_operand" "=a")
21163 (call:DI
21164 (mem:QI (plus:DI (match_operand:DI 2 "register_operand" "b")
21165 (match_operand:DI 3 "immediate_operand" "i")))
21166 (match_operand 4)))
21167 (unspec:DI [(match_operand 1 "tls_symbolic_operand")
21168 (reg:DI SP_REG)]
21169 UNSPEC_TLS_GD)]
21170 "TARGET_64BIT && ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF
21171 && GET_CODE (operands[3]) == CONST
21172 && GET_CODE (XEXP (operands[3], 0)) == UNSPEC
21173 && XINT (XEXP (operands[3], 0), 1) == UNSPEC_PLTOFF"
21174 {
21175 output_asm_insn
21176 ("lea{q}\t{%E1@tlsgd(%%rip), %%rdi|rdi, %E1@tlsgd[rip]}", operands);
21177 output_asm_insn ("movabs{q}\t{%3, %%rax|rax, %3}", operands);
21178 output_asm_insn ("add{q}\t{%2, %%rax|rax, %2}", operands);
21179 return "call\t{*%%rax|rax}";
21180 }
21181 [(set_attr "type" "multi")
21182 (set_attr "length" "22")])
21183
21184 (define_expand "@tls_global_dynamic_64_<mode>"
21185 [(parallel
21186 [(set (match_operand:P 0 "register_operand")
21187 (call:P
21188 (mem:QI (match_operand 2))
21189 (const_int 0)))
21190 (unspec:P [(match_operand 1 "tls_symbolic_operand")
21191 (reg:P SP_REG)]
21192 UNSPEC_TLS_GD)])]
21193 "TARGET_64BIT"
21194 "ix86_tls_descriptor_calls_expanded_in_cfun = true;")
21195
21196 (define_insn "*tls_local_dynamic_base_32_gnu"
21197 [(set (match_operand:SI 0 "register_operand" "=a")
21198 (unspec:SI
21199 [(match_operand:SI 1 "register_operand" "Yb")
21200 (match_operand 2 "constant_call_address_operand" "Bz")
21201 (reg:SI SP_REG)]
21202 UNSPEC_TLS_LD_BASE))
21203 (clobber (match_scratch:SI 3 "=d"))
21204 (clobber (match_scratch:SI 4 "=c"))
21205 (clobber (reg:CC FLAGS_REG))]
21206 "!TARGET_64BIT && TARGET_GNU_TLS"
21207 {
21208 output_asm_insn
21209 ("lea{l}\t{%&@tlsldm(%1), %0|%0, %&@tlsldm[%1]}", operands);
21210 if (TARGET_SUN_TLS)
21211 {
21212 if (HAVE_AS_IX86_TLSLDMPLT)
21213 return "call\t%&@tlsldmplt";
21214 else
21215 return "call\t%p2@plt";
21216 }
21217 if (flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT)
21218 return "call\t%P2";
21219 return "call\t{*%p2@GOT(%1)|[DWORD PTR %p2@GOT[%1]]}";
21220 }
21221 [(set_attr "type" "multi")
21222 (set_attr "length" "11")])
21223
21224 (define_expand "tls_local_dynamic_base_32"
21225 [(parallel
21226 [(set (match_operand:SI 0 "register_operand")
21227 (unspec:SI
21228 [(match_operand:SI 1 "register_operand")
21229 (match_operand 2 "constant_call_address_operand")
21230 (reg:SI SP_REG)]
21231 UNSPEC_TLS_LD_BASE))
21232 (clobber (scratch:SI))
21233 (clobber (scratch:SI))
21234 (clobber (reg:CC FLAGS_REG))])]
21235 ""
21236 "ix86_tls_descriptor_calls_expanded_in_cfun = true;")
21237
21238 (define_insn "*tls_local_dynamic_base_64_<mode>"
21239 [(set (match_operand:P 0 "register_operand" "=a")
21240 (call:P
21241 (mem:QI (match_operand 1 "constant_call_address_operand" "Bz"))
21242 (match_operand 2)))
21243 (unspec:P [(reg:P SP_REG)] UNSPEC_TLS_LD_BASE)]
21244 "TARGET_64BIT"
21245 {
21246 output_asm_insn
21247 ("lea{q}\t{%&@tlsld(%%rip), %%rdi|rdi, %&@tlsld[rip]}", operands);
21248 if (TARGET_SUN_TLS)
21249 return "call\t%p1@plt";
21250 if (flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT)
21251 return "call\t%P1";
21252 return "call\t{*%p1@GOTPCREL(%%rip)|[QWORD PTR %p1@GOTPCREL[rip]]}";
21253 }
21254 [(set_attr "type" "multi")
21255 (set_attr "length" "12")])
21256
21257 (define_insn "*tls_local_dynamic_base_64_largepic"
21258 [(set (match_operand:DI 0 "register_operand" "=a")
21259 (call:DI
21260 (mem:QI (plus:DI (match_operand:DI 1 "register_operand" "b")
21261 (match_operand:DI 2 "immediate_operand" "i")))
21262 (match_operand 3)))
21263 (unspec:DI [(reg:DI SP_REG)] UNSPEC_TLS_LD_BASE)]
21264 "TARGET_64BIT && ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF
21265 && GET_CODE (operands[2]) == CONST
21266 && GET_CODE (XEXP (operands[2], 0)) == UNSPEC
21267 && XINT (XEXP (operands[2], 0), 1) == UNSPEC_PLTOFF"
21268 {
21269 output_asm_insn
21270 ("lea{q}\t{%&@tlsld(%%rip), %%rdi|rdi, %&@tlsld[rip]}", operands);
21271 output_asm_insn ("movabs{q}\t{%2, %%rax|rax, %2}", operands);
21272 output_asm_insn ("add{q}\t{%1, %%rax|rax, %1}", operands);
21273 return "call\t{*%%rax|rax}";
21274 }
21275 [(set_attr "type" "multi")
21276 (set_attr "length" "22")])
21277
21278 (define_expand "@tls_local_dynamic_base_64_<mode>"
21279 [(parallel
21280 [(set (match_operand:P 0 "register_operand")
21281 (call:P
21282 (mem:QI (match_operand 1))
21283 (const_int 0)))
21284 (unspec:P [(reg:P SP_REG)] UNSPEC_TLS_LD_BASE)])]
21285 "TARGET_64BIT"
21286 "ix86_tls_descriptor_calls_expanded_in_cfun = true;")
21287
21288 ;; Local dynamic of a single variable is a lose. Show combine how
21289 ;; to convert that back to global dynamic.
21290
21291 (define_insn_and_split "*tls_local_dynamic_32_once"
21292 [(set (match_operand:SI 0 "register_operand" "=a")
21293 (plus:SI
21294 (unspec:SI [(match_operand:SI 1 "register_operand" "b")
21295 (match_operand 2 "constant_call_address_operand" "Bz")
21296 (reg:SI SP_REG)]
21297 UNSPEC_TLS_LD_BASE)
21298 (const:SI (unspec:SI
21299 [(match_operand 3 "tls_symbolic_operand")]
21300 UNSPEC_DTPOFF))))
21301 (clobber (match_scratch:SI 4 "=d"))
21302 (clobber (match_scratch:SI 5 "=c"))
21303 (clobber (reg:CC FLAGS_REG))]
21304 ""
21305 "#"
21306 ""
21307 [(parallel
21308 [(set (match_dup 0)
21309 (unspec:SI [(match_dup 1) (match_dup 3) (match_dup 2)
21310 (reg:SI SP_REG)]
21311 UNSPEC_TLS_GD))
21312 (clobber (match_dup 4))
21313 (clobber (match_dup 5))
21314 (clobber (reg:CC FLAGS_REG))])])
21315
21316 ;; Load and add the thread base pointer from %<tp_seg>:0.
21317 (define_expand "get_thread_pointer<mode>"
21318 [(set (match_operand:PTR 0 "register_operand")
21319 (unspec:PTR [(const_int 0)] UNSPEC_TP))]
21320 ""
21321 {
21322 /* targetm is not visible in the scope of the condition. */
21323 if (!targetm.have_tls)
21324 error ("%<__builtin_thread_pointer%> is not supported on this target");
21325 })
21326
21327 (define_insn_and_split "*load_tp_<mode>"
21328 [(set (match_operand:PTR 0 "register_operand" "=r")
21329 (unspec:PTR [(const_int 0)] UNSPEC_TP))]
21330 ""
21331 "#"
21332 ""
21333 [(set (match_dup 0)
21334 (match_dup 1))]
21335 {
21336 addr_space_t as = DEFAULT_TLS_SEG_REG;
21337
21338 operands[1] = gen_const_mem (<MODE>mode, const0_rtx);
21339 set_mem_addr_space (operands[1], as);
21340 })
21341
21342 (define_insn_and_split "*load_tp_x32_zext"
21343 [(set (match_operand:DI 0 "register_operand" "=r")
21344 (zero_extend:DI
21345 (unspec:SI [(const_int 0)] UNSPEC_TP)))]
21346 "TARGET_X32"
21347 "#"
21348 "&& 1"
21349 [(set (match_dup 0)
21350 (zero_extend:DI (match_dup 1)))]
21351 {
21352 addr_space_t as = DEFAULT_TLS_SEG_REG;
21353
21354 operands[1] = gen_const_mem (SImode, const0_rtx);
21355 set_mem_addr_space (operands[1], as);
21356 })
21357
21358 (define_insn_and_split "*add_tp_<mode>"
21359 [(set (match_operand:PTR 0 "register_operand" "=r")
21360 (plus:PTR
21361 (unspec:PTR [(const_int 0)] UNSPEC_TP)
21362 (match_operand:PTR 1 "register_operand" "0")))
21363 (clobber (reg:CC FLAGS_REG))]
21364 ""
21365 "#"
21366 ""
21367 [(parallel
21368 [(set (match_dup 0)
21369 (plus:PTR (match_dup 1) (match_dup 2)))
21370 (clobber (reg:CC FLAGS_REG))])]
21371 {
21372 addr_space_t as = DEFAULT_TLS_SEG_REG;
21373
21374 operands[2] = gen_const_mem (<MODE>mode, const0_rtx);
21375 set_mem_addr_space (operands[2], as);
21376 })
21377
21378 (define_insn_and_split "*add_tp_x32_zext"
21379 [(set (match_operand:DI 0 "register_operand" "=r")
21380 (zero_extend:DI
21381 (plus:SI (unspec:SI [(const_int 0)] UNSPEC_TP)
21382 (match_operand:SI 1 "register_operand" "0"))))
21383 (clobber (reg:CC FLAGS_REG))]
21384 "TARGET_X32"
21385 "#"
21386 "&& 1"
21387 [(parallel
21388 [(set (match_dup 0)
21389 (zero_extend:DI
21390 (plus:SI (match_dup 1) (match_dup 2))))
21391 (clobber (reg:CC FLAGS_REG))])]
21392 {
21393 addr_space_t as = DEFAULT_TLS_SEG_REG;
21394
21395 operands[2] = gen_const_mem (SImode, const0_rtx);
21396 set_mem_addr_space (operands[2], as);
21397 })
21398
21399 ;; The Sun linker took the AMD64 TLS spec literally and can only handle
21400 ;; %rax as destination of the initial executable code sequence.
21401 (define_insn "tls_initial_exec_64_sun"
21402 [(set (match_operand:DI 0 "register_operand" "=a")
21403 (unspec:DI
21404 [(match_operand 1 "tls_symbolic_operand")]
21405 UNSPEC_TLS_IE_SUN))
21406 (clobber (reg:CC FLAGS_REG))]
21407 "TARGET_64BIT && TARGET_SUN_TLS"
21408 {
21409 output_asm_insn
21410 ("mov{q}\t{%%fs:0, %0|%0, QWORD PTR fs:0}", operands);
21411 return "add{q}\t{%a1@gottpoff(%%rip), %0|%0, %a1@gottpoff[rip]}";
21412 }
21413 [(set_attr "type" "multi")])
21414
21415 ;; GNU2 TLS patterns can be split.
21416
21417 (define_expand "tls_dynamic_gnu2_32"
21418 [(set (match_dup 3)
21419 (plus:SI (match_operand:SI 2 "register_operand")
21420 (const:SI
21421 (unspec:SI [(match_operand 1 "tls_symbolic_operand")]
21422 UNSPEC_TLSDESC))))
21423 (parallel
21424 [(set (match_operand:SI 0 "register_operand")
21425 (unspec:SI [(match_dup 1) (match_dup 3)
21426 (match_dup 2) (reg:SI SP_REG)]
21427 UNSPEC_TLSDESC))
21428 (clobber (reg:CC FLAGS_REG))])]
21429 "!TARGET_64BIT && TARGET_GNU2_TLS"
21430 {
21431 operands[3] = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : operands[0];
21432 ix86_tls_descriptor_calls_expanded_in_cfun = true;
21433 })
21434
21435 (define_insn "*tls_dynamic_gnu2_lea_32"
21436 [(set (match_operand:SI 0 "register_operand" "=r")
21437 (plus:SI (match_operand:SI 1 "register_operand" "b")
21438 (const:SI
21439 (unspec:SI [(match_operand 2 "tls_symbolic_operand")]
21440 UNSPEC_TLSDESC))))]
21441 "!TARGET_64BIT && TARGET_GNU2_TLS"
21442 "lea{l}\t{%E2@TLSDESC(%1), %0|%0, %E2@TLSDESC[%1]}"
21443 [(set_attr "type" "lea")
21444 (set_attr "mode" "SI")
21445 (set_attr "length" "6")
21446 (set_attr "length_address" "4")])
21447
21448 (define_insn "*tls_dynamic_gnu2_call_32"
21449 [(set (match_operand:SI 0 "register_operand" "=a")
21450 (unspec:SI [(match_operand 1 "tls_symbolic_operand")
21451 (match_operand:SI 2 "register_operand" "0")
21452 ;; we have to make sure %ebx still points to the GOT
21453 (match_operand:SI 3 "register_operand" "b")
21454 (reg:SI SP_REG)]
21455 UNSPEC_TLSDESC))
21456 (clobber (reg:CC FLAGS_REG))]
21457 "!TARGET_64BIT && TARGET_GNU2_TLS"
21458 "call\t{*%a1@TLSCALL(%2)|[DWORD PTR [%2+%a1@TLSCALL]]}"
21459 [(set_attr "type" "call")
21460 (set_attr "length" "2")
21461 (set_attr "length_address" "0")])
21462
21463 (define_insn_and_split "*tls_dynamic_gnu2_combine_32"
21464 [(set (match_operand:SI 0 "register_operand" "=&a")
21465 (plus:SI
21466 (unspec:SI [(match_operand 3 "tls_modbase_operand")
21467 (match_operand:SI 4)
21468 (match_operand:SI 2 "register_operand" "b")
21469 (reg:SI SP_REG)]
21470 UNSPEC_TLSDESC)
21471 (const:SI (unspec:SI
21472 [(match_operand 1 "tls_symbolic_operand")]
21473 UNSPEC_DTPOFF))))
21474 (clobber (reg:CC FLAGS_REG))]
21475 "!TARGET_64BIT && TARGET_GNU2_TLS"
21476 "#"
21477 "&& 1"
21478 [(set (match_dup 0) (match_dup 5))]
21479 {
21480 operands[5] = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : operands[0];
21481 emit_insn (gen_tls_dynamic_gnu2_32 (operands[5], operands[1], operands[2]));
21482 })
21483
21484 (define_expand "@tls_dynamic_gnu2_64_<mode>"
21485 [(set (match_dup 2)
21486 (unspec:PTR [(match_operand 1 "tls_symbolic_operand")]
21487 UNSPEC_TLSDESC))
21488 (parallel
21489 [(set (match_operand:PTR 0 "register_operand")
21490 (unspec:PTR [(match_dup 1) (match_dup 2) (reg:PTR SP_REG)]
21491 UNSPEC_TLSDESC))
21492 (clobber (reg:CC FLAGS_REG))])]
21493 "TARGET_64BIT && TARGET_GNU2_TLS"
21494 {
21495 operands[2] = can_create_pseudo_p () ? gen_reg_rtx (ptr_mode) : operands[0];
21496 ix86_tls_descriptor_calls_expanded_in_cfun = true;
21497 })
21498
21499 (define_insn "*tls_dynamic_gnu2_lea_64_<mode>"
21500 [(set (match_operand:PTR 0 "register_operand" "=r")
21501 (unspec:PTR [(match_operand 1 "tls_symbolic_operand")]
21502 UNSPEC_TLSDESC))]
21503 "TARGET_64BIT && TARGET_GNU2_TLS"
21504 "lea%z0\t{%E1@TLSDESC(%%rip), %0|%0, %E1@TLSDESC[rip]}"
21505 [(set_attr "type" "lea")
21506 (set_attr "mode" "<MODE>")
21507 (set_attr "length" "7")
21508 (set_attr "length_address" "4")])
21509
21510 (define_insn "*tls_dynamic_gnu2_call_64_<mode>"
21511 [(set (match_operand:PTR 0 "register_operand" "=a")
21512 (unspec:PTR [(match_operand 1 "tls_symbolic_operand")
21513 (match_operand:PTR 2 "register_operand" "0")
21514 (reg:PTR SP_REG)]
21515 UNSPEC_TLSDESC))
21516 (clobber (reg:CC FLAGS_REG))]
21517 "TARGET_64BIT && TARGET_GNU2_TLS"
21518 "call\t{*%a1@TLSCALL(%2)|[QWORD PTR [%2+%a1@TLSCALL]]}"
21519 [(set_attr "type" "call")
21520 (set_attr "length" "2")
21521 (set_attr "length_address" "0")])
21522
21523 (define_insn_and_split "*tls_dynamic_gnu2_combine_64_<mode>"
21524 [(set (match_operand:PTR 0 "register_operand" "=&a")
21525 (plus:PTR
21526 (unspec:PTR [(match_operand 2 "tls_modbase_operand")
21527 (match_operand:PTR 3)
21528 (reg:PTR SP_REG)]
21529 UNSPEC_TLSDESC)
21530 (const:PTR (unspec:PTR
21531 [(match_operand 1 "tls_symbolic_operand")]
21532 UNSPEC_DTPOFF))))
21533 (clobber (reg:CC FLAGS_REG))]
21534 "TARGET_64BIT && TARGET_GNU2_TLS"
21535 "#"
21536 "&& 1"
21537 [(set (match_dup 0) (match_dup 4))]
21538 {
21539 operands[4] = can_create_pseudo_p () ? gen_reg_rtx (ptr_mode) : operands[0];
21540 emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode, operands[4], operands[1]));
21541 })
21542
21543 (define_split
21544 [(match_operand 0 "tls_address_pattern")]
21545 "TARGET_TLS_DIRECT_SEG_REFS"
21546 [(match_dup 0)]
21547 "operands[0] = ix86_rewrite_tls_address (operands[0]);")
21548
21549 \f
21550 ;; These patterns match the binary 387 instructions for addM3, subM3,
21551 ;; mulM3 and divM3. There are three patterns for each of DFmode and
21552 ;; SFmode. The first is the normal insn, the second the same insn but
21553 ;; with one operand a conversion, and the third the same insn but with
21554 ;; the other operand a conversion. The conversion may be SFmode or
21555 ;; SImode if the target mode DFmode, but only SImode if the target mode
21556 ;; is SFmode.
21557
21558 ;; Gcc is slightly more smart about handling normal two address instructions
21559 ;; so use special patterns for add and mull.
21560
21561 (define_insn "*fop_xf_comm_i387"
21562 [(set (match_operand:XF 0 "register_operand" "=f")
21563 (match_operator:XF 3 "binary_fp_operator"
21564 [(match_operand:XF 1 "register_operand" "%0")
21565 (match_operand:XF 2 "register_operand" "f")]))]
21566 "TARGET_80387
21567 && COMMUTATIVE_ARITH_P (operands[3])"
21568 "* return output_387_binary_op (insn, operands);"
21569 [(set (attr "type")
21570 (if_then_else (match_operand:XF 3 "mult_operator")
21571 (const_string "fmul")
21572 (const_string "fop")))
21573 (set_attr "mode" "XF")])
21574
21575 (define_insn "*fop_<mode>_comm"
21576 [(set (match_operand:MODEF 0 "register_operand" "=f,x,v")
21577 (match_operator:MODEF 3 "binary_fp_operator"
21578 [(match_operand:MODEF 1 "nonimmediate_operand" "%0,0,v")
21579 (match_operand:MODEF 2 "nonimmediate_operand" "fm,xm,vm")]))]
21580 "((SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
21581 || (TARGET_80387 && X87_ENABLE_ARITH (<MODE>mode)))
21582 && COMMUTATIVE_ARITH_P (operands[3])
21583 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
21584 "* return output_387_binary_op (insn, operands);"
21585 [(set (attr "type")
21586 (if_then_else (eq_attr "alternative" "1,2")
21587 (if_then_else (match_operand:MODEF 3 "mult_operator")
21588 (const_string "ssemul")
21589 (const_string "sseadd"))
21590 (if_then_else (match_operand:MODEF 3 "mult_operator")
21591 (const_string "fmul")
21592 (const_string "fop"))))
21593 (set_attr "isa" "*,noavx,avx")
21594 (set_attr "prefix" "orig,orig,vex")
21595 (set_attr "mode" "<MODE>")
21596 (set (attr "enabled")
21597 (if_then_else
21598 (match_test ("SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"))
21599 (if_then_else
21600 (eq_attr "alternative" "0")
21601 (symbol_ref "TARGET_MIX_SSE_I387
21602 && X87_ENABLE_ARITH (<MODE>mode)")
21603 (const_string "*"))
21604 (if_then_else
21605 (eq_attr "alternative" "0")
21606 (symbol_ref "true")
21607 (symbol_ref "false"))))])
21608
21609 (define_insn "*<insn>hf"
21610 [(set (match_operand:HF 0 "register_operand" "=v")
21611 (plusminusmultdiv:HF
21612 (match_operand:HF 1 "nonimmediate_operand" "<comm>v")
21613 (match_operand:HF 2 "nonimmediate_operand" "vm")))]
21614 "TARGET_AVX512FP16
21615 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
21616 "v<insn>sh\t{%2, %1, %0|%0, %1, %2}"
21617 [(set_attr "prefix" "evex")
21618 (set_attr "mode" "HF")])
21619
21620 (define_insn "*rcpsf2_sse"
21621 [(set (match_operand:SF 0 "register_operand" "=x,x,x,x")
21622 (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "0,x,m,ja")]
21623 UNSPEC_RCP))]
21624 "TARGET_SSE && TARGET_SSE_MATH"
21625 "@
21626 %vrcpss\t{%d1, %0|%0, %d1}
21627 %vrcpss\t{%d1, %0|%0, %d1}
21628 rcpss\t{%1, %d0|%d0, %1}
21629 vrcpss\t{%1, %d0|%d0, %1}"
21630 [(set_attr "isa" "*,*,noavx,avx")
21631 (set_attr "addr" "*,*,*,gpr16")
21632 (set_attr "type" "sse")
21633 (set_attr "atom_sse_attr" "rcp")
21634 (set_attr "btver2_sse_attr" "rcp")
21635 (set_attr "prefix" "maybe_vex")
21636 (set_attr "mode" "SF")
21637 (set_attr "avx_partial_xmm_update" "false,false,true,true")
21638 (set (attr "preferred_for_speed")
21639 (cond [(match_test "TARGET_AVX")
21640 (symbol_ref "true")
21641 (eq_attr "alternative" "1,2,3")
21642 (symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY")
21643 ]
21644 (symbol_ref "true")))])
21645
21646 (define_insn "rcphf2"
21647 [(set (match_operand:HF 0 "register_operand" "=v,v")
21648 (unspec:HF [(match_operand:HF 1 "nonimmediate_operand" "v,m")]
21649 UNSPEC_RCP))]
21650 "TARGET_AVX512FP16"
21651 "@
21652 vrcpsh\t{%d1, %0|%0, %d1}
21653 vrcpsh\t{%1, %d0|%d0, %1}"
21654 [(set_attr "type" "sse")
21655 (set_attr "prefix" "evex")
21656 (set_attr "mode" "HF")
21657 (set_attr "avx_partial_xmm_update" "false,true")])
21658
21659 (define_insn "*fop_xf_1_i387"
21660 [(set (match_operand:XF 0 "register_operand" "=f,f")
21661 (match_operator:XF 3 "binary_fp_operator"
21662 [(match_operand:XF 1 "register_operand" "0,f")
21663 (match_operand:XF 2 "register_operand" "f,0")]))]
21664 "TARGET_80387
21665 && !COMMUTATIVE_ARITH_P (operands[3])"
21666 "* return output_387_binary_op (insn, operands);"
21667 [(set (attr "type")
21668 (if_then_else (match_operand:XF 3 "div_operator")
21669 (const_string "fdiv")
21670 (const_string "fop")))
21671 (set_attr "mode" "XF")])
21672
21673 (define_insn "*fop_<mode>_1"
21674 [(set (match_operand:MODEF 0 "register_operand" "=f,f,x,v")
21675 (match_operator:MODEF 3 "binary_fp_operator"
21676 [(match_operand:MODEF 1
21677 "x87nonimm_ssenomem_operand" "0,fm,0,v")
21678 (match_operand:MODEF 2
21679 "nonimmediate_operand" "fm,0,xm,vm")]))]
21680 "((SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
21681 || (TARGET_80387 && X87_ENABLE_ARITH (<MODE>mode)))
21682 && !COMMUTATIVE_ARITH_P (operands[3])
21683 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
21684 "* return output_387_binary_op (insn, operands);"
21685 [(set (attr "type")
21686 (if_then_else (eq_attr "alternative" "2,3")
21687 (if_then_else (match_operand:MODEF 3 "div_operator")
21688 (const_string "ssediv")
21689 (const_string "sseadd"))
21690 (if_then_else (match_operand:MODEF 3 "div_operator")
21691 (const_string "fdiv")
21692 (const_string "fop"))))
21693 (set_attr "isa" "*,*,noavx,avx")
21694 (set_attr "prefix" "orig,orig,orig,vex")
21695 (set_attr "mode" "<MODE>")
21696 (set (attr "enabled")
21697 (if_then_else
21698 (match_test ("SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"))
21699 (if_then_else
21700 (eq_attr "alternative" "0,1")
21701 (symbol_ref "TARGET_MIX_SSE_I387
21702 && X87_ENABLE_ARITH (<MODE>mode)")
21703 (const_string "*"))
21704 (if_then_else
21705 (eq_attr "alternative" "0,1")
21706 (symbol_ref "true")
21707 (symbol_ref "false"))))])
21708
21709 (define_insn "*fop_<X87MODEF:mode>_2_i387"
21710 [(set (match_operand:X87MODEF 0 "register_operand" "=f")
21711 (match_operator:X87MODEF 3 "binary_fp_operator"
21712 [(float:X87MODEF
21713 (match_operand:SWI24 1 "nonimmediate_operand" "m"))
21714 (match_operand:X87MODEF 2 "register_operand" "0")]))]
21715 "TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, <SWI24:MODE>mode)
21716 && !(SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH)
21717 && (TARGET_USE_<SWI24:MODE>MODE_FIOP
21718 || optimize_function_for_size_p (cfun))"
21719 "* return output_387_binary_op (insn, operands);"
21720 [(set (attr "type")
21721 (cond [(match_operand:X87MODEF 3 "mult_operator")
21722 (const_string "fmul")
21723 (match_operand:X87MODEF 3 "div_operator")
21724 (const_string "fdiv")
21725 ]
21726 (const_string "fop")))
21727 (set_attr "fp_int_src" "true")
21728 (set_attr "mode" "<SWI24:MODE>")])
21729
21730 (define_insn "*fop_<X87MODEF:mode>_3_i387"
21731 [(set (match_operand:X87MODEF 0 "register_operand" "=f")
21732 (match_operator:X87MODEF 3 "binary_fp_operator"
21733 [(match_operand:X87MODEF 1 "register_operand" "0")
21734 (float:X87MODEF
21735 (match_operand:SWI24 2 "nonimmediate_operand" "m"))]))]
21736 "TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, <SWI24:MODE>mode)
21737 && !(SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH)
21738 && (TARGET_USE_<SWI24:MODE>MODE_FIOP
21739 || optimize_function_for_size_p (cfun))"
21740 "* return output_387_binary_op (insn, operands);"
21741 [(set (attr "type")
21742 (cond [(match_operand:X87MODEF 3 "mult_operator")
21743 (const_string "fmul")
21744 (match_operand:X87MODEF 3 "div_operator")
21745 (const_string "fdiv")
21746 ]
21747 (const_string "fop")))
21748 (set_attr "fp_int_src" "true")
21749 (set_attr "mode" "<SWI24:MODE>")])
21750
21751 (define_insn "*fop_xf_4_i387"
21752 [(set (match_operand:XF 0 "register_operand" "=f,f")
21753 (match_operator:XF 3 "binary_fp_operator"
21754 [(float_extend:XF
21755 (match_operand:MODEF 1 "nonimmediate_operand" "fm,0"))
21756 (match_operand:XF 2 "register_operand" "0,f")]))]
21757 "TARGET_80387"
21758 "* return output_387_binary_op (insn, operands);"
21759 [(set (attr "type")
21760 (cond [(match_operand:XF 3 "mult_operator")
21761 (const_string "fmul")
21762 (match_operand:XF 3 "div_operator")
21763 (const_string "fdiv")
21764 ]
21765 (const_string "fop")))
21766 (set_attr "mode" "<MODE>")])
21767
21768 (define_insn "*fop_df_4_i387"
21769 [(set (match_operand:DF 0 "register_operand" "=f,f")
21770 (match_operator:DF 3 "binary_fp_operator"
21771 [(float_extend:DF
21772 (match_operand:SF 1 "nonimmediate_operand" "fm,0"))
21773 (match_operand:DF 2 "register_operand" "0,f")]))]
21774 "TARGET_80387 && X87_ENABLE_ARITH (DFmode)
21775 && !(SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)"
21776 "* return output_387_binary_op (insn, operands);"
21777 [(set (attr "type")
21778 (cond [(match_operand:DF 3 "mult_operator")
21779 (const_string "fmul")
21780 (match_operand:DF 3 "div_operator")
21781 (const_string "fdiv")
21782 ]
21783 (const_string "fop")))
21784 (set_attr "mode" "SF")])
21785
21786 (define_insn "*fop_xf_5_i387"
21787 [(set (match_operand:XF 0 "register_operand" "=f,f")
21788 (match_operator:XF 3 "binary_fp_operator"
21789 [(match_operand:XF 1 "register_operand" "0,f")
21790 (float_extend:XF
21791 (match_operand:MODEF 2 "nonimmediate_operand" "fm,0"))]))]
21792 "TARGET_80387"
21793 "* return output_387_binary_op (insn, operands);"
21794 [(set (attr "type")
21795 (cond [(match_operand:XF 3 "mult_operator")
21796 (const_string "fmul")
21797 (match_operand:XF 3 "div_operator")
21798 (const_string "fdiv")
21799 ]
21800 (const_string "fop")))
21801 (set_attr "mode" "<MODE>")])
21802
21803 (define_insn "*fop_df_5_i387"
21804 [(set (match_operand:DF 0 "register_operand" "=f,f")
21805 (match_operator:DF 3 "binary_fp_operator"
21806 [(match_operand:DF 1 "register_operand" "0,f")
21807 (float_extend:DF
21808 (match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))]
21809 "TARGET_80387 && X87_ENABLE_ARITH (DFmode)
21810 && !(SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)"
21811 "* return output_387_binary_op (insn, operands);"
21812 [(set (attr "type")
21813 (cond [(match_operand:DF 3 "mult_operator")
21814 (const_string "fmul")
21815 (match_operand:DF 3 "div_operator")
21816 (const_string "fdiv")
21817 ]
21818 (const_string "fop")))
21819 (set_attr "mode" "SF")])
21820
21821 (define_insn "*fop_xf_6_i387"
21822 [(set (match_operand:XF 0 "register_operand" "=f,f")
21823 (match_operator:XF 3 "binary_fp_operator"
21824 [(float_extend:XF
21825 (match_operand:MODEF 1 "register_operand" "0,f"))
21826 (float_extend:XF
21827 (match_operand:MODEF 2 "nonimmediate_operand" "fm,0"))]))]
21828 "TARGET_80387"
21829 "* return output_387_binary_op (insn, operands);"
21830 [(set (attr "type")
21831 (cond [(match_operand:XF 3 "mult_operator")
21832 (const_string "fmul")
21833 (match_operand:XF 3 "div_operator")
21834 (const_string "fdiv")
21835 ]
21836 (const_string "fop")))
21837 (set_attr "mode" "<MODE>")])
21838
21839 (define_insn "*fop_df_6_i387"
21840 [(set (match_operand:DF 0 "register_operand" "=f,f")
21841 (match_operator:DF 3 "binary_fp_operator"
21842 [(float_extend:DF
21843 (match_operand:SF 1 "register_operand" "0,f"))
21844 (float_extend:DF
21845 (match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))]
21846 "TARGET_80387 && X87_ENABLE_ARITH (DFmode)
21847 && !(SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)"
21848 "* return output_387_binary_op (insn, operands);"
21849 [(set (attr "type")
21850 (cond [(match_operand:DF 3 "mult_operator")
21851 (const_string "fmul")
21852 (match_operand:DF 3 "div_operator")
21853 (const_string "fdiv")
21854 ]
21855 (const_string "fop")))
21856 (set_attr "mode" "SF")])
21857 \f
21858 ;; FPU special functions.
21859
21860 ;; This pattern implements a no-op XFmode truncation for
21861 ;; all fancy i386 XFmode math functions.
21862
21863 (define_insn "truncxf<mode>2_i387_noop_unspec"
21864 [(set (match_operand:MODEF 0 "nonimmediate_operand" "=mf")
21865 (unspec:MODEF [(match_operand:XF 1 "register_operand" "f")]
21866 UNSPEC_TRUNC_NOOP))]
21867 "TARGET_USE_FANCY_MATH_387"
21868 "* return output_387_reg_move (insn, operands);"
21869 [(set_attr "type" "fmov")
21870 (set_attr "mode" "<MODE>")])
21871
21872 (define_insn "sqrtxf2"
21873 [(set (match_operand:XF 0 "register_operand" "=f")
21874 (sqrt:XF (match_operand:XF 1 "register_operand" "0")))]
21875 "TARGET_USE_FANCY_MATH_387"
21876 "fsqrt"
21877 [(set_attr "type" "fpspc")
21878 (set_attr "mode" "XF")
21879 (set_attr "athlon_decode" "direct")
21880 (set_attr "amdfam10_decode" "direct")
21881 (set_attr "bdver1_decode" "direct")])
21882
21883 (define_insn "*rsqrtsf2_sse"
21884 [(set (match_operand:SF 0 "register_operand" "=x,x,x,x")
21885 (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "0,x,m,ja")]
21886 UNSPEC_RSQRT))]
21887 "TARGET_SSE && TARGET_SSE_MATH"
21888 "@
21889 %vrsqrtss\t{%d1, %0|%0, %d1}
21890 %vrsqrtss\t{%d1, %0|%0, %d1}
21891 rsqrtss\t{%1, %d0|%d0, %1}
21892 vrsqrtss\t{%1, %d0|%d0, %1}"
21893 [(set_attr "isa" "*,*,noavx,avx")
21894 (set_attr "addr" "*,*,*,gpr16")
21895 (set_attr "type" "sse")
21896 (set_attr "atom_sse_attr" "rcp")
21897 (set_attr "btver2_sse_attr" "rcp")
21898 (set_attr "prefix" "maybe_vex")
21899 (set_attr "mode" "SF")
21900 (set_attr "avx_partial_xmm_update" "false,false,true,true")
21901 (set (attr "preferred_for_speed")
21902 (cond [(match_test "TARGET_AVX")
21903 (symbol_ref "true")
21904 (eq_attr "alternative" "1,2,3")
21905 (symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY")
21906 ]
21907 (symbol_ref "true")))])
21908
21909 (define_expand "rsqrtsf2"
21910 [(set (match_operand:SF 0 "register_operand")
21911 (unspec:SF [(match_operand:SF 1 "nonimmediate_operand")]
21912 UNSPEC_RSQRT))]
21913 "TARGET_SSE && TARGET_SSE_MATH"
21914 {
21915 ix86_emit_swsqrtsf (operands[0], operands[1], SFmode, 1);
21916 DONE;
21917 })
21918
21919 (define_insn "rsqrthf2"
21920 [(set (match_operand:HF 0 "register_operand" "=v,v")
21921 (unspec:HF [(match_operand:HF 1 "nonimmediate_operand" "v,m")]
21922 UNSPEC_RSQRT))]
21923 "TARGET_AVX512FP16"
21924 "@
21925 vrsqrtsh\t{%d1, %0|%0, %d1}
21926 vrsqrtsh\t{%1, %d0|%d0, %1}"
21927 [(set_attr "type" "sse")
21928 (set_attr "prefix" "evex")
21929 (set_attr "avx_partial_xmm_update" "false,true")
21930 (set_attr "mode" "HF")])
21931
21932 (define_insn "sqrthf2"
21933 [(set (match_operand:HF 0 "register_operand" "=v,v")
21934 (sqrt:HF
21935 (match_operand:HF 1 "nonimmediate_operand" "v,m")))]
21936 "TARGET_AVX512FP16"
21937 "@
21938 vsqrtsh\t{%d1, %0|%0, %d1}
21939 vsqrtsh\t{%1, %d0|%d0, %1}"
21940 [(set_attr "type" "sse")
21941 (set_attr "prefix" "evex")
21942 (set_attr "avx_partial_xmm_update" "false,true")
21943 (set_attr "mode" "HF")])
21944
21945 (define_insn "*sqrt<mode>2_sse"
21946 [(set (match_operand:MODEF 0 "register_operand" "=v,v,v")
21947 (sqrt:MODEF
21948 (match_operand:MODEF 1 "nonimmediate_operand" "0,v,m")))]
21949 "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
21950 "@
21951 %vsqrt<ssemodesuffix>\t{%d1, %0|%0, %d1}
21952 %vsqrt<ssemodesuffix>\t{%d1, %0|%0, %d1}
21953 %vsqrt<ssemodesuffix>\t{%1, %d0|%d0, %1}"
21954 [(set_attr "type" "sse")
21955 (set_attr "atom_sse_attr" "sqrt")
21956 (set_attr "btver2_sse_attr" "sqrt")
21957 (set_attr "prefix" "maybe_vex")
21958 (set_attr "avx_partial_xmm_update" "false,false,true")
21959 (set_attr "mode" "<MODE>")
21960 (set (attr "preferred_for_speed")
21961 (cond [(match_test "TARGET_AVX")
21962 (symbol_ref "true")
21963 (eq_attr "alternative" "1,2")
21964 (symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY")
21965 ]
21966 (symbol_ref "true")))])
21967
21968 (define_expand "sqrt<mode>2"
21969 [(set (match_operand:MODEF 0 "register_operand")
21970 (sqrt:MODEF
21971 (match_operand:MODEF 1 "nonimmediate_operand")))]
21972 "(TARGET_USE_FANCY_MATH_387 && X87_ENABLE_ARITH (<MODE>mode))
21973 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
21974 {
21975 if (<MODE>mode == SFmode
21976 && TARGET_SSE && TARGET_SSE_MATH
21977 && TARGET_RECIP_SQRT
21978 && !optimize_function_for_size_p (cfun)
21979 && flag_finite_math_only && !flag_trapping_math
21980 && flag_unsafe_math_optimizations)
21981 {
21982 ix86_emit_swsqrtsf (operands[0], operands[1], SFmode, 0);
21983 DONE;
21984 }
21985
21986 if (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
21987 {
21988 rtx op0 = gen_reg_rtx (XFmode);
21989 rtx op1 = gen_reg_rtx (XFmode);
21990
21991 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
21992 emit_insn (gen_sqrtxf2 (op0, op1));
21993 emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op0));
21994 DONE;
21995 }
21996 })
21997
21998 (define_expand "hypot<mode>3"
21999 [(use (match_operand:MODEF 0 "register_operand"))
22000 (use (match_operand:MODEF 1 "general_operand"))
22001 (use (match_operand:MODEF 2 "general_operand"))]
22002 "TARGET_USE_FANCY_MATH_387
22003 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
22004 || TARGET_MIX_SSE_I387)
22005 && flag_finite_math_only
22006 && flag_unsafe_math_optimizations"
22007 {
22008 rtx op0 = gen_reg_rtx (XFmode);
22009 rtx op1 = gen_reg_rtx (XFmode);
22010 rtx op2 = gen_reg_rtx (XFmode);
22011
22012 emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
22013 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
22014
22015 emit_insn (gen_mulxf3 (op1, op1, op1));
22016 emit_insn (gen_mulxf3 (op2, op2, op2));
22017 emit_insn (gen_addxf3 (op0, op2, op1));
22018 emit_insn (gen_sqrtxf2 (op0, op0));
22019
22020 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
22021 DONE;
22022 })
22023
22024 (define_insn "x86_fnstsw_1"
22025 [(set (match_operand:HI 0 "register_operand" "=a")
22026 (unspec:HI [(reg:CCFP FPSR_REG)] UNSPEC_FNSTSW))]
22027 "TARGET_80387"
22028 "fnstsw\t%0"
22029 [(set_attr "length" "2")
22030 (set_attr "mode" "SI")
22031 (set_attr "unit" "i387")])
22032
22033 (define_insn "fpremxf4_i387"
22034 [(set (match_operand:XF 0 "register_operand" "=f")
22035 (unspec:XF [(match_operand:XF 2 "register_operand" "0")
22036 (match_operand:XF 3 "register_operand" "1")]
22037 UNSPEC_FPREM_F))
22038 (set (match_operand:XF 1 "register_operand" "=f")
22039 (unspec:XF [(match_dup 2) (match_dup 3)]
22040 UNSPEC_FPREM_U))
22041 (set (reg:CCFP FPSR_REG)
22042 (unspec:CCFP [(match_dup 2) (match_dup 3)]
22043 UNSPEC_C2_FLAG))]
22044 "TARGET_USE_FANCY_MATH_387"
22045 "fprem"
22046 [(set_attr "type" "fpspc")
22047 (set_attr "znver1_decode" "vector")
22048 (set_attr "mode" "XF")])
22049
22050 (define_expand "fmodxf3"
22051 [(use (match_operand:XF 0 "register_operand"))
22052 (use (match_operand:XF 1 "general_operand"))
22053 (use (match_operand:XF 2 "general_operand"))]
22054 "TARGET_USE_FANCY_MATH_387"
22055 {
22056 rtx_code_label *label = gen_label_rtx ();
22057
22058 rtx op1 = gen_reg_rtx (XFmode);
22059 rtx op2 = gen_reg_rtx (XFmode);
22060
22061 emit_move_insn (op2, operands[2]);
22062 emit_move_insn (op1, operands[1]);
22063
22064 emit_label (label);
22065 emit_insn (gen_fpremxf4_i387 (op1, op2, op1, op2));
22066 ix86_emit_fp_unordered_jump (label);
22067 LABEL_NUSES (label) = 1;
22068
22069 emit_move_insn (operands[0], op1);
22070 DONE;
22071 })
22072
22073 (define_expand "fmod<mode>3"
22074 [(use (match_operand:MODEF 0 "register_operand"))
22075 (use (match_operand:MODEF 1 "general_operand"))
22076 (use (match_operand:MODEF 2 "general_operand"))]
22077 "TARGET_USE_FANCY_MATH_387"
22078 {
22079 rtx (*gen_truncxf) (rtx, rtx);
22080
22081 rtx_code_label *label = gen_label_rtx ();
22082
22083 rtx op1 = gen_reg_rtx (XFmode);
22084 rtx op2 = gen_reg_rtx (XFmode);
22085
22086 emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
22087 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
22088
22089 emit_label (label);
22090 emit_insn (gen_fpremxf4_i387 (op1, op2, op1, op2));
22091 ix86_emit_fp_unordered_jump (label);
22092 LABEL_NUSES (label) = 1;
22093
22094 /* Truncate the result properly for strict SSE math. */
22095 if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
22096 && !TARGET_MIX_SSE_I387)
22097 gen_truncxf = gen_truncxf<mode>2;
22098 else
22099 gen_truncxf = gen_truncxf<mode>2_i387_noop_unspec;
22100
22101 emit_insn (gen_truncxf (operands[0], op1));
22102 DONE;
22103 })
22104
22105 (define_insn "fprem1xf4_i387"
22106 [(set (match_operand:XF 0 "register_operand" "=f")
22107 (unspec:XF [(match_operand:XF 2 "register_operand" "0")
22108 (match_operand:XF 3 "register_operand" "1")]
22109 UNSPEC_FPREM1_F))
22110 (set (match_operand:XF 1 "register_operand" "=f")
22111 (unspec:XF [(match_dup 2) (match_dup 3)]
22112 UNSPEC_FPREM1_U))
22113 (set (reg:CCFP FPSR_REG)
22114 (unspec:CCFP [(match_dup 2) (match_dup 3)]
22115 UNSPEC_C2_FLAG))]
22116 "TARGET_USE_FANCY_MATH_387"
22117 "fprem1"
22118 [(set_attr "type" "fpspc")
22119 (set_attr "znver1_decode" "vector")
22120 (set_attr "mode" "XF")])
22121
22122 (define_expand "remainderxf3"
22123 [(use (match_operand:XF 0 "register_operand"))
22124 (use (match_operand:XF 1 "general_operand"))
22125 (use (match_operand:XF 2 "general_operand"))]
22126 "TARGET_USE_FANCY_MATH_387"
22127 {
22128 rtx_code_label *label = gen_label_rtx ();
22129
22130 rtx op1 = gen_reg_rtx (XFmode);
22131 rtx op2 = gen_reg_rtx (XFmode);
22132
22133 emit_move_insn (op2, operands[2]);
22134 emit_move_insn (op1, operands[1]);
22135
22136 emit_label (label);
22137 emit_insn (gen_fprem1xf4_i387 (op1, op2, op1, op2));
22138 ix86_emit_fp_unordered_jump (label);
22139 LABEL_NUSES (label) = 1;
22140
22141 emit_move_insn (operands[0], op1);
22142 DONE;
22143 })
22144
22145 (define_expand "remainder<mode>3"
22146 [(use (match_operand:MODEF 0 "register_operand"))
22147 (use (match_operand:MODEF 1 "general_operand"))
22148 (use (match_operand:MODEF 2 "general_operand"))]
22149 "TARGET_USE_FANCY_MATH_387"
22150 {
22151 rtx (*gen_truncxf) (rtx, rtx);
22152
22153 rtx_code_label *label = gen_label_rtx ();
22154
22155 rtx op1 = gen_reg_rtx (XFmode);
22156 rtx op2 = gen_reg_rtx (XFmode);
22157
22158 emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
22159 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
22160
22161 emit_label (label);
22162
22163 emit_insn (gen_fprem1xf4_i387 (op1, op2, op1, op2));
22164 ix86_emit_fp_unordered_jump (label);
22165 LABEL_NUSES (label) = 1;
22166
22167 /* Truncate the result properly for strict SSE math. */
22168 if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
22169 && !TARGET_MIX_SSE_I387)
22170 gen_truncxf = gen_truncxf<mode>2;
22171 else
22172 gen_truncxf = gen_truncxf<mode>2_i387_noop_unspec;
22173
22174 emit_insn (gen_truncxf (operands[0], op1));
22175 DONE;
22176 })
22177
22178 (define_int_iterator SINCOS
22179 [UNSPEC_SIN
22180 UNSPEC_COS])
22181
22182 (define_int_attr sincos
22183 [(UNSPEC_SIN "sin")
22184 (UNSPEC_COS "cos")])
22185
22186 (define_insn "<sincos>xf2"
22187 [(set (match_operand:XF 0 "register_operand" "=f")
22188 (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
22189 SINCOS))]
22190 "TARGET_USE_FANCY_MATH_387
22191 && flag_unsafe_math_optimizations"
22192 "f<sincos>"
22193 [(set_attr "type" "fpspc")
22194 (set_attr "znver1_decode" "vector")
22195 (set_attr "mode" "XF")])
22196
22197 (define_expand "<sincos><mode>2"
22198 [(set (match_operand:MODEF 0 "register_operand")
22199 (unspec:MODEF [(match_operand:MODEF 1 "general_operand")]
22200 SINCOS))]
22201 "TARGET_USE_FANCY_MATH_387
22202 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
22203 || TARGET_MIX_SSE_I387)
22204 && flag_unsafe_math_optimizations"
22205 {
22206 rtx op0 = gen_reg_rtx (XFmode);
22207 rtx op1 = gen_reg_rtx (XFmode);
22208
22209 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
22210 emit_insn (gen_<sincos>xf2 (op0, op1));
22211 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
22212 DONE;
22213 })
22214
22215 (define_insn "sincosxf3"
22216 [(set (match_operand:XF 0 "register_operand" "=f")
22217 (unspec:XF [(match_operand:XF 2 "register_operand" "0")]
22218 UNSPEC_SINCOS_COS))
22219 (set (match_operand:XF 1 "register_operand" "=f")
22220 (unspec:XF [(match_dup 2)] UNSPEC_SINCOS_SIN))]
22221 "TARGET_USE_FANCY_MATH_387
22222 && flag_unsafe_math_optimizations"
22223 "fsincos"
22224 [(set_attr "type" "fpspc")
22225 (set_attr "znver1_decode" "vector")
22226 (set_attr "mode" "XF")])
22227
22228 (define_expand "sincos<mode>3"
22229 [(use (match_operand:MODEF 0 "register_operand"))
22230 (use (match_operand:MODEF 1 "register_operand"))
22231 (use (match_operand:MODEF 2 "general_operand"))]
22232 "TARGET_USE_FANCY_MATH_387
22233 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
22234 || TARGET_MIX_SSE_I387)
22235 && flag_unsafe_math_optimizations"
22236 {
22237 rtx op0 = gen_reg_rtx (XFmode);
22238 rtx op1 = gen_reg_rtx (XFmode);
22239 rtx op2 = gen_reg_rtx (XFmode);
22240
22241 emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
22242 emit_insn (gen_sincosxf3 (op0, op1, op2));
22243 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
22244 emit_insn (gen_truncxf<mode>2 (operands[1], op1));
22245 DONE;
22246 })
22247
22248 (define_insn "fptanxf4_i387"
22249 [(set (match_operand:SF 0 "register_operand" "=f")
22250 (match_operand:SF 3 "const1_operand"))
22251 (set (match_operand:XF 1 "register_operand" "=f")
22252 (unspec:XF [(match_operand:XF 2 "register_operand" "0")]
22253 UNSPEC_TAN))]
22254 "TARGET_USE_FANCY_MATH_387
22255 && flag_unsafe_math_optimizations"
22256 "fptan"
22257 [(set_attr "type" "fpspc")
22258 (set_attr "znver1_decode" "vector")
22259 (set_attr "mode" "XF")])
22260
22261 (define_expand "tanxf2"
22262 [(use (match_operand:XF 0 "register_operand"))
22263 (use (match_operand:XF 1 "register_operand"))]
22264 "TARGET_USE_FANCY_MATH_387
22265 && flag_unsafe_math_optimizations"
22266 {
22267 rtx one = gen_reg_rtx (SFmode);
22268 emit_insn (gen_fptanxf4_i387 (one, operands[0], operands[1],
22269 CONST1_RTX (SFmode)));
22270 DONE;
22271 })
22272
22273 (define_expand "tan<mode>2"
22274 [(use (match_operand:MODEF 0 "register_operand"))
22275 (use (match_operand:MODEF 1 "general_operand"))]
22276 "TARGET_USE_FANCY_MATH_387
22277 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
22278 || TARGET_MIX_SSE_I387)
22279 && flag_unsafe_math_optimizations"
22280 {
22281 rtx op0 = gen_reg_rtx (XFmode);
22282 rtx op1 = gen_reg_rtx (XFmode);
22283
22284 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
22285 emit_insn (gen_tanxf2 (op0, op1));
22286 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
22287 DONE;
22288 })
22289
22290 (define_insn "atan2xf3"
22291 [(set (match_operand:XF 0 "register_operand" "=f")
22292 (unspec:XF [(match_operand:XF 2 "register_operand" "0")
22293 (match_operand:XF 1 "register_operand" "f")]
22294 UNSPEC_FPATAN))
22295 (clobber (match_scratch:XF 3 "=1"))]
22296 "TARGET_USE_FANCY_MATH_387
22297 && flag_unsafe_math_optimizations"
22298 "fpatan"
22299 [(set_attr "type" "fpspc")
22300 (set_attr "znver1_decode" "vector")
22301 (set_attr "mode" "XF")])
22302
22303 (define_expand "atan2<mode>3"
22304 [(use (match_operand:MODEF 0 "register_operand"))
22305 (use (match_operand:MODEF 1 "general_operand"))
22306 (use (match_operand:MODEF 2 "general_operand"))]
22307 "TARGET_USE_FANCY_MATH_387
22308 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
22309 || TARGET_MIX_SSE_I387)
22310 && flag_unsafe_math_optimizations"
22311 {
22312 rtx op0 = gen_reg_rtx (XFmode);
22313 rtx op1 = gen_reg_rtx (XFmode);
22314 rtx op2 = gen_reg_rtx (XFmode);
22315
22316 emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
22317 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
22318
22319 emit_insn (gen_atan2xf3 (op0, op1, op2));
22320 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
22321 DONE;
22322 })
22323
22324 (define_expand "atanxf2"
22325 [(parallel [(set (match_operand:XF 0 "register_operand")
22326 (unspec:XF [(match_dup 2)
22327 (match_operand:XF 1 "register_operand")]
22328 UNSPEC_FPATAN))
22329 (clobber (scratch:XF))])]
22330 "TARGET_USE_FANCY_MATH_387
22331 && flag_unsafe_math_optimizations"
22332 "operands[2] = force_reg (XFmode, CONST1_RTX (XFmode));")
22333
22334 (define_expand "atan<mode>2"
22335 [(use (match_operand:MODEF 0 "register_operand"))
22336 (use (match_operand:MODEF 1 "general_operand"))]
22337 "TARGET_USE_FANCY_MATH_387
22338 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
22339 || TARGET_MIX_SSE_I387)
22340 && flag_unsafe_math_optimizations"
22341 {
22342 rtx op0 = gen_reg_rtx (XFmode);
22343 rtx op1 = gen_reg_rtx (XFmode);
22344
22345 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
22346 emit_insn (gen_atanxf2 (op0, op1));
22347 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
22348 DONE;
22349 })
22350
22351 (define_expand "asinxf2"
22352 [(set (match_dup 2)
22353 (mult:XF (match_operand:XF 1 "register_operand")
22354 (match_dup 1)))
22355 (set (match_dup 4) (minus:XF (match_dup 3) (match_dup 2)))
22356 (set (match_dup 5) (sqrt:XF (match_dup 4)))
22357 (parallel [(set (match_operand:XF 0 "register_operand")
22358 (unspec:XF [(match_dup 5) (match_dup 1)]
22359 UNSPEC_FPATAN))
22360 (clobber (scratch:XF))])]
22361 "TARGET_USE_FANCY_MATH_387
22362 && flag_unsafe_math_optimizations"
22363 {
22364 int i;
22365
22366 for (i = 2; i < 6; i++)
22367 operands[i] = gen_reg_rtx (XFmode);
22368
22369 emit_move_insn (operands[3], CONST1_RTX (XFmode));
22370 })
22371
22372 (define_expand "asin<mode>2"
22373 [(use (match_operand:MODEF 0 "register_operand"))
22374 (use (match_operand:MODEF 1 "general_operand"))]
22375 "TARGET_USE_FANCY_MATH_387
22376 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
22377 || TARGET_MIX_SSE_I387)
22378 && flag_unsafe_math_optimizations"
22379 {
22380 rtx op0 = gen_reg_rtx (XFmode);
22381 rtx op1 = gen_reg_rtx (XFmode);
22382
22383 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
22384 emit_insn (gen_asinxf2 (op0, op1));
22385 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
22386 DONE;
22387 })
22388
22389 (define_expand "acosxf2"
22390 [(set (match_dup 2)
22391 (mult:XF (match_operand:XF 1 "register_operand")
22392 (match_dup 1)))
22393 (set (match_dup 4) (minus:XF (match_dup 3) (match_dup 2)))
22394 (set (match_dup 5) (sqrt:XF (match_dup 4)))
22395 (parallel [(set (match_operand:XF 0 "register_operand")
22396 (unspec:XF [(match_dup 1) (match_dup 5)]
22397 UNSPEC_FPATAN))
22398 (clobber (scratch:XF))])]
22399 "TARGET_USE_FANCY_MATH_387
22400 && flag_unsafe_math_optimizations"
22401 {
22402 int i;
22403
22404 for (i = 2; i < 6; i++)
22405 operands[i] = gen_reg_rtx (XFmode);
22406
22407 emit_move_insn (operands[3], CONST1_RTX (XFmode));
22408 })
22409
22410 (define_expand "acos<mode>2"
22411 [(use (match_operand:MODEF 0 "register_operand"))
22412 (use (match_operand:MODEF 1 "general_operand"))]
22413 "TARGET_USE_FANCY_MATH_387
22414 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
22415 || TARGET_MIX_SSE_I387)
22416 && flag_unsafe_math_optimizations"
22417 {
22418 rtx op0 = gen_reg_rtx (XFmode);
22419 rtx op1 = gen_reg_rtx (XFmode);
22420
22421 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
22422 emit_insn (gen_acosxf2 (op0, op1));
22423 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
22424 DONE;
22425 })
22426
22427 (define_expand "sinhxf2"
22428 [(use (match_operand:XF 0 "register_operand"))
22429 (use (match_operand:XF 1 "register_operand"))]
22430 "TARGET_USE_FANCY_MATH_387
22431 && flag_finite_math_only
22432 && flag_unsafe_math_optimizations"
22433 {
22434 ix86_emit_i387_sinh (operands[0], operands[1]);
22435 DONE;
22436 })
22437
22438 (define_expand "sinh<mode>2"
22439 [(use (match_operand:MODEF 0 "register_operand"))
22440 (use (match_operand:MODEF 1 "general_operand"))]
22441 "TARGET_USE_FANCY_MATH_387
22442 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
22443 || TARGET_MIX_SSE_I387)
22444 && flag_finite_math_only
22445 && flag_unsafe_math_optimizations"
22446 {
22447 rtx op0 = gen_reg_rtx (XFmode);
22448 rtx op1 = gen_reg_rtx (XFmode);
22449
22450 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
22451 emit_insn (gen_sinhxf2 (op0, op1));
22452 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
22453 DONE;
22454 })
22455
22456 (define_expand "coshxf2"
22457 [(use (match_operand:XF 0 "register_operand"))
22458 (use (match_operand:XF 1 "register_operand"))]
22459 "TARGET_USE_FANCY_MATH_387
22460 && flag_unsafe_math_optimizations"
22461 {
22462 ix86_emit_i387_cosh (operands[0], operands[1]);
22463 DONE;
22464 })
22465
22466 (define_expand "cosh<mode>2"
22467 [(use (match_operand:MODEF 0 "register_operand"))
22468 (use (match_operand:MODEF 1 "general_operand"))]
22469 "TARGET_USE_FANCY_MATH_387
22470 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
22471 || TARGET_MIX_SSE_I387)
22472 && flag_unsafe_math_optimizations"
22473 {
22474 rtx op0 = gen_reg_rtx (XFmode);
22475 rtx op1 = gen_reg_rtx (XFmode);
22476
22477 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
22478 emit_insn (gen_coshxf2 (op0, op1));
22479 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
22480 DONE;
22481 })
22482
22483 (define_expand "tanhxf2"
22484 [(use (match_operand:XF 0 "register_operand"))
22485 (use (match_operand:XF 1 "register_operand"))]
22486 "TARGET_USE_FANCY_MATH_387
22487 && flag_unsafe_math_optimizations"
22488 {
22489 ix86_emit_i387_tanh (operands[0], operands[1]);
22490 DONE;
22491 })
22492
22493 (define_expand "tanh<mode>2"
22494 [(use (match_operand:MODEF 0 "register_operand"))
22495 (use (match_operand:MODEF 1 "general_operand"))]
22496 "TARGET_USE_FANCY_MATH_387
22497 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
22498 || TARGET_MIX_SSE_I387)
22499 && flag_unsafe_math_optimizations"
22500 {
22501 rtx op0 = gen_reg_rtx (XFmode);
22502 rtx op1 = gen_reg_rtx (XFmode);
22503
22504 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
22505 emit_insn (gen_tanhxf2 (op0, op1));
22506 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
22507 DONE;
22508 })
22509
22510 (define_expand "asinhxf2"
22511 [(use (match_operand:XF 0 "register_operand"))
22512 (use (match_operand:XF 1 "register_operand"))]
22513 "TARGET_USE_FANCY_MATH_387
22514 && flag_finite_math_only
22515 && flag_unsafe_math_optimizations"
22516 {
22517 ix86_emit_i387_asinh (operands[0], operands[1]);
22518 DONE;
22519 })
22520
22521 (define_expand "asinh<mode>2"
22522 [(use (match_operand:MODEF 0 "register_operand"))
22523 (use (match_operand:MODEF 1 "general_operand"))]
22524 "TARGET_USE_FANCY_MATH_387
22525 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
22526 || TARGET_MIX_SSE_I387)
22527 && flag_finite_math_only
22528 && flag_unsafe_math_optimizations"
22529 {
22530 rtx op0 = gen_reg_rtx (XFmode);
22531 rtx op1 = gen_reg_rtx (XFmode);
22532
22533 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
22534 emit_insn (gen_asinhxf2 (op0, op1));
22535 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
22536 DONE;
22537 })
22538
22539 (define_expand "acoshxf2"
22540 [(use (match_operand:XF 0 "register_operand"))
22541 (use (match_operand:XF 1 "register_operand"))]
22542 "TARGET_USE_FANCY_MATH_387
22543 && flag_unsafe_math_optimizations"
22544 {
22545 ix86_emit_i387_acosh (operands[0], operands[1]);
22546 DONE;
22547 })
22548
22549 (define_expand "acosh<mode>2"
22550 [(use (match_operand:MODEF 0 "register_operand"))
22551 (use (match_operand:MODEF 1 "general_operand"))]
22552 "TARGET_USE_FANCY_MATH_387
22553 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
22554 || TARGET_MIX_SSE_I387)
22555 && flag_unsafe_math_optimizations"
22556 {
22557 rtx op0 = gen_reg_rtx (XFmode);
22558 rtx op1 = gen_reg_rtx (XFmode);
22559
22560 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
22561 emit_insn (gen_acoshxf2 (op0, op1));
22562 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
22563 DONE;
22564 })
22565
22566 (define_expand "atanhxf2"
22567 [(use (match_operand:XF 0 "register_operand"))
22568 (use (match_operand:XF 1 "register_operand"))]
22569 "TARGET_USE_FANCY_MATH_387
22570 && flag_unsafe_math_optimizations"
22571 {
22572 ix86_emit_i387_atanh (operands[0], operands[1]);
22573 DONE;
22574 })
22575
22576 (define_expand "atanh<mode>2"
22577 [(use (match_operand:MODEF 0 "register_operand"))
22578 (use (match_operand:MODEF 1 "general_operand"))]
22579 "TARGET_USE_FANCY_MATH_387
22580 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
22581 || TARGET_MIX_SSE_I387)
22582 && flag_unsafe_math_optimizations"
22583 {
22584 rtx op0 = gen_reg_rtx (XFmode);
22585 rtx op1 = gen_reg_rtx (XFmode);
22586
22587 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
22588 emit_insn (gen_atanhxf2 (op0, op1));
22589 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
22590 DONE;
22591 })
22592
22593 (define_insn "fyl2xxf3_i387"
22594 [(set (match_operand:XF 0 "register_operand" "=f")
22595 (unspec:XF [(match_operand:XF 1 "register_operand" "0")
22596 (match_operand:XF 2 "register_operand" "f")]
22597 UNSPEC_FYL2X))
22598 (clobber (match_scratch:XF 3 "=2"))]
22599 "TARGET_USE_FANCY_MATH_387
22600 && flag_unsafe_math_optimizations"
22601 "fyl2x"
22602 [(set_attr "type" "fpspc")
22603 (set_attr "znver1_decode" "vector")
22604 (set_attr "mode" "XF")])
22605
22606 (define_expand "logxf2"
22607 [(parallel [(set (match_operand:XF 0 "register_operand")
22608 (unspec:XF [(match_operand:XF 1 "register_operand")
22609 (match_dup 2)] UNSPEC_FYL2X))
22610 (clobber (scratch:XF))])]
22611 "TARGET_USE_FANCY_MATH_387
22612 && flag_unsafe_math_optimizations"
22613 {
22614 operands[2]
22615 = force_reg (XFmode, standard_80387_constant_rtx (4)); /* fldln2 */
22616 })
22617
22618 (define_expand "log<mode>2"
22619 [(use (match_operand:MODEF 0 "register_operand"))
22620 (use (match_operand:MODEF 1 "general_operand"))]
22621 "TARGET_USE_FANCY_MATH_387
22622 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
22623 || TARGET_MIX_SSE_I387)
22624 && flag_unsafe_math_optimizations"
22625 {
22626 rtx op0 = gen_reg_rtx (XFmode);
22627 rtx op1 = gen_reg_rtx (XFmode);
22628
22629 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
22630 emit_insn (gen_logxf2 (op0, op1));
22631 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
22632 DONE;
22633 })
22634
22635 (define_expand "log10xf2"
22636 [(parallel [(set (match_operand:XF 0 "register_operand")
22637 (unspec:XF [(match_operand:XF 1 "register_operand")
22638 (match_dup 2)] UNSPEC_FYL2X))
22639 (clobber (scratch:XF))])]
22640 "TARGET_USE_FANCY_MATH_387
22641 && flag_unsafe_math_optimizations"
22642 {
22643 operands[2]
22644 = force_reg (XFmode, standard_80387_constant_rtx (3)); /* fldlg2 */
22645 })
22646
22647 (define_expand "log10<mode>2"
22648 [(use (match_operand:MODEF 0 "register_operand"))
22649 (use (match_operand:MODEF 1 "general_operand"))]
22650 "TARGET_USE_FANCY_MATH_387
22651 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
22652 || TARGET_MIX_SSE_I387)
22653 && flag_unsafe_math_optimizations"
22654 {
22655 rtx op0 = gen_reg_rtx (XFmode);
22656 rtx op1 = gen_reg_rtx (XFmode);
22657
22658 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
22659 emit_insn (gen_log10xf2 (op0, op1));
22660 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
22661 DONE;
22662 })
22663
22664 (define_expand "log2xf2"
22665 [(parallel [(set (match_operand:XF 0 "register_operand")
22666 (unspec:XF [(match_operand:XF 1 "register_operand")
22667 (match_dup 2)] UNSPEC_FYL2X))
22668 (clobber (scratch:XF))])]
22669 "TARGET_USE_FANCY_MATH_387
22670 && flag_unsafe_math_optimizations"
22671 "operands[2] = force_reg (XFmode, CONST1_RTX (XFmode));")
22672
22673 (define_expand "log2<mode>2"
22674 [(use (match_operand:MODEF 0 "register_operand"))
22675 (use (match_operand:MODEF 1 "general_operand"))]
22676 "TARGET_USE_FANCY_MATH_387
22677 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
22678 || TARGET_MIX_SSE_I387)
22679 && flag_unsafe_math_optimizations"
22680 {
22681 rtx op0 = gen_reg_rtx (XFmode);
22682 rtx op1 = gen_reg_rtx (XFmode);
22683
22684 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
22685 emit_insn (gen_log2xf2 (op0, op1));
22686 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
22687 DONE;
22688 })
22689
22690 (define_insn "fyl2xp1xf3_i387"
22691 [(set (match_operand:XF 0 "register_operand" "=f")
22692 (unspec:XF [(match_operand:XF 1 "register_operand" "0")
22693 (match_operand:XF 2 "register_operand" "f")]
22694 UNSPEC_FYL2XP1))
22695 (clobber (match_scratch:XF 3 "=2"))]
22696 "TARGET_USE_FANCY_MATH_387
22697 && flag_unsafe_math_optimizations"
22698 "fyl2xp1"
22699 [(set_attr "type" "fpspc")
22700 (set_attr "znver1_decode" "vector")
22701 (set_attr "mode" "XF")])
22702
22703 (define_expand "log1pxf2"
22704 [(use (match_operand:XF 0 "register_operand"))
22705 (use (match_operand:XF 1 "register_operand"))]
22706 "TARGET_USE_FANCY_MATH_387
22707 && flag_unsafe_math_optimizations"
22708 {
22709 ix86_emit_i387_log1p (operands[0], operands[1]);
22710 DONE;
22711 })
22712
22713 (define_expand "log1p<mode>2"
22714 [(use (match_operand:MODEF 0 "register_operand"))
22715 (use (match_operand:MODEF 1 "general_operand"))]
22716 "TARGET_USE_FANCY_MATH_387
22717 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
22718 || TARGET_MIX_SSE_I387)
22719 && flag_unsafe_math_optimizations"
22720 {
22721 rtx op0 = gen_reg_rtx (XFmode);
22722 rtx op1 = gen_reg_rtx (XFmode);
22723
22724 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
22725 emit_insn (gen_log1pxf2 (op0, op1));
22726 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
22727 DONE;
22728 })
22729
22730 (define_insn "fxtractxf3_i387"
22731 [(set (match_operand:XF 0 "register_operand" "=f")
22732 (unspec:XF [(match_operand:XF 2 "register_operand" "0")]
22733 UNSPEC_XTRACT_FRACT))
22734 (set (match_operand:XF 1 "register_operand" "=f")
22735 (unspec:XF [(match_dup 2)] UNSPEC_XTRACT_EXP))]
22736 "TARGET_USE_FANCY_MATH_387
22737 && flag_unsafe_math_optimizations"
22738 "fxtract"
22739 [(set_attr "type" "fpspc")
22740 (set_attr "znver1_decode" "vector")
22741 (set_attr "mode" "XF")])
22742
22743 (define_expand "logbxf2"
22744 [(parallel [(set (match_dup 2)
22745 (unspec:XF [(match_operand:XF 1 "register_operand")]
22746 UNSPEC_XTRACT_FRACT))
22747 (set (match_operand:XF 0 "register_operand")
22748 (unspec:XF [(match_dup 1)] UNSPEC_XTRACT_EXP))])]
22749 "TARGET_USE_FANCY_MATH_387
22750 && flag_unsafe_math_optimizations"
22751 "operands[2] = gen_reg_rtx (XFmode);")
22752
22753 (define_expand "logb<mode>2"
22754 [(use (match_operand:MODEF 0 "register_operand"))
22755 (use (match_operand:MODEF 1 "general_operand"))]
22756 "TARGET_USE_FANCY_MATH_387
22757 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
22758 || TARGET_MIX_SSE_I387)
22759 && flag_unsafe_math_optimizations"
22760 {
22761 rtx op0 = gen_reg_rtx (XFmode);
22762 rtx op1 = gen_reg_rtx (XFmode);
22763
22764 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
22765 emit_insn (gen_logbxf2 (op0, op1));
22766 emit_insn (gen_truncxf<mode>2 (operands[0], op1));
22767 DONE;
22768 })
22769
22770 (define_expand "ilogbxf2"
22771 [(use (match_operand:SI 0 "register_operand"))
22772 (use (match_operand:XF 1 "register_operand"))]
22773 "TARGET_USE_FANCY_MATH_387
22774 && flag_unsafe_math_optimizations"
22775 {
22776 rtx op0, op1;
22777
22778 if (optimize_insn_for_size_p ())
22779 FAIL;
22780
22781 op0 = gen_reg_rtx (XFmode);
22782 op1 = gen_reg_rtx (XFmode);
22783
22784 emit_insn (gen_fxtractxf3_i387 (op0, op1, operands[1]));
22785 emit_insn (gen_fix_truncxfsi2 (operands[0], op1));
22786 DONE;
22787 })
22788
22789 (define_expand "ilogb<mode>2"
22790 [(use (match_operand:SI 0 "register_operand"))
22791 (use (match_operand:MODEF 1 "general_operand"))]
22792 "TARGET_USE_FANCY_MATH_387
22793 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
22794 || TARGET_MIX_SSE_I387)
22795 && flag_unsafe_math_optimizations"
22796 {
22797 rtx op0, op1, op2;
22798
22799 if (optimize_insn_for_size_p ())
22800 FAIL;
22801
22802 op0 = gen_reg_rtx (XFmode);
22803 op1 = gen_reg_rtx (XFmode);
22804 op2 = gen_reg_rtx (XFmode);
22805
22806 emit_insn (gen_extend<mode>xf2 (op2, operands[1]));
22807 emit_insn (gen_fxtractxf3_i387 (op0, op1, op2));
22808 emit_insn (gen_fix_truncxfsi2 (operands[0], op1));
22809 DONE;
22810 })
22811
22812 (define_insn "*f2xm1xf2_i387"
22813 [(set (match_operand:XF 0 "register_operand" "=f")
22814 (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
22815 UNSPEC_F2XM1))]
22816 "TARGET_USE_FANCY_MATH_387
22817 && flag_unsafe_math_optimizations"
22818 "f2xm1"
22819 [(set_attr "type" "fpspc")
22820 (set_attr "znver1_decode" "vector")
22821 (set_attr "mode" "XF")])
22822
22823 (define_insn "fscalexf4_i387"
22824 [(set (match_operand:XF 0 "register_operand" "=f")
22825 (unspec:XF [(match_operand:XF 2 "register_operand" "0")
22826 (match_operand:XF 3 "register_operand" "1")]
22827 UNSPEC_FSCALE_FRACT))
22828 (set (match_operand:XF 1 "register_operand" "=f")
22829 (unspec:XF [(match_dup 2) (match_dup 3)]
22830 UNSPEC_FSCALE_EXP))]
22831 "TARGET_USE_FANCY_MATH_387
22832 && flag_unsafe_math_optimizations"
22833 "fscale"
22834 [(set_attr "type" "fpspc")
22835 (set_attr "znver1_decode" "vector")
22836 (set_attr "mode" "XF")])
22837
22838 (define_expand "expNcorexf3"
22839 [(set (match_dup 3) (mult:XF (match_operand:XF 1 "register_operand")
22840 (match_operand:XF 2 "register_operand")))
22841 (set (match_dup 4) (unspec:XF [(match_dup 3)] UNSPEC_FRNDINT))
22842 (set (match_dup 5) (minus:XF (match_dup 3) (match_dup 4)))
22843 (set (match_dup 6) (unspec:XF [(match_dup 5)] UNSPEC_F2XM1))
22844 (set (match_dup 8) (plus:XF (match_dup 6) (match_dup 7)))
22845 (parallel [(set (match_operand:XF 0 "register_operand")
22846 (unspec:XF [(match_dup 8) (match_dup 4)]
22847 UNSPEC_FSCALE_FRACT))
22848 (set (match_dup 9)
22849 (unspec:XF [(match_dup 8) (match_dup 4)]
22850 UNSPEC_FSCALE_EXP))])]
22851 "TARGET_USE_FANCY_MATH_387
22852 && flag_unsafe_math_optimizations"
22853 {
22854 int i;
22855
22856 for (i = 3; i < 10; i++)
22857 operands[i] = gen_reg_rtx (XFmode);
22858
22859 emit_move_insn (operands[7], CONST1_RTX (XFmode));
22860 })
22861
22862 (define_expand "expxf2"
22863 [(use (match_operand:XF 0 "register_operand"))
22864 (use (match_operand:XF 1 "register_operand"))]
22865 "TARGET_USE_FANCY_MATH_387
22866 && flag_unsafe_math_optimizations"
22867 {
22868 rtx op2 = force_reg (XFmode, standard_80387_constant_rtx (5)); /* fldl2e */
22869
22870 emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2));
22871 DONE;
22872 })
22873
22874 (define_expand "exp<mode>2"
22875 [(use (match_operand:MODEF 0 "register_operand"))
22876 (use (match_operand:MODEF 1 "general_operand"))]
22877 "TARGET_USE_FANCY_MATH_387
22878 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
22879 || TARGET_MIX_SSE_I387)
22880 && flag_unsafe_math_optimizations"
22881 {
22882 rtx op0 = gen_reg_rtx (XFmode);
22883 rtx op1 = gen_reg_rtx (XFmode);
22884
22885 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
22886 emit_insn (gen_expxf2 (op0, op1));
22887 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
22888 DONE;
22889 })
22890
22891 (define_expand "exp10xf2"
22892 [(use (match_operand:XF 0 "register_operand"))
22893 (use (match_operand:XF 1 "register_operand"))]
22894 "TARGET_USE_FANCY_MATH_387
22895 && flag_unsafe_math_optimizations"
22896 {
22897 rtx op2 = force_reg (XFmode, standard_80387_constant_rtx (6)); /* fldl2t */
22898
22899 emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2));
22900 DONE;
22901 })
22902
22903 (define_expand "exp10<mode>2"
22904 [(use (match_operand:MODEF 0 "register_operand"))
22905 (use (match_operand:MODEF 1 "general_operand"))]
22906 "TARGET_USE_FANCY_MATH_387
22907 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
22908 || TARGET_MIX_SSE_I387)
22909 && flag_unsafe_math_optimizations"
22910 {
22911 rtx op0 = gen_reg_rtx (XFmode);
22912 rtx op1 = gen_reg_rtx (XFmode);
22913
22914 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
22915 emit_insn (gen_exp10xf2 (op0, op1));
22916 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
22917 DONE;
22918 })
22919
22920 (define_expand "exp2xf2"
22921 [(use (match_operand:XF 0 "register_operand"))
22922 (use (match_operand:XF 1 "register_operand"))]
22923 "TARGET_USE_FANCY_MATH_387
22924 && flag_unsafe_math_optimizations"
22925 {
22926 rtx op2 = force_reg (XFmode, CONST1_RTX (XFmode));
22927
22928 emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2));
22929 DONE;
22930 })
22931
22932 (define_expand "exp2<mode>2"
22933 [(use (match_operand:MODEF 0 "register_operand"))
22934 (use (match_operand:MODEF 1 "general_operand"))]
22935 "TARGET_USE_FANCY_MATH_387
22936 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
22937 || TARGET_MIX_SSE_I387)
22938 && flag_unsafe_math_optimizations"
22939 {
22940 rtx op0 = gen_reg_rtx (XFmode);
22941 rtx op1 = gen_reg_rtx (XFmode);
22942
22943 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
22944 emit_insn (gen_exp2xf2 (op0, op1));
22945 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
22946 DONE;
22947 })
22948
22949 (define_expand "expm1xf2"
22950 [(set (match_dup 3) (mult:XF (match_operand:XF 1 "register_operand")
22951 (match_dup 2)))
22952 (set (match_dup 4) (unspec:XF [(match_dup 3)] UNSPEC_FRNDINT))
22953 (set (match_dup 5) (minus:XF (match_dup 3) (match_dup 4)))
22954 (set (match_dup 6) (unspec:XF [(match_dup 5)] UNSPEC_F2XM1))
22955 (parallel [(set (match_dup 7)
22956 (unspec:XF [(match_dup 6) (match_dup 4)]
22957 UNSPEC_FSCALE_FRACT))
22958 (set (match_dup 8)
22959 (unspec:XF [(match_dup 6) (match_dup 4)]
22960 UNSPEC_FSCALE_EXP))])
22961 (parallel [(set (match_dup 10)
22962 (unspec:XF [(match_dup 9) (match_dup 8)]
22963 UNSPEC_FSCALE_FRACT))
22964 (set (match_dup 11)
22965 (unspec:XF [(match_dup 9) (match_dup 8)]
22966 UNSPEC_FSCALE_EXP))])
22967 (set (match_dup 12) (minus:XF (match_dup 10) (match_dup 9)))
22968 (set (match_operand:XF 0 "register_operand")
22969 (plus:XF (match_dup 12) (match_dup 7)))]
22970 "TARGET_USE_FANCY_MATH_387
22971 && flag_unsafe_math_optimizations"
22972 {
22973 int i;
22974
22975 for (i = 2; i < 13; i++)
22976 operands[i] = gen_reg_rtx (XFmode);
22977
22978 emit_move_insn (operands[2], standard_80387_constant_rtx (5)); /* fldl2e */
22979 emit_move_insn (operands[9], CONST1_RTX (XFmode));
22980 })
22981
22982 (define_expand "expm1<mode>2"
22983 [(use (match_operand:MODEF 0 "register_operand"))
22984 (use (match_operand:MODEF 1 "general_operand"))]
22985 "TARGET_USE_FANCY_MATH_387
22986 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
22987 || TARGET_MIX_SSE_I387)
22988 && flag_unsafe_math_optimizations"
22989 {
22990 rtx op0 = gen_reg_rtx (XFmode);
22991 rtx op1 = gen_reg_rtx (XFmode);
22992
22993 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
22994 emit_insn (gen_expm1xf2 (op0, op1));
22995 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
22996 DONE;
22997 })
22998
22999 (define_insn "avx512f_scalef<mode>2"
23000 [(set (match_operand:MODEF 0 "register_operand" "=v")
23001 (unspec:MODEF
23002 [(match_operand:MODEF 1 "register_operand" "v")
23003 (match_operand:MODEF 2 "nonimmediate_operand" "vm")]
23004 UNSPEC_SCALEF))]
23005 "TARGET_AVX512F"
23006 "vscalef<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
23007 [(set_attr "prefix" "evex")
23008 (set_attr "mode" "<MODE>")])
23009
23010 (define_expand "ldexpxf3"
23011 [(match_operand:XF 0 "register_operand")
23012 (match_operand:XF 1 "register_operand")
23013 (match_operand:SI 2 "register_operand")]
23014 "TARGET_USE_FANCY_MATH_387
23015 && flag_unsafe_math_optimizations"
23016 {
23017 rtx tmp1 = gen_reg_rtx (XFmode);
23018 rtx tmp2 = gen_reg_rtx (XFmode);
23019
23020 emit_insn (gen_floatsixf2 (tmp1, operands[2]));
23021 emit_insn (gen_fscalexf4_i387 (operands[0], tmp2,
23022 operands[1], tmp1));
23023 DONE;
23024 })
23025
23026 (define_expand "ldexp<mode>3"
23027 [(use (match_operand:MODEF 0 "register_operand"))
23028 (use (match_operand:MODEF 1 "general_operand"))
23029 (use (match_operand:SI 2 "register_operand"))]
23030 "((TARGET_USE_FANCY_MATH_387
23031 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
23032 || TARGET_MIX_SSE_I387))
23033 || (TARGET_AVX512F && TARGET_SSE_MATH))
23034 && flag_unsafe_math_optimizations"
23035 {
23036 /* Prefer avx512f version. */
23037 if (TARGET_AVX512F && TARGET_SSE_MATH)
23038 {
23039 rtx op2 = gen_reg_rtx (<MODE>mode);
23040 operands[1] = force_reg (<MODE>mode, operands[1]);
23041
23042 emit_insn (gen_floatsi<mode>2 (op2, operands[2]));
23043 emit_insn (gen_avx512f_scalef<mode>2 (operands[0], operands[1], op2));
23044 }
23045 else
23046 {
23047 rtx op0 = gen_reg_rtx (XFmode);
23048 rtx op1 = gen_reg_rtx (XFmode);
23049
23050 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
23051 emit_insn (gen_ldexpxf3 (op0, op1, operands[2]));
23052 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
23053 }
23054 DONE;
23055 })
23056
23057 (define_expand "scalbxf3"
23058 [(parallel [(set (match_operand:XF 0 " register_operand")
23059 (unspec:XF [(match_operand:XF 1 "register_operand")
23060 (match_operand:XF 2 "register_operand")]
23061 UNSPEC_FSCALE_FRACT))
23062 (set (match_dup 3)
23063 (unspec:XF [(match_dup 1) (match_dup 2)]
23064 UNSPEC_FSCALE_EXP))])]
23065 "TARGET_USE_FANCY_MATH_387
23066 && flag_unsafe_math_optimizations"
23067 "operands[3] = gen_reg_rtx (XFmode);")
23068
23069 (define_expand "scalb<mode>3"
23070 [(use (match_operand:MODEF 0 "register_operand"))
23071 (use (match_operand:MODEF 1 "general_operand"))
23072 (use (match_operand:MODEF 2 "general_operand"))]
23073 "TARGET_USE_FANCY_MATH_387
23074 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
23075 || TARGET_MIX_SSE_I387)
23076 && flag_unsafe_math_optimizations"
23077 {
23078 rtx op0 = gen_reg_rtx (XFmode);
23079 rtx op1 = gen_reg_rtx (XFmode);
23080 rtx op2 = gen_reg_rtx (XFmode);
23081
23082 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
23083 emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
23084 emit_insn (gen_scalbxf3 (op0, op1, op2));
23085 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
23086 DONE;
23087 })
23088
23089 (define_expand "significandxf2"
23090 [(parallel [(set (match_operand:XF 0 "register_operand")
23091 (unspec:XF [(match_operand:XF 1 "register_operand")]
23092 UNSPEC_XTRACT_FRACT))
23093 (set (match_dup 2)
23094 (unspec:XF [(match_dup 1)] UNSPEC_XTRACT_EXP))])]
23095 "TARGET_USE_FANCY_MATH_387
23096 && flag_unsafe_math_optimizations"
23097 "operands[2] = gen_reg_rtx (XFmode);")
23098
23099 (define_expand "significand<mode>2"
23100 [(use (match_operand:MODEF 0 "register_operand"))
23101 (use (match_operand:MODEF 1 "general_operand"))]
23102 "TARGET_USE_FANCY_MATH_387
23103 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
23104 || TARGET_MIX_SSE_I387)
23105 && flag_unsafe_math_optimizations"
23106 {
23107 rtx op0 = gen_reg_rtx (XFmode);
23108 rtx op1 = gen_reg_rtx (XFmode);
23109
23110 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
23111 emit_insn (gen_significandxf2 (op0, op1));
23112 emit_insn (gen_truncxf<mode>2 (operands[0], op0));
23113 DONE;
23114 })
23115 \f
23116
23117 (define_insn "sse4_1_round<mode>2"
23118 [(set (match_operand:MODEFH 0 "register_operand" "=x,x,x,v,v")
23119 (unspec:MODEFH
23120 [(match_operand:MODEFH 1 "nonimmediate_operand" "0,x,jm,v,m")
23121 (match_operand:SI 2 "const_0_to_15_operand")]
23122 UNSPEC_ROUND))]
23123 "TARGET_SSE4_1"
23124 "@
23125 %vround<ssemodesuffix>\t{%2, %d1, %0|%0, %d1, %2}
23126 %vround<ssemodesuffix>\t{%2, %d1, %0|%0, %d1, %2}
23127 %vround<ssemodesuffix>\t{%2, %1, %d0|%d0, %1, %2}
23128 vrndscale<ssemodesuffix>\t{%2, %d1, %0|%0, %d1, %2}
23129 vrndscale<ssemodesuffix>\t{%2, %1, %d0|%d0, %1, %2}"
23130 [(set_attr "type" "ssecvt")
23131 (set_attr "prefix_extra" "1,1,1,*,*")
23132 (set_attr "length_immediate" "1")
23133 (set_attr "addr" "*,*,gpr16,*,*")
23134 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,evex,evex")
23135 (set_attr "isa" "noavx512f,noavx512f,noavx512f,avx512f,avx512f")
23136 (set_attr "avx_partial_xmm_update" "false,false,true,false,true")
23137 (set_attr "mode" "<MODE>")
23138 (set (attr "preferred_for_speed")
23139 (cond [(match_test "TARGET_AVX")
23140 (symbol_ref "true")
23141 (eq_attr "alternative" "1,2")
23142 (symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY")
23143 ]
23144 (symbol_ref "true")))])
23145
23146 (define_insn "rintxf2"
23147 [(set (match_operand:XF 0 "register_operand" "=f")
23148 (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
23149 UNSPEC_FRNDINT))]
23150 "TARGET_USE_FANCY_MATH_387"
23151 "frndint"
23152 [(set_attr "type" "fpspc")
23153 (set_attr "znver1_decode" "vector")
23154 (set_attr "mode" "XF")])
23155
23156 (define_expand "rinthf2"
23157 [(match_operand:HF 0 "register_operand")
23158 (match_operand:HF 1 "nonimmediate_operand")]
23159 "TARGET_AVX512FP16"
23160 {
23161 emit_insn (gen_sse4_1_roundhf2 (operands[0],
23162 operands[1],
23163 GEN_INT (ROUND_MXCSR)));
23164 DONE;
23165 })
23166
23167 (define_expand "rint<mode>2"
23168 [(use (match_operand:MODEF 0 "register_operand"))
23169 (use (match_operand:MODEF 1 "nonimmediate_operand"))]
23170 "TARGET_USE_FANCY_MATH_387
23171 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
23172 {
23173 if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
23174 {
23175 if (TARGET_SSE4_1)
23176 emit_insn (gen_sse4_1_round<mode>2
23177 (operands[0], operands[1], GEN_INT (ROUND_MXCSR)));
23178 else
23179 ix86_expand_rint (operands[0], operands[1]);
23180 }
23181 else
23182 {
23183 rtx op0 = gen_reg_rtx (XFmode);
23184 rtx op1 = gen_reg_rtx (XFmode);
23185
23186 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
23187 emit_insn (gen_rintxf2 (op0, op1));
23188 emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op0));
23189 }
23190 DONE;
23191 })
23192
23193 (define_expand "nearbyintxf2"
23194 [(set (match_operand:XF 0 "register_operand")
23195 (unspec:XF [(match_operand:XF 1 "register_operand")]
23196 UNSPEC_FRNDINT))]
23197 "TARGET_USE_FANCY_MATH_387
23198 && !flag_trapping_math")
23199
23200 (define_expand "nearbyinthf2"
23201 [(match_operand:HF 0 "register_operand")
23202 (match_operand:HF 1 "nonimmediate_operand")]
23203 "TARGET_AVX512FP16"
23204 {
23205 emit_insn (gen_sse4_1_roundhf2 (operands[0],
23206 operands[1],
23207 GEN_INT (ROUND_MXCSR | ROUND_NO_EXC)));
23208 DONE;
23209 })
23210
23211 (define_expand "nearbyint<mode>2"
23212 [(use (match_operand:MODEF 0 "register_operand"))
23213 (use (match_operand:MODEF 1 "nonimmediate_operand"))]
23214 "(TARGET_USE_FANCY_MATH_387
23215 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
23216 || TARGET_MIX_SSE_I387)
23217 && !flag_trapping_math)
23218 || (TARGET_SSE4_1 && TARGET_SSE_MATH)"
23219 {
23220 if (TARGET_SSE4_1 && TARGET_SSE_MATH)
23221 emit_insn (gen_sse4_1_round<mode>2
23222 (operands[0], operands[1], GEN_INT (ROUND_MXCSR
23223 | ROUND_NO_EXC)));
23224 else
23225 {
23226 rtx op0 = gen_reg_rtx (XFmode);
23227 rtx op1 = gen_reg_rtx (XFmode);
23228
23229 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
23230 emit_insn (gen_nearbyintxf2 (op0, op1));
23231 emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op0));
23232 }
23233 DONE;
23234 })
23235
23236 (define_expand "roundhf2"
23237 [(match_operand:HF 0 "register_operand")
23238 (match_operand:HF 1 "register_operand")]
23239 "TARGET_AVX512FP16 && !flag_trapping_math && !flag_rounding_math"
23240 {
23241 ix86_expand_round_sse4 (operands[0], operands[1]);
23242 DONE;
23243 })
23244
23245 (define_expand "round<mode>2"
23246 [(match_operand:X87MODEF 0 "register_operand")
23247 (match_operand:X87MODEF 1 "nonimmediate_operand")]
23248 "(TARGET_USE_FANCY_MATH_387
23249 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
23250 || TARGET_MIX_SSE_I387)
23251 && flag_unsafe_math_optimizations
23252 && (flag_fp_int_builtin_inexact || !flag_trapping_math))
23253 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
23254 && !flag_trapping_math && !flag_rounding_math)"
23255 {
23256 if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
23257 && !flag_trapping_math && !flag_rounding_math)
23258 {
23259 if (TARGET_SSE4_1)
23260 {
23261 operands[1] = force_reg (<MODE>mode, operands[1]);
23262 ix86_expand_round_sse4 (operands[0], operands[1]);
23263 }
23264 else if (TARGET_64BIT || (<MODE>mode != DFmode))
23265 ix86_expand_round (operands[0], operands[1]);
23266 else
23267 ix86_expand_rounddf_32 (operands[0], operands[1]);
23268 }
23269 else
23270 {
23271 operands[1] = force_reg (<MODE>mode, operands[1]);
23272 ix86_emit_i387_round (operands[0], operands[1]);
23273 }
23274 DONE;
23275 })
23276
23277 (define_insn "lrintxfdi2"
23278 [(set (match_operand:DI 0 "nonimmediate_operand" "=m")
23279 (unspec:DI [(match_operand:XF 1 "register_operand" "f")]
23280 UNSPEC_FIST))
23281 (clobber (match_scratch:XF 2 "=&f"))]
23282 "TARGET_USE_FANCY_MATH_387"
23283 "* return output_fix_trunc (insn, operands, false);"
23284 [(set_attr "type" "fpspc")
23285 (set_attr "mode" "DI")])
23286
23287 (define_insn "lrintxf<mode>2"
23288 [(set (match_operand:SWI24 0 "nonimmediate_operand" "=m")
23289 (unspec:SWI24 [(match_operand:XF 1 "register_operand" "f")]
23290 UNSPEC_FIST))]
23291 "TARGET_USE_FANCY_MATH_387"
23292 "* return output_fix_trunc (insn, operands, false);"
23293 [(set_attr "type" "fpspc")
23294 (set_attr "mode" "<MODE>")])
23295
23296 (define_expand "lroundhf<mode>2"
23297 [(set (match_operand:SWI248 0 "register_operand")
23298 (unspec:SWI248 [(match_operand:HF 1 "nonimmediate_operand")]
23299 UNSPEC_FIX_NOTRUNC))]
23300 "TARGET_AVX512FP16 && !flag_trapping_math && !flag_rounding_math"
23301 {
23302 ix86_expand_lround (operands[0], operands[1]);
23303 DONE;
23304 })
23305
23306 (define_expand "lrinthf<mode>2"
23307 [(set (match_operand:SWI48 0 "register_operand")
23308 (unspec:SWI48 [(match_operand:HF 1 "nonimmediate_operand")]
23309 UNSPEC_FIX_NOTRUNC))]
23310 "TARGET_AVX512FP16")
23311
23312 (define_expand "lrint<MODEF:mode><SWI48:mode>2"
23313 [(set (match_operand:SWI48 0 "register_operand")
23314 (unspec:SWI48 [(match_operand:MODEF 1 "nonimmediate_operand")]
23315 UNSPEC_FIX_NOTRUNC))]
23316 "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH")
23317
23318 (define_expand "lround<X87MODEF:mode><SWI248x:mode>2"
23319 [(match_operand:SWI248x 0 "nonimmediate_operand")
23320 (match_operand:X87MODEF 1 "register_operand")]
23321 "(TARGET_USE_FANCY_MATH_387
23322 && (!(SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH)
23323 || TARGET_MIX_SSE_I387)
23324 && flag_unsafe_math_optimizations)
23325 || (SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH
23326 && <SWI248x:MODE>mode != HImode
23327 && ((<SWI248x:MODE>mode != DImode) || TARGET_64BIT)
23328 && !flag_trapping_math && !flag_rounding_math)"
23329 {
23330 if (SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH
23331 && <SWI248x:MODE>mode != HImode
23332 && ((<SWI248x:MODE>mode != DImode) || TARGET_64BIT)
23333 && !flag_trapping_math && !flag_rounding_math)
23334 ix86_expand_lround (operands[0], operands[1]);
23335 else
23336 ix86_emit_i387_round (operands[0], operands[1]);
23337 DONE;
23338 })
23339
23340 (define_int_iterator FRNDINT_ROUNDING
23341 [UNSPEC_FRNDINT_ROUNDEVEN
23342 UNSPEC_FRNDINT_FLOOR
23343 UNSPEC_FRNDINT_CEIL
23344 UNSPEC_FRNDINT_TRUNC])
23345
23346 (define_int_iterator FIST_ROUNDING
23347 [UNSPEC_FIST_FLOOR
23348 UNSPEC_FIST_CEIL])
23349
23350 ;; Base name for define_insn
23351 (define_int_attr rounding_insn
23352 [(UNSPEC_FRNDINT_ROUNDEVEN "roundeven")
23353 (UNSPEC_FRNDINT_FLOOR "floor")
23354 (UNSPEC_FRNDINT_CEIL "ceil")
23355 (UNSPEC_FRNDINT_TRUNC "btrunc")
23356 (UNSPEC_FIST_FLOOR "floor")
23357 (UNSPEC_FIST_CEIL "ceil")])
23358
23359 (define_int_attr rounding
23360 [(UNSPEC_FRNDINT_ROUNDEVEN "roundeven")
23361 (UNSPEC_FRNDINT_FLOOR "floor")
23362 (UNSPEC_FRNDINT_CEIL "ceil")
23363 (UNSPEC_FRNDINT_TRUNC "trunc")
23364 (UNSPEC_FIST_FLOOR "floor")
23365 (UNSPEC_FIST_CEIL "ceil")])
23366
23367 (define_int_attr ROUNDING
23368 [(UNSPEC_FRNDINT_ROUNDEVEN "ROUNDEVEN")
23369 (UNSPEC_FRNDINT_FLOOR "FLOOR")
23370 (UNSPEC_FRNDINT_CEIL "CEIL")
23371 (UNSPEC_FRNDINT_TRUNC "TRUNC")
23372 (UNSPEC_FIST_FLOOR "FLOOR")
23373 (UNSPEC_FIST_CEIL "CEIL")])
23374
23375 ;; Rounding mode control word calculation could clobber FLAGS_REG.
23376 (define_insn_and_split "frndintxf2_<rounding>"
23377 [(set (match_operand:XF 0 "register_operand")
23378 (unspec:XF [(match_operand:XF 1 "register_operand")]
23379 FRNDINT_ROUNDING))
23380 (clobber (reg:CC FLAGS_REG))]
23381 "TARGET_USE_FANCY_MATH_387
23382 && (flag_fp_int_builtin_inexact || !flag_trapping_math)
23383 && ix86_pre_reload_split ()"
23384 "#"
23385 "&& 1"
23386 [(const_int 0)]
23387 {
23388 ix86_optimize_mode_switching[I387_<ROUNDING>] = 1;
23389
23390 operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
23391 operands[3] = assign_386_stack_local (HImode, SLOT_CW_<ROUNDING>);
23392
23393 emit_insn (gen_frndintxf2_<rounding>_i387 (operands[0], operands[1],
23394 operands[2], operands[3]));
23395 DONE;
23396 }
23397 [(set_attr "type" "frndint")
23398 (set_attr "i387_cw" "<rounding>")
23399 (set_attr "mode" "XF")])
23400
23401 (define_insn "frndintxf2_<rounding>_i387"
23402 [(set (match_operand:XF 0 "register_operand" "=f")
23403 (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
23404 FRNDINT_ROUNDING))
23405 (use (match_operand:HI 2 "memory_operand" "m"))
23406 (use (match_operand:HI 3 "memory_operand" "m"))]
23407 "TARGET_USE_FANCY_MATH_387
23408 && (flag_fp_int_builtin_inexact || !flag_trapping_math)"
23409 "fldcw\t%3\n\tfrndint\n\tfldcw\t%2"
23410 [(set_attr "type" "frndint")
23411 (set_attr "i387_cw" "<rounding>")
23412 (set_attr "mode" "XF")])
23413
23414 (define_expand "<rounding_insn>xf2"
23415 [(parallel [(set (match_operand:XF 0 "register_operand")
23416 (unspec:XF [(match_operand:XF 1 "register_operand")]
23417 FRNDINT_ROUNDING))
23418 (clobber (reg:CC FLAGS_REG))])]
23419 "TARGET_USE_FANCY_MATH_387
23420 && (flag_fp_int_builtin_inexact || !flag_trapping_math)")
23421
23422 (define_expand "<rounding_insn>hf2"
23423 [(parallel [(set (match_operand:HF 0 "register_operand")
23424 (unspec:HF [(match_operand:HF 1 "register_operand")]
23425 FRNDINT_ROUNDING))
23426 (clobber (reg:CC FLAGS_REG))])]
23427 "TARGET_AVX512FP16"
23428 {
23429 emit_insn (gen_sse4_1_roundhf2 (operands[0], operands[1],
23430 GEN_INT (ROUND_<ROUNDING> | ROUND_NO_EXC)));
23431 DONE;
23432 })
23433
23434 (define_expand "<rounding_insn><mode>2"
23435 [(parallel [(set (match_operand:MODEF 0 "register_operand")
23436 (unspec:MODEF [(match_operand:MODEF 1 "register_operand")]
23437 FRNDINT_ROUNDING))
23438 (clobber (reg:CC FLAGS_REG))])]
23439 "(TARGET_USE_FANCY_MATH_387
23440 && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
23441 || TARGET_MIX_SSE_I387)
23442 && (flag_fp_int_builtin_inexact || !flag_trapping_math))
23443 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
23444 && (TARGET_SSE4_1
23445 || (ROUND_<ROUNDING> != ROUND_ROUNDEVEN
23446 && (flag_fp_int_builtin_inexact || !flag_trapping_math))))"
23447 {
23448 if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
23449 && (TARGET_SSE4_1
23450 || (ROUND_<ROUNDING> != ROUND_ROUNDEVEN
23451 && (flag_fp_int_builtin_inexact || !flag_trapping_math))))
23452 {
23453 if (TARGET_SSE4_1)
23454 emit_insn (gen_sse4_1_round<mode>2
23455 (operands[0], operands[1],
23456 GEN_INT (ROUND_<ROUNDING> | ROUND_NO_EXC)));
23457 else if (TARGET_64BIT || (<MODE>mode != DFmode))
23458 {
23459 if (ROUND_<ROUNDING> == ROUND_FLOOR)
23460 ix86_expand_floorceil (operands[0], operands[1], true);
23461 else if (ROUND_<ROUNDING> == ROUND_CEIL)
23462 ix86_expand_floorceil (operands[0], operands[1], false);
23463 else if (ROUND_<ROUNDING> == ROUND_TRUNC)
23464 ix86_expand_trunc (operands[0], operands[1]);
23465 else
23466 gcc_unreachable ();
23467 }
23468 else
23469 {
23470 if (ROUND_<ROUNDING> == ROUND_FLOOR)
23471 ix86_expand_floorceildf_32 (operands[0], operands[1], true);
23472 else if (ROUND_<ROUNDING> == ROUND_CEIL)
23473 ix86_expand_floorceildf_32 (operands[0], operands[1], false);
23474 else if (ROUND_<ROUNDING> == ROUND_TRUNC)
23475 ix86_expand_truncdf_32 (operands[0], operands[1]);
23476 else
23477 gcc_unreachable ();
23478 }
23479 }
23480 else
23481 {
23482 rtx op0 = gen_reg_rtx (XFmode);
23483 rtx op1 = gen_reg_rtx (XFmode);
23484
23485 emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
23486 emit_insn (gen_frndintxf2_<rounding> (op0, op1));
23487 emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op0));
23488 }
23489 DONE;
23490 })
23491
23492 ;; Rounding mode control word calculation could clobber FLAGS_REG.
23493 (define_insn_and_split "*fist<mode>2_<rounding>_1"
23494 [(set (match_operand:SWI248x 0 "nonimmediate_operand")
23495 (unspec:SWI248x [(match_operand:XF 1 "register_operand")]
23496 FIST_ROUNDING))
23497 (clobber (reg:CC FLAGS_REG))]
23498 "TARGET_USE_FANCY_MATH_387
23499 && flag_unsafe_math_optimizations
23500 && ix86_pre_reload_split ()"
23501 "#"
23502 "&& 1"
23503 [(const_int 0)]
23504 {
23505 ix86_optimize_mode_switching[I387_<ROUNDING>] = 1;
23506
23507 operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
23508 operands[3] = assign_386_stack_local (HImode, SLOT_CW_<ROUNDING>);
23509
23510 emit_insn (gen_fist<mode>2_<rounding> (operands[0], operands[1],
23511 operands[2], operands[3]));
23512 DONE;
23513 }
23514 [(set_attr "type" "fistp")
23515 (set_attr "i387_cw" "<rounding>")
23516 (set_attr "mode" "<MODE>")])
23517
23518 (define_insn "fistdi2_<rounding>"
23519 [(set (match_operand:DI 0 "nonimmediate_operand" "=m")
23520 (unspec:DI [(match_operand:XF 1 "register_operand" "f")]
23521 FIST_ROUNDING))
23522 (use (match_operand:HI 2 "memory_operand" "m"))
23523 (use (match_operand:HI 3 "memory_operand" "m"))
23524 (clobber (match_scratch:XF 4 "=&f"))]
23525 "TARGET_USE_FANCY_MATH_387
23526 && flag_unsafe_math_optimizations"
23527 "* return output_fix_trunc (insn, operands, false);"
23528 [(set_attr "type" "fistp")
23529 (set_attr "i387_cw" "<rounding>")
23530 (set_attr "mode" "DI")])
23531
23532 (define_insn "fist<mode>2_<rounding>"
23533 [(set (match_operand:SWI24 0 "nonimmediate_operand" "=m")
23534 (unspec:SWI24 [(match_operand:XF 1 "register_operand" "f")]
23535 FIST_ROUNDING))
23536 (use (match_operand:HI 2 "memory_operand" "m"))
23537 (use (match_operand:HI 3 "memory_operand" "m"))]
23538 "TARGET_USE_FANCY_MATH_387
23539 && flag_unsafe_math_optimizations"
23540 "* return output_fix_trunc (insn, operands, false);"
23541 [(set_attr "type" "fistp")
23542 (set_attr "i387_cw" "<rounding>")
23543 (set_attr "mode" "<MODE>")])
23544
23545 (define_expand "l<rounding_insn>xf<mode>2"
23546 [(parallel [(set (match_operand:SWI248x 0 "nonimmediate_operand")
23547 (unspec:SWI248x [(match_operand:XF 1 "register_operand")]
23548 FIST_ROUNDING))
23549 (clobber (reg:CC FLAGS_REG))])]
23550 "TARGET_USE_FANCY_MATH_387
23551 && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
23552 && flag_unsafe_math_optimizations")
23553
23554 (define_expand "l<rounding_insn>hf<mode>2"
23555 [(set (match_operand:SWI48 0 "nonimmediate_operand")
23556 (unspec:SWI48 [(match_operand:HF 1 "register_operand")]
23557 FIST_ROUNDING))]
23558 "TARGET_AVX512FP16"
23559 {
23560 rtx tmp = gen_reg_rtx (HFmode);
23561 emit_insn (gen_sse4_1_roundhf2 (tmp, operands[1],
23562 GEN_INT (ROUND_<ROUNDING> | ROUND_NO_EXC)));
23563 emit_insn (gen_fix_trunchf<mode>2 (operands[0], tmp));
23564 DONE;
23565 })
23566
23567 (define_expand "l<rounding_insn><MODEF:mode><SWI48:mode>2"
23568 [(parallel [(set (match_operand:SWI48 0 "nonimmediate_operand")
23569 (unspec:SWI48 [(match_operand:MODEF 1 "register_operand")]
23570 FIST_ROUNDING))
23571 (clobber (reg:CC FLAGS_REG))])]
23572 "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
23573 && (TARGET_SSE4_1 || !flag_trapping_math)"
23574 {
23575 if (TARGET_SSE4_1)
23576 {
23577 rtx tmp = gen_reg_rtx (<MODEF:MODE>mode);
23578
23579 emit_insn (gen_sse4_1_round<MODEF:mode>2
23580 (tmp, operands[1], GEN_INT (ROUND_<ROUNDING>
23581 | ROUND_NO_EXC)));
23582 emit_insn (gen_fix_trunc<MODEF:mode><SWI48:mode>2
23583 (operands[0], tmp));
23584 }
23585 else if (ROUND_<ROUNDING> == ROUND_FLOOR)
23586 ix86_expand_lfloorceil (operands[0], operands[1], true);
23587 else if (ROUND_<ROUNDING> == ROUND_CEIL)
23588 ix86_expand_lfloorceil (operands[0], operands[1], false);
23589 else
23590 gcc_unreachable ();
23591
23592 DONE;
23593 })
23594
23595 (define_insn "fxam<mode>2_i387"
23596 [(set (match_operand:HI 0 "register_operand" "=a")
23597 (unspec:HI
23598 [(match_operand:X87MODEF 1 "register_operand" "f")]
23599 UNSPEC_FXAM))]
23600 "TARGET_USE_FANCY_MATH_387"
23601 "fxam\n\tfnstsw\t%0"
23602 [(set_attr "type" "multi")
23603 (set_attr "length" "4")
23604 (set_attr "unit" "i387")
23605 (set_attr "mode" "<MODE>")])
23606
23607 (define_expand "signbittf2"
23608 [(use (match_operand:SI 0 "register_operand"))
23609 (use (match_operand:TF 1 "register_operand"))]
23610 "TARGET_SSE"
23611 {
23612 if (TARGET_SSE4_1)
23613 {
23614 rtx mask = ix86_build_signbit_mask (TFmode, 0, 0);
23615 rtx scratch = gen_reg_rtx (QImode);
23616
23617 emit_insn (gen_ptesttf2 (operands[1], mask));
23618 ix86_expand_setcc (scratch, NE,
23619 gen_rtx_REG (CCZmode, FLAGS_REG), const0_rtx);
23620
23621 emit_insn (gen_zero_extendqisi2 (operands[0], scratch));
23622 }
23623 else
23624 {
23625 emit_insn (gen_sse_movmskps (operands[0],
23626 gen_lowpart (V4SFmode, operands[1])));
23627 emit_insn (gen_andsi3 (operands[0], operands[0], GEN_INT (0x8)));
23628 }
23629 DONE;
23630 })
23631
23632 (define_expand "signbitxf2"
23633 [(use (match_operand:SI 0 "register_operand"))
23634 (use (match_operand:XF 1 "register_operand"))]
23635 "TARGET_USE_FANCY_MATH_387"
23636 {
23637 rtx scratch = gen_reg_rtx (HImode);
23638
23639 emit_insn (gen_fxamxf2_i387 (scratch, operands[1]));
23640 emit_insn (gen_andsi3 (operands[0],
23641 gen_lowpart (SImode, scratch), GEN_INT (0x200)));
23642 DONE;
23643 })
23644
23645 (define_insn "movmsk_df"
23646 [(set (match_operand:SI 0 "register_operand" "=r,jr")
23647 (unspec:SI
23648 [(match_operand:DF 1 "register_operand" "x,x")]
23649 UNSPEC_MOVMSK))]
23650 "SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH"
23651 "%vmovmskpd\t{%1, %0|%0, %1}"
23652 [(set_attr "isa" "noavx,avx")
23653 (set_attr "type" "ssemov")
23654 (set_attr "prefix" "maybe_evex")
23655 (set_attr "mode" "DF")])
23656
23657 ;; Use movmskpd in SSE mode to avoid store forwarding stall
23658 ;; for 32bit targets and movq+shrq sequence for 64bit targets.
23659 (define_expand "signbitdf2"
23660 [(use (match_operand:SI 0 "register_operand"))
23661 (use (match_operand:DF 1 "register_operand"))]
23662 "TARGET_USE_FANCY_MATH_387
23663 || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)"
23664 {
23665 if (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)
23666 {
23667 emit_insn (gen_movmsk_df (operands[0], operands[1]));
23668 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
23669 }
23670 else
23671 {
23672 rtx scratch = gen_reg_rtx (HImode);
23673
23674 emit_insn (gen_fxamdf2_i387 (scratch, operands[1]));
23675 emit_insn (gen_andsi3 (operands[0],
23676 gen_lowpart (SImode, scratch), GEN_INT (0x200)));
23677 }
23678 DONE;
23679 })
23680
23681 (define_expand "signbitsf2"
23682 [(use (match_operand:SI 0 "register_operand"))
23683 (use (match_operand:SF 1 "register_operand"))]
23684 "TARGET_USE_FANCY_MATH_387
23685 && !(SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH)"
23686 {
23687 rtx scratch = gen_reg_rtx (HImode);
23688
23689 emit_insn (gen_fxamsf2_i387 (scratch, operands[1]));
23690 emit_insn (gen_andsi3 (operands[0],
23691 gen_lowpart (SImode, scratch), GEN_INT (0x200)));
23692 DONE;
23693 })
23694 \f
23695 ;; Block operation instructions
23696
23697 (define_insn "cld"
23698 [(unspec_volatile [(const_int 0)] UNSPECV_CLD)]
23699 ""
23700 "cld"
23701 [(set_attr "length" "1")
23702 (set_attr "length_immediate" "0")
23703 (set_attr "modrm" "0")])
23704
23705 (define_expand "cpymem<mode>"
23706 [(use (match_operand:BLK 0 "memory_operand"))
23707 (use (match_operand:BLK 1 "memory_operand"))
23708 (use (match_operand:SWI48 2 "nonmemory_operand"))
23709 (use (match_operand:SWI48 3 "const_int_operand"))
23710 (use (match_operand:SI 4 "const_int_operand"))
23711 (use (match_operand:SI 5 "const_int_operand"))
23712 (use (match_operand:SI 6 ""))
23713 (use (match_operand:SI 7 ""))
23714 (use (match_operand:SI 8 ""))]
23715 ""
23716 {
23717 if (ix86_expand_set_or_cpymem (operands[0], operands[1],
23718 operands[2], NULL, operands[3],
23719 operands[4], operands[5],
23720 operands[6], operands[7],
23721 operands[8], false))
23722 DONE;
23723 else
23724 FAIL;
23725 })
23726
23727 ;; Most CPUs don't like single string operations
23728 ;; Handle this case here to simplify previous expander.
23729
23730 (define_expand "strmov"
23731 [(set (match_dup 4) (match_operand 3 "memory_operand"))
23732 (set (match_operand 1 "memory_operand") (match_dup 4))
23733 (parallel [(set (match_operand 0 "register_operand") (match_dup 5))
23734 (clobber (reg:CC FLAGS_REG))])
23735 (parallel [(set (match_operand 2 "register_operand") (match_dup 6))
23736 (clobber (reg:CC FLAGS_REG))])]
23737 ""
23738 {
23739 /* Can't use this for non-default address spaces. */
23740 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (operands[3])))
23741 FAIL;
23742
23743 int piece_size = GET_MODE_SIZE (GET_MODE (operands[1]));
23744
23745 /* If .md ever supports :P for Pmode, these can be directly
23746 in the pattern above. */
23747 operands[5] = plus_constant (Pmode, operands[0], piece_size);
23748 operands[6] = plus_constant (Pmode, operands[2], piece_size);
23749
23750 /* Can't use this if the user has appropriated esi or edi. */
23751 if ((TARGET_SINGLE_STRINGOP || optimize_insn_for_size_p ())
23752 && !(fixed_regs[SI_REG] || fixed_regs[DI_REG]))
23753 {
23754 emit_insn (gen_strmov_singleop (operands[0], operands[1],
23755 operands[2], operands[3],
23756 operands[5], operands[6]));
23757 DONE;
23758 }
23759
23760 operands[4] = gen_reg_rtx (GET_MODE (operands[1]));
23761 })
23762
23763 (define_expand "strmov_singleop"
23764 [(parallel [(set (match_operand 1 "memory_operand")
23765 (match_operand 3 "memory_operand"))
23766 (set (match_operand 0 "register_operand")
23767 (match_operand 4))
23768 (set (match_operand 2 "register_operand")
23769 (match_operand 5))])]
23770 ""
23771 {
23772 if (TARGET_CLD)
23773 ix86_optimize_mode_switching[X86_DIRFLAG] = 1;
23774 })
23775
23776 (define_insn "*strmovdi_rex_1"
23777 [(set (mem:DI (match_operand:P 2 "register_operand" "0"))
23778 (mem:DI (match_operand:P 3 "register_operand" "1")))
23779 (set (match_operand:P 0 "register_operand" "=D")
23780 (plus:P (match_dup 2)
23781 (const_int 8)))
23782 (set (match_operand:P 1 "register_operand" "=S")
23783 (plus:P (match_dup 3)
23784 (const_int 8)))]
23785 "TARGET_64BIT
23786 && !(fixed_regs[SI_REG] || fixed_regs[DI_REG])
23787 && ix86_check_no_addr_space (insn)"
23788 "%^movsq"
23789 [(set_attr "type" "str")
23790 (set_attr "memory" "both")
23791 (set_attr "mode" "DI")])
23792
23793 (define_insn "*strmovsi_1"
23794 [(set (mem:SI (match_operand:P 2 "register_operand" "0"))
23795 (mem:SI (match_operand:P 3 "register_operand" "1")))
23796 (set (match_operand:P 0 "register_operand" "=D")
23797 (plus:P (match_dup 2)
23798 (const_int 4)))
23799 (set (match_operand:P 1 "register_operand" "=S")
23800 (plus:P (match_dup 3)
23801 (const_int 4)))]
23802 "!(fixed_regs[SI_REG] || fixed_regs[DI_REG])
23803 && ix86_check_no_addr_space (insn)"
23804 "%^movs{l|d}"
23805 [(set_attr "type" "str")
23806 (set_attr "memory" "both")
23807 (set_attr "mode" "SI")])
23808
23809 (define_insn "*strmovhi_1"
23810 [(set (mem:HI (match_operand:P 2 "register_operand" "0"))
23811 (mem:HI (match_operand:P 3 "register_operand" "1")))
23812 (set (match_operand:P 0 "register_operand" "=D")
23813 (plus:P (match_dup 2)
23814 (const_int 2)))
23815 (set (match_operand:P 1 "register_operand" "=S")
23816 (plus:P (match_dup 3)
23817 (const_int 2)))]
23818 "!(fixed_regs[SI_REG] || fixed_regs[DI_REG])
23819 && ix86_check_no_addr_space (insn)"
23820 "%^movsw"
23821 [(set_attr "type" "str")
23822 (set_attr "memory" "both")
23823 (set_attr "mode" "HI")])
23824
23825 (define_insn "*strmovqi_1"
23826 [(set (mem:QI (match_operand:P 2 "register_operand" "0"))
23827 (mem:QI (match_operand:P 3 "register_operand" "1")))
23828 (set (match_operand:P 0 "register_operand" "=D")
23829 (plus:P (match_dup 2)
23830 (const_int 1)))
23831 (set (match_operand:P 1 "register_operand" "=S")
23832 (plus:P (match_dup 3)
23833 (const_int 1)))]
23834 "!(fixed_regs[SI_REG] || fixed_regs[DI_REG])
23835 && ix86_check_no_addr_space (insn)"
23836 "%^movsb"
23837 [(set_attr "type" "str")
23838 (set_attr "memory" "both")
23839 (set (attr "prefix_rex")
23840 (if_then_else
23841 (match_test "<P:MODE>mode == DImode")
23842 (const_string "0")
23843 (const_string "*")))
23844 (set_attr "mode" "QI")])
23845
23846 (define_expand "rep_mov"
23847 [(parallel [(set (match_operand 4 "register_operand") (const_int 0))
23848 (set (match_operand 0 "register_operand")
23849 (match_operand 5))
23850 (set (match_operand 2 "register_operand")
23851 (match_operand 6))
23852 (set (match_operand 1 "memory_operand")
23853 (match_operand 3 "memory_operand"))
23854 (use (match_dup 4))])]
23855 ""
23856 {
23857 if (TARGET_CLD)
23858 ix86_optimize_mode_switching[X86_DIRFLAG] = 1;
23859 })
23860
23861 (define_insn "*rep_movdi_rex64"
23862 [(set (match_operand:P 2 "register_operand" "=c") (const_int 0))
23863 (set (match_operand:P 0 "register_operand" "=D")
23864 (plus:P (ashift:P (match_operand:P 5 "register_operand" "2")
23865 (const_int 3))
23866 (match_operand:P 3 "register_operand" "0")))
23867 (set (match_operand:P 1 "register_operand" "=S")
23868 (plus:P (ashift:P (match_dup 5) (const_int 3))
23869 (match_operand:P 4 "register_operand" "1")))
23870 (set (mem:BLK (match_dup 3))
23871 (mem:BLK (match_dup 4)))
23872 (use (match_dup 5))]
23873 "TARGET_64BIT
23874 && !(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
23875 && ix86_check_no_addr_space (insn)"
23876 "%^rep{%;} movsq"
23877 [(set_attr "type" "str")
23878 (set_attr "prefix_rep" "1")
23879 (set_attr "memory" "both")
23880 (set_attr "mode" "DI")])
23881
23882 (define_insn "*rep_movsi"
23883 [(set (match_operand:P 2 "register_operand" "=c") (const_int 0))
23884 (set (match_operand:P 0 "register_operand" "=D")
23885 (plus:P (ashift:P (match_operand:P 5 "register_operand" "2")
23886 (const_int 2))
23887 (match_operand:P 3 "register_operand" "0")))
23888 (set (match_operand:P 1 "register_operand" "=S")
23889 (plus:P (ashift:P (match_dup 5) (const_int 2))
23890 (match_operand:P 4 "register_operand" "1")))
23891 (set (mem:BLK (match_dup 3))
23892 (mem:BLK (match_dup 4)))
23893 (use (match_dup 5))]
23894 "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
23895 && ix86_check_no_addr_space (insn)"
23896 "%^rep{%;} movs{l|d}"
23897 [(set_attr "type" "str")
23898 (set_attr "prefix_rep" "1")
23899 (set_attr "memory" "both")
23900 (set_attr "mode" "SI")])
23901
23902 (define_insn "*rep_movqi"
23903 [(set (match_operand:P 2 "register_operand" "=c") (const_int 0))
23904 (set (match_operand:P 0 "register_operand" "=D")
23905 (plus:P (match_operand:P 3 "register_operand" "0")
23906 (match_operand:P 5 "register_operand" "2")))
23907 (set (match_operand:P 1 "register_operand" "=S")
23908 (plus:P (match_operand:P 4 "register_operand" "1") (match_dup 5)))
23909 (set (mem:BLK (match_dup 3))
23910 (mem:BLK (match_dup 4)))
23911 (use (match_dup 5))]
23912 "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
23913 && ix86_check_no_addr_space (insn)"
23914 "%^rep{%;} movsb"
23915 [(set_attr "type" "str")
23916 (set_attr "prefix_rep" "1")
23917 (set_attr "memory" "both")
23918 (set_attr "mode" "QI")])
23919
23920 (define_expand "setmem<mode>"
23921 [(use (match_operand:BLK 0 "memory_operand"))
23922 (use (match_operand:SWI48 1 "nonmemory_operand"))
23923 (use (match_operand:QI 2 "nonmemory_operand"))
23924 (use (match_operand 3 "const_int_operand"))
23925 (use (match_operand:SI 4 "const_int_operand"))
23926 (use (match_operand:SI 5 "const_int_operand"))
23927 (use (match_operand:SI 6 ""))
23928 (use (match_operand:SI 7 ""))
23929 (use (match_operand:SI 8 ""))]
23930 ""
23931 {
23932 if (ix86_expand_set_or_cpymem (operands[0], NULL,
23933 operands[1], operands[2],
23934 operands[3], operands[4],
23935 operands[5], operands[6],
23936 operands[7], operands[8], true))
23937 DONE;
23938 else
23939 FAIL;
23940 })
23941
23942 ;; Most CPUs don't like single string operations
23943 ;; Handle this case here to simplify previous expander.
23944
23945 (define_expand "strset"
23946 [(set (match_operand 1 "memory_operand")
23947 (match_operand 2 "register_operand"))
23948 (parallel [(set (match_operand 0 "register_operand")
23949 (match_dup 3))
23950 (clobber (reg:CC FLAGS_REG))])]
23951 ""
23952 {
23953 /* Can't use this for non-default address spaces. */
23954 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (operands[1])))
23955 FAIL;
23956
23957 if (GET_MODE (operands[1]) != GET_MODE (operands[2]))
23958 operands[1] = adjust_address_nv (operands[1], GET_MODE (operands[2]), 0);
23959
23960 /* If .md ever supports :P for Pmode, this can be directly
23961 in the pattern above. */
23962 operands[3] = plus_constant (Pmode, operands[0],
23963 GET_MODE_SIZE (GET_MODE (operands[2])));
23964
23965 /* Can't use this if the user has appropriated eax or edi. */
23966 if ((TARGET_SINGLE_STRINGOP || optimize_insn_for_size_p ())
23967 && !(fixed_regs[AX_REG] || fixed_regs[DI_REG]))
23968 {
23969 emit_insn (gen_strset_singleop (operands[0], operands[1], operands[2],
23970 operands[3]));
23971 DONE;
23972 }
23973 })
23974
23975 (define_expand "strset_singleop"
23976 [(parallel [(set (match_operand 1 "memory_operand")
23977 (match_operand 2 "register_operand"))
23978 (set (match_operand 0 "register_operand")
23979 (match_operand 3))
23980 (unspec [(const_int 0)] UNSPEC_STOS)])]
23981 ""
23982 {
23983 if (TARGET_CLD)
23984 ix86_optimize_mode_switching[X86_DIRFLAG] = 1;
23985 })
23986
23987 (define_insn "*strsetdi_rex_1"
23988 [(set (mem:DI (match_operand:P 1 "register_operand" "0"))
23989 (match_operand:DI 2 "register_operand" "a"))
23990 (set (match_operand:P 0 "register_operand" "=D")
23991 (plus:P (match_dup 1)
23992 (const_int 8)))
23993 (unspec [(const_int 0)] UNSPEC_STOS)]
23994 "TARGET_64BIT
23995 && !(fixed_regs[AX_REG] || fixed_regs[DI_REG])
23996 && ix86_check_no_addr_space (insn)"
23997 "%^stosq"
23998 [(set_attr "type" "str")
23999 (set_attr "memory" "store")
24000 (set_attr "mode" "DI")])
24001
24002 (define_insn "*strsetsi_1"
24003 [(set (mem:SI (match_operand:P 1 "register_operand" "0"))
24004 (match_operand:SI 2 "register_operand" "a"))
24005 (set (match_operand:P 0 "register_operand" "=D")
24006 (plus:P (match_dup 1)
24007 (const_int 4)))
24008 (unspec [(const_int 0)] UNSPEC_STOS)]
24009 "!(fixed_regs[AX_REG] || fixed_regs[DI_REG])
24010 && ix86_check_no_addr_space (insn)"
24011 "%^stos{l|d}"
24012 [(set_attr "type" "str")
24013 (set_attr "memory" "store")
24014 (set_attr "mode" "SI")])
24015
24016 (define_insn "*strsethi_1"
24017 [(set (mem:HI (match_operand:P 1 "register_operand" "0"))
24018 (match_operand:HI 2 "register_operand" "a"))
24019 (set (match_operand:P 0 "register_operand" "=D")
24020 (plus:P (match_dup 1)
24021 (const_int 2)))
24022 (unspec [(const_int 0)] UNSPEC_STOS)]
24023 "!(fixed_regs[AX_REG] || fixed_regs[DI_REG])
24024 && ix86_check_no_addr_space (insn)"
24025 "%^stosw"
24026 [(set_attr "type" "str")
24027 (set_attr "memory" "store")
24028 (set_attr "mode" "HI")])
24029
24030 (define_insn "*strsetqi_1"
24031 [(set (mem:QI (match_operand:P 1 "register_operand" "0"))
24032 (match_operand:QI 2 "register_operand" "a"))
24033 (set (match_operand:P 0 "register_operand" "=D")
24034 (plus:P (match_dup 1)
24035 (const_int 1)))
24036 (unspec [(const_int 0)] UNSPEC_STOS)]
24037 "!(fixed_regs[AX_REG] || fixed_regs[DI_REG])
24038 && ix86_check_no_addr_space (insn)"
24039 "%^stosb"
24040 [(set_attr "type" "str")
24041 (set_attr "memory" "store")
24042 (set (attr "prefix_rex")
24043 (if_then_else
24044 (match_test "<P:MODE>mode == DImode")
24045 (const_string "0")
24046 (const_string "*")))
24047 (set_attr "mode" "QI")])
24048
24049 (define_expand "rep_stos"
24050 [(parallel [(set (match_operand 1 "register_operand") (const_int 0))
24051 (set (match_operand 0 "register_operand")
24052 (match_operand 4))
24053 (set (match_operand 2 "memory_operand") (const_int 0))
24054 (use (match_operand 3 "register_operand"))
24055 (use (match_dup 1))])]
24056 ""
24057 {
24058 if (TARGET_CLD)
24059 ix86_optimize_mode_switching[X86_DIRFLAG] = 1;
24060 })
24061
24062 (define_insn "*rep_stosdi_rex64"
24063 [(set (match_operand:P 1 "register_operand" "=c") (const_int 0))
24064 (set (match_operand:P 0 "register_operand" "=D")
24065 (plus:P (ashift:P (match_operand:P 4 "register_operand" "1")
24066 (const_int 3))
24067 (match_operand:P 3 "register_operand" "0")))
24068 (set (mem:BLK (match_dup 3))
24069 (const_int 0))
24070 (use (match_operand:DI 2 "register_operand" "a"))
24071 (use (match_dup 4))]
24072 "TARGET_64BIT
24073 && !(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
24074 && ix86_check_no_addr_space (insn)"
24075 "%^rep{%;} stosq"
24076 [(set_attr "type" "str")
24077 (set_attr "prefix_rep" "1")
24078 (set_attr "memory" "store")
24079 (set_attr "mode" "DI")])
24080
24081 (define_insn "*rep_stossi"
24082 [(set (match_operand:P 1 "register_operand" "=c") (const_int 0))
24083 (set (match_operand:P 0 "register_operand" "=D")
24084 (plus:P (ashift:P (match_operand:P 4 "register_operand" "1")
24085 (const_int 2))
24086 (match_operand:P 3 "register_operand" "0")))
24087 (set (mem:BLK (match_dup 3))
24088 (const_int 0))
24089 (use (match_operand:SI 2 "register_operand" "a"))
24090 (use (match_dup 4))]
24091 "!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
24092 && ix86_check_no_addr_space (insn)"
24093 "%^rep{%;} stos{l|d}"
24094 [(set_attr "type" "str")
24095 (set_attr "prefix_rep" "1")
24096 (set_attr "memory" "store")
24097 (set_attr "mode" "SI")])
24098
24099 (define_insn "*rep_stosqi"
24100 [(set (match_operand:P 1 "register_operand" "=c") (const_int 0))
24101 (set (match_operand:P 0 "register_operand" "=D")
24102 (plus:P (match_operand:P 3 "register_operand" "0")
24103 (match_operand:P 4 "register_operand" "1")))
24104 (set (mem:BLK (match_dup 3))
24105 (const_int 0))
24106 (use (match_operand:QI 2 "register_operand" "a"))
24107 (use (match_dup 4))]
24108 "!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
24109 && ix86_check_no_addr_space (insn)"
24110 "%^rep{%;} stosb"
24111 [(set_attr "type" "str")
24112 (set_attr "prefix_rep" "1")
24113 (set_attr "memory" "store")
24114 (set (attr "prefix_rex")
24115 (if_then_else
24116 (match_test "<P:MODE>mode == DImode")
24117 (const_string "0")
24118 (const_string "*")))
24119 (set_attr "mode" "QI")])
24120
24121 (define_expand "cmpmemsi"
24122 [(set (match_operand:SI 0 "register_operand" "")
24123 (compare:SI (match_operand:BLK 1 "memory_operand" "")
24124 (match_operand:BLK 2 "memory_operand" "") ) )
24125 (use (match_operand 3 "general_operand"))
24126 (use (match_operand 4 "immediate_operand"))]
24127 ""
24128 {
24129 if (ix86_expand_cmpstrn_or_cmpmem (operands[0], operands[1],
24130 operands[2], operands[3],
24131 operands[4], false))
24132 DONE;
24133 else
24134 FAIL;
24135 })
24136
24137 (define_expand "cmpstrnsi"
24138 [(set (match_operand:SI 0 "register_operand")
24139 (compare:SI (match_operand:BLK 1 "general_operand")
24140 (match_operand:BLK 2 "general_operand")))
24141 (use (match_operand 3 "general_operand"))
24142 (use (match_operand 4 "immediate_operand"))]
24143 ""
24144 {
24145 if (ix86_expand_cmpstrn_or_cmpmem (operands[0], operands[1],
24146 operands[2], operands[3],
24147 operands[4], true))
24148 DONE;
24149 else
24150 FAIL;
24151 })
24152
24153 ;; Produce a tri-state integer (-1, 0, 1) from condition codes.
24154
24155 (define_expand "cmpintqi"
24156 [(set (match_dup 1)
24157 (gtu:QI (reg:CC FLAGS_REG) (const_int 0)))
24158 (set (match_dup 2)
24159 (ltu:QI (reg:CC FLAGS_REG) (const_int 0)))
24160 (parallel [(set (match_operand:QI 0 "register_operand")
24161 (minus:QI (match_dup 1)
24162 (match_dup 2)))
24163 (clobber (reg:CC FLAGS_REG))])]
24164 ""
24165 {
24166 operands[1] = gen_reg_rtx (QImode);
24167 operands[2] = gen_reg_rtx (QImode);
24168 })
24169
24170 ;; memcmp recognizers. The `cmpsb' opcode does nothing if the count is
24171 ;; zero. Emit extra code to make sure that a zero-length compare is EQ.
24172
24173 (define_expand "cmpstrnqi_nz_1"
24174 [(parallel [(set (reg:CC FLAGS_REG)
24175 (compare:CC (match_operand 4 "memory_operand")
24176 (match_operand 5 "memory_operand")))
24177 (use (match_operand 2 "register_operand"))
24178 (use (match_operand:SI 3 "immediate_operand"))
24179 (clobber (match_operand 0 "register_operand"))
24180 (clobber (match_operand 1 "register_operand"))
24181 (clobber (match_dup 2))])]
24182 ""
24183 {
24184 if (TARGET_CLD)
24185 ix86_optimize_mode_switching[X86_DIRFLAG] = 1;
24186 })
24187
24188 (define_insn "*cmpstrnqi_nz_1"
24189 [(set (reg:CC FLAGS_REG)
24190 (compare:CC (mem:BLK (match_operand:P 4 "register_operand" "0"))
24191 (mem:BLK (match_operand:P 5 "register_operand" "1"))))
24192 (use (match_operand:P 6 "register_operand" "2"))
24193 (use (match_operand:SI 3 "immediate_operand" "i"))
24194 (clobber (match_operand:P 0 "register_operand" "=S"))
24195 (clobber (match_operand:P 1 "register_operand" "=D"))
24196 (clobber (match_operand:P 2 "register_operand" "=c"))]
24197 "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
24198 && ix86_check_no_addr_space (insn)"
24199 "%^repz{%;} cmpsb"
24200 [(set_attr "type" "str")
24201 (set_attr "mode" "QI")
24202 (set (attr "prefix_rex")
24203 (if_then_else
24204 (match_test "<P:MODE>mode == DImode")
24205 (const_string "0")
24206 (const_string "*")))
24207 (set_attr "prefix_rep" "1")])
24208
24209 ;; The same, but the count is not known to not be zero.
24210
24211 (define_expand "cmpstrnqi_1"
24212 [(parallel [(set (reg:CC FLAGS_REG)
24213 (if_then_else:CC (ne (match_operand 2 "register_operand")
24214 (const_int 0))
24215 (compare:CC (match_operand 4 "memory_operand")
24216 (match_operand 5 "memory_operand"))
24217 (reg:CC FLAGS_REG)))
24218 (use (match_operand:SI 3 "immediate_operand"))
24219 (clobber (match_operand 0 "register_operand"))
24220 (clobber (match_operand 1 "register_operand"))
24221 (clobber (match_dup 2))])]
24222 ""
24223 {
24224 if (TARGET_CLD)
24225 ix86_optimize_mode_switching[X86_DIRFLAG] = 1;
24226 })
24227
24228 (define_insn "*cmpstrnqi_1"
24229 [(set (reg:CC FLAGS_REG)
24230 (if_then_else:CC (ne (match_operand:P 6 "register_operand" "2")
24231 (const_int 0))
24232 (compare:CC (mem:BLK (match_operand:P 4 "register_operand" "0"))
24233 (mem:BLK (match_operand:P 5 "register_operand" "1")))
24234 (reg:CC FLAGS_REG)))
24235 (use (match_operand:SI 3 "immediate_operand" "i"))
24236 (clobber (match_operand:P 0 "register_operand" "=S"))
24237 (clobber (match_operand:P 1 "register_operand" "=D"))
24238 (clobber (match_operand:P 2 "register_operand" "=c"))]
24239 "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
24240 && ix86_check_no_addr_space (insn)"
24241 "%^repz{%;} cmpsb"
24242 [(set_attr "type" "str")
24243 (set_attr "mode" "QI")
24244 (set (attr "prefix_rex")
24245 (if_then_else
24246 (match_test "<P:MODE>mode == DImode")
24247 (const_string "0")
24248 (const_string "*")))
24249 (set_attr "prefix_rep" "1")])
24250
24251 (define_expand "strlen<mode>"
24252 [(set (match_operand:P 0 "register_operand")
24253 (unspec:P [(match_operand:BLK 1 "general_operand")
24254 (match_operand:QI 2 "immediate_operand")
24255 (match_operand 3 "immediate_operand")]
24256 UNSPEC_SCAS))]
24257 ""
24258 {
24259 if (ix86_expand_strlen (operands[0], operands[1], operands[2], operands[3]))
24260 DONE;
24261 else
24262 FAIL;
24263 })
24264
24265 (define_expand "strlenqi_1"
24266 [(parallel [(set (match_operand 0 "register_operand")
24267 (match_operand 2))
24268 (clobber (match_operand 1 "register_operand"))
24269 (clobber (reg:CC FLAGS_REG))])]
24270 ""
24271 {
24272 if (TARGET_CLD)
24273 ix86_optimize_mode_switching[X86_DIRFLAG] = 1;
24274 })
24275
24276 (define_insn "*strlenqi_1"
24277 [(set (match_operand:P 0 "register_operand" "=&c")
24278 (unspec:P [(mem:BLK (match_operand:P 5 "register_operand" "1"))
24279 (match_operand:QI 2 "register_operand" "a")
24280 (match_operand:P 3 "immediate_operand" "i")
24281 (match_operand:P 4 "register_operand" "0")] UNSPEC_SCAS))
24282 (clobber (match_operand:P 1 "register_operand" "=D"))
24283 (clobber (reg:CC FLAGS_REG))]
24284 "!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
24285 && ix86_check_no_addr_space (insn)"
24286 "%^repnz{%;} scasb"
24287 [(set_attr "type" "str")
24288 (set_attr "mode" "QI")
24289 (set (attr "prefix_rex")
24290 (if_then_else
24291 (match_test "<P:MODE>mode == DImode")
24292 (const_string "0")
24293 (const_string "*")))
24294 (set_attr "prefix_rep" "1")])
24295
24296 ;; Peephole optimizations to clean up after cmpstrn*. This should be
24297 ;; handled in combine, but it is not currently up to the task.
24298 ;; When used for their truth value, the cmpstrn* expanders generate
24299 ;; code like this:
24300 ;;
24301 ;; repz cmpsb
24302 ;; seta %al
24303 ;; setb %dl
24304 ;; cmpb %al, %dl
24305 ;; jcc label
24306 ;;
24307 ;; The intermediate three instructions are unnecessary.
24308
24309 ;; This one handles cmpstrn*_nz_1...
24310 (define_peephole2
24311 [(parallel[
24312 (set (reg:CC FLAGS_REG)
24313 (compare:CC (mem:BLK (match_operand 4 "register_operand"))
24314 (mem:BLK (match_operand 5 "register_operand"))))
24315 (use (match_operand 6 "register_operand"))
24316 (use (match_operand:SI 3 "immediate_operand"))
24317 (clobber (match_operand 0 "register_operand"))
24318 (clobber (match_operand 1 "register_operand"))
24319 (clobber (match_operand 2 "register_operand"))])
24320 (set (match_operand:QI 7 "register_operand")
24321 (gtu:QI (reg:CC FLAGS_REG) (const_int 0)))
24322 (set (match_operand:QI 8 "register_operand")
24323 (ltu:QI (reg:CC FLAGS_REG) (const_int 0)))
24324 (set (reg FLAGS_REG)
24325 (compare (match_dup 7) (match_dup 8)))
24326 ]
24327 "peep2_reg_dead_p (4, operands[7]) && peep2_reg_dead_p (4, operands[8])"
24328 [(parallel[
24329 (set (reg:CC FLAGS_REG)
24330 (compare:CC (mem:BLK (match_dup 4))
24331 (mem:BLK (match_dup 5))))
24332 (use (match_dup 6))
24333 (use (match_dup 3))
24334 (clobber (match_dup 0))
24335 (clobber (match_dup 1))
24336 (clobber (match_dup 2))])])
24337
24338 ;; ...and this one handles cmpstrn*_1.
24339 (define_peephole2
24340 [(parallel[
24341 (set (reg:CC FLAGS_REG)
24342 (if_then_else:CC (ne (match_operand 6 "register_operand")
24343 (const_int 0))
24344 (compare:CC (mem:BLK (match_operand 4 "register_operand"))
24345 (mem:BLK (match_operand 5 "register_operand")))
24346 (reg:CC FLAGS_REG)))
24347 (use (match_operand:SI 3 "immediate_operand"))
24348 (clobber (match_operand 0 "register_operand"))
24349 (clobber (match_operand 1 "register_operand"))
24350 (clobber (match_operand 2 "register_operand"))])
24351 (set (match_operand:QI 7 "register_operand")
24352 (gtu:QI (reg:CC FLAGS_REG) (const_int 0)))
24353 (set (match_operand:QI 8 "register_operand")
24354 (ltu:QI (reg:CC FLAGS_REG) (const_int 0)))
24355 (set (reg FLAGS_REG)
24356 (compare (match_dup 7) (match_dup 8)))
24357 ]
24358 "peep2_reg_dead_p (4, operands[7]) && peep2_reg_dead_p (4, operands[8])"
24359 [(parallel[
24360 (set (reg:CC FLAGS_REG)
24361 (if_then_else:CC (ne (match_dup 6)
24362 (const_int 0))
24363 (compare:CC (mem:BLK (match_dup 4))
24364 (mem:BLK (match_dup 5)))
24365 (reg:CC FLAGS_REG)))
24366 (use (match_dup 3))
24367 (clobber (match_dup 0))
24368 (clobber (match_dup 1))
24369 (clobber (match_dup 2))])])
24370 \f
24371 ;; Conditional move instructions.
24372
24373 (define_expand "mov<mode>cc"
24374 [(set (match_operand:SWIM 0 "register_operand")
24375 (if_then_else:SWIM (match_operand 1 "comparison_operator")
24376 (match_operand:SWIM 2 "<general_operand>")
24377 (match_operand:SWIM 3 "<general_operand>")))]
24378 ""
24379 "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;")
24380
24381 ;; Data flow gets confused by our desire for `sbbl reg,reg', and clearing
24382 ;; the register first winds up with `sbbl $0,reg', which is also weird.
24383 ;; So just document what we're doing explicitly.
24384
24385 (define_expand "x86_mov<mode>cc_0_m1"
24386 [(parallel
24387 [(set (match_operand:SWI48 0 "register_operand")
24388 (if_then_else:SWI48
24389 (match_operator:SWI48 2 "ix86_carry_flag_operator"
24390 [(match_operand 1 "flags_reg_operand")
24391 (const_int 0)])
24392 (const_int -1)
24393 (const_int 0)))
24394 (clobber (reg:CC FLAGS_REG))])])
24395
24396 (define_insn "*x86_mov<mode>cc_0_m1"
24397 [(set (match_operand:SWI48 0 "register_operand" "=r")
24398 (if_then_else:SWI48 (match_operator 1 "ix86_carry_flag_operator"
24399 [(reg FLAGS_REG) (const_int 0)])
24400 (const_int -1)
24401 (const_int 0)))
24402 (clobber (reg:CC FLAGS_REG))]
24403 ""
24404 "sbb{<imodesuffix>}\t%0, %0"
24405 [(set_attr "type" "alu1")
24406 (set_attr "use_carry" "1")
24407 (set_attr "pent_pair" "pu")
24408 (set_attr "mode" "<MODE>")
24409 (set_attr "length_immediate" "0")])
24410
24411 (define_insn "*x86_mov<mode>cc_0_m1_se"
24412 [(set (match_operand:SWI48 0 "register_operand" "=r")
24413 (sign_extract:SWI48 (match_operator 1 "ix86_carry_flag_operator"
24414 [(reg FLAGS_REG) (const_int 0)])
24415 (const_int 1)
24416 (const_int 0)))
24417 (clobber (reg:CC FLAGS_REG))]
24418 ""
24419 "sbb{<imodesuffix>}\t%0, %0"
24420 [(set_attr "type" "alu1")
24421 (set_attr "use_carry" "1")
24422 (set_attr "pent_pair" "pu")
24423 (set_attr "mode" "<MODE>")
24424 (set_attr "length_immediate" "0")])
24425
24426 (define_insn "*x86_mov<mode>cc_0_m1_neg"
24427 [(set (match_operand:SWI 0 "register_operand" "=<r>")
24428 (neg:SWI (match_operator 1 "ix86_carry_flag_operator"
24429 [(reg FLAGS_REG) (const_int 0)])))
24430 (clobber (reg:CC FLAGS_REG))]
24431 ""
24432 "sbb{<imodesuffix>}\t%0, %0"
24433 [(set_attr "type" "alu1")
24434 (set_attr "use_carry" "1")
24435 (set_attr "pent_pair" "pu")
24436 (set_attr "mode" "<MODE>")
24437 (set_attr "length_immediate" "0")])
24438
24439 (define_expand "x86_mov<mode>cc_0_m1_neg"
24440 [(parallel
24441 [(set (match_operand:SWI48 0 "register_operand")
24442 (neg:SWI48 (ltu:SWI48 (reg:CCC FLAGS_REG) (const_int 0))))
24443 (clobber (reg:CC FLAGS_REG))])])
24444
24445 (define_split
24446 [(set (match_operand:SWI48 0 "register_operand")
24447 (neg:SWI48
24448 (leu:SWI48
24449 (match_operand 1 "int_nonimmediate_operand")
24450 (match_operand 2 "const_int_operand"))))]
24451 "x86_64_immediate_operand (operands[2], VOIDmode)
24452 && INTVAL (operands[2]) != -1
24453 && INTVAL (operands[2]) != 2147483647"
24454 [(set (reg:CC FLAGS_REG) (compare:CC (match_dup 1) (match_dup 2)))
24455 (set (match_dup 0)
24456 (neg:SWI48 (ltu:SWI48 (reg:CC FLAGS_REG) (const_int 0))))]
24457 "operands[2] = GEN_INT (INTVAL (operands[2]) + 1);")
24458
24459 (define_split
24460 [(set (match_operand:SWI 0 "register_operand")
24461 (neg:SWI
24462 (eq:SWI
24463 (match_operand 1 "int_nonimmediate_operand")
24464 (const_int 0))))]
24465 ""
24466 [(set (reg:CC FLAGS_REG) (compare:CC (match_dup 1) (const_int 1)))
24467 (set (match_dup 0)
24468 (neg:SWI (ltu:SWI (reg:CC FLAGS_REG) (const_int 0))))])
24469
24470 (define_split
24471 [(set (match_operand:SWI 0 "register_operand")
24472 (neg:SWI
24473 (ne:SWI
24474 (match_operand 1 "int_nonimmediate_operand")
24475 (const_int 0))))]
24476 ""
24477 [(set (reg:CCC FLAGS_REG)
24478 (unspec:CCC [(match_dup 1) (const_int 0)] UNSPEC_CC_NE))
24479 (set (match_dup 0)
24480 (neg:SWI (ltu:SWI (reg:CCC FLAGS_REG) (const_int 0))))])
24481
24482 (define_insn "*mov<mode>cc_noc"
24483 [(set (match_operand:SWI248 0 "register_operand" "=r,r,r,r")
24484 (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator"
24485 [(reg FLAGS_REG) (const_int 0)])
24486 (match_operand:SWI248 2 "nonimmediate_operand" "rm,0,rm,r")
24487 (match_operand:SWI248 3 "nonimmediate_operand" "0,rm,r,rm")))]
24488 "TARGET_CMOVE && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
24489 "@
24490 cmov%O2%C1\t{%2, %0|%0, %2}
24491 cmov%O2%c1\t{%3, %0|%0, %3}
24492 cmov%O2%C1\t{%2, %3, %0|%0, %3, %2}
24493 cmov%O2%c1\t{%3, %2, %0|%0, %2, %3}"
24494 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
24495 (set_attr "type" "icmov")
24496 (set_attr "mode" "<MODE>")])
24497
24498 (define_insn "*movsicc_noc_zext"
24499 [(set (match_operand:DI 0 "register_operand" "=r,r,r,r")
24500 (if_then_else:DI (match_operator 1 "ix86_comparison_operator"
24501 [(reg FLAGS_REG) (const_int 0)])
24502 (zero_extend:DI
24503 (match_operand:SI 2 "nonimmediate_operand" "rm,0,rm,r"))
24504 (zero_extend:DI
24505 (match_operand:SI 3 "nonimmediate_operand" "0,rm,r,rm"))))]
24506 "TARGET_64BIT
24507 && TARGET_CMOVE && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
24508 "@
24509 cmov%O2%C1\t{%2, %k0|%k0, %2}
24510 cmov%O2%c1\t{%3, %k0|%k0, %3}
24511 cmov%O2%C1\t{%2, %3, %k0|%k0, %3, %2}
24512 cmov%O2%c1\t{%3, %2, %k0|%k0, %2, %3}"
24513 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
24514 (set_attr "type" "icmov")
24515 (set_attr "mode" "SI")])
24516
24517 (define_insn "*movsicc_noc_zext_1"
24518 [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,r")
24519 (zero_extend:DI
24520 (if_then_else:SI (match_operator 1 "ix86_comparison_operator"
24521 [(reg FLAGS_REG) (const_int 0)])
24522 (match_operand:SI 2 "nonimmediate_operand" "rm,0,rm,r")
24523 (match_operand:SI 3 "nonimmediate_operand" "0,rm,r,rm"))))]
24524 "TARGET_64BIT
24525 && TARGET_CMOVE && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
24526 "@
24527 cmov%O2%C1\t{%2, %k0|%k0, %2}
24528 cmov%O2%c1\t{%3, %k0|%k0, %3}
24529 cmov%O2%C1\t{%2, %3, %k0|%k0, %3, %2}
24530 cmov%O2%c1\t{%3, %2, %k0|%k0, %2, %3}"
24531 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")
24532 (set_attr "type" "icmov")
24533 (set_attr "mode" "SI")])
24534
24535
24536 ;; Don't do conditional moves with memory inputs. This splitter helps
24537 ;; register starved x86_32 by forcing inputs into registers before reload.
24538 (define_split
24539 [(set (match_operand:SWI248 0 "register_operand")
24540 (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator"
24541 [(reg FLAGS_REG) (const_int 0)])
24542 (match_operand:SWI248 2 "nonimmediate_operand")
24543 (match_operand:SWI248 3 "nonimmediate_operand")))]
24544 "!TARGET_64BIT && TARGET_CMOVE
24545 && TARGET_AVOID_MEM_OPND_FOR_CMOVE
24546 && (MEM_P (operands[2]) || MEM_P (operands[3]))
24547 && can_create_pseudo_p ()
24548 && optimize_insn_for_speed_p ()"
24549 [(set (match_dup 0)
24550 (if_then_else:SWI248 (match_dup 1) (match_dup 2) (match_dup 3)))]
24551 {
24552 operands[2] = force_reg (<MODE>mode, operands[2]);
24553 operands[3] = force_reg (<MODE>mode, operands[3]);
24554 })
24555
24556 (define_insn "*movqicc_noc"
24557 [(set (match_operand:QI 0 "register_operand" "=r,r,r")
24558 (if_then_else:QI (match_operator 1 "ix86_comparison_operator"
24559 [(reg FLAGS_REG) (const_int 0)])
24560 (match_operand:QI 2 "register_operand" "r,0,r")
24561 (match_operand:QI 3 "register_operand" "0,r,r")))]
24562 "TARGET_CMOVE && !TARGET_PARTIAL_REG_STALL"
24563 "#"
24564 [(set_attr "isa" "*,*,apx_ndd")
24565 (set_attr "type" "icmov")
24566 (set_attr "mode" "QI")])
24567
24568 (define_split
24569 [(set (match_operand:SWI12 0 "register_operand")
24570 (if_then_else:SWI12 (match_operator 1 "ix86_comparison_operator"
24571 [(reg FLAGS_REG) (const_int 0)])
24572 (match_operand:SWI12 2 "register_operand")
24573 (match_operand:SWI12 3 "register_operand")))]
24574 "TARGET_CMOVE && !TARGET_PARTIAL_REG_STALL
24575 && reload_completed"
24576 [(set (match_dup 0)
24577 (if_then_else:SI (match_dup 1) (match_dup 2) (match_dup 3)))]
24578 {
24579 operands[0] = gen_lowpart (SImode, operands[0]);
24580 operands[2] = gen_lowpart (SImode, operands[2]);
24581 operands[3] = gen_lowpart (SImode, operands[3]);
24582 })
24583
24584 ;; Don't do conditional moves with memory inputs
24585 (define_peephole2
24586 [(match_scratch:SWI248 4 "r")
24587 (set (match_operand:SWI248 0 "register_operand")
24588 (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator"
24589 [(reg FLAGS_REG) (const_int 0)])
24590 (match_operand:SWI248 2 "nonimmediate_operand")
24591 (match_operand:SWI248 3 "nonimmediate_operand")))]
24592 "TARGET_CMOVE && TARGET_AVOID_MEM_OPND_FOR_CMOVE
24593 && (MEM_P (operands[2]) || MEM_P (operands[3]))
24594 && optimize_insn_for_speed_p ()"
24595 [(set (match_dup 4) (match_dup 5))
24596 (set (match_dup 0)
24597 (if_then_else:SWI248 (match_dup 1) (match_dup 2) (match_dup 3)))]
24598 {
24599 if (MEM_P (operands[2]))
24600 {
24601 operands[5] = operands[2];
24602 operands[2] = operands[4];
24603 }
24604 else if (MEM_P (operands[3]))
24605 {
24606 operands[5] = operands[3];
24607 operands[3] = operands[4];
24608 }
24609 else
24610 gcc_unreachable ();
24611 })
24612
24613 (define_peephole2
24614 [(match_scratch:SI 4 "r")
24615 (set (match_operand:DI 0 "register_operand")
24616 (if_then_else:DI (match_operator 1 "ix86_comparison_operator"
24617 [(reg FLAGS_REG) (const_int 0)])
24618 (zero_extend:DI
24619 (match_operand:SI 2 "nonimmediate_operand"))
24620 (zero_extend:DI
24621 (match_operand:SI 3 "nonimmediate_operand"))))]
24622 "TARGET_64BIT
24623 && TARGET_CMOVE && TARGET_AVOID_MEM_OPND_FOR_CMOVE
24624 && (MEM_P (operands[2]) || MEM_P (operands[3]))
24625 && optimize_insn_for_speed_p ()"
24626 [(set (match_dup 4) (match_dup 5))
24627 (set (match_dup 0)
24628 (if_then_else:DI (match_dup 1)
24629 (zero_extend:DI (match_dup 2))
24630 (zero_extend:DI (match_dup 3))))]
24631 {
24632 if (MEM_P (operands[2]))
24633 {
24634 operands[5] = operands[2];
24635 operands[2] = operands[4];
24636 }
24637 else if (MEM_P (operands[3]))
24638 {
24639 operands[5] = operands[3];
24640 operands[3] = operands[4];
24641 }
24642 else
24643 gcc_unreachable ();
24644 })
24645
24646 ;; Eliminate a reg-reg mov by inverting the condition of a cmov (#1).
24647 ;; mov r0,r1; dec r0; mov r2,r3; cmov r0,r2 -> dec r1; mov r0,r3; cmov r0, r1
24648 (define_peephole2
24649 [(set (match_operand:SWI248 0 "general_reg_operand")
24650 (match_operand:SWI248 1 "general_reg_operand"))
24651 (parallel [(set (reg FLAGS_REG) (match_operand 5))
24652 (set (match_dup 0) (match_operand:SWI248 6))])
24653 (set (match_operand:SWI248 2 "general_reg_operand")
24654 (match_operand:SWI248 3 "general_gr_operand"))
24655 (set (match_dup 0)
24656 (if_then_else:SWI248 (match_operator 4 "ix86_comparison_operator"
24657 [(reg FLAGS_REG) (const_int 0)])
24658 (match_dup 0)
24659 (match_dup 2)))]
24660 "TARGET_CMOVE
24661 && REGNO (operands[2]) != REGNO (operands[0])
24662 && REGNO (operands[2]) != REGNO (operands[1])
24663 && peep2_reg_dead_p (1, operands[1])
24664 && peep2_reg_dead_p (4, operands[2])
24665 && !reg_overlap_mentioned_p (operands[0], operands[3])"
24666 [(parallel [(set (match_dup 7) (match_dup 8))
24667 (set (match_dup 1) (match_dup 9))])
24668 (set (match_dup 0) (match_dup 3))
24669 (set (match_dup 0) (if_then_else:SWI248 (match_dup 4)
24670 (match_dup 1)
24671 (match_dup 0)))]
24672 {
24673 operands[7] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (1)), 0, 0));
24674 operands[8]
24675 = ix86_replace_reg_with_reg (operands[5], operands[0], operands[1]);
24676 operands[9]
24677 = ix86_replace_reg_with_reg (operands[6], operands[0], operands[1]);
24678 })
24679
24680 ;; Eliminate a reg-reg mov by inverting the condition of a cmov (#2).
24681 ;; mov r2,r3; mov r0,r1; dec r0; cmov r0,r2 -> dec r1; mov r0,r3; cmov r0, r1
24682 (define_peephole2
24683 [(set (match_operand:SWI248 2 "general_reg_operand")
24684 (match_operand:SWI248 3 "general_gr_operand"))
24685 (set (match_operand:SWI248 0 "general_reg_operand")
24686 (match_operand:SWI248 1 "general_reg_operand"))
24687 (parallel [(set (reg FLAGS_REG) (match_operand 5))
24688 (set (match_dup 0) (match_operand:SWI248 6))])
24689 (set (match_dup 0)
24690 (if_then_else:SWI248 (match_operator 4 "ix86_comparison_operator"
24691 [(reg FLAGS_REG) (const_int 0)])
24692 (match_dup 0)
24693 (match_dup 2)))]
24694 "TARGET_CMOVE
24695 && REGNO (operands[2]) != REGNO (operands[0])
24696 && REGNO (operands[2]) != REGNO (operands[1])
24697 && peep2_reg_dead_p (2, operands[1])
24698 && peep2_reg_dead_p (4, operands[2])
24699 && !reg_overlap_mentioned_p (operands[0], operands[3])
24700 && !reg_mentioned_p (operands[2], operands[6])"
24701 [(parallel [(set (match_dup 7) (match_dup 8))
24702 (set (match_dup 1) (match_dup 9))])
24703 (set (match_dup 0) (match_dup 3))
24704 (set (match_dup 0) (if_then_else:SWI248 (match_dup 4)
24705 (match_dup 1)
24706 (match_dup 0)))]
24707 {
24708 operands[7] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (2)), 0, 0));
24709 operands[8]
24710 = ix86_replace_reg_with_reg (operands[5], operands[0], operands[1]);
24711 operands[9]
24712 = ix86_replace_reg_with_reg (operands[6], operands[0], operands[1]);
24713 })
24714
24715 (define_insn "movhf_mask"
24716 [(set (match_operand:HF 0 "nonimmediate_operand" "=v,m,v")
24717 (unspec:HF
24718 [(match_operand:HF 1 "nonimmediate_operand" "m,v,v")
24719 (match_operand:HF 2 "nonimm_or_0_operand" "0C,0C,0C")
24720 (match_operand:QI 3 "register_operand" "Yk,Yk,Yk")]
24721 UNSPEC_MOVCC_MASK))]
24722 "TARGET_AVX512FP16"
24723 "@
24724 vmovsh\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}
24725 vmovsh\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}
24726 vmovsh\t{%d1, %0%{%3%}%N2|%0%{%3%}%N2, %d1}"
24727 [(set_attr "type" "ssemov")
24728 (set_attr "prefix" "evex")
24729 (set_attr "mode" "HF")])
24730
24731 (define_expand "movhfcc"
24732 [(set (match_operand:HF 0 "register_operand")
24733 (if_then_else:HF
24734 (match_operand 1 "comparison_operator")
24735 (match_operand:HF 2 "register_operand")
24736 (match_operand:HF 3 "register_operand")))]
24737 "TARGET_AVX512FP16"
24738 "if (ix86_expand_fp_movcc (operands)) DONE; else FAIL;")
24739
24740 (define_expand "mov<mode>cc"
24741 [(set (match_operand:X87MODEF 0 "register_operand")
24742 (if_then_else:X87MODEF
24743 (match_operand 1 "comparison_operator")
24744 (match_operand:X87MODEF 2 "register_operand")
24745 (match_operand:X87MODEF 3 "register_operand")))]
24746 "(TARGET_80387 && TARGET_CMOVE)
24747 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
24748 "if (ix86_expand_fp_movcc (operands)) DONE; else FAIL;")
24749
24750 (define_insn "*movxfcc_1"
24751 [(set (match_operand:XF 0 "register_operand" "=f,f")
24752 (if_then_else:XF (match_operator 1 "fcmov_comparison_operator"
24753 [(reg FLAGS_REG) (const_int 0)])
24754 (match_operand:XF 2 "register_operand" "f,0")
24755 (match_operand:XF 3 "register_operand" "0,f")))]
24756 "TARGET_80387 && TARGET_CMOVE"
24757 "@
24758 fcmov%F1\t{%2, %0|%0, %2}
24759 fcmov%f1\t{%3, %0|%0, %3}"
24760 [(set_attr "type" "fcmov")
24761 (set_attr "mode" "XF")])
24762
24763 (define_insn "*movdfcc_1"
24764 [(set (match_operand:DF 0 "register_operand" "=f,f,&r,&r,r ,r")
24765 (if_then_else:DF (match_operator 1 "fcmov_comparison_operator"
24766 [(reg FLAGS_REG) (const_int 0)])
24767 (match_operand:DF 2 "nonimmediate_operand"
24768 "f ,0,rm,0 ,rm,0")
24769 (match_operand:DF 3 "nonimmediate_operand"
24770 "0 ,f,0 ,rm,0, rm")))]
24771 "TARGET_80387 && TARGET_CMOVE
24772 && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
24773 "@
24774 fcmov%F1\t{%2, %0|%0, %2}
24775 fcmov%f1\t{%3, %0|%0, %3}
24776 #
24777 #
24778 cmov%O2%C1\t{%2, %0|%0, %2}
24779 cmov%O2%c1\t{%3, %0|%0, %3}"
24780 [(set_attr "isa" "*,*,nox64,nox64,x64,x64")
24781 (set_attr "type" "fcmov,fcmov,multi,multi,icmov,icmov")
24782 (set_attr "mode" "DF,DF,DI,DI,DI,DI")])
24783
24784 (define_split
24785 [(set (match_operand:DF 0 "general_reg_operand")
24786 (if_then_else:DF (match_operator 1 "fcmov_comparison_operator"
24787 [(reg FLAGS_REG) (const_int 0)])
24788 (match_operand:DF 2 "nonimmediate_operand")
24789 (match_operand:DF 3 "nonimmediate_operand")))]
24790 "!TARGET_64BIT && reload_completed"
24791 [(set (match_dup 2)
24792 (if_then_else:SI (match_dup 1) (match_dup 4) (match_dup 5)))
24793 (set (match_dup 3)
24794 (if_then_else:SI (match_dup 1) (match_dup 6) (match_dup 7)))]
24795 {
24796 split_double_mode (DImode, &operands[2], 2, &operands[4], &operands[6]);
24797 split_double_mode (DImode, &operands[0], 1, &operands[2], &operands[3]);
24798 })
24799
24800 (define_insn "*movsfcc_1_387"
24801 [(set (match_operand:SF 0 "register_operand" "=f,f,r,r")
24802 (if_then_else:SF (match_operator 1 "fcmov_comparison_operator"
24803 [(reg FLAGS_REG) (const_int 0)])
24804 (match_operand:SF 2 "nonimmediate_operand" "f,0,rm,0")
24805 (match_operand:SF 3 "nonimmediate_operand" "0,f,0,rm")))]
24806 "TARGET_80387 && TARGET_CMOVE
24807 && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
24808 "@
24809 fcmov%F1\t{%2, %0|%0, %2}
24810 fcmov%f1\t{%3, %0|%0, %3}
24811 cmov%O2%C1\t{%2, %0|%0, %2}
24812 cmov%O2%c1\t{%3, %0|%0, %3}"
24813 [(set_attr "type" "fcmov,fcmov,icmov,icmov")
24814 (set_attr "mode" "SF,SF,SI,SI")])
24815
24816 ;; Don't do conditional moves with memory inputs. This splitter helps
24817 ;; register starved x86_32 by forcing inputs into registers before reload.
24818 (define_split
24819 [(set (match_operand:MODEF 0 "register_operand")
24820 (if_then_else:MODEF (match_operator 1 "ix86_comparison_operator"
24821 [(reg FLAGS_REG) (const_int 0)])
24822 (match_operand:MODEF 2 "nonimmediate_operand")
24823 (match_operand:MODEF 3 "nonimmediate_operand")))]
24824 "!TARGET_64BIT && TARGET_80387 && TARGET_CMOVE
24825 && TARGET_AVOID_MEM_OPND_FOR_CMOVE
24826 && (MEM_P (operands[2]) || MEM_P (operands[3]))
24827 && can_create_pseudo_p ()
24828 && optimize_insn_for_speed_p ()"
24829 [(set (match_dup 0)
24830 (if_then_else:MODEF (match_dup 1) (match_dup 2) (match_dup 3)))]
24831 {
24832 operands[2] = force_reg (<MODE>mode, operands[2]);
24833 operands[3] = force_reg (<MODE>mode, operands[3]);
24834 })
24835
24836 ;; Don't do conditional moves with memory inputs
24837 (define_peephole2
24838 [(match_scratch:MODEF 4 "r")
24839 (set (match_operand:MODEF 0 "general_reg_operand")
24840 (if_then_else:MODEF (match_operator 1 "fcmov_comparison_operator"
24841 [(reg FLAGS_REG) (const_int 0)])
24842 (match_operand:MODEF 2 "nonimmediate_operand")
24843 (match_operand:MODEF 3 "nonimmediate_operand")))]
24844 "(<MODE>mode != DFmode || TARGET_64BIT)
24845 && TARGET_80387 && TARGET_CMOVE
24846 && TARGET_AVOID_MEM_OPND_FOR_CMOVE
24847 && (MEM_P (operands[2]) || MEM_P (operands[3]))
24848 && optimize_insn_for_speed_p ()"
24849 [(set (match_dup 4) (match_dup 5))
24850 (set (match_dup 0)
24851 (if_then_else:MODEF (match_dup 1) (match_dup 2) (match_dup 3)))]
24852 {
24853 if (MEM_P (operands[2]))
24854 {
24855 operands[5] = operands[2];
24856 operands[2] = operands[4];
24857 }
24858 else if (MEM_P (operands[3]))
24859 {
24860 operands[5] = operands[3];
24861 operands[3] = operands[4];
24862 }
24863 else
24864 gcc_unreachable ();
24865 })
24866
24867 ;; All moves in XOP pcmov instructions are 128 bits and hence we restrict
24868 ;; the scalar versions to have only XMM registers as operands.
24869
24870 ;; XOP conditional move
24871 (define_insn "*xop_pcmov_<mode>"
24872 [(set (match_operand:MODEF 0 "register_operand" "=x")
24873 (if_then_else:MODEF
24874 (match_operand:MODEF 1 "register_operand" "x")
24875 (match_operand:MODEF 2 "register_operand" "x")
24876 (match_operand:MODEF 3 "register_operand" "x")))]
24877 "TARGET_XOP"
24878 "vpcmov\t{%1, %3, %2, %0|%0, %2, %3, %1}"
24879 [(set_attr "type" "sse4arg")
24880 (set_attr "mode" "TI")])
24881
24882 ;; These versions of the min/max patterns are intentionally ignorant of
24883 ;; their behavior wrt -0.0 and NaN (via the commutative operand mark).
24884 ;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator
24885 ;; are undefined in this condition, we're certain this is correct.
24886
24887 (define_insn "<code><mode>3"
24888 [(set (match_operand:MODEF 0 "register_operand" "=x,v")
24889 (smaxmin:MODEF
24890 (match_operand:MODEF 1 "nonimmediate_operand" "%0,v")
24891 (match_operand:MODEF 2 "nonimmediate_operand" "xm,vm")))]
24892 "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
24893 "@
24894 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
24895 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
24896 [(set_attr "isa" "noavx,avx")
24897 (set_attr "prefix" "orig,vex")
24898 (set_attr "type" "sseadd")
24899 (set_attr "mode" "<MODE>")])
24900
24901 (define_insn "<code>hf3"
24902 [(set (match_operand:HF 0 "register_operand" "=v")
24903 (smaxmin:HF
24904 (match_operand:HF 1 "nonimmediate_operand" "%v")
24905 (match_operand:HF 2 "nonimmediate_operand" "vm")))]
24906 "TARGET_AVX512FP16"
24907 "v<maxmin_float>sh\t{%2, %1, %0|%0, %1, %2}"
24908 [(set_attr "prefix" "evex")
24909 (set_attr "type" "sseadd")
24910 (set_attr "mode" "HF")])
24911
24912 ;; These versions of the min/max patterns implement exactly the operations
24913 ;; min = (op1 < op2 ? op1 : op2)
24914 ;; max = (!(op1 < op2) ? op1 : op2)
24915 ;; Their operands are not commutative, and thus they may be used in the
24916 ;; presence of -0.0 and NaN.
24917
24918 (define_insn "*ieee_s<ieee_maxmin>hf3"
24919 [(set (match_operand:HF 0 "register_operand" "=v")
24920 (unspec:HF
24921 [(match_operand:HF 1 "register_operand" "v")
24922 (match_operand:HF 2 "nonimmediate_operand" "vm")]
24923 IEEE_MAXMIN))]
24924 "TARGET_AVX512FP16"
24925 "v<ieee_maxmin>sh\t{%2, %1, %0|%0, %1, %2}"
24926 [(set_attr "prefix" "evex")
24927 (set_attr "type" "sseadd")
24928 (set_attr "mode" "HF")])
24929
24930 (define_insn "*ieee_s<ieee_maxmin><mode>3"
24931 [(set (match_operand:MODEF 0 "register_operand" "=x,v")
24932 (unspec:MODEF
24933 [(match_operand:MODEF 1 "register_operand" "0,v")
24934 (match_operand:MODEF 2 "nonimmediate_operand" "xm,vm")]
24935 IEEE_MAXMIN))]
24936 "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
24937 "@
24938 <ieee_maxmin><ssemodesuffix>\t{%2, %0|%0, %2}
24939 v<ieee_maxmin><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
24940 [(set_attr "isa" "noavx,avx")
24941 (set_attr "prefix" "orig,maybe_evex")
24942 (set_attr "type" "sseadd")
24943 (set_attr "mode" "<MODE>")])
24944
24945 ;; Operands order in min/max instruction matters for signed zero and NANs.
24946 (define_insn_and_split "*ieee_max<mode>3_1"
24947 [(set (match_operand:MODEF 0 "register_operand")
24948 (unspec:MODEF
24949 [(match_operand:MODEF 1 "register_operand")
24950 (match_operand:MODEF 2 "register_operand")
24951 (lt:MODEF
24952 (match_operand:MODEF 3 "register_operand")
24953 (match_operand:MODEF 4 "register_operand"))]
24954 UNSPEC_BLENDV))]
24955 "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
24956 && (rtx_equal_p (operands[1], operands[3])
24957 && rtx_equal_p (operands[2], operands[4]))
24958 && ix86_pre_reload_split ()"
24959 "#"
24960 "&& 1"
24961 [(set (match_dup 0)
24962 (unspec:MODEF
24963 [(match_dup 2)
24964 (match_dup 1)]
24965 UNSPEC_IEEE_MAX))])
24966
24967 (define_insn_and_split "*ieee_min<mode>3_1"
24968 [(set (match_operand:MODEF 0 "register_operand")
24969 (unspec:MODEF
24970 [(match_operand:MODEF 1 "register_operand")
24971 (match_operand:MODEF 2 "register_operand")
24972 (lt:MODEF
24973 (match_operand:MODEF 3 "register_operand")
24974 (match_operand:MODEF 4 "register_operand"))]
24975 UNSPEC_BLENDV))]
24976 "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
24977 && (rtx_equal_p (operands[1], operands[4])
24978 && rtx_equal_p (operands[2], operands[3]))
24979 && ix86_pre_reload_split ()"
24980 "#"
24981 "&& 1"
24982 [(set (match_dup 0)
24983 (unspec:MODEF
24984 [(match_dup 2)
24985 (match_dup 1)]
24986 UNSPEC_IEEE_MIN))])
24987
24988 ;; Make two stack loads independent:
24989 ;; fld aa fld aa
24990 ;; fld %st(0) -> fld bb
24991 ;; fmul bb fmul %st(1), %st
24992 ;;
24993 ;; Actually we only match the last two instructions for simplicity.
24994
24995 (define_peephole2
24996 [(set (match_operand 0 "fp_register_operand")
24997 (match_operand 1 "fp_register_operand"))
24998 (set (match_dup 0)
24999 (match_operator 2 "binary_fp_operator"
25000 [(match_dup 0)
25001 (match_operand 3 "memory_operand")]))]
25002 "REGNO (operands[0]) != REGNO (operands[1])"
25003 [(set (match_dup 0) (match_dup 3))
25004 (set (match_dup 0)
25005 (match_op_dup 2
25006 [(match_dup 5) (match_dup 4)]))]
25007 {
25008 operands[4] = operands[0];
25009 operands[5] = operands[1];
25010
25011 /* The % modifier is not operational anymore in peephole2's, so we have to
25012 swap the operands manually in the case of addition and multiplication. */
25013 if (COMMUTATIVE_ARITH_P (operands[2]))
25014 std::swap (operands[4], operands[5]);
25015 })
25016
25017 (define_peephole2
25018 [(set (match_operand 0 "fp_register_operand")
25019 (match_operand 1 "fp_register_operand"))
25020 (set (match_dup 0)
25021 (match_operator 2 "binary_fp_operator"
25022 [(match_operand 3 "memory_operand")
25023 (match_dup 0)]))]
25024 "REGNO (operands[0]) != REGNO (operands[1])"
25025 [(set (match_dup 0) (match_dup 3))
25026 (set (match_dup 0)
25027 (match_op_dup 2
25028 [(match_dup 4) (match_dup 5)]))]
25029 {
25030 operands[4] = operands[0];
25031 operands[5] = operands[1];
25032
25033 /* The % modifier is not operational anymore in peephole2's, so we have to
25034 swap the operands manually in the case of addition and multiplication. */
25035 if (COMMUTATIVE_ARITH_P (operands[2]))
25036 std::swap (operands[4], operands[5]);
25037 })
25038
25039 ;; Conditional addition patterns
25040 (define_expand "add<mode>cc"
25041 [(match_operand:SWI 0 "register_operand")
25042 (match_operand 1 "ordered_comparison_operator")
25043 (match_operand:SWI 2 "register_operand")
25044 (match_operand:SWI 3 "const_int_operand")]
25045 ""
25046 "if (ix86_expand_int_addcc (operands)) DONE; else FAIL;")
25047
25048 ;; min/max patterns
25049
25050 (define_code_attr maxmin_rel
25051 [(smax "GE") (smin "LE") (umax "GEU") (umin "LEU")])
25052
25053 (define_expand "<code><mode>3"
25054 [(parallel
25055 [(set (match_operand:SDWIM 0 "register_operand")
25056 (maxmin:SDWIM
25057 (match_operand:SDWIM 1 "register_operand")
25058 (match_operand:SDWIM 2 "general_operand")))
25059 (clobber (reg:CC FLAGS_REG))])]
25060 "TARGET_CMOVE
25061 && (<MODE>mode != QImode || !TARGET_PARTIAL_REG_STALL)")
25062
25063 (define_insn_and_split "*<code><dwi>3_doubleword"
25064 [(set (match_operand:<DWI> 0 "register_operand")
25065 (maxmin:<DWI>
25066 (match_operand:<DWI> 1 "register_operand")
25067 (match_operand:<DWI> 2 "general_operand")))
25068 (clobber (reg:CC FLAGS_REG))]
25069 "TARGET_CMOVE
25070 && ix86_pre_reload_split ()"
25071 "#"
25072 "&& 1"
25073 [(set (match_dup 0)
25074 (if_then_else:DWIH (match_dup 6)
25075 (match_dup 1)
25076 (match_dup 2)))
25077 (set (match_dup 3)
25078 (if_then_else:DWIH (match_dup 6)
25079 (match_dup 4)
25080 (match_dup 5)))]
25081 {
25082 operands[2] = force_reg (<DWI>mode, operands[2]);
25083
25084 split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);
25085
25086 rtx cmplo[2] = { operands[1], operands[2] };
25087 rtx cmphi[2] = { operands[4], operands[5] };
25088
25089 enum rtx_code code = <maxmin_rel>;
25090
25091 switch (code)
25092 {
25093 case LE: case LEU:
25094 std::swap (cmplo[0], cmplo[1]);
25095 std::swap (cmphi[0], cmphi[1]);
25096 code = swap_condition (code);
25097 /* FALLTHRU */
25098
25099 case GE: case GEU:
25100 {
25101 bool uns = (code == GEU);
25102 rtx (*sbb_insn) (machine_mode, rtx, rtx, rtx)
25103 = uns ? gen_sub3_carry_ccc : gen_sub3_carry_ccgz;
25104
25105 emit_insn (gen_cmp_1 (<MODE>mode, cmplo[0], cmplo[1]));
25106
25107 rtx tmp = gen_rtx_SCRATCH (<MODE>mode);
25108 emit_insn (sbb_insn (<MODE>mode, tmp, cmphi[0], cmphi[1]));
25109
25110 rtx flags = gen_rtx_REG (uns ? CCCmode : CCGZmode, FLAGS_REG);
25111 operands[6] = gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
25112
25113 break;
25114 }
25115
25116 default:
25117 gcc_unreachable ();
25118 }
25119 })
25120
25121 (define_insn_and_split "*<code><mode>3_1"
25122 [(set (match_operand:SWI 0 "register_operand")
25123 (maxmin:SWI
25124 (match_operand:SWI 1 "register_operand")
25125 (match_operand:SWI 2 "general_operand")))
25126 (clobber (reg:CC FLAGS_REG))]
25127 "TARGET_CMOVE
25128 && (<MODE>mode != QImode || !TARGET_PARTIAL_REG_STALL)
25129 && ix86_pre_reload_split ()"
25130 "#"
25131 "&& 1"
25132 [(set (match_dup 0)
25133 (if_then_else:SWI (match_dup 3)
25134 (match_dup 1)
25135 (match_dup 2)))]
25136 {
25137 machine_mode mode = <MODE>mode;
25138 rtx cmp_op = operands[2];
25139
25140 operands[2] = force_reg (mode, cmp_op);
25141
25142 enum rtx_code code = <maxmin_rel>;
25143
25144 if (cmp_op == const1_rtx)
25145 {
25146 /* Convert smax (x, 1) into (x > 0 ? x : 1).
25147 Convert umax (x, 1) into (x != 0 ? x : 1).
25148 Convert ?min (x, 1) into (x <= 0 ? x : 1). */
25149 cmp_op = const0_rtx;
25150 if (code == GE)
25151 code = GT;
25152 else if (code == GEU)
25153 code = NE;
25154 }
25155 /* Convert smin (x, -1) into (x < 0 ? x : -1). */
25156 else if (cmp_op == constm1_rtx && code == LE)
25157 {
25158 cmp_op = const0_rtx;
25159 code = LT;
25160 }
25161 /* Convert smax (x, -1) into (x >= 0 ? x : -1). */
25162 else if (cmp_op == constm1_rtx && code == GE)
25163 cmp_op = const0_rtx;
25164 else if (cmp_op != const0_rtx)
25165 cmp_op = operands[2];
25166
25167 machine_mode cmpmode = SELECT_CC_MODE (code, operands[1], cmp_op);
25168 rtx flags = gen_rtx_REG (cmpmode, FLAGS_REG);
25169
25170 rtx tmp = gen_rtx_COMPARE (cmpmode, operands[1], cmp_op);
25171 emit_insn (gen_rtx_SET (flags, tmp));
25172
25173 operands[3] = gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
25174 })
25175
25176 ;; Avoid clearing a register between a flags setting comparison and its use,
25177 ;; i.e. prefer "xorl %eax,%eax; test/cmp" over "test/cmp; movl $0, %eax".
25178 (define_peephole2
25179 [(set (reg FLAGS_REG) (match_operand 0))
25180 (set (match_operand:SWI 1 "general_reg_operand") (const_int 0))]
25181 "peep2_regno_dead_p (0, FLAGS_REG)
25182 && !reg_overlap_mentioned_p (operands[1], operands[0])"
25183 [(set (match_dup 2) (match_dup 0))]
25184 {
25185 operands[2] = gen_rtx_REG (GET_MODE (operands[0]), FLAGS_REG);
25186 ix86_expand_clear (operands[1]);
25187 })
25188
25189 ;; When optimizing for size, zeroing memory should use a register.
25190 (define_peephole2
25191 [(match_scratch:SWI48 0 "r")
25192 (set (match_operand:SWI48 1 "memory_operand") (const_int 0))
25193 (set (match_operand:SWI48 2 "memory_operand") (const_int 0))
25194 (set (match_operand:SWI48 3 "memory_operand") (const_int 0))
25195 (set (match_operand:SWI48 4 "memory_operand") (const_int 0))]
25196 "optimize_insn_for_size_p () && peep2_regno_dead_p (0, FLAGS_REG)"
25197 [(const_int 0)]
25198 {
25199 ix86_expand_clear (operands[0]);
25200 emit_move_insn (operands[1], operands[0]);
25201 emit_move_insn (operands[2], operands[0]);
25202 emit_move_insn (operands[3], operands[0]);
25203 ix86_last_zero_store_uid
25204 = INSN_UID (emit_move_insn (operands[4], operands[0]));
25205 DONE;
25206 })
25207
25208 (define_peephole2
25209 [(match_scratch:SWI48 0 "r")
25210 (set (match_operand:SWI48 1 "memory_operand") (const_int 0))
25211 (set (match_operand:SWI48 2 "memory_operand") (const_int 0))]
25212 "optimize_insn_for_size_p () && peep2_regno_dead_p (0, FLAGS_REG)"
25213 [(const_int 0)]
25214 {
25215 ix86_expand_clear (operands[0]);
25216 emit_move_insn (operands[1], operands[0]);
25217 ix86_last_zero_store_uid
25218 = INSN_UID (emit_move_insn (operands[2], operands[0]));
25219 DONE;
25220 })
25221
25222 (define_peephole2
25223 [(match_scratch:SWI48 0 "r")
25224 (set (match_operand:SWI48 1 "memory_operand") (const_int 0))]
25225 "optimize_insn_for_size_p () && peep2_regno_dead_p (0, FLAGS_REG)"
25226 [(const_int 0)]
25227 {
25228 ix86_expand_clear (operands[0]);
25229 ix86_last_zero_store_uid
25230 = INSN_UID (emit_move_insn (operands[1], operands[0]));
25231 DONE;
25232 })
25233
25234 (define_peephole2
25235 [(set (match_operand:SWI48 5 "memory_operand")
25236 (match_operand:SWI48 0 "general_reg_operand"))
25237 (set (match_operand:SWI48 1 "memory_operand") (const_int 0))
25238 (set (match_operand:SWI48 2 "memory_operand") (const_int 0))
25239 (set (match_operand:SWI48 3 "memory_operand") (const_int 0))
25240 (set (match_operand:SWI48 4 "memory_operand") (const_int 0))]
25241 "optimize_insn_for_size_p ()
25242 && INSN_UID (peep2_next_insn (0)) == ix86_last_zero_store_uid"
25243 [(const_int 0)]
25244 {
25245 emit_move_insn (operands[5], operands[0]);
25246 emit_move_insn (operands[1], operands[0]);
25247 emit_move_insn (operands[2], operands[0]);
25248 emit_move_insn (operands[3], operands[0]);
25249 ix86_last_zero_store_uid
25250 = INSN_UID (emit_move_insn (operands[4], operands[0]));
25251 DONE;
25252 })
25253
25254 (define_peephole2
25255 [(set (match_operand:SWI48 3 "memory_operand")
25256 (match_operand:SWI48 0 "general_reg_operand"))
25257 (set (match_operand:SWI48 1 "memory_operand") (const_int 0))
25258 (set (match_operand:SWI48 2 "memory_operand") (const_int 0))]
25259 "optimize_insn_for_size_p ()
25260 && INSN_UID (peep2_next_insn (0)) == ix86_last_zero_store_uid"
25261 [(const_int 0)]
25262 {
25263 emit_move_insn (operands[3], operands[0]);
25264 emit_move_insn (operands[1], operands[0]);
25265 ix86_last_zero_store_uid
25266 = INSN_UID (emit_move_insn (operands[2], operands[0]));
25267 DONE;
25268 })
25269
25270 (define_peephole2
25271 [(set (match_operand:SWI48 2 "memory_operand")
25272 (match_operand:SWI48 0 "general_reg_operand"))
25273 (set (match_operand:SWI48 1 "memory_operand") (const_int 0))]
25274 "optimize_insn_for_size_p ()
25275 && INSN_UID (peep2_next_insn (0)) == ix86_last_zero_store_uid"
25276 [(const_int 0)]
25277 {
25278 emit_move_insn (operands[2], operands[0]);
25279 ix86_last_zero_store_uid
25280 = INSN_UID (emit_move_insn (operands[1], operands[0]));
25281 DONE;
25282 })
25283
25284 ;; Reload dislikes loading constants directly into class_likely_spilled
25285 ;; hard registers. Try to tidy things up here.
25286 (define_peephole2
25287 [(set (match_operand:SWI 0 "general_reg_operand")
25288 (match_operand:SWI 1 "x86_64_general_operand"))
25289 (set (match_operand:SWI 2 "general_reg_operand")
25290 (match_dup 0))]
25291 "peep2_reg_dead_p (2, operands[0])"
25292 [(set (match_dup 2) (match_dup 1))])
25293 \f
25294 ;; Misc patterns (?)
25295
25296 ;; This pattern exists to put a dependency on all ebp-based memory accesses.
25297 ;; Otherwise there will be nothing to keep
25298 ;;
25299 ;; [(set (reg ebp) (reg esp))]
25300 ;; [(set (reg esp) (plus (reg esp) (const_int -160000)))
25301 ;; (clobber (eflags)]
25302 ;; [(set (mem (plus (reg ebp) (const_int -160000))) (const_int 0))]
25303 ;;
25304 ;; in proper program order.
25305
25306 (define_insn "@pro_epilogue_adjust_stack_add_<mode>"
25307 [(set (match_operand:P 0 "register_operand" "=r,r")
25308 (plus:P (match_operand:P 1 "register_operand" "0,r")
25309 (match_operand:P 2 "<nonmemory_operand>" "r<i>,l<i>")))
25310 (clobber (reg:CC FLAGS_REG))
25311 (clobber (mem:BLK (scratch)))]
25312 ""
25313 {
25314 switch (get_attr_type (insn))
25315 {
25316 case TYPE_IMOV:
25317 return "mov{<imodesuffix>}\t{%1, %0|%0, %1}";
25318
25319 case TYPE_ALU:
25320 gcc_assert (rtx_equal_p (operands[0], operands[1]));
25321 if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
25322 return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
25323
25324 return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
25325
25326 default:
25327 operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
25328 return "lea{<imodesuffix>}\t{%E2, %0|%0, %E2}";
25329 }
25330 }
25331 [(set (attr "type")
25332 (cond [(and (eq_attr "alternative" "0")
25333 (not (match_test "TARGET_OPT_AGU")))
25334 (const_string "alu")
25335 (match_operand:<MODE> 2 "const0_operand")
25336 (const_string "imov")
25337 ]
25338 (const_string "lea")))
25339 (set (attr "length_immediate")
25340 (cond [(eq_attr "type" "imov")
25341 (const_string "0")
25342 (and (eq_attr "type" "alu")
25343 (match_operand 2 "const128_operand"))
25344 (const_string "1")
25345 ]
25346 (const_string "*")))
25347 (set_attr "mode" "<MODE>")])
25348
25349 (define_insn "@pro_epilogue_adjust_stack_sub_<mode>"
25350 [(set (match_operand:P 0 "register_operand" "=r")
25351 (minus:P (match_operand:P 1 "register_operand" "0")
25352 (match_operand:P 2 "register_operand" "r")))
25353 (clobber (reg:CC FLAGS_REG))
25354 (clobber (mem:BLK (scratch)))]
25355 ""
25356 "sub{<imodesuffix>}\t{%2, %0|%0, %2}"
25357 [(set_attr "type" "alu")
25358 (set_attr "mode" "<MODE>")])
25359
25360 (define_insn "@allocate_stack_worker_probe_<mode>"
25361 [(set (match_operand:P 0 "register_operand" "=a")
25362 (unspec_volatile:P [(match_operand:P 1 "register_operand" "0")]
25363 UNSPECV_STACK_PROBE))
25364 (clobber (reg:CC FLAGS_REG))]
25365 "ix86_target_stack_probe ()"
25366 "call\t___chkstk_ms"
25367 [(set_attr "type" "multi")
25368 (set_attr "length" "5")])
25369
25370 (define_expand "allocate_stack"
25371 [(match_operand 0 "register_operand")
25372 (match_operand 1 "general_operand")]
25373 "ix86_target_stack_probe ()"
25374 {
25375 rtx x;
25376
25377 #ifndef CHECK_STACK_LIMIT
25378 #define CHECK_STACK_LIMIT 0
25379 #endif
25380
25381 if (CHECK_STACK_LIMIT && CONST_INT_P (operands[1])
25382 && INTVAL (operands[1]) < CHECK_STACK_LIMIT)
25383 x = operands[1];
25384 else
25385 {
25386 x = copy_to_mode_reg (Pmode, operands[1]);
25387
25388 emit_insn (gen_allocate_stack_worker_probe (Pmode, x, x));
25389 }
25390
25391 x = expand_simple_binop (Pmode, MINUS, stack_pointer_rtx, x,
25392 stack_pointer_rtx, 0, OPTAB_DIRECT);
25393
25394 if (x != stack_pointer_rtx)
25395 emit_move_insn (stack_pointer_rtx, x);
25396
25397 emit_move_insn (operands[0], virtual_stack_dynamic_rtx);
25398 DONE;
25399 })
25400
25401 (define_expand "probe_stack"
25402 [(match_operand 0 "memory_operand")]
25403 ""
25404 {
25405 emit_insn (gen_probe_stack_1
25406 (word_mode, operands[0], const0_rtx));
25407 DONE;
25408 })
25409
25410 ;; Use OR for stack probes, this is shorter.
25411 (define_insn "@probe_stack_1_<mode>"
25412 [(set (match_operand:W 0 "memory_operand" "=m")
25413 (unspec:W [(match_operand:W 1 "const0_operand")]
25414 UNSPEC_PROBE_STACK))
25415 (clobber (reg:CC FLAGS_REG))]
25416 ""
25417 "or{<imodesuffix>}\t{%1, %0|%0, %1}"
25418 [(set_attr "type" "alu1")
25419 (set_attr "mode" "<MODE>")
25420 (set_attr "length_immediate" "1")])
25421
25422 (define_insn "@adjust_stack_and_probe_<mode>"
25423 [(set (match_operand:P 0 "register_operand" "=r")
25424 (unspec_volatile:P [(match_operand:P 1 "register_operand" "0")]
25425 UNSPECV_PROBE_STACK_RANGE))
25426 (set (reg:P SP_REG)
25427 (minus:P (reg:P SP_REG) (match_operand:P 2 "const_int_operand")))
25428 (clobber (reg:CC FLAGS_REG))
25429 (clobber (mem:BLK (scratch)))]
25430 ""
25431 "* return output_adjust_stack_and_probe (operands[0]);"
25432 [(set_attr "type" "multi")])
25433
25434 (define_insn "@probe_stack_range_<mode>"
25435 [(set (match_operand:P 0 "register_operand" "=r")
25436 (unspec_volatile:P [(match_operand:P 1 "register_operand" "0")
25437 (match_operand:P 2 "const_int_operand")]
25438 UNSPECV_PROBE_STACK_RANGE))
25439 (clobber (reg:CC FLAGS_REG))]
25440 ""
25441 "* return output_probe_stack_range (operands[0], operands[2]);"
25442 [(set_attr "type" "multi")])
25443
25444 (define_expand "builtin_setjmp_receiver"
25445 [(label_ref (match_operand 0))]
25446 "!TARGET_64BIT && flag_pic"
25447 {
25448 #if TARGET_MACHO
25449 if (TARGET_MACHO)
25450 {
25451 rtx xops[3];
25452 rtx_code_label *label_rtx = gen_label_rtx ();
25453 emit_insn (gen_set_got_labelled (pic_offset_table_rtx, label_rtx));
25454 xops[0] = xops[1] = pic_offset_table_rtx;
25455 xops[2] = machopic_gen_offset (gen_rtx_LABEL_REF (SImode, label_rtx));
25456 ix86_expand_binary_operator (MINUS, SImode, xops);
25457 }
25458 else
25459 #endif
25460 emit_insn (gen_set_got (pic_offset_table_rtx));
25461 DONE;
25462 })
25463
25464 (define_expand "save_stack_nonlocal"
25465 [(set (match_operand 0 "memory_operand")
25466 (match_operand 1 "register_operand"))]
25467 ""
25468 {
25469 rtx stack_slot;
25470
25471 if (flag_cf_protection & CF_RETURN)
25472 {
25473 /* Copy shadow stack pointer to the first slot
25474 and stack pointer to the second slot. */
25475 rtx ssp_slot = adjust_address (operands[0], word_mode, 0);
25476 stack_slot = adjust_address (operands[0], Pmode, UNITS_PER_WORD);
25477
25478 rtx reg_ssp = force_reg (word_mode, const0_rtx);
25479 emit_insn (gen_rdssp (word_mode, reg_ssp, reg_ssp));
25480 emit_move_insn (ssp_slot, reg_ssp);
25481 }
25482 else
25483 stack_slot = adjust_address (operands[0], Pmode, 0);
25484 emit_move_insn (stack_slot, operands[1]);
25485 DONE;
25486 })
25487
25488 (define_expand "restore_stack_nonlocal"
25489 [(set (match_operand 0 "register_operand" "")
25490 (match_operand 1 "memory_operand" ""))]
25491 ""
25492 {
25493 rtx stack_slot;
25494
25495 if (flag_cf_protection & CF_RETURN)
25496 {
25497 /* Restore shadow stack pointer from the first slot
25498 and stack pointer from the second slot. */
25499 rtx ssp_slot = adjust_address (operands[1], word_mode, 0);
25500 stack_slot = adjust_address (operands[1], Pmode, UNITS_PER_WORD);
25501
25502 /* Get the current shadow stack pointer. The code below will check if
25503 SHSTK feature is enabled. If it is not enabled the RDSSP instruction
25504 is a NOP. */
25505 rtx reg_ssp = force_reg (word_mode, const0_rtx);
25506 emit_insn (gen_rdssp (word_mode, reg_ssp, reg_ssp));
25507
25508 /* Compare through subtraction the saved and the current ssp
25509 to decide if ssp has to be adjusted. */
25510 reg_ssp = expand_simple_binop (word_mode, MINUS,
25511 reg_ssp, ssp_slot,
25512 reg_ssp, 1, OPTAB_DIRECT);
25513
25514 /* Compare and jump over adjustment code. */
25515 rtx noadj_label = gen_label_rtx ();
25516 emit_cmp_and_jump_insns (reg_ssp, const0_rtx, EQ, NULL_RTX,
25517 word_mode, 1, noadj_label);
25518
25519 /* Compute the number of frames to adjust. */
25520 rtx reg_adj = gen_lowpart (ptr_mode, reg_ssp);
25521 rtx reg_adj_neg = expand_simple_unop (ptr_mode, NEG, reg_adj,
25522 NULL_RTX, 1);
25523
25524 reg_adj = expand_simple_binop (ptr_mode, LSHIFTRT, reg_adj_neg,
25525 GEN_INT (exact_log2 (UNITS_PER_WORD)),
25526 reg_adj, 1, OPTAB_DIRECT);
25527
25528 /* Check if number of frames <= 255 so no loop is needed. */
25529 rtx inc_label = gen_label_rtx ();
25530 emit_cmp_and_jump_insns (reg_adj, GEN_INT (255), LEU, NULL_RTX,
25531 ptr_mode, 1, inc_label);
25532
25533 /* Adjust the ssp in a loop. */
25534 rtx loop_label = gen_label_rtx ();
25535 emit_label (loop_label);
25536 LABEL_NUSES (loop_label) = 1;
25537
25538 rtx reg_255 = force_reg (word_mode, GEN_INT (255));
25539 emit_insn (gen_incssp (word_mode, reg_255));
25540
25541 reg_adj = expand_simple_binop (ptr_mode, MINUS,
25542 reg_adj, GEN_INT (255),
25543 reg_adj, 1, OPTAB_DIRECT);
25544
25545 /* Compare and jump to the loop label. */
25546 emit_cmp_and_jump_insns (reg_adj, GEN_INT (255), GTU, NULL_RTX,
25547 ptr_mode, 1, loop_label);
25548
25549 emit_label (inc_label);
25550 LABEL_NUSES (inc_label) = 1;
25551
25552 emit_insn (gen_incssp (word_mode, reg_ssp));
25553
25554 emit_label (noadj_label);
25555 LABEL_NUSES (noadj_label) = 1;
25556 }
25557 else
25558 stack_slot = adjust_address (operands[1], Pmode, 0);
25559 emit_move_insn (operands[0], stack_slot);
25560 DONE;
25561 })
25562
25563 (define_expand "stack_protect_set"
25564 [(match_operand 0 "memory_operand")
25565 (match_operand 1 "memory_operand")]
25566 ""
25567 {
25568 rtx scratch = gen_reg_rtx (word_mode);
25569
25570 emit_insn (gen_stack_protect_set_1
25571 (ptr_mode, word_mode, operands[0], operands[1], scratch));
25572 DONE;
25573 })
25574
25575 (define_insn "@stack_protect_set_1_<PTR:mode>_<W:mode>"
25576 [(set (match_operand:PTR 0 "memory_operand" "=m")
25577 (unspec:PTR [(match_operand:PTR 1 "memory_operand" "m")]
25578 UNSPEC_SP_SET))
25579 (set (match_operand:W 2 "register_operand" "=&r") (const_int 0))
25580 (clobber (reg:CC FLAGS_REG))]
25581 ""
25582 {
25583 output_asm_insn ("mov{<PTR:imodesuffix>}\t{%1, %<PTR:k>2|%<PTR:k>2, %1}",
25584 operands);
25585 output_asm_insn ("mov{<PTR:imodesuffix>}\t{%<PTR:k>2, %0|%0, %<PTR:k>2}",
25586 operands);
25587 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
25588 return "xor{l}\t%k2, %k2";
25589 else
25590 return "mov{l}\t{$0, %k2|%k2, 0}";
25591 }
25592 [(set_attr "type" "multi")])
25593
25594 ;; Patterns and peephole2s to optimize stack_protect_set_1_<mode>
25595 ;; immediately followed by *mov{s,d}i_internal, where we can avoid
25596 ;; the xor{l} above. We don't split this, so that scheduling or
25597 ;; anything else doesn't separate the *stack_protect_set* pattern from
25598 ;; the set of the register that overwrites the register with a new value.
25599
25600 (define_peephole2
25601 [(parallel [(set (match_operand:PTR 0 "memory_operand")
25602 (unspec:PTR [(match_operand:PTR 1 "memory_operand")]
25603 UNSPEC_SP_SET))
25604 (set (match_operand 2 "general_reg_operand") (const_int 0))
25605 (clobber (reg:CC FLAGS_REG))])
25606 (set (match_operand 3 "general_reg_operand")
25607 (match_operand 4 "const0_operand"))]
25608 "GET_MODE (operands[2]) == word_mode
25609 && GET_MODE_SIZE (GET_MODE (operands[3])) <= UNITS_PER_WORD
25610 && peep2_reg_dead_p (0, operands[3])
25611 && peep2_reg_dead_p (1, operands[2])"
25612 [(parallel [(set (match_dup 0)
25613 (unspec:PTR [(match_dup 1)] UNSPEC_SP_SET))
25614 (set (match_dup 3) (const_int 0))
25615 (clobber (reg:CC FLAGS_REG))])]
25616 "operands[3] = gen_lowpart (word_mode, operands[3]);")
25617
25618 (define_insn "*stack_protect_set_2_<mode>_si"
25619 [(set (match_operand:PTR 0 "memory_operand" "=m")
25620 (unspec:PTR [(match_operand:PTR 3 "memory_operand" "m")]
25621 UNSPEC_SP_SET))
25622 (set (match_operand:SI 1 "register_operand" "=&r")
25623 (match_operand:SI 2 "general_operand" "g"))]
25624 "reload_completed"
25625 {
25626 output_asm_insn ("mov{<imodesuffix>}\t{%3, %<k>1|%<k>1, %3}", operands);
25627 output_asm_insn ("mov{<imodesuffix>}\t{%<k>1, %0|%0, %<k>1}", operands);
25628 if (pic_32bit_operand (operands[2], SImode)
25629 || ix86_use_lea_for_mov (insn, operands + 1))
25630 return "lea{l}\t{%E2, %1|%1, %E2}";
25631 else
25632 return "mov{l}\t{%2, %1|%1, %2}";
25633 }
25634 [(set_attr "type" "multi")
25635 (set_attr "length" "24")])
25636
25637 (define_insn "*stack_protect_set_2_<mode>_di"
25638 [(set (match_operand:PTR 0 "memory_operand" "=m,m,m")
25639 (unspec:PTR [(match_operand:PTR 3 "memory_operand" "m,m,m")]
25640 UNSPEC_SP_SET))
25641 (set (match_operand:DI 1 "register_operand" "=&r,&r,&r")
25642 (match_operand:DI 2 "general_operand" "Z,rem,i"))]
25643 "TARGET_64BIT && reload_completed"
25644 {
25645 output_asm_insn ("mov{<imodesuffix>}\t{%3, %<k>1|%<k>1, %3}", operands);
25646 output_asm_insn ("mov{<imodesuffix>}\t{%<k>1, %0|%0, %<k>1}", operands);
25647 if (pic_32bit_operand (operands[2], DImode))
25648 return "lea{q}\t{%E2, %1|%1, %E2}";
25649 else if (which_alternative == 0)
25650 return "mov{l}\t{%k2, %k1|%k1, %k2}";
25651 else if (which_alternative == 2)
25652 return "movabs{q}\t{%2, %1|%1, %2}";
25653 else if (ix86_use_lea_for_mov (insn, operands + 1))
25654 return "lea{q}\t{%E2, %1|%1, %E2}";
25655 else
25656 return "mov{q}\t{%2, %1|%1, %2}";
25657 }
25658 [(set_attr "type" "multi")
25659 (set_attr "length" "24")])
25660
25661 (define_peephole2
25662 [(parallel [(set (match_operand:PTR 0 "memory_operand")
25663 (unspec:PTR [(match_operand:PTR 1 "memory_operand")]
25664 UNSPEC_SP_SET))
25665 (set (match_operand 2 "general_reg_operand") (const_int 0))
25666 (clobber (reg:CC FLAGS_REG))])
25667 (set (match_operand:SWI48 3 "general_reg_operand")
25668 (match_operand:SWI48 4 "general_gr_operand"))]
25669 "GET_MODE (operands[2]) == word_mode
25670 && peep2_reg_dead_p (0, operands[3])
25671 && peep2_reg_dead_p (1, operands[2])"
25672 [(parallel [(set (match_dup 0)
25673 (unspec:PTR [(match_dup 1)] UNSPEC_SP_SET))
25674 (set (match_dup 3) (match_dup 4))])])
25675
25676 (define_peephole2
25677 [(set (match_operand:SWI48 3 "general_reg_operand")
25678 (match_operand:SWI48 4 "general_gr_operand"))
25679 (parallel [(set (match_operand:PTR 0 "memory_operand")
25680 (unspec:PTR [(match_operand:PTR 1 "memory_operand")]
25681 UNSPEC_SP_SET))
25682 (set (match_operand 2 "general_reg_operand") (const_int 0))
25683 (clobber (reg:CC FLAGS_REG))])]
25684 "GET_MODE (operands[2]) == word_mode
25685 && peep2_reg_dead_p (0, operands[3])
25686 && peep2_reg_dead_p (2, operands[2])
25687 && !reg_mentioned_p (operands[3], operands[0])
25688 && !reg_mentioned_p (operands[3], operands[1])"
25689 [(parallel [(set (match_dup 0)
25690 (unspec:PTR [(match_dup 1)] UNSPEC_SP_SET))
25691 (set (match_dup 3) (match_dup 4))])])
25692
25693 (define_insn "*stack_protect_set_3_<PTR:mode>_<SWI48:mode>"
25694 [(set (match_operand:PTR 0 "memory_operand" "=m")
25695 (unspec:PTR [(match_operand:PTR 3 "memory_operand" "m")]
25696 UNSPEC_SP_SET))
25697 (set (match_operand:SWI48 1 "register_operand" "=&r")
25698 (match_operand:SWI48 2 "address_no_seg_operand" "Ts"))]
25699 ""
25700 {
25701 output_asm_insn ("mov{<PTR:imodesuffix>}\t{%3, %<PTR:k>1|%<PTR:k>1, %3}",
25702 operands);
25703 output_asm_insn ("mov{<PTR:imodesuffix>}\t{%<PTR:k>1, %0|%0, %<PTR:k>1}",
25704 operands);
25705 if (SImode_address_operand (operands[2], VOIDmode))
25706 {
25707 gcc_assert (TARGET_64BIT);
25708 return "lea{l}\t{%E2, %k1|%k1, %E2}";
25709 }
25710 else
25711 return "lea{<SWI48:imodesuffix>}\t{%E2, %1|%1, %E2}";
25712 }
25713 [(set_attr "type" "multi")
25714 (set_attr "length" "24")])
25715
25716 (define_peephole2
25717 [(parallel [(set (match_operand:PTR 0 "memory_operand")
25718 (unspec:PTR [(match_operand:PTR 1 "memory_operand")]
25719 UNSPEC_SP_SET))
25720 (set (match_operand 2 "general_reg_operand") (const_int 0))
25721 (clobber (reg:CC FLAGS_REG))])
25722 (set (match_operand:SWI48 3 "general_reg_operand")
25723 (match_operand:SWI48 4 "address_no_seg_operand"))]
25724 "GET_MODE (operands[2]) == word_mode
25725 && peep2_reg_dead_p (0, operands[3])
25726 && peep2_reg_dead_p (1, operands[2])"
25727 [(parallel [(set (match_dup 0)
25728 (unspec:PTR [(match_dup 1)] UNSPEC_SP_SET))
25729 (set (match_dup 3) (match_dup 4))])])
25730
25731 (define_insn "*stack_protect_set_4z_<mode>_di"
25732 [(set (match_operand:PTR 0 "memory_operand" "=m")
25733 (unspec:PTR [(match_operand:PTR 3 "memory_operand" "m")]
25734 UNSPEC_SP_SET))
25735 (set (match_operand:DI 1 "register_operand" "=&r")
25736 (zero_extend:DI (match_operand:SI 2 "nonimmediate_operand" "rm")))]
25737 "TARGET_64BIT && reload_completed"
25738 {
25739 output_asm_insn ("mov{<imodesuffix>}\t{%3, %<k>1|%<k>1, %3}", operands);
25740 output_asm_insn ("mov{<imodesuffix>}\t{%<k>1, %0|%0, %<k>1}", operands);
25741 if (ix86_use_lea_for_mov (insn, operands + 1))
25742 return "lea{l}\t{%E2, %k1|%k1, %E2}";
25743 else
25744 return "mov{l}\t{%2, %k1|%k1, %2}";
25745 }
25746 [(set_attr "type" "multi")
25747 (set_attr "length" "24")])
25748
25749 (define_insn "*stack_protect_set_4s_<mode>_di"
25750 [(set (match_operand:PTR 0 "memory_operand" "=m")
25751 (unspec:PTR [(match_operand:PTR 3 "memory_operand" "m")]
25752 UNSPEC_SP_SET))
25753 (set (match_operand:DI 1 "register_operand" "=&r")
25754 (sign_extend:DI (match_operand:SI 2 "nonimmediate_operand" "rm")))]
25755 "TARGET_64BIT && reload_completed"
25756 {
25757 output_asm_insn ("mov{<imodesuffix>}\t{%3, %<k>1|%<k>1, %3}", operands);
25758 output_asm_insn ("mov{<imodesuffix>}\t{%<k>1, %0|%0, %<k>1}", operands);
25759 return "movs{lq|x}\t{%2, %1|%1, %2}";
25760 }
25761 [(set_attr "type" "multi")
25762 (set_attr "length" "24")])
25763
25764 (define_peephole2
25765 [(parallel [(set (match_operand:PTR 0 "memory_operand")
25766 (unspec:PTR [(match_operand:PTR 1 "memory_operand")]
25767 UNSPEC_SP_SET))
25768 (set (match_operand 2 "general_reg_operand") (const_int 0))
25769 (clobber (reg:CC FLAGS_REG))])
25770 (set (match_operand:DI 3 "general_reg_operand")
25771 (any_extend:DI
25772 (match_operand:SI 4 "nonimmediate_gr_operand")))]
25773 "TARGET_64BIT
25774 && GET_MODE (operands[2]) == word_mode
25775 && peep2_reg_dead_p (0, operands[3])
25776 && peep2_reg_dead_p (1, operands[2])"
25777 [(parallel [(set (match_dup 0)
25778 (unspec:PTR [(match_dup 1)] UNSPEC_SP_SET))
25779 (set (match_dup 3)
25780 (any_extend:DI (match_dup 4)))])])
25781
25782 (define_expand "stack_protect_test"
25783 [(match_operand 0 "memory_operand")
25784 (match_operand 1 "memory_operand")
25785 (match_operand 2)]
25786 ""
25787 {
25788 rtx flags = gen_rtx_REG (CCZmode, FLAGS_REG);
25789
25790 emit_insn (gen_stack_protect_test_1
25791 (ptr_mode, flags, operands[0], operands[1]));
25792
25793 emit_jump_insn (gen_cbranchcc4 (gen_rtx_EQ (VOIDmode, flags, const0_rtx),
25794 flags, const0_rtx, operands[2]));
25795 DONE;
25796 })
25797
25798 (define_insn "@stack_protect_test_1_<mode>"
25799 [(set (match_operand:CCZ 0 "flags_reg_operand")
25800 (unspec:CCZ [(match_operand:PTR 1 "memory_operand" "m")
25801 (match_operand:PTR 2 "memory_operand" "m")]
25802 UNSPEC_SP_TEST))
25803 (clobber (match_scratch:PTR 3 "=&r"))]
25804 ""
25805 {
25806 output_asm_insn ("mov{<imodesuffix>}\t{%1, %3|%3, %1}", operands);
25807 return "sub{<imodesuffix>}\t{%2, %3|%3, %2}";
25808 }
25809 [(set_attr "type" "multi")])
25810
25811 ;; Avoid redundant prefixes by splitting HImode arithmetic to SImode.
25812 ;; Do not split instructions with mask registers.
25813 (define_split
25814 [(set (match_operand 0 "general_reg_operand")
25815 (match_operator 3 "promotable_binary_operator"
25816 [(match_operand 1 "general_reg_operand")
25817 (match_operand 2 "aligned_operand")]))
25818 (clobber (reg:CC FLAGS_REG))]
25819 "! TARGET_PARTIAL_REG_STALL && reload_completed
25820 && ((GET_MODE (operands[0]) == HImode
25821 && ((optimize_function_for_speed_p (cfun) && !TARGET_FAST_PREFIX)
25822 /* ??? next two lines just !satisfies_constraint_K (...) */
25823 || !CONST_INT_P (operands[2])
25824 || satisfies_constraint_K (operands[2])))
25825 || (GET_MODE (operands[0]) == QImode
25826 && (TARGET_PROMOTE_QImode || optimize_function_for_size_p (cfun))))"
25827 [(parallel [(set (match_dup 0)
25828 (match_op_dup 3 [(match_dup 1) (match_dup 2)]))
25829 (clobber (reg:CC FLAGS_REG))])]
25830 {
25831 operands[0] = gen_lowpart (SImode, operands[0]);
25832 operands[1] = gen_lowpart (SImode, operands[1]);
25833 if (GET_CODE (operands[3]) != ASHIFT)
25834 operands[2] = gen_lowpart (SImode, operands[2]);
25835 operands[3] = shallow_copy_rtx (operands[3]);
25836 PUT_MODE (operands[3], SImode);
25837 })
25838
25839 ; Promote the QImode tests, as i386 has encoding of the AND
25840 ; instruction with 32-bit sign-extended immediate and thus the
25841 ; instruction size is unchanged, except in the %eax case for
25842 ; which it is increased by one byte, hence the ! optimize_size.
25843 (define_split
25844 [(set (match_operand 0 "flags_reg_operand")
25845 (match_operator 2 "compare_operator"
25846 [(and (match_operand 3 "aligned_operand")
25847 (match_operand 4 "const_int_operand"))
25848 (const_int 0)]))
25849 (set (match_operand 1 "register_operand")
25850 (and (match_dup 3) (match_dup 4)))]
25851 "! TARGET_PARTIAL_REG_STALL && reload_completed
25852 && optimize_insn_for_speed_p ()
25853 && ((GET_MODE (operands[1]) == HImode && ! TARGET_FAST_PREFIX)
25854 || (GET_MODE (operands[1]) == QImode && TARGET_PROMOTE_QImode))
25855 /* Ensure that the operand will remain sign-extended immediate. */
25856 && ix86_match_ccmode (insn, INTVAL (operands[4]) >= 0 ? CCNOmode : CCZmode)"
25857 [(parallel [(set (match_dup 0)
25858 (match_op_dup 2 [(and:SI (match_dup 3) (match_dup 4))
25859 (const_int 0)]))
25860 (set (match_dup 1)
25861 (and:SI (match_dup 3) (match_dup 4)))])]
25862 {
25863 operands[4]
25864 = gen_int_mode (INTVAL (operands[4])
25865 & GET_MODE_MASK (GET_MODE (operands[1])), SImode);
25866 operands[1] = gen_lowpart (SImode, operands[1]);
25867 operands[3] = gen_lowpart (SImode, operands[3]);
25868 })
25869
25870 ; Don't promote the QImode tests, as i386 doesn't have encoding of
25871 ; the TEST instruction with 32-bit sign-extended immediate and thus
25872 ; the instruction size would at least double, which is not what we
25873 ; want even with ! optimize_size.
25874 (define_split
25875 [(set (match_operand 0 "flags_reg_operand")
25876 (match_operator 1 "compare_operator"
25877 [(and (match_operand:HI 2 "aligned_operand")
25878 (match_operand:HI 3 "const_int_operand"))
25879 (const_int 0)]))]
25880 "! TARGET_PARTIAL_REG_STALL && reload_completed
25881 && ! TARGET_FAST_PREFIX
25882 && optimize_insn_for_speed_p ()
25883 /* Ensure that the operand will remain sign-extended immediate. */
25884 && ix86_match_ccmode (insn, INTVAL (operands[3]) >= 0 ? CCNOmode : CCZmode)"
25885 [(set (match_dup 0)
25886 (match_op_dup 1 [(and:SI (match_dup 2) (match_dup 3))
25887 (const_int 0)]))]
25888 {
25889 operands[3]
25890 = gen_int_mode (INTVAL (operands[3])
25891 & GET_MODE_MASK (GET_MODE (operands[2])), SImode);
25892 operands[2] = gen_lowpart (SImode, operands[2]);
25893 })
25894
25895 (define_split
25896 [(set (match_operand 0 "register_operand")
25897 (neg (match_operand 1 "register_operand")))
25898 (clobber (reg:CC FLAGS_REG))]
25899 "! TARGET_PARTIAL_REG_STALL && reload_completed
25900 && (GET_MODE (operands[0]) == HImode
25901 || (GET_MODE (operands[0]) == QImode
25902 && (TARGET_PROMOTE_QImode
25903 || optimize_insn_for_size_p ())))"
25904 [(parallel [(set (match_dup 0)
25905 (neg:SI (match_dup 1)))
25906 (clobber (reg:CC FLAGS_REG))])]
25907 {
25908 operands[0] = gen_lowpart (SImode, operands[0]);
25909 operands[1] = gen_lowpart (SImode, operands[1]);
25910 })
25911
25912 ;; Do not split instructions with mask regs.
25913 (define_split
25914 [(set (match_operand 0 "general_reg_operand")
25915 (not (match_operand 1 "general_reg_operand")))]
25916 "! TARGET_PARTIAL_REG_STALL && reload_completed
25917 && (GET_MODE (operands[0]) == HImode
25918 || (GET_MODE (operands[0]) == QImode
25919 && (TARGET_PROMOTE_QImode
25920 || optimize_insn_for_size_p ())))"
25921 [(set (match_dup 0)
25922 (not:SI (match_dup 1)))]
25923 {
25924 operands[0] = gen_lowpart (SImode, operands[0]);
25925 operands[1] = gen_lowpart (SImode, operands[1]);
25926 })
25927 \f
25928 ;; RTL Peephole optimizations, run before sched2. These primarily look to
25929 ;; transform a complex memory operation into two memory to register operations.
25930
25931 ;; Don't push memory operands
25932 (define_peephole2
25933 [(set (match_operand:SWI 0 "push_operand")
25934 (match_operand:SWI 1 "memory_operand"))
25935 (match_scratch:SWI 2 "<r>")]
25936 "!(TARGET_PUSH_MEMORY || optimize_insn_for_size_p ())
25937 && !RTX_FRAME_RELATED_P (peep2_next_insn (0))"
25938 [(set (match_dup 2) (match_dup 1))
25939 (set (match_dup 0) (match_dup 2))])
25940
25941 ;; We need to handle SFmode only, because DFmode and XFmode are split to
25942 ;; SImode pushes.
25943 (define_peephole2
25944 [(set (match_operand:SF 0 "push_operand")
25945 (match_operand:SF 1 "memory_operand"))
25946 (match_scratch:SF 2 "r")]
25947 "!(TARGET_PUSH_MEMORY || optimize_insn_for_size_p ())
25948 && !RTX_FRAME_RELATED_P (peep2_next_insn (0))"
25949 [(set (match_dup 2) (match_dup 1))
25950 (set (match_dup 0) (match_dup 2))])
25951
25952 ;; Don't move an immediate directly to memory when the instruction
25953 ;; gets too big, or if LCP stalls are a problem for 16-bit moves.
25954 (define_peephole2
25955 [(match_scratch:SWI124 1 "<r>")
25956 (set (match_operand:SWI124 0 "memory_operand")
25957 (const_int 0))]
25958 "optimize_insn_for_speed_p ()
25959 && ((<MODE>mode == HImode
25960 && TARGET_LCP_STALL)
25961 || (!TARGET_USE_MOV0
25962 && TARGET_SPLIT_LONG_MOVES
25963 && get_attr_length (insn) >= ix86_cur_cost ()->large_insn))
25964 && peep2_regno_dead_p (0, FLAGS_REG)"
25965 [(parallel [(set (match_dup 2) (const_int 0))
25966 (clobber (reg:CC FLAGS_REG))])
25967 (set (match_dup 0) (match_dup 1))]
25968 "operands[2] = gen_lowpart (SImode, operands[1]);")
25969
25970 (define_peephole2
25971 [(match_scratch:SWI124 2 "<r>")
25972 (set (match_operand:SWI124 0 "memory_operand")
25973 (match_operand:SWI124 1 "immediate_operand"))]
25974 "optimize_insn_for_speed_p ()
25975 && ((<MODE>mode == HImode
25976 && TARGET_LCP_STALL)
25977 || (TARGET_SPLIT_LONG_MOVES
25978 && get_attr_length (insn) >= ix86_cur_cost ()->large_insn))"
25979 [(set (match_dup 2) (match_dup 1))
25980 (set (match_dup 0) (match_dup 2))])
25981
25982 ;; Don't compare memory with zero, load and use a test instead.
25983 (define_peephole2
25984 [(set (match_operand 0 "flags_reg_operand")
25985 (match_operator 1 "compare_operator"
25986 [(match_operand:SI 2 "memory_operand")
25987 (const_int 0)]))
25988 (match_scratch:SI 3 "r")]
25989 "optimize_insn_for_speed_p () && ix86_match_ccmode (insn, CCNOmode)"
25990 [(set (match_dup 3) (match_dup 2))
25991 (set (match_dup 0) (match_op_dup 1 [(match_dup 3) (const_int 0)]))])
25992
25993 ;; NOT is not pairable on Pentium, while XOR is, but one byte longer.
25994 ;; Don't split NOTs with a displacement operand, because resulting XOR
25995 ;; will not be pairable anyway.
25996 ;;
25997 ;; On AMD K6, NOT is vector decoded with memory operand that cannot be
25998 ;; represented using a modRM byte. The XOR replacement is long decoded,
25999 ;; so this split helps here as well.
26000 ;;
26001 ;; Note: Can't do this as a regular split because we can't get proper
26002 ;; lifetime information then.
26003
26004 (define_peephole2
26005 [(set (match_operand:SWI124 0 "nonimmediate_gr_operand")
26006 (not:SWI124 (match_operand:SWI124 1 "nonimmediate_gr_operand")))]
26007 "optimize_insn_for_speed_p ()
26008 && ((TARGET_NOT_UNPAIRABLE
26009 && (!MEM_P (operands[0])
26010 || !memory_displacement_operand (operands[0], <MODE>mode)))
26011 || (TARGET_NOT_VECTORMODE
26012 && long_memory_operand (operands[0], <MODE>mode)))
26013 && peep2_regno_dead_p (0, FLAGS_REG)"
26014 [(parallel [(set (match_dup 0)
26015 (xor:SWI124 (match_dup 1) (const_int -1)))
26016 (clobber (reg:CC FLAGS_REG))])])
26017
26018 ;; Non pairable "test imm, reg" instructions can be translated to
26019 ;; "and imm, reg" if reg dies. The "and" form is also shorter (one
26020 ;; byte opcode instead of two, have a short form for byte operands),
26021 ;; so do it for other CPUs as well. Given that the value was dead,
26022 ;; this should not create any new dependencies. Pass on the sub-word
26023 ;; versions if we're concerned about partial register stalls.
26024
26025 (define_peephole2
26026 [(set (match_operand 0 "flags_reg_operand")
26027 (match_operator 1 "compare_operator"
26028 [(and:SI (match_operand:SI 2 "register_operand")
26029 (match_operand:SI 3 "immediate_operand"))
26030 (const_int 0)]))]
26031 "ix86_match_ccmode (insn, CCNOmode)
26032 && (REGNO (operands[2]) != AX_REG
26033 || satisfies_constraint_K (operands[3]))
26034 && peep2_reg_dead_p (1, operands[2])"
26035 [(parallel
26036 [(set (match_dup 0)
26037 (match_op_dup 1 [(and:SI (match_dup 2) (match_dup 3))
26038 (const_int 0)]))
26039 (set (match_dup 2)
26040 (and:SI (match_dup 2) (match_dup 3)))])])
26041
26042 ;; We don't need to handle HImode case, because it will be promoted to SImode
26043 ;; on ! TARGET_PARTIAL_REG_STALL
26044
26045 (define_peephole2
26046 [(set (match_operand 0 "flags_reg_operand")
26047 (match_operator 1 "compare_operator"
26048 [(and:QI (match_operand:QI 2 "register_operand")
26049 (match_operand:QI 3 "immediate_operand"))
26050 (const_int 0)]))]
26051 "! TARGET_PARTIAL_REG_STALL
26052 && ix86_match_ccmode (insn, CCNOmode)
26053 && REGNO (operands[2]) != AX_REG
26054 && peep2_reg_dead_p (1, operands[2])"
26055 [(parallel
26056 [(set (match_dup 0)
26057 (match_op_dup 1 [(and:QI (match_dup 2) (match_dup 3))
26058 (const_int 0)]))
26059 (set (match_dup 2)
26060 (and:QI (match_dup 2) (match_dup 3)))])])
26061
26062 (define_peephole2
26063 [(set (match_operand 0 "flags_reg_operand")
26064 (match_operator 1 "compare_operator"
26065 [(and:QI
26066 (subreg:QI
26067 (match_operator:SWI248 4 "extract_operator"
26068 [(match_operand 2 "int248_register_operand")
26069 (const_int 8)
26070 (const_int 8)]) 0)
26071 (match_operand 3 "const_int_operand"))
26072 (const_int 0)]))]
26073 "! TARGET_PARTIAL_REG_STALL
26074 && ix86_match_ccmode (insn, CCNOmode)
26075 && REGNO (operands[2]) != AX_REG
26076 && peep2_reg_dead_p (1, operands[2])"
26077 [(parallel
26078 [(set (match_dup 0)
26079 (match_op_dup 1
26080 [(and:QI
26081 (subreg:QI
26082 (match_op_dup 4 [(match_dup 2)
26083 (const_int 8)
26084 (const_int 8)]) 0)
26085 (match_dup 3))
26086 (const_int 0)]))
26087 (set (zero_extract:SWI248 (match_dup 2)
26088 (const_int 8)
26089 (const_int 8))
26090 (subreg:SWI248
26091 (and:QI
26092 (subreg:QI
26093 (match_op_dup 4 [(match_dup 2)
26094 (const_int 8)
26095 (const_int 8)]) 0)
26096 (match_dup 3)) 0))])])
26097
26098 ;; Don't do logical operations with memory inputs.
26099 (define_peephole2
26100 [(match_scratch:SWI 2 "<r>")
26101 (parallel [(set (match_operand:SWI 0 "register_operand")
26102 (match_operator:SWI 3 "arith_or_logical_operator"
26103 [(match_dup 0)
26104 (match_operand:SWI 1 "memory_operand")]))
26105 (clobber (reg:CC FLAGS_REG))])]
26106 "!(TARGET_READ_MODIFY || optimize_insn_for_size_p ())"
26107 [(set (match_dup 2) (match_dup 1))
26108 (parallel [(set (match_dup 0)
26109 (match_op_dup 3 [(match_dup 0) (match_dup 2)]))
26110 (clobber (reg:CC FLAGS_REG))])])
26111
26112 (define_peephole2
26113 [(match_scratch:SWI 2 "<r>")
26114 (parallel [(set (match_operand:SWI 0 "register_operand")
26115 (match_operator:SWI 3 "arith_or_logical_operator"
26116 [(match_operand:SWI 1 "memory_operand")
26117 (match_dup 0)]))
26118 (clobber (reg:CC FLAGS_REG))])]
26119 "!(TARGET_READ_MODIFY || optimize_insn_for_size_p ())"
26120 [(set (match_dup 2) (match_dup 1))
26121 (parallel [(set (match_dup 0)
26122 (match_op_dup 3 [(match_dup 2) (match_dup 0)]))
26123 (clobber (reg:CC FLAGS_REG))])])
26124
26125 ;; Prefer Load+RegOp to Mov+MemOp. Watch out for cases when
26126 ;; the memory address refers to the destination of the load!
26127
26128 (define_peephole2
26129 [(set (match_operand:SWI 0 "general_reg_operand")
26130 (match_operand:SWI 1 "general_reg_operand"))
26131 (parallel [(set (match_dup 0)
26132 (match_operator:SWI 3 "commutative_operator"
26133 [(match_dup 0)
26134 (match_operand:SWI 2 "memory_operand")]))
26135 (clobber (reg:CC FLAGS_REG))])]
26136 "REGNO (operands[0]) != REGNO (operands[1])
26137 && (<MODE>mode != QImode
26138 || any_QIreg_operand (operands[1], QImode))"
26139 [(set (match_dup 0) (match_dup 4))
26140 (parallel [(set (match_dup 0)
26141 (match_op_dup 3 [(match_dup 0) (match_dup 1)]))
26142 (clobber (reg:CC FLAGS_REG))])]
26143 {
26144 operands[4]
26145 = ix86_replace_reg_with_reg (operands[2], operands[0], operands[1]);
26146 })
26147
26148 (define_peephole2
26149 [(set (match_operand 0 "mmx_reg_operand")
26150 (match_operand 1 "mmx_reg_operand"))
26151 (set (match_dup 0)
26152 (match_operator 3 "commutative_operator"
26153 [(match_dup 0)
26154 (match_operand 2 "memory_operand")]))]
26155 "REGNO (operands[0]) != REGNO (operands[1])"
26156 [(set (match_dup 0) (match_dup 2))
26157 (set (match_dup 0)
26158 (match_op_dup 3 [(match_dup 0) (match_dup 1)]))])
26159
26160 (define_peephole2
26161 [(set (match_operand 0 "sse_reg_operand")
26162 (match_operand 1 "sse_reg_operand"))
26163 (set (match_dup 0)
26164 (match_operator 3 "commutative_operator"
26165 [(match_dup 0)
26166 (match_operand 2 "memory_operand")]))]
26167 "REGNO (operands[0]) != REGNO (operands[1])
26168 /* Punt if operands[1] is %[xy]mm16+ and AVX512BW is not enabled,
26169 as EVEX encoded vpadd[bw], vpmullw, vpmin[su][bw] and vpmax[su][bw]
26170 instructions require AVX512BW and AVX512VL, but with the original
26171 instructions it might require just AVX512VL.
26172 AVX512VL is implied from TARGET_HARD_REGNO_MODE_OK. */
26173 && (!EXT_REX_SSE_REGNO_P (REGNO (operands[1]))
26174 || TARGET_AVX512BW
26175 || GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (operands[0]))) > 2
26176 || logic_operator (operands[3], VOIDmode))"
26177 [(set (match_dup 0) (match_dup 2))
26178 (set (match_dup 0)
26179 (match_op_dup 3 [(match_dup 0) (match_dup 1)]))])
26180
26181 ; Don't do logical operations with memory outputs
26182 ;
26183 ; These two don't make sense for PPro/PII -- we're expanding a 4-uop
26184 ; instruction into two 1-uop insns plus a 2-uop insn. That last has
26185 ; the same decoder scheduling characteristics as the original.
26186
26187 (define_peephole2
26188 [(match_scratch:SWI 2 "<r>")
26189 (parallel [(set (match_operand:SWI 0 "memory_operand")
26190 (match_operator:SWI 3 "arith_or_logical_operator"
26191 [(match_dup 0)
26192 (match_operand:SWI 1 "<nonmemory_operand>")]))
26193 (clobber (reg:CC FLAGS_REG))])]
26194 "!(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())"
26195 [(set (match_dup 2) (match_dup 0))
26196 (parallel [(set (match_dup 2)
26197 (match_op_dup 3 [(match_dup 2) (match_dup 1)]))
26198 (clobber (reg:CC FLAGS_REG))])
26199 (set (match_dup 0) (match_dup 2))])
26200
26201 (define_peephole2
26202 [(match_scratch:SWI 2 "<r>")
26203 (parallel [(set (match_operand:SWI 0 "memory_operand")
26204 (match_operator:SWI 3 "arith_or_logical_operator"
26205 [(match_operand:SWI 1 "<nonmemory_operand>")
26206 (match_dup 0)]))
26207 (clobber (reg:CC FLAGS_REG))])]
26208 "!(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())"
26209 [(set (match_dup 2) (match_dup 0))
26210 (parallel [(set (match_dup 2)
26211 (match_op_dup 3 [(match_dup 1) (match_dup 2)]))
26212 (clobber (reg:CC FLAGS_REG))])
26213 (set (match_dup 0) (match_dup 2))])
26214
26215 ;; Attempt to use arith or logical operations with memory outputs with
26216 ;; setting of flags.
26217 (define_peephole2
26218 [(set (match_operand:SWI 0 "register_operand")
26219 (match_operand:SWI 1 "memory_operand"))
26220 (parallel [(set (match_dup 0)
26221 (match_operator:SWI 3 "plusminuslogic_operator"
26222 [(match_dup 0)
26223 (match_operand:SWI 2 "<nonmemory_operand>")]))
26224 (clobber (reg:CC FLAGS_REG))])
26225 (set (match_dup 1) (match_dup 0))
26226 (set (reg FLAGS_REG) (compare (match_dup 0) (const_int 0)))]
26227 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
26228 && peep2_reg_dead_p (4, operands[0])
26229 && !reg_overlap_mentioned_p (operands[0], operands[1])
26230 && !reg_overlap_mentioned_p (operands[0], operands[2])
26231 && (<MODE>mode != QImode
26232 || immediate_operand (operands[2], QImode)
26233 || any_QIreg_operand (operands[2], QImode))
26234 && ix86_match_ccmode (peep2_next_insn (3),
26235 (GET_CODE (operands[3]) == PLUS
26236 || GET_CODE (operands[3]) == MINUS)
26237 ? CCGOCmode : CCNOmode)"
26238 [(parallel [(set (match_dup 4) (match_dup 6))
26239 (set (match_dup 1) (match_dup 5))])]
26240 {
26241 operands[4] = SET_DEST (PATTERN (peep2_next_insn (3)));
26242 operands[5]
26243 = gen_rtx_fmt_ee (GET_CODE (operands[3]), GET_MODE (operands[3]),
26244 copy_rtx (operands[1]),
26245 operands[2]);
26246 operands[6]
26247 = gen_rtx_COMPARE (GET_MODE (operands[4]),
26248 copy_rtx (operands[5]),
26249 const0_rtx);
26250 })
26251
26252 ;; Likewise for cmpelim optimized pattern.
26253 (define_peephole2
26254 [(set (match_operand:SWI 0 "register_operand")
26255 (match_operand:SWI 1 "memory_operand"))
26256 (parallel [(set (reg FLAGS_REG)
26257 (compare (match_operator:SWI 3 "plusminuslogic_operator"
26258 [(match_dup 0)
26259 (match_operand:SWI 2 "<nonmemory_operand>")])
26260 (const_int 0)))
26261 (set (match_dup 0) (match_dup 3))])
26262 (set (match_dup 1) (match_dup 0))]
26263 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
26264 && peep2_reg_dead_p (3, operands[0])
26265 && !reg_overlap_mentioned_p (operands[0], operands[1])
26266 && !reg_overlap_mentioned_p (operands[0], operands[2])
26267 && ix86_match_ccmode (peep2_next_insn (1),
26268 (GET_CODE (operands[3]) == PLUS
26269 || GET_CODE (operands[3]) == MINUS)
26270 ? CCGOCmode : CCNOmode)"
26271 [(parallel [(set (match_dup 4) (match_dup 6))
26272 (set (match_dup 1) (match_dup 5))])]
26273 {
26274 operands[4] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (1)), 0, 0));
26275 operands[5]
26276 = gen_rtx_fmt_ee (GET_CODE (operands[3]), GET_MODE (operands[3]),
26277 copy_rtx (operands[1]), operands[2]);
26278 operands[6]
26279 = gen_rtx_COMPARE (GET_MODE (operands[4]), copy_rtx (operands[5]),
26280 const0_rtx);
26281 })
26282
26283 ;; Likewise for instances where we have a lea pattern.
26284 (define_peephole2
26285 [(set (match_operand:SWI 0 "register_operand")
26286 (match_operand:SWI 1 "memory_operand"))
26287 (set (match_operand:<LEAMODE> 3 "register_operand")
26288 (plus:<LEAMODE> (match_operand:<LEAMODE> 4 "register_operand")
26289 (match_operand:<LEAMODE> 2 "<nonmemory_operand>")))
26290 (set (match_dup 1) (match_operand:SWI 5 "register_operand"))
26291 (set (reg FLAGS_REG) (compare (match_dup 5) (const_int 0)))]
26292 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
26293 && REGNO (operands[4]) == REGNO (operands[0])
26294 && REGNO (operands[5]) == REGNO (operands[3])
26295 && peep2_reg_dead_p (4, operands[3])
26296 && ((REGNO (operands[0]) == REGNO (operands[3]))
26297 || peep2_reg_dead_p (2, operands[0]))
26298 && !reg_overlap_mentioned_p (operands[0], operands[1])
26299 && !reg_overlap_mentioned_p (operands[3], operands[1])
26300 && !reg_overlap_mentioned_p (operands[0], operands[2])
26301 && (<MODE>mode != QImode
26302 || immediate_operand (operands[2], QImode)
26303 || any_QIreg_operand (operands[2], QImode))
26304 && ix86_match_ccmode (peep2_next_insn (3), CCGOCmode)"
26305 [(parallel [(set (match_dup 6) (match_dup 8))
26306 (set (match_dup 1) (match_dup 7))])]
26307 {
26308 operands[6] = SET_DEST (PATTERN (peep2_next_insn (3)));
26309 operands[7]
26310 = gen_rtx_PLUS (<MODE>mode,
26311 copy_rtx (operands[1]),
26312 gen_lowpart (<MODE>mode, operands[2]));
26313 operands[8]
26314 = gen_rtx_COMPARE (GET_MODE (operands[6]),
26315 copy_rtx (operands[7]),
26316 const0_rtx);
26317 })
26318
26319 (define_peephole2
26320 [(parallel [(set (match_operand:SWI 0 "register_operand")
26321 (match_operator:SWI 2 "plusminuslogic_operator"
26322 [(match_dup 0)
26323 (match_operand:SWI 1 "memory_operand")]))
26324 (clobber (reg:CC FLAGS_REG))])
26325 (set (match_dup 1) (match_dup 0))
26326 (set (reg FLAGS_REG) (compare (match_dup 0) (const_int 0)))]
26327 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
26328 && COMMUTATIVE_ARITH_P (operands[2])
26329 && peep2_reg_dead_p (3, operands[0])
26330 && !reg_overlap_mentioned_p (operands[0], operands[1])
26331 && ix86_match_ccmode (peep2_next_insn (2),
26332 GET_CODE (operands[2]) == PLUS
26333 ? CCGOCmode : CCNOmode)"
26334 [(parallel [(set (match_dup 3) (match_dup 5))
26335 (set (match_dup 1) (match_dup 4))])]
26336 {
26337 operands[3] = SET_DEST (PATTERN (peep2_next_insn (2)));
26338 operands[4]
26339 = gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE (operands[2]),
26340 copy_rtx (operands[1]),
26341 operands[0]);
26342 operands[5]
26343 = gen_rtx_COMPARE (GET_MODE (operands[3]),
26344 copy_rtx (operands[4]),
26345 const0_rtx);
26346 })
26347
26348 ;; Likewise for cmpelim optimized pattern.
26349 (define_peephole2
26350 [(parallel [(set (reg FLAGS_REG)
26351 (compare (match_operator:SWI 2 "plusminuslogic_operator"
26352 [(match_operand:SWI 0 "register_operand")
26353 (match_operand:SWI 1 "memory_operand")])
26354 (const_int 0)))
26355 (set (match_dup 0) (match_dup 2))])
26356 (set (match_dup 1) (match_dup 0))]
26357 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
26358 && COMMUTATIVE_ARITH_P (operands[2])
26359 && peep2_reg_dead_p (2, operands[0])
26360 && !reg_overlap_mentioned_p (operands[0], operands[1])
26361 && ix86_match_ccmode (peep2_next_insn (0),
26362 GET_CODE (operands[2]) == PLUS
26363 ? CCGOCmode : CCNOmode)"
26364 [(parallel [(set (match_dup 3) (match_dup 5))
26365 (set (match_dup 1) (match_dup 4))])]
26366 {
26367 operands[3] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (0)), 0, 0));
26368 operands[4]
26369 = gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE (operands[2]),
26370 copy_rtx (operands[1]), operands[0]);
26371 operands[5]
26372 = gen_rtx_COMPARE (GET_MODE (operands[3]), copy_rtx (operands[4]),
26373 const0_rtx);
26374 })
26375
26376 (define_peephole2
26377 [(set (match_operand:SWI12 0 "register_operand")
26378 (match_operand:SWI12 1 "memory_operand"))
26379 (parallel [(set (match_operand:SI 4 "register_operand")
26380 (match_operator:SI 3 "plusminuslogic_operator"
26381 [(match_dup 4)
26382 (match_operand:SI 2 "nonmemory_operand")]))
26383 (clobber (reg:CC FLAGS_REG))])
26384 (set (match_dup 1) (match_dup 0))
26385 (set (reg FLAGS_REG) (compare (match_dup 0) (const_int 0)))]
26386 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
26387 && REGNO (operands[0]) == REGNO (operands[4])
26388 && peep2_reg_dead_p (4, operands[0])
26389 && (<MODE>mode != QImode
26390 || immediate_operand (operands[2], SImode)
26391 || any_QIreg_operand (operands[2], SImode))
26392 && !reg_overlap_mentioned_p (operands[0], operands[1])
26393 && !reg_overlap_mentioned_p (operands[0], operands[2])
26394 && ix86_match_ccmode (peep2_next_insn (3),
26395 (GET_CODE (operands[3]) == PLUS
26396 || GET_CODE (operands[3]) == MINUS)
26397 ? CCGOCmode : CCNOmode)"
26398 [(parallel [(set (match_dup 5) (match_dup 7))
26399 (set (match_dup 1) (match_dup 6))])]
26400 {
26401 operands[5] = SET_DEST (PATTERN (peep2_next_insn (3)));
26402 operands[6]
26403 = gen_rtx_fmt_ee (GET_CODE (operands[3]), <MODE>mode,
26404 copy_rtx (operands[1]),
26405 gen_lowpart (<MODE>mode, operands[2]));
26406 operands[7]
26407 = gen_rtx_COMPARE (GET_MODE (operands[5]),
26408 copy_rtx (operands[6]),
26409 const0_rtx);
26410 })
26411
26412 ;; peephole2 comes before regcprop, so deal also with a case that
26413 ;; would be cleaned up by regcprop.
26414 (define_peephole2
26415 [(set (match_operand:SWI 0 "register_operand")
26416 (match_operand:SWI 1 "memory_operand"))
26417 (parallel [(set (match_dup 0)
26418 (match_operator:SWI 3 "plusminuslogic_operator"
26419 [(match_dup 0)
26420 (match_operand:SWI 2 "<nonmemory_operand>")]))
26421 (clobber (reg:CC FLAGS_REG))])
26422 (set (match_operand:SWI 4 "register_operand") (match_dup 0))
26423 (set (match_dup 1) (match_dup 4))
26424 (set (reg FLAGS_REG) (compare (match_dup 4) (const_int 0)))]
26425 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
26426 && peep2_reg_dead_p (3, operands[0])
26427 && peep2_reg_dead_p (5, operands[4])
26428 && !reg_overlap_mentioned_p (operands[0], operands[1])
26429 && !reg_overlap_mentioned_p (operands[0], operands[2])
26430 && !reg_overlap_mentioned_p (operands[4], operands[1])
26431 && (<MODE>mode != QImode
26432 || immediate_operand (operands[2], QImode)
26433 || any_QIreg_operand (operands[2], QImode))
26434 && ix86_match_ccmode (peep2_next_insn (4),
26435 (GET_CODE (operands[3]) == PLUS
26436 || GET_CODE (operands[3]) == MINUS)
26437 ? CCGOCmode : CCNOmode)"
26438 [(parallel [(set (match_dup 5) (match_dup 7))
26439 (set (match_dup 1) (match_dup 6))])]
26440 {
26441 operands[5] = SET_DEST (PATTERN (peep2_next_insn (4)));
26442 operands[6]
26443 = gen_rtx_fmt_ee (GET_CODE (operands[3]), GET_MODE (operands[3]),
26444 copy_rtx (operands[1]),
26445 operands[2]);
26446 operands[7]
26447 = gen_rtx_COMPARE (GET_MODE (operands[5]),
26448 copy_rtx (operands[6]),
26449 const0_rtx);
26450 })
26451
26452 (define_peephole2
26453 [(set (match_operand:SWI12 0 "register_operand")
26454 (match_operand:SWI12 1 "memory_operand"))
26455 (parallel [(set (match_operand:SI 4 "register_operand")
26456 (match_operator:SI 3 "plusminuslogic_operator"
26457 [(match_dup 4)
26458 (match_operand:SI 2 "nonmemory_operand")]))
26459 (clobber (reg:CC FLAGS_REG))])
26460 (set (match_operand:SWI12 5 "register_operand") (match_dup 0))
26461 (set (match_dup 1) (match_dup 5))
26462 (set (reg FLAGS_REG) (compare (match_dup 5) (const_int 0)))]
26463 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
26464 && REGNO (operands[0]) == REGNO (operands[4])
26465 && peep2_reg_dead_p (3, operands[0])
26466 && peep2_reg_dead_p (5, operands[5])
26467 && (<MODE>mode != QImode
26468 || immediate_operand (operands[2], SImode)
26469 || any_QIreg_operand (operands[2], SImode))
26470 && !reg_overlap_mentioned_p (operands[0], operands[1])
26471 && !reg_overlap_mentioned_p (operands[0], operands[2])
26472 && !reg_overlap_mentioned_p (operands[5], operands[1])
26473 && ix86_match_ccmode (peep2_next_insn (4),
26474 (GET_CODE (operands[3]) == PLUS
26475 || GET_CODE (operands[3]) == MINUS)
26476 ? CCGOCmode : CCNOmode)"
26477 [(parallel [(set (match_dup 6) (match_dup 8))
26478 (set (match_dup 1) (match_dup 7))])]
26479 {
26480 operands[6] = SET_DEST (PATTERN (peep2_next_insn (4)));
26481 operands[7]
26482 = gen_rtx_fmt_ee (GET_CODE (operands[3]), <MODE>mode,
26483 copy_rtx (operands[1]),
26484 gen_lowpart (<MODE>mode, operands[2]));
26485 operands[8]
26486 = gen_rtx_COMPARE (GET_MODE (operands[6]),
26487 copy_rtx (operands[7]),
26488 const0_rtx);
26489 })
26490
26491 ;; Likewise for cmpelim optimized pattern.
26492 (define_peephole2
26493 [(set (match_operand:SWI 0 "register_operand")
26494 (match_operand:SWI 1 "memory_operand"))
26495 (parallel [(set (reg FLAGS_REG)
26496 (compare (match_operator:SWI 3 "plusminuslogic_operator"
26497 [(match_dup 0)
26498 (match_operand:SWI 2 "<nonmemory_operand>")])
26499 (const_int 0)))
26500 (set (match_dup 0) (match_dup 3))])
26501 (set (match_operand:SWI 4 "register_operand") (match_dup 0))
26502 (set (match_dup 1) (match_dup 4))]
26503 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
26504 && peep2_reg_dead_p (3, operands[0])
26505 && peep2_reg_dead_p (4, operands[4])
26506 && !reg_overlap_mentioned_p (operands[0], operands[1])
26507 && !reg_overlap_mentioned_p (operands[0], operands[2])
26508 && !reg_overlap_mentioned_p (operands[4], operands[1])
26509 && ix86_match_ccmode (peep2_next_insn (1),
26510 (GET_CODE (operands[3]) == PLUS
26511 || GET_CODE (operands[3]) == MINUS)
26512 ? CCGOCmode : CCNOmode)"
26513 [(parallel [(set (match_dup 5) (match_dup 7))
26514 (set (match_dup 1) (match_dup 6))])]
26515 {
26516 operands[5] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (1)), 0, 0));
26517 operands[6]
26518 = gen_rtx_fmt_ee (GET_CODE (operands[3]), GET_MODE (operands[3]),
26519 copy_rtx (operands[1]), operands[2]);
26520 operands[7]
26521 = gen_rtx_COMPARE (GET_MODE (operands[5]), copy_rtx (operands[6]),
26522 const0_rtx);
26523 })
26524
26525 ;; Special cases for xor, where (x ^= y) != 0 is (misoptimized)
26526 ;; into x = z; x ^= y; x != z
26527 (define_peephole2
26528 [(set (match_operand:SWI 0 "register_operand")
26529 (match_operand:SWI 1 "memory_operand"))
26530 (set (match_operand:SWI 3 "register_operand") (match_dup 0))
26531 (parallel [(set (match_operand:SWI 4 "register_operand")
26532 (xor:SWI (match_dup 4)
26533 (match_operand:SWI 2 "<nonmemory_operand>")))
26534 (clobber (reg:CC FLAGS_REG))])
26535 (set (match_dup 1) (match_dup 4))
26536 (set (reg:CCZ FLAGS_REG)
26537 (compare:CCZ (match_operand:SWI 5 "register_operand")
26538 (match_operand:SWI 6 "<nonmemory_operand>")))]
26539 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
26540 && (REGNO (operands[4]) == REGNO (operands[0])
26541 || REGNO (operands[4]) == REGNO (operands[3]))
26542 && (rtx_equal_p (operands[REGNO (operands[4]) == REGNO (operands[0])
26543 ? 3 : 0], operands[5])
26544 ? rtx_equal_p (operands[2], operands[6])
26545 : rtx_equal_p (operands[2], operands[5])
26546 && rtx_equal_p (operands[REGNO (operands[4]) == REGNO (operands[0])
26547 ? 3 : 0], operands[6]))
26548 && peep2_reg_dead_p (4, operands[4])
26549 && peep2_reg_dead_p (5, operands[REGNO (operands[4]) == REGNO (operands[0])
26550 ? 3 : 0])
26551 && !reg_overlap_mentioned_p (operands[0], operands[1])
26552 && !reg_overlap_mentioned_p (operands[0], operands[2])
26553 && !reg_overlap_mentioned_p (operands[3], operands[0])
26554 && !reg_overlap_mentioned_p (operands[3], operands[1])
26555 && !reg_overlap_mentioned_p (operands[3], operands[2])
26556 && (<MODE>mode != QImode
26557 || immediate_operand (operands[2], QImode)
26558 || any_QIreg_operand (operands[2], QImode))"
26559 [(parallel [(set (match_dup 7) (match_dup 9))
26560 (set (match_dup 1) (match_dup 8))])]
26561 {
26562 operands[7] = SET_DEST (PATTERN (peep2_next_insn (4)));
26563 operands[8] = gen_rtx_XOR (<MODE>mode, copy_rtx (operands[1]),
26564 operands[2]);
26565 operands[9]
26566 = gen_rtx_COMPARE (GET_MODE (operands[7]),
26567 copy_rtx (operands[8]),
26568 const0_rtx);
26569 })
26570
26571 (define_peephole2
26572 [(set (match_operand:SWI12 0 "register_operand")
26573 (match_operand:SWI12 1 "memory_operand"))
26574 (set (match_operand:SWI12 3 "register_operand") (match_dup 0))
26575 (parallel [(set (match_operand:SI 4 "register_operand")
26576 (xor:SI (match_dup 4)
26577 (match_operand:SI 2 "<nonmemory_operand>")))
26578 (clobber (reg:CC FLAGS_REG))])
26579 (set (match_dup 1) (match_operand:SWI12 5 "register_operand"))
26580 (set (reg:CCZ FLAGS_REG)
26581 (compare:CCZ (match_operand:SWI12 6 "register_operand")
26582 (match_operand:SWI12 7 "<nonmemory_operand>")))]
26583 "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
26584 && (REGNO (operands[5]) == REGNO (operands[0])
26585 || REGNO (operands[5]) == REGNO (operands[3]))
26586 && REGNO (operands[5]) == REGNO (operands[4])
26587 && (rtx_equal_p (operands[REGNO (operands[5]) == REGNO (operands[0])
26588 ? 3 : 0], operands[6])
26589 ? (REG_P (operands[2])
26590 ? REG_P (operands[7]) && REGNO (operands[2]) == REGNO (operands[7])
26591 : rtx_equal_p (operands[2], operands[7]))
26592 : (rtx_equal_p (operands[REGNO (operands[5]) == REGNO (operands[0])
26593 ? 3 : 0], operands[7])
26594 && REG_P (operands[2])
26595 && REGNO (operands[2]) == REGNO (operands[6])))
26596 && peep2_reg_dead_p (4, operands[5])
26597 && peep2_reg_dead_p (5, operands[REGNO (operands[5]) == REGNO (operands[0])
26598 ? 3 : 0])
26599 && !reg_overlap_mentioned_p (operands[0], operands[1])
26600 && !reg_overlap_mentioned_p (operands[0], operands[2])
26601 && !reg_overlap_mentioned_p (operands[3], operands[0])
26602 && !reg_overlap_mentioned_p (operands[3], operands[1])
26603 && !reg_overlap_mentioned_p (operands[3], operands[2])
26604 && (<MODE>mode != QImode
26605 || immediate_operand (operands[2], SImode)
26606 || any_QIreg_operand (operands[2], SImode))"
26607 [(parallel [(set (match_dup 8) (match_dup 10))
26608 (set (match_dup 1) (match_dup 9))])]
26609 {
26610 operands[8] = SET_DEST (PATTERN (peep2_next_insn (4)));
26611 operands[9] = gen_rtx_XOR (<MODE>mode, copy_rtx (operands[1]),
26612 gen_lowpart (<MODE>mode, operands[2]));
26613 operands[10]
26614 = gen_rtx_COMPARE (GET_MODE (operands[8]),
26615 copy_rtx (operands[9]),
26616 const0_rtx);
26617 })
26618
26619 ;; Attempt to optimize away memory stores of values the memory already
26620 ;; has. See PR79593.
26621 (define_peephole2
26622 [(set (match_operand 0 "register_operand")
26623 (match_operand 1 "memory_operand"))
26624 (set (match_operand 2 "memory_operand") (match_dup 0))]
26625 "!MEM_VOLATILE_P (operands[1])
26626 && !MEM_VOLATILE_P (operands[2])
26627 && rtx_equal_p (operands[1], operands[2])
26628 && !reg_overlap_mentioned_p (operands[0], operands[2])"
26629 [(set (match_dup 0) (match_dup 1))])
26630
26631 ;; Attempt to always use XOR for zeroing registers (including FP modes).
26632 (define_peephole2
26633 [(set (match_operand 0 "general_reg_operand")
26634 (match_operand 1 "const0_operand"))]
26635 "GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD
26636 && (! TARGET_USE_MOV0 || optimize_insn_for_size_p ())
26637 && peep2_regno_dead_p (0, FLAGS_REG)"
26638 [(parallel [(set (match_dup 0) (const_int 0))
26639 (clobber (reg:CC FLAGS_REG))])]
26640 "operands[0] = gen_lowpart (word_mode, operands[0]);")
26641
26642 (define_peephole2
26643 [(set (strict_low_part (match_operand:SWI12 0 "general_reg_operand"))
26644 (const_int 0))]
26645 "(! TARGET_USE_MOV0 || optimize_insn_for_size_p ())
26646 && peep2_regno_dead_p (0, FLAGS_REG)"
26647 [(parallel [(set (strict_low_part (match_dup 0)) (const_int 0))
26648 (clobber (reg:CC FLAGS_REG))])])
26649
26650 ;; For HI, SI and DI modes, or $-1,reg is smaller than mov $-1,reg.
26651 (define_peephole2
26652 [(set (match_operand:SWI248 0 "general_reg_operand")
26653 (const_int -1))]
26654 "(TARGET_MOVE_M1_VIA_OR || optimize_insn_for_size_p ())
26655 && peep2_regno_dead_p (0, FLAGS_REG)"
26656 [(parallel [(set (match_dup 0) (const_int -1))
26657 (clobber (reg:CC FLAGS_REG))])]
26658 {
26659 if (<MODE_SIZE> < GET_MODE_SIZE (SImode))
26660 operands[0] = gen_lowpart (SImode, operands[0]);
26661 })
26662
26663 ;; Attempt to convert simple lea to add/shift.
26664 ;; These can be created by move expanders.
26665 ;; Disable PLUS peepholes on TARGET_OPT_AGU, since all
26666 ;; relevant lea instructions were already split.
26667
26668 (define_peephole2
26669 [(set (match_operand:SWI48 0 "register_operand")
26670 (plus:SWI48 (match_dup 0)
26671 (match_operand:SWI48 1 "<nonmemory_operand>")))]
26672 "!TARGET_OPT_AGU
26673 && peep2_regno_dead_p (0, FLAGS_REG)"
26674 [(parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (match_dup 1)))
26675 (clobber (reg:CC FLAGS_REG))])])
26676
26677 (define_peephole2
26678 [(set (match_operand:SWI48 0 "register_operand")
26679 (plus:SWI48 (match_operand:SWI48 1 "<nonmemory_operand>")
26680 (match_dup 0)))]
26681 "!TARGET_OPT_AGU
26682 && peep2_regno_dead_p (0, FLAGS_REG)"
26683 [(parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (match_dup 1)))
26684 (clobber (reg:CC FLAGS_REG))])])
26685
26686 (define_peephole2
26687 [(set (match_operand:DI 0 "register_operand")
26688 (zero_extend:DI
26689 (plus:SI (match_operand:SI 1 "register_operand")
26690 (match_operand:SI 2 "nonmemory_operand"))))]
26691 "TARGET_64BIT && !TARGET_OPT_AGU
26692 && REGNO (operands[0]) == REGNO (operands[1])
26693 && peep2_regno_dead_p (0, FLAGS_REG)"
26694 [(parallel [(set (match_dup 0)
26695 (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))
26696 (clobber (reg:CC FLAGS_REG))])])
26697
26698 (define_peephole2
26699 [(set (match_operand:DI 0 "register_operand")
26700 (zero_extend:DI
26701 (plus:SI (match_operand:SI 1 "nonmemory_operand")
26702 (match_operand:SI 2 "register_operand"))))]
26703 "TARGET_64BIT && !TARGET_OPT_AGU
26704 && REGNO (operands[0]) == REGNO (operands[2])
26705 && peep2_regno_dead_p (0, FLAGS_REG)"
26706 [(parallel [(set (match_dup 0)
26707 (zero_extend:DI (plus:SI (match_dup 2) (match_dup 1))))
26708 (clobber (reg:CC FLAGS_REG))])])
26709
26710 (define_peephole2
26711 [(set (match_operand:SWI48 0 "register_operand")
26712 (mult:SWI48 (match_dup 0)
26713 (match_operand:SWI48 1 "const_int_operand")))]
26714 "pow2p_hwi (INTVAL (operands[1]))
26715 && peep2_regno_dead_p (0, FLAGS_REG)"
26716 [(parallel [(set (match_dup 0) (ashift:SWI48 (match_dup 0) (match_dup 1)))
26717 (clobber (reg:CC FLAGS_REG))])]
26718 "operands[1] = GEN_INT (exact_log2 (INTVAL (operands[1])));")
26719
26720 (define_peephole2
26721 [(set (match_operand:DI 0 "register_operand")
26722 (zero_extend:DI
26723 (mult:SI (match_operand:SI 1 "register_operand")
26724 (match_operand:SI 2 "const_int_operand"))))]
26725 "TARGET_64BIT
26726 && pow2p_hwi (INTVAL (operands[2]))
26727 && REGNO (operands[0]) == REGNO (operands[1])
26728 && peep2_regno_dead_p (0, FLAGS_REG)"
26729 [(parallel [(set (match_dup 0)
26730 (zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))
26731 (clobber (reg:CC FLAGS_REG))])]
26732 "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));")
26733
26734 ;; The ESP adjustments can be done by the push and pop instructions. Resulting
26735 ;; code is shorter, since push is only 1 byte, while add imm, %esp is 3 bytes.
26736 ;; On many CPUs it is also faster, since special hardware to avoid esp
26737 ;; dependencies is present.
26738
26739 ;; While some of these conversions may be done using splitters, we use
26740 ;; peepholes in order to allow combine_stack_adjustments pass to see
26741 ;; nonobfuscated RTL.
26742
26743 ;; Convert prologue esp subtractions to push.
26744 ;; We need register to push. In order to keep verify_flow_info happy we have
26745 ;; two choices
26746 ;; - use scratch and clobber it in order to avoid dependencies
26747 ;; - use already live register
26748 ;; We can't use the second way right now, since there is no reliable way how to
26749 ;; verify that given register is live. First choice will also most likely in
26750 ;; fewer dependencies. On the place of esp adjustments it is very likely that
26751 ;; call clobbered registers are dead. We may want to use base pointer as an
26752 ;; alternative when no register is available later.
26753
26754 (define_peephole2
26755 [(match_scratch:W 1 "r")
26756 (parallel [(set (reg:P SP_REG)
26757 (plus:P (reg:P SP_REG)
26758 (match_operand:P 0 "const_int_operand")))
26759 (clobber (reg:CC FLAGS_REG))
26760 (clobber (mem:BLK (scratch)))])]
26761 "(TARGET_SINGLE_PUSH || optimize_insn_for_size_p ())
26762 && INTVAL (operands[0]) == -GET_MODE_SIZE (word_mode)
26763 && !ix86_red_zone_used"
26764 [(clobber (match_dup 1))
26765 (parallel [(set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1))
26766 (clobber (mem:BLK (scratch)))])])
26767
26768 (define_peephole2
26769 [(match_scratch:W 1 "r")
26770 (parallel [(set (reg:P SP_REG)
26771 (plus:P (reg:P SP_REG)
26772 (match_operand:P 0 "const_int_operand")))
26773 (clobber (reg:CC FLAGS_REG))
26774 (clobber (mem:BLK (scratch)))])]
26775 "(TARGET_DOUBLE_PUSH || optimize_insn_for_size_p ())
26776 && INTVAL (operands[0]) == -2*GET_MODE_SIZE (word_mode)
26777 && !ix86_red_zone_used"
26778 [(clobber (match_dup 1))
26779 (set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1))
26780 (parallel [(set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1))
26781 (clobber (mem:BLK (scratch)))])])
26782
26783 ;; Convert esp subtractions to push.
26784 (define_peephole2
26785 [(match_scratch:W 1 "r")
26786 (parallel [(set (reg:P SP_REG)
26787 (plus:P (reg:P SP_REG)
26788 (match_operand:P 0 "const_int_operand")))
26789 (clobber (reg:CC FLAGS_REG))])]
26790 "(TARGET_SINGLE_PUSH || optimize_insn_for_size_p ())
26791 && INTVAL (operands[0]) == -GET_MODE_SIZE (word_mode)
26792 && !ix86_red_zone_used"
26793 [(clobber (match_dup 1))
26794 (set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1))])
26795
26796 (define_peephole2
26797 [(match_scratch:W 1 "r")
26798 (parallel [(set (reg:P SP_REG)
26799 (plus:P (reg:P SP_REG)
26800 (match_operand:P 0 "const_int_operand")))
26801 (clobber (reg:CC FLAGS_REG))])]
26802 "(TARGET_DOUBLE_PUSH || optimize_insn_for_size_p ())
26803 && INTVAL (operands[0]) == -2*GET_MODE_SIZE (word_mode)
26804 && !ix86_red_zone_used"
26805 [(clobber (match_dup 1))
26806 (set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1))
26807 (set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1))])
26808
26809 ;; Convert epilogue deallocator to pop.
26810 (define_peephole2
26811 [(match_scratch:W 1 "r")
26812 (parallel [(set (reg:P SP_REG)
26813 (plus:P (reg:P SP_REG)
26814 (match_operand:P 0 "const_int_operand")))
26815 (clobber (reg:CC FLAGS_REG))
26816 (clobber (mem:BLK (scratch)))])]
26817 "(TARGET_SINGLE_POP || optimize_insn_for_size_p ())
26818 && INTVAL (operands[0]) == GET_MODE_SIZE (word_mode)"
26819 [(parallel [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))
26820 (clobber (mem:BLK (scratch)))])])
26821
26822 ;; Two pops case is tricky, since pop causes dependency
26823 ;; on destination register. We use two registers if available.
26824 (define_peephole2
26825 [(match_scratch:W 1 "r")
26826 (match_scratch:W 2 "r")
26827 (parallel [(set (reg:P SP_REG)
26828 (plus:P (reg:P SP_REG)
26829 (match_operand:P 0 "const_int_operand")))
26830 (clobber (reg:CC FLAGS_REG))
26831 (clobber (mem:BLK (scratch)))])]
26832 "(TARGET_DOUBLE_POP || optimize_insn_for_size_p ())
26833 && INTVAL (operands[0]) == 2*GET_MODE_SIZE (word_mode)"
26834 [(parallel [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))
26835 (clobber (mem:BLK (scratch)))])
26836 (set (match_dup 2) (mem:W (post_inc:P (reg:P SP_REG))))])
26837
26838 (define_peephole2
26839 [(match_scratch:W 1 "r")
26840 (parallel [(set (reg:P SP_REG)
26841 (plus:P (reg:P SP_REG)
26842 (match_operand:P 0 "const_int_operand")))
26843 (clobber (reg:CC FLAGS_REG))
26844 (clobber (mem:BLK (scratch)))])]
26845 "optimize_insn_for_size_p ()
26846 && INTVAL (operands[0]) == 2*GET_MODE_SIZE (word_mode)"
26847 [(parallel [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))
26848 (clobber (mem:BLK (scratch)))])
26849 (set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))])
26850
26851 ;; Convert esp additions to pop.
26852 (define_peephole2
26853 [(match_scratch:W 1 "r")
26854 (parallel [(set (reg:P SP_REG)
26855 (plus:P (reg:P SP_REG)
26856 (match_operand:P 0 "const_int_operand")))
26857 (clobber (reg:CC FLAGS_REG))])]
26858 "INTVAL (operands[0]) == GET_MODE_SIZE (word_mode)"
26859 [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))])
26860
26861 ;; Two pops case is tricky, since pop causes dependency
26862 ;; on destination register. We use two registers if available.
26863 (define_peephole2
26864 [(match_scratch:W 1 "r")
26865 (match_scratch:W 2 "r")
26866 (parallel [(set (reg:P SP_REG)
26867 (plus:P (reg:P SP_REG)
26868 (match_operand:P 0 "const_int_operand")))
26869 (clobber (reg:CC FLAGS_REG))])]
26870 "INTVAL (operands[0]) == 2*GET_MODE_SIZE (word_mode)"
26871 [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))
26872 (set (match_dup 2) (mem:W (post_inc:P (reg:P SP_REG))))])
26873
26874 (define_peephole2
26875 [(match_scratch:W 1 "r")
26876 (parallel [(set (reg:P SP_REG)
26877 (plus:P (reg:P SP_REG)
26878 (match_operand:P 0 "const_int_operand")))
26879 (clobber (reg:CC FLAGS_REG))])]
26880 "optimize_insn_for_size_p ()
26881 && INTVAL (operands[0]) == 2*GET_MODE_SIZE (word_mode)"
26882 [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))
26883 (set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))])
26884 \f
26885 ;; Convert compares with 1 to shorter inc/dec operations when CF is not
26886 ;; required and register dies. Similarly for 128 to -128.
26887 (define_peephole2
26888 [(set (match_operand 0 "flags_reg_operand")
26889 (match_operator 1 "compare_operator"
26890 [(match_operand 2 "register_operand")
26891 (match_operand 3 "const_int_operand")]))]
26892 "(((!TARGET_FUSE_CMP_AND_BRANCH || optimize_insn_for_size_p ())
26893 && incdec_operand (operands[3], GET_MODE (operands[3])))
26894 || (!TARGET_FUSE_CMP_AND_BRANCH
26895 && INTVAL (operands[3]) == 128))
26896 && ix86_match_ccmode (insn, CCGCmode)
26897 && peep2_reg_dead_p (1, operands[2])"
26898 [(parallel [(set (match_dup 0)
26899 (match_op_dup 1 [(match_dup 2) (match_dup 3)]))
26900 (clobber (match_dup 2))])])
26901 \f
26902 ;; Convert imul by three, five and nine into lea
26903 (define_peephole2
26904 [(parallel
26905 [(set (match_operand:SWI48 0 "register_operand")
26906 (mult:SWI48 (match_operand:SWI48 1 "register_operand")
26907 (match_operand:SWI48 2 "const359_operand")))
26908 (clobber (reg:CC FLAGS_REG))])]
26909 "!TARGET_PARTIAL_REG_STALL
26910 || <MODE>mode == SImode
26911 || optimize_function_for_size_p (cfun)"
26912 [(set (match_dup 0)
26913 (plus:SWI48 (mult:SWI48 (match_dup 1) (match_dup 2))
26914 (match_dup 1)))]
26915 "operands[2] = GEN_INT (INTVAL (operands[2]) - 1);")
26916
26917 (define_peephole2
26918 [(parallel
26919 [(set (match_operand:SWI48 0 "register_operand")
26920 (mult:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
26921 (match_operand:SWI48 2 "const359_operand")))
26922 (clobber (reg:CC FLAGS_REG))])]
26923 "optimize_insn_for_speed_p ()
26924 && (!TARGET_PARTIAL_REG_STALL || <MODE>mode == SImode)"
26925 [(set (match_dup 0) (match_dup 1))
26926 (set (match_dup 0)
26927 (plus:SWI48 (mult:SWI48 (match_dup 0) (match_dup 2))
26928 (match_dup 0)))]
26929 "operands[2] = GEN_INT (INTVAL (operands[2]) - 1);")
26930
26931 ;; imul $32bit_imm, mem, reg is vector decoded, while
26932 ;; imul $32bit_imm, reg, reg is direct decoded.
26933 (define_peephole2
26934 [(match_scratch:SWI48 3 "r")
26935 (parallel [(set (match_operand:SWI48 0 "register_operand")
26936 (mult:SWI48 (match_operand:SWI48 1 "memory_operand")
26937 (match_operand:SWI48 2 "immediate_operand")))
26938 (clobber (reg:CC FLAGS_REG))])]
26939 "TARGET_SLOW_IMUL_IMM32_MEM && optimize_insn_for_speed_p ()
26940 && !satisfies_constraint_K (operands[2])"
26941 [(set (match_dup 3) (match_dup 1))
26942 (parallel [(set (match_dup 0) (mult:SWI48 (match_dup 3) (match_dup 2)))
26943 (clobber (reg:CC FLAGS_REG))])])
26944
26945 (define_peephole2
26946 [(match_scratch:SI 3 "r")
26947 (parallel [(set (match_operand:DI 0 "register_operand")
26948 (zero_extend:DI
26949 (mult:SI (match_operand:SI 1 "memory_operand")
26950 (match_operand:SI 2 "immediate_operand"))))
26951 (clobber (reg:CC FLAGS_REG))])]
26952 "TARGET_64BIT
26953 && TARGET_SLOW_IMUL_IMM32_MEM && optimize_insn_for_speed_p ()
26954 && !satisfies_constraint_K (operands[2])"
26955 [(set (match_dup 3) (match_dup 1))
26956 (parallel [(set (match_dup 0)
26957 (zero_extend:DI (mult:SI (match_dup 3) (match_dup 2))))
26958 (clobber (reg:CC FLAGS_REG))])])
26959
26960 ;; imul $8/16bit_imm, regmem, reg is vector decoded.
26961 ;; Convert it into imul reg, reg
26962 ;; It would be better to force assembler to encode instruction using long
26963 ;; immediate, but there is apparently no way to do so.
26964 (define_peephole2
26965 [(parallel [(set (match_operand:SWI248 0 "register_operand")
26966 (mult:SWI248
26967 (match_operand:SWI248 1 "nonimmediate_operand")
26968 (match_operand:SWI248 2 "const_int_operand")))
26969 (clobber (reg:CC FLAGS_REG))])
26970 (match_scratch:SWI248 3 "r")]
26971 "TARGET_SLOW_IMUL_IMM8 && optimize_insn_for_speed_p ()
26972 && satisfies_constraint_K (operands[2])"
26973 [(set (match_dup 3) (match_dup 2))
26974 (parallel [(set (match_dup 0) (mult:SWI248 (match_dup 0) (match_dup 3)))
26975 (clobber (reg:CC FLAGS_REG))])]
26976 {
26977 if (!rtx_equal_p (operands[0], operands[1]))
26978 emit_move_insn (operands[0], operands[1]);
26979 })
26980
26981 ;; After splitting up read-modify operations, array accesses with memory
26982 ;; operands might end up in form:
26983 ;; sall $2, %eax
26984 ;; movl 4(%esp), %edx
26985 ;; addl %edx, %eax
26986 ;; instead of pre-splitting:
26987 ;; sall $2, %eax
26988 ;; addl 4(%esp), %eax
26989 ;; Turn it into:
26990 ;; movl 4(%esp), %edx
26991 ;; leal (%edx,%eax,4), %eax
26992
26993 (define_peephole2
26994 [(match_scratch:W 5 "r")
26995 (parallel [(set (match_operand 0 "register_operand")
26996 (ashift (match_operand 1 "register_operand")
26997 (match_operand 2 "const_int_operand")))
26998 (clobber (reg:CC FLAGS_REG))])
26999 (parallel [(set (match_operand 3 "register_operand")
27000 (plus (match_dup 0)
27001 (match_operand 4 "x86_64_general_operand")))
27002 (clobber (reg:CC FLAGS_REG))])]
27003 "IN_RANGE (INTVAL (operands[2]), 1, 3)
27004 /* Validate MODE for lea. */
27005 && ((!TARGET_PARTIAL_REG_STALL
27006 && (GET_MODE (operands[0]) == QImode
27007 || GET_MODE (operands[0]) == HImode))
27008 || GET_MODE (operands[0]) == SImode
27009 || (TARGET_64BIT && GET_MODE (operands[0]) == DImode))
27010 && (rtx_equal_p (operands[0], operands[3])
27011 || peep2_reg_dead_p (2, operands[0]))
27012 /* We reorder load and the shift. */
27013 && !reg_overlap_mentioned_p (operands[0], operands[4])"
27014 [(set (match_dup 5) (match_dup 4))
27015 (set (match_dup 0) (match_dup 1))]
27016 {
27017 machine_mode op1mode = GET_MODE (operands[1]);
27018 machine_mode mode = op1mode == DImode ? DImode : SImode;
27019 int scale = 1 << INTVAL (operands[2]);
27020 rtx index = gen_lowpart (word_mode, operands[1]);
27021 rtx base = gen_lowpart (word_mode, operands[5]);
27022 rtx dest = gen_lowpart (mode, operands[3]);
27023
27024 operands[1] = gen_rtx_PLUS (word_mode, base,
27025 gen_rtx_MULT (word_mode, index, GEN_INT (scale)));
27026 if (mode != word_mode)
27027 operands[1] = gen_rtx_SUBREG (mode, operands[1], 0);
27028
27029 operands[5] = base;
27030 if (op1mode != word_mode)
27031 operands[5] = gen_lowpart (op1mode, operands[5]);
27032
27033 operands[0] = dest;
27034 })
27035 \f
27036 ;; We used to use "int $5", in honor of #BR which maps to interrupt vector 5.
27037 ;; That, however, is usually mapped by the OS to SIGSEGV, which is often
27038 ;; caught for use by garbage collectors and the like. Using an insn that
27039 ;; maps to SIGILL makes it more likely the program will rightfully die.
27040 ;; Keeping with tradition, "6" is in honor of #UD.
27041 (define_insn "trap"
27042 [(trap_if (const_int 1) (const_int 6))]
27043 ""
27044 {
27045 #ifdef HAVE_AS_IX86_UD2
27046 return "ud2";
27047 #else
27048 return ASM_SHORT "0x0b0f";
27049 #endif
27050 }
27051 [(set_attr "length" "2")])
27052
27053 (define_insn "ud2"
27054 [(unspec_volatile [(const_int 0)] UNSPECV_UD2)]
27055 ""
27056 {
27057 #ifdef HAVE_AS_IX86_UD2
27058 return "ud2";
27059 #else
27060 return ASM_SHORT "0x0b0f";
27061 #endif
27062 }
27063 [(set_attr "length" "2")])
27064
27065 (define_expand "prefetch"
27066 [(prefetch (match_operand 0 "address_operand")
27067 (match_operand:SI 1 "const_int_operand")
27068 (match_operand:SI 2 "const_int_operand"))]
27069 "TARGET_3DNOW || TARGET_PREFETCH_SSE || TARGET_PRFCHW || TARGET_PREFETCHWT1"
27070 {
27071 bool write = operands[1] != const0_rtx;
27072 int locality = INTVAL (operands[2]);
27073
27074 gcc_assert (IN_RANGE (locality, 0, 3));
27075
27076 /* Use 3dNOW prefetch in case we are asking for write prefetch not
27077 supported by SSE counterpart (non-SSE2 athlon machines) or the
27078 SSE prefetch is not available (K6 machines). Otherwise use SSE
27079 prefetch as it allows specifying of locality. */
27080
27081 if (write)
27082 {
27083 if (TARGET_PREFETCHWT1)
27084 operands[2] = GEN_INT (MAX (locality, 2));
27085 else if (TARGET_PRFCHW)
27086 operands[2] = GEN_INT (3);
27087 else if (TARGET_3DNOW && !TARGET_SSE2)
27088 operands[2] = GEN_INT (3);
27089 else if (TARGET_PREFETCH_SSE)
27090 operands[1] = const0_rtx;
27091 else
27092 {
27093 gcc_assert (TARGET_3DNOW);
27094 operands[2] = GEN_INT (3);
27095 }
27096 }
27097 else
27098 {
27099 if (TARGET_PREFETCH_SSE)
27100 ;
27101 else
27102 {
27103 gcc_assert (TARGET_3DNOW);
27104 operands[2] = GEN_INT (3);
27105 }
27106 }
27107 })
27108
27109 (define_insn "*prefetch_sse"
27110 [(prefetch (match_operand 0 "address_operand" "p")
27111 (const_int 0)
27112 (match_operand:SI 1 "const_int_operand"))]
27113 "TARGET_PREFETCH_SSE"
27114 {
27115 static const char * const patterns[4] = {
27116 "prefetchnta\t%a0", "prefetcht2\t%a0", "prefetcht1\t%a0", "prefetcht0\t%a0"
27117 };
27118
27119 int locality = INTVAL (operands[1]);
27120 gcc_assert (IN_RANGE (locality, 0, 3));
27121
27122 return patterns[locality];
27123 }
27124 [(set_attr "type" "sse")
27125 (set_attr "atom_sse_attr" "prefetch")
27126 (set (attr "length_address")
27127 (symbol_ref "memory_address_length (operands[0], false)"))
27128 (set_attr "memory" "none")])
27129
27130 (define_insn "*prefetch_3dnow"
27131 [(prefetch (match_operand 0 "address_operand" "p")
27132 (match_operand:SI 1 "const_int_operand")
27133 (const_int 3))]
27134 "TARGET_3DNOW || TARGET_PRFCHW || TARGET_PREFETCHWT1"
27135 {
27136 if (operands[1] == const0_rtx)
27137 return "prefetch\t%a0";
27138 else
27139 return "prefetchw\t%a0";
27140 }
27141 [(set_attr "type" "mmx")
27142 (set (attr "length_address")
27143 (symbol_ref "memory_address_length (operands[0], false)"))
27144 (set_attr "memory" "none")])
27145
27146 (define_insn "*prefetch_prefetchwt1"
27147 [(prefetch (match_operand 0 "address_operand" "p")
27148 (const_int 1)
27149 (const_int 2))]
27150 "TARGET_PREFETCHWT1"
27151 "prefetchwt1\t%a0";
27152 [(set_attr "type" "sse")
27153 (set (attr "length_address")
27154 (symbol_ref "memory_address_length (operands[0], false)"))
27155 (set_attr "memory" "none")])
27156
27157 (define_insn "prefetchi"
27158 [(unspec_volatile [(match_operand 0 "local_func_symbolic_operand" "p")
27159 (match_operand:SI 1 "const_int_operand")]
27160 UNSPECV_PREFETCHI)]
27161 "TARGET_PREFETCHI && TARGET_64BIT"
27162 {
27163 static const char * const patterns[2] = {
27164 "prefetchit1\t%0", "prefetchit0\t%0"
27165 };
27166
27167 int locality = INTVAL (operands[1]);
27168 gcc_assert (IN_RANGE (locality, 2, 3));
27169
27170 return patterns[locality - 2];
27171 }
27172 [(set_attr "type" "sse")
27173 (set (attr "length_address")
27174 (symbol_ref "memory_address_length (operands[0], false)"))
27175 (set_attr "memory" "none")])
27176
27177 (define_insn "sse4_2_crc32<mode>"
27178 [(set (match_operand:SI 0 "register_operand" "=r")
27179 (unspec:SI
27180 [(match_operand:SI 1 "register_operand" "0")
27181 (match_operand:SWI124 2 "nonimmediate_operand" "<r>m")]
27182 UNSPEC_CRC32))]
27183 "TARGET_CRC32"
27184 "crc32{<imodesuffix>}\t{%2, %0|%0, %2}"
27185 [(set_attr "type" "sselog1")
27186 (set_attr "prefix_rep" "1")
27187 (set_attr "prefix_extra" "1")
27188 (set (attr "prefix_data16")
27189 (if_then_else (match_operand:HI 2)
27190 (const_string "1")
27191 (const_string "*")))
27192 (set (attr "prefix_rex")
27193 (if_then_else (match_operand:QI 2 "ext_QIreg_operand")
27194 (const_string "1")
27195 (const_string "*")))
27196 (set_attr "mode" "SI")])
27197
27198 (define_insn "sse4_2_crc32di"
27199 [(set (match_operand:DI 0 "register_operand" "=r")
27200 (zero_extend:DI
27201 (unspec:SI
27202 [(match_operand:SI 1 "register_operand" "0")
27203 (match_operand:DI 2 "nonimmediate_operand" "rm")]
27204 UNSPEC_CRC32)))]
27205 "TARGET_64BIT && TARGET_CRC32"
27206 "crc32{q}\t{%2, %0|%0, %2}"
27207 [(set_attr "type" "sselog1")
27208 (set_attr "prefix_rep" "1")
27209 (set_attr "prefix_extra" "1")
27210 (set_attr "mode" "DI")])
27211
27212 (define_insn "rdpmc"
27213 [(set (match_operand:DI 0 "register_operand" "=A")
27214 (unspec_volatile:DI [(match_operand:SI 1 "register_operand" "c")]
27215 UNSPECV_RDPMC))]
27216 "!TARGET_64BIT"
27217 "rdpmc"
27218 [(set_attr "type" "other")
27219 (set_attr "length" "2")])
27220
27221 (define_insn "rdpmc_rex64"
27222 [(set (match_operand:DI 0 "register_operand" "=a")
27223 (unspec_volatile:DI [(match_operand:SI 2 "register_operand" "c")]
27224 UNSPECV_RDPMC))
27225 (set (match_operand:DI 1 "register_operand" "=d")
27226 (unspec_volatile:DI [(match_dup 2)] UNSPECV_RDPMC))]
27227 "TARGET_64BIT"
27228 "rdpmc"
27229 [(set_attr "type" "other")
27230 (set_attr "length" "2")])
27231
27232 (define_insn "rdtsc"
27233 [(set (match_operand:DI 0 "register_operand" "=A")
27234 (unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSC))]
27235 "!TARGET_64BIT"
27236 "rdtsc"
27237 [(set_attr "type" "other")
27238 (set_attr "length" "2")])
27239
27240 (define_insn "rdtsc_rex64"
27241 [(set (match_operand:DI 0 "register_operand" "=a")
27242 (unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSC))
27243 (set (match_operand:DI 1 "register_operand" "=d")
27244 (unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSC))]
27245 "TARGET_64BIT"
27246 "rdtsc"
27247 [(set_attr "type" "other")
27248 (set_attr "length" "2")])
27249
27250 (define_insn "rdtscp"
27251 [(set (match_operand:DI 0 "register_operand" "=A")
27252 (unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSCP))
27253 (set (match_operand:SI 1 "register_operand" "=c")
27254 (unspec_volatile:SI [(const_int 0)] UNSPECV_RDTSCP))]
27255 "!TARGET_64BIT"
27256 "rdtscp"
27257 [(set_attr "type" "other")
27258 (set_attr "length" "3")])
27259
27260 (define_insn "rdtscp_rex64"
27261 [(set (match_operand:DI 0 "register_operand" "=a")
27262 (unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSCP))
27263 (set (match_operand:DI 1 "register_operand" "=d")
27264 (unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSCP))
27265 (set (match_operand:SI 2 "register_operand" "=c")
27266 (unspec_volatile:SI [(const_int 0)] UNSPECV_RDTSCP))]
27267 "TARGET_64BIT"
27268 "rdtscp"
27269 [(set_attr "type" "other")
27270 (set_attr "length" "3")])
27271
27272 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
27273 ;;
27274 ;; FXSR, XSAVE and XSAVEOPT instructions
27275 ;;
27276 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
27277
27278 (define_insn "fxsave"
27279 [(set (match_operand:BLK 0 "memory_operand" "=m")
27280 (unspec_volatile:BLK [(const_int 0)] UNSPECV_FXSAVE))]
27281 "TARGET_FXSR"
27282 "fxsave\t%0"
27283 [(set_attr "type" "other")
27284 (set_attr "memory" "store")
27285 (set (attr "length")
27286 (symbol_ref "ix86_attr_length_address_default (insn) + 3"))])
27287
27288 (define_insn "fxsave64"
27289 [(set (match_operand:BLK 0 "memory_operand" "=jm")
27290 (unspec_volatile:BLK [(const_int 0)] UNSPECV_FXSAVE64))]
27291 "TARGET_64BIT && TARGET_FXSR"
27292 "fxsave64\t%0"
27293 [(set_attr "type" "other")
27294 (set_attr "addr" "gpr16")
27295 (set_attr "memory" "store")
27296 (set (attr "length")
27297 (symbol_ref "ix86_attr_length_address_default (insn) + 4"))])
27298
27299 (define_insn "fxrstor"
27300 [(unspec_volatile [(match_operand:BLK 0 "memory_operand" "m")]
27301 UNSPECV_FXRSTOR)]
27302 "TARGET_FXSR"
27303 "fxrstor\t%0"
27304 [(set_attr "type" "other")
27305 (set_attr "memory" "load")
27306 (set (attr "length")
27307 (symbol_ref "ix86_attr_length_address_default (insn) + 3"))])
27308
27309 (define_insn "fxrstor64"
27310 [(unspec_volatile [(match_operand:BLK 0 "memory_operand" "jm")]
27311 UNSPECV_FXRSTOR64)]
27312 "TARGET_64BIT && TARGET_FXSR"
27313 "fxrstor64\t%0"
27314 [(set_attr "type" "other")
27315 (set_attr "addr" "gpr16")
27316 (set_attr "memory" "load")
27317 (set (attr "length")
27318 (symbol_ref "ix86_attr_length_address_default (insn) + 4"))])
27319
27320 (define_int_iterator ANY_XSAVE
27321 [UNSPECV_XSAVE
27322 (UNSPECV_XSAVEOPT "TARGET_XSAVEOPT")
27323 (UNSPECV_XSAVEC "TARGET_XSAVEC")
27324 (UNSPECV_XSAVES "TARGET_XSAVES")])
27325
27326 (define_int_iterator ANY_XSAVE64
27327 [UNSPECV_XSAVE64
27328 (UNSPECV_XSAVEOPT64 "TARGET_XSAVEOPT")
27329 (UNSPECV_XSAVEC64 "TARGET_XSAVEC")
27330 (UNSPECV_XSAVES64 "TARGET_XSAVES")])
27331
27332 (define_int_attr xsave
27333 [(UNSPECV_XSAVE "xsave")
27334 (UNSPECV_XSAVE64 "xsave64")
27335 (UNSPECV_XSAVEOPT "xsaveopt")
27336 (UNSPECV_XSAVEOPT64 "xsaveopt64")
27337 (UNSPECV_XSAVEC "xsavec")
27338 (UNSPECV_XSAVEC64 "xsavec64")
27339 (UNSPECV_XSAVES "xsaves")
27340 (UNSPECV_XSAVES64 "xsaves64")])
27341
27342 (define_int_iterator ANY_XRSTOR
27343 [UNSPECV_XRSTOR
27344 (UNSPECV_XRSTORS "TARGET_XSAVES")])
27345
27346 (define_int_iterator ANY_XRSTOR64
27347 [UNSPECV_XRSTOR64
27348 (UNSPECV_XRSTORS64 "TARGET_XSAVES")])
27349
27350 (define_int_attr xrstor
27351 [(UNSPECV_XRSTOR "xrstor")
27352 (UNSPECV_XRSTOR64 "xrstor")
27353 (UNSPECV_XRSTORS "xrstors")
27354 (UNSPECV_XRSTORS64 "xrstors")])
27355
27356 (define_insn "<xsave>"
27357 [(set (match_operand:BLK 0 "memory_operand" "=m")
27358 (unspec_volatile:BLK
27359 [(match_operand:DI 1 "register_operand" "A")]
27360 ANY_XSAVE))]
27361 "!TARGET_64BIT && TARGET_XSAVE"
27362 "<xsave>\t%0"
27363 [(set_attr "type" "other")
27364 (set_attr "memory" "store")
27365 (set (attr "length")
27366 (symbol_ref "ix86_attr_length_address_default (insn) + 3"))])
27367
27368 (define_insn "<xsave>_rex64"
27369 [(set (match_operand:BLK 0 "memory_operand" "=jm")
27370 (unspec_volatile:BLK
27371 [(match_operand:SI 1 "register_operand" "a")
27372 (match_operand:SI 2 "register_operand" "d")]
27373 ANY_XSAVE))]
27374 "TARGET_64BIT && TARGET_XSAVE"
27375 "<xsave>\t%0"
27376 [(set_attr "type" "other")
27377 (set_attr "memory" "store")
27378 (set_attr "addr" "gpr16")
27379 (set (attr "length")
27380 (symbol_ref "ix86_attr_length_address_default (insn) + 3"))])
27381
27382 (define_insn "<xsave>"
27383 [(set (match_operand:BLK 0 "memory_operand" "=jm")
27384 (unspec_volatile:BLK
27385 [(match_operand:SI 1 "register_operand" "a")
27386 (match_operand:SI 2 "register_operand" "d")]
27387 ANY_XSAVE64))]
27388 "TARGET_64BIT && TARGET_XSAVE"
27389 "<xsave>\t%0"
27390 [(set_attr "type" "other")
27391 (set_attr "memory" "store")
27392 (set_attr "addr" "gpr16")
27393 (set (attr "length")
27394 (symbol_ref "ix86_attr_length_address_default (insn) + 4"))])
27395
27396 (define_insn "<xrstor>"
27397 [(unspec_volatile:BLK
27398 [(match_operand:BLK 0 "memory_operand" "m")
27399 (match_operand:DI 1 "register_operand" "A")]
27400 ANY_XRSTOR)]
27401 "!TARGET_64BIT && TARGET_XSAVE"
27402 "<xrstor>\t%0"
27403 [(set_attr "type" "other")
27404 (set_attr "memory" "load")
27405 (set (attr "length")
27406 (symbol_ref "ix86_attr_length_address_default (insn) + 3"))])
27407
27408 (define_insn "<xrstor>_rex64"
27409 [(unspec_volatile:BLK
27410 [(match_operand:BLK 0 "memory_operand" "jm")
27411 (match_operand:SI 1 "register_operand" "a")
27412 (match_operand:SI 2 "register_operand" "d")]
27413 ANY_XRSTOR)]
27414 "TARGET_64BIT && TARGET_XSAVE"
27415 "<xrstor>\t%0"
27416 [(set_attr "type" "other")
27417 (set_attr "memory" "load")
27418 (set_attr "addr" "gpr16")
27419 (set (attr "length")
27420 (symbol_ref "ix86_attr_length_address_default (insn) + 3"))])
27421
27422 (define_insn "<xrstor>64"
27423 [(unspec_volatile:BLK
27424 [(match_operand:BLK 0 "memory_operand" "jm")
27425 (match_operand:SI 1 "register_operand" "a")
27426 (match_operand:SI 2 "register_operand" "d")]
27427 ANY_XRSTOR64)]
27428 "TARGET_64BIT && TARGET_XSAVE"
27429 "<xrstor>64\t%0"
27430 [(set_attr "type" "other")
27431 (set_attr "memory" "load")
27432 (set_attr "addr" "gpr16")
27433 (set (attr "length")
27434 (symbol_ref "ix86_attr_length_address_default (insn) + 4"))])
27435
27436 (define_insn "xsetbv"
27437 [(unspec_volatile:SI
27438 [(match_operand:SI 0 "register_operand" "c")
27439 (match_operand:DI 1 "register_operand" "A")]
27440 UNSPECV_XSETBV)]
27441 "!TARGET_64BIT && TARGET_XSAVE"
27442 "xsetbv"
27443 [(set_attr "type" "other")])
27444
27445 (define_insn "xsetbv_rex64"
27446 [(unspec_volatile:SI
27447 [(match_operand:SI 0 "register_operand" "c")
27448 (match_operand:SI 1 "register_operand" "a")
27449 (match_operand:SI 2 "register_operand" "d")]
27450 UNSPECV_XSETBV)]
27451 "TARGET_64BIT && TARGET_XSAVE"
27452 "xsetbv"
27453 [(set_attr "type" "other")])
27454
27455 (define_insn "xgetbv"
27456 [(set (match_operand:DI 0 "register_operand" "=A")
27457 (unspec_volatile:DI [(match_operand:SI 1 "register_operand" "c")]
27458 UNSPECV_XGETBV))]
27459 "!TARGET_64BIT && TARGET_XSAVE"
27460 "xgetbv"
27461 [(set_attr "type" "other")])
27462
27463 (define_insn "xgetbv_rex64"
27464 [(set (match_operand:DI 0 "register_operand" "=a")
27465 (unspec_volatile:DI [(match_operand:SI 2 "register_operand" "c")]
27466 UNSPECV_XGETBV))
27467 (set (match_operand:DI 1 "register_operand" "=d")
27468 (unspec_volatile:DI [(match_dup 2)] UNSPECV_XGETBV))]
27469 "TARGET_64BIT && TARGET_XSAVE"
27470 "xgetbv"
27471 [(set_attr "type" "other")])
27472
27473 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
27474 ;;
27475 ;; Floating-point instructions for atomic compound assignments
27476 ;;
27477 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
27478
27479 ; Clobber all floating-point registers on environment save and restore
27480 ; to ensure that the TOS value saved at fnstenv is valid after fldenv.
27481 (define_insn "fnstenv"
27482 [(set (match_operand:BLK 0 "memory_operand" "=m")
27483 (unspec_volatile:BLK [(const_int 0)] UNSPECV_FNSTENV))
27484 (clobber (reg:XF ST0_REG))
27485 (clobber (reg:XF ST1_REG))
27486 (clobber (reg:XF ST2_REG))
27487 (clobber (reg:XF ST3_REG))
27488 (clobber (reg:XF ST4_REG))
27489 (clobber (reg:XF ST5_REG))
27490 (clobber (reg:XF ST6_REG))
27491 (clobber (reg:XF ST7_REG))]
27492 "TARGET_80387"
27493 "fnstenv\t%0"
27494 [(set_attr "type" "other")
27495 (set_attr "memory" "store")
27496 (set (attr "length")
27497 (symbol_ref "ix86_attr_length_address_default (insn) + 2"))])
27498
27499 (define_insn "fldenv"
27500 [(unspec_volatile [(match_operand:BLK 0 "memory_operand" "m")]
27501 UNSPECV_FLDENV)
27502 (clobber (reg:XF ST0_REG))
27503 (clobber (reg:XF ST1_REG))
27504 (clobber (reg:XF ST2_REG))
27505 (clobber (reg:XF ST3_REG))
27506 (clobber (reg:XF ST4_REG))
27507 (clobber (reg:XF ST5_REG))
27508 (clobber (reg:XF ST6_REG))
27509 (clobber (reg:XF ST7_REG))]
27510 "TARGET_80387"
27511 "fldenv\t%0"
27512 [(set_attr "type" "other")
27513 (set_attr "memory" "load")
27514 (set (attr "length")
27515 (symbol_ref "ix86_attr_length_address_default (insn) + 2"))])
27516
27517 (define_insn "fnstsw"
27518 [(set (match_operand:HI 0 "nonimmediate_operand" "=a,m")
27519 (unspec_volatile:HI [(const_int 0)] UNSPECV_FNSTSW))]
27520 "TARGET_80387"
27521 "fnstsw\t%0"
27522 [(set_attr "type" "other,other")
27523 (set_attr "memory" "none,store")
27524 (set (attr "length")
27525 (symbol_ref "ix86_attr_length_address_default (insn) + 2"))])
27526
27527 (define_insn "fnclex"
27528 [(unspec_volatile [(const_int 0)] UNSPECV_FNCLEX)]
27529 "TARGET_80387"
27530 "fnclex"
27531 [(set_attr "type" "other")
27532 (set_attr "memory" "none")
27533 (set_attr "length" "2")])
27534
27535 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
27536 ;;
27537 ;; LWP instructions
27538 ;;
27539 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
27540
27541 (define_insn "@lwp_llwpcb<mode>"
27542 [(unspec_volatile [(match_operand:P 0 "register_operand" "r")]
27543 UNSPECV_LLWP_INTRINSIC)]
27544 "TARGET_LWP"
27545 "llwpcb\t%0"
27546 [(set_attr "type" "lwp")
27547 (set_attr "mode" "<MODE>")
27548 (set_attr "length" "5")])
27549
27550 (define_insn "@lwp_slwpcb<mode>"
27551 [(set (match_operand:P 0 "register_operand" "=r")
27552 (unspec_volatile:P [(const_int 0)] UNSPECV_SLWP_INTRINSIC))]
27553 "TARGET_LWP"
27554 "slwpcb\t%0"
27555 [(set_attr "type" "lwp")
27556 (set_attr "mode" "<MODE>")
27557 (set_attr "length" "5")])
27558
27559 (define_insn "@lwp_lwpval<mode>"
27560 [(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r")
27561 (match_operand:SI 1 "nonimmediate_operand" "rm")
27562 (match_operand:SI 2 "const_int_operand")]
27563 UNSPECV_LWPVAL_INTRINSIC)]
27564 "TARGET_LWP"
27565 "lwpval\t{%2, %1, %0|%0, %1, %2}"
27566 [(set_attr "type" "lwp")
27567 (set_attr "mode" "<MODE>")
27568 (set (attr "length")
27569 (symbol_ref "ix86_attr_length_address_default (insn) + 9"))])
27570
27571 (define_insn "@lwp_lwpins<mode>"
27572 [(set (reg:CCC FLAGS_REG)
27573 (unspec_volatile:CCC [(match_operand:SWI48 0 "register_operand" "r")
27574 (match_operand:SI 1 "nonimmediate_operand" "rm")
27575 (match_operand:SI 2 "const_int_operand")]
27576 UNSPECV_LWPINS_INTRINSIC))]
27577 "TARGET_LWP"
27578 "lwpins\t{%2, %1, %0|%0, %1, %2}"
27579 [(set_attr "type" "lwp")
27580 (set_attr "mode" "<MODE>")
27581 (set (attr "length")
27582 (symbol_ref "ix86_attr_length_address_default (insn) + 9"))])
27583
27584 (define_int_iterator RDFSGSBASE
27585 [UNSPECV_RDFSBASE
27586 UNSPECV_RDGSBASE])
27587
27588 (define_int_iterator WRFSGSBASE
27589 [UNSPECV_WRFSBASE
27590 UNSPECV_WRGSBASE])
27591
27592 (define_int_attr fsgs
27593 [(UNSPECV_RDFSBASE "fs")
27594 (UNSPECV_RDGSBASE "gs")
27595 (UNSPECV_WRFSBASE "fs")
27596 (UNSPECV_WRGSBASE "gs")])
27597
27598 (define_insn "rd<fsgs>base<mode>"
27599 [(set (match_operand:SWI48 0 "register_operand" "=r")
27600 (unspec_volatile:SWI48 [(const_int 0)] RDFSGSBASE))]
27601 "TARGET_64BIT && TARGET_FSGSBASE"
27602 "rd<fsgs>base\t%0"
27603 [(set_attr "type" "other")
27604 (set_attr "prefix_0f" "1")
27605 (set_attr "prefix_rep" "1")])
27606
27607 (define_insn "wr<fsgs>base<mode>"
27608 [(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r")]
27609 WRFSGSBASE)]
27610 "TARGET_64BIT && TARGET_FSGSBASE"
27611 "wr<fsgs>base\t%0"
27612 [(set_attr "type" "other")
27613 (set_attr "prefix_0f" "1")
27614 (set_attr "prefix_rep" "1")])
27615
27616 (define_insn "ptwrite<mode>"
27617 [(unspec_volatile [(match_operand:SWI48 0 "nonimmediate_operand" "rm")]
27618 UNSPECV_PTWRITE)]
27619 "TARGET_PTWRITE"
27620 "ptwrite\t%0"
27621 [(set_attr "type" "other")
27622 (set_attr "prefix_0f" "1")
27623 (set_attr "prefix_rep" "1")])
27624
27625 (define_insn "@rdrand<mode>"
27626 [(set (match_operand:SWI248 0 "register_operand" "=r")
27627 (unspec_volatile:SWI248 [(const_int 0)] UNSPECV_RDRAND))
27628 (set (reg:CCC FLAGS_REG)
27629 (unspec_volatile:CCC [(const_int 0)] UNSPECV_RDRAND))]
27630 "TARGET_RDRND"
27631 "rdrand\t%0"
27632 [(set_attr "type" "other")
27633 (set_attr "prefix_0f" "1")])
27634
27635 (define_insn "@rdseed<mode>"
27636 [(set (match_operand:SWI248 0 "register_operand" "=r")
27637 (unspec_volatile:SWI248 [(const_int 0)] UNSPECV_RDSEED))
27638 (set (reg:CCC FLAGS_REG)
27639 (unspec_volatile:CCC [(const_int 0)] UNSPECV_RDSEED))]
27640 "TARGET_RDSEED"
27641 "rdseed\t%0"
27642 [(set_attr "type" "other")
27643 (set_attr "prefix_0f" "1")])
27644
27645 (define_expand "pause"
27646 [(set (match_dup 0)
27647 (unspec:BLK [(match_dup 0)] UNSPEC_PAUSE))]
27648 ""
27649 {
27650 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
27651 MEM_VOLATILE_P (operands[0]) = 1;
27652 })
27653
27654 ;; Use "rep; nop", instead of "pause", to support older assemblers.
27655 ;; They have the same encoding.
27656 (define_insn "*pause"
27657 [(set (match_operand:BLK 0)
27658 (unspec:BLK [(match_dup 0)] UNSPEC_PAUSE))]
27659 ""
27660 "rep%; nop"
27661 [(set_attr "length" "2")
27662 (set_attr "memory" "unknown")])
27663
27664 ;; CET instructions
27665 (define_insn "@rdssp<mode>"
27666 [(set (match_operand:SWI48 0 "register_operand" "=r")
27667 (unspec_volatile:SWI48 [(match_operand:SWI48 1 "register_operand" "0")]
27668 UNSPECV_NOP_RDSSP))]
27669 "TARGET_SHSTK || (flag_cf_protection & CF_RETURN)"
27670 "rdssp<mskmodesuffix>\t%0"
27671 [(set_attr "length" "6")
27672 (set_attr "type" "other")])
27673
27674 (define_insn "@incssp<mode>"
27675 [(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r")]
27676 UNSPECV_INCSSP)]
27677 "TARGET_SHSTK || (flag_cf_protection & CF_RETURN)"
27678 "incssp<mskmodesuffix>\t%0"
27679 [(set_attr "length" "4")
27680 (set_attr "type" "other")])
27681
27682 (define_insn "saveprevssp"
27683 [(unspec_volatile [(const_int 0)] UNSPECV_SAVEPREVSSP)]
27684 "TARGET_SHSTK"
27685 "saveprevssp"
27686 [(set_attr "length" "5")
27687 (set_attr "type" "other")])
27688
27689 (define_insn "rstorssp"
27690 [(unspec_volatile [(match_operand:DI 0 "memory_operand" "m")]
27691 UNSPECV_RSTORSSP)]
27692 "TARGET_SHSTK"
27693 "rstorssp\t%0"
27694 [(set_attr "length" "5")
27695 (set_attr "type" "other")])
27696
27697 (define_insn "@wrss<mode>"
27698 [(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r")
27699 (match_operand:SWI48 1 "memory_operand" "m")]
27700 UNSPECV_WRSS)]
27701 "TARGET_SHSTK"
27702 "wrss<mskmodesuffix>\t%0, %1"
27703 [(set_attr "length" "3")
27704 (set_attr "type" "other")])
27705
27706 (define_insn "@wruss<mode>"
27707 [(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r")
27708 (match_operand:SWI48 1 "memory_operand" "m")]
27709 UNSPECV_WRUSS)]
27710 "TARGET_SHSTK"
27711 "wruss<mskmodesuffix>\t%0, %1"
27712 [(set_attr "length" "4")
27713 (set_attr "type" "other")])
27714
27715 (define_insn "setssbsy"
27716 [(unspec_volatile [(const_int 0)] UNSPECV_SETSSBSY)]
27717 "TARGET_SHSTK"
27718 "setssbsy"
27719 [(set_attr "length" "4")
27720 (set_attr "type" "other")])
27721
27722 (define_insn "clrssbsy"
27723 [(unspec_volatile [(match_operand:DI 0 "memory_operand" "m")]
27724 UNSPECV_CLRSSBSY)]
27725 "TARGET_SHSTK"
27726 "clrssbsy\t%0"
27727 [(set_attr "length" "4")
27728 (set_attr "type" "other")])
27729
27730 (define_insn "nop_endbr"
27731 [(unspec_volatile [(const_int 0)] UNSPECV_NOP_ENDBR)]
27732 "(flag_cf_protection & CF_BRANCH)"
27733 {
27734 return TARGET_64BIT ? "endbr64" : "endbr32";
27735 }
27736 [(set_attr "length" "4")
27737 (set_attr "length_immediate" "0")
27738 (set_attr "modrm" "0")])
27739
27740 ;; For RTM support
27741 (define_expand "xbegin"
27742 [(set (match_operand:SI 0 "register_operand")
27743 (unspec_volatile:SI [(const_int 0)] UNSPECV_XBEGIN))]
27744 "TARGET_RTM"
27745 {
27746 rtx_code_label *label = gen_label_rtx ();
27747
27748 /* xbegin is emitted as jump_insn, so reload won't be able
27749 to reload its operand. Force the value into AX hard register. */
27750 rtx ax_reg = gen_rtx_REG (SImode, AX_REG);
27751 emit_move_insn (ax_reg, constm1_rtx);
27752
27753 emit_jump_insn (gen_xbegin_1 (ax_reg, label));
27754
27755 emit_label (label);
27756 LABEL_NUSES (label) = 1;
27757
27758 emit_move_insn (operands[0], ax_reg);
27759
27760 DONE;
27761 })
27762
27763 (define_insn "xbegin_1"
27764 [(set (pc)
27765 (if_then_else (ne (unspec [(const_int 0)] UNSPEC_XBEGIN_ABORT)
27766 (const_int 0))
27767 (label_ref (match_operand 1))
27768 (pc)))
27769 (set (match_operand:SI 0 "register_operand" "+a")
27770 (unspec_volatile:SI [(match_dup 0)] UNSPECV_XBEGIN))]
27771 "TARGET_RTM"
27772 "xbegin\t%l1"
27773 [(set_attr "type" "other")
27774 (set_attr "length" "6")])
27775
27776 (define_insn "xend"
27777 [(unspec_volatile [(const_int 0)] UNSPECV_XEND)]
27778 "TARGET_RTM"
27779 "xend"
27780 [(set_attr "type" "other")
27781 (set_attr "length" "3")])
27782
27783 (define_insn "xabort"
27784 [(unspec_volatile [(match_operand:SI 0 "const_0_to_255_operand")]
27785 UNSPECV_XABORT)]
27786 "TARGET_RTM"
27787 "xabort\t%0"
27788 [(set_attr "type" "other")
27789 (set_attr "length" "3")])
27790
27791 (define_expand "xtest"
27792 [(set (match_operand:QI 0 "register_operand")
27793 (unspec_volatile:QI [(const_int 0)] UNSPECV_XTEST))]
27794 "TARGET_RTM"
27795 {
27796 emit_insn (gen_xtest_1 ());
27797
27798 ix86_expand_setcc (operands[0], NE,
27799 gen_rtx_REG (CCZmode, FLAGS_REG), const0_rtx);
27800 DONE;
27801 })
27802
27803 (define_insn "xtest_1"
27804 [(set (reg:CCZ FLAGS_REG)
27805 (unspec_volatile:CCZ [(const_int 0)] UNSPECV_XTEST))]
27806 "TARGET_RTM"
27807 "xtest"
27808 [(set_attr "type" "other")
27809 (set_attr "length" "3")])
27810
27811 (define_insn "clwb"
27812 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
27813 UNSPECV_CLWB)]
27814 "TARGET_CLWB"
27815 "clwb\t%a0"
27816 [(set_attr "type" "sse")
27817 (set_attr "atom_sse_attr" "fence")
27818 (set_attr "memory" "unknown")])
27819
27820 (define_insn "clflushopt"
27821 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
27822 UNSPECV_CLFLUSHOPT)]
27823 "TARGET_CLFLUSHOPT"
27824 "clflushopt\t%a0"
27825 [(set_attr "type" "sse")
27826 (set_attr "atom_sse_attr" "fence")
27827 (set_attr "memory" "unknown")])
27828
27829 ;; MONITORX and MWAITX
27830 (define_insn "mwaitx"
27831 [(unspec_volatile [(match_operand:SI 0 "register_operand" "c")
27832 (match_operand:SI 1 "register_operand" "a")
27833 (match_operand:SI 2 "register_operand" "b")]
27834 UNSPECV_MWAITX)]
27835 "TARGET_MWAITX"
27836 ;; 64bit version is "mwaitx %rax,%rcx,%rbx". But only lower 32bits are used.
27837 ;; Since 32bit register operands are implicitly zero extended to 64bit,
27838 ;; we only need to set up 32bit registers.
27839 "mwaitx"
27840 [(set_attr "length" "3")])
27841
27842 (define_insn "@monitorx_<mode>"
27843 [(unspec_volatile [(match_operand:P 0 "register_operand" "a")
27844 (match_operand:SI 1 "register_operand" "c")
27845 (match_operand:SI 2 "register_operand" "d")]
27846 UNSPECV_MONITORX)]
27847 "TARGET_MWAITX"
27848 ;; 64bit version is "monitorx %rax,%rcx,%rdx". But only lower 32bits in
27849 ;; RCX and RDX are used. Since 32bit register operands are implicitly
27850 ;; zero extended to 64bit, we only need to set up 32bit registers.
27851 "%^monitorx"
27852 [(set (attr "length")
27853 (symbol_ref ("(Pmode != word_mode) + 3")))])
27854
27855 ;; CLZERO
27856 (define_insn "@clzero_<mode>"
27857 [(unspec_volatile [(match_operand: P 0 "register_operand" "a")]
27858 UNSPECV_CLZERO)]
27859 "TARGET_CLZERO"
27860 "clzero"
27861 [(set_attr "length" "3")
27862 (set_attr "memory" "unknown")])
27863
27864 ;; RDPKRU and WRPKRU
27865
27866 (define_expand "rdpkru"
27867 [(parallel
27868 [(set (match_operand:SI 0 "register_operand")
27869 (unspec_volatile:SI [(match_dup 1)] UNSPECV_PKU))
27870 (set (match_dup 2) (const_int 0))])]
27871 "TARGET_PKU"
27872 {
27873 operands[1] = force_reg (SImode, const0_rtx);
27874 operands[2] = gen_reg_rtx (SImode);
27875 })
27876
27877 (define_insn "*rdpkru"
27878 [(set (match_operand:SI 0 "register_operand" "=a")
27879 (unspec_volatile:SI [(match_operand:SI 2 "register_operand" "c")]
27880 UNSPECV_PKU))
27881 (set (match_operand:SI 1 "register_operand" "=d")
27882 (const_int 0))]
27883 "TARGET_PKU"
27884 "rdpkru"
27885 [(set_attr "type" "other")])
27886
27887 (define_expand "wrpkru"
27888 [(unspec_volatile:SI
27889 [(match_operand:SI 0 "register_operand")
27890 (match_dup 1) (match_dup 2)] UNSPECV_PKU)]
27891 "TARGET_PKU"
27892 {
27893 operands[1] = force_reg (SImode, const0_rtx);
27894 operands[2] = force_reg (SImode, const0_rtx);
27895 })
27896
27897 (define_insn "*wrpkru"
27898 [(unspec_volatile:SI
27899 [(match_operand:SI 0 "register_operand" "a")
27900 (match_operand:SI 1 "register_operand" "d")
27901 (match_operand:SI 2 "register_operand" "c")] UNSPECV_PKU)]
27902 "TARGET_PKU"
27903 "wrpkru"
27904 [(set_attr "type" "other")])
27905
27906 (define_insn "rdpid"
27907 [(set (match_operand:SI 0 "register_operand" "=r")
27908 (unspec_volatile:SI [(const_int 0)] UNSPECV_RDPID))]
27909 "!TARGET_64BIT && TARGET_RDPID"
27910 "rdpid\t%0"
27911 [(set_attr "type" "other")])
27912
27913 (define_insn "rdpid_rex64"
27914 [(set (match_operand:DI 0 "register_operand" "=r")
27915 (unspec_volatile:DI [(const_int 0)] UNSPECV_RDPID))]
27916 "TARGET_64BIT && TARGET_RDPID"
27917 "rdpid\t%0"
27918 [(set_attr "type" "other")])
27919
27920 ;; Intirinsics for > i486
27921
27922 (define_insn "wbinvd"
27923 [(unspec_volatile [(const_int 0)] UNSPECV_WBINVD)]
27924 ""
27925 "wbinvd"
27926 [(set_attr "type" "other")])
27927
27928 (define_insn "wbnoinvd"
27929 [(unspec_volatile [(const_int 0)] UNSPECV_WBNOINVD)]
27930 "TARGET_WBNOINVD"
27931 "wbnoinvd"
27932 [(set_attr "type" "other")])
27933
27934 ;; MOVDIRI and MOVDIR64B
27935
27936 (define_insn "movdiri<mode>"
27937 [(set (match_operand:SWI48 0 "memory_operand" "=m")
27938 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
27939 UNSPEC_MOVDIRI))]
27940 "TARGET_MOVDIRI"
27941 "movdiri\t{%1, %0|%0, %1}"
27942 [(set_attr "type" "other")])
27943
27944 (define_insn "@movdir64b_<mode>"
27945 [(set (mem:XI (match_operand:P 0 "register_operand" "r"))
27946 (unspec:XI [(match_operand:XI 1 "memory_operand" "m")]
27947 UNSPEC_MOVDIR64B))]
27948 "TARGET_MOVDIR64B"
27949 "movdir64b\t{%1, %0|%0, %1}"
27950 [(set_attr "type" "other")])
27951
27952 ;; TSXLDTRK
27953 (define_int_iterator TSXLDTRK [UNSPECV_XSUSLDTRK UNSPECV_XRESLDTRK])
27954 (define_int_attr tsxldtrk [(UNSPECV_XSUSLDTRK "xsusldtrk")
27955 (UNSPECV_XRESLDTRK "xresldtrk")])
27956 (define_insn "<tsxldtrk>"
27957 [(unspec_volatile [(const_int 0)] TSXLDTRK)]
27958 "TARGET_TSXLDTRK"
27959 "<tsxldtrk>"
27960 [(set_attr "type" "other")
27961 (set_attr "length" "4")])
27962
27963 ;; ENQCMD and ENQCMDS
27964
27965 (define_int_iterator ENQCMD [UNSPECV_ENQCMD UNSPECV_ENQCMDS])
27966 (define_int_attr enqcmd_sfx [(UNSPECV_ENQCMD "") (UNSPECV_ENQCMDS "s")])
27967
27968 (define_insn "@enqcmd<enqcmd_sfx>_<mode>"
27969 [(set (reg:CCZ FLAGS_REG)
27970 (unspec_volatile:CCZ [(match_operand:P 0 "register_operand" "r")
27971 (match_operand:XI 1 "memory_operand" "m")]
27972 ENQCMD))]
27973 "TARGET_ENQCMD"
27974 "enqcmd<enqcmd_sfx>\t{%1, %0|%0, %1}"
27975 [(set_attr "type" "other")])
27976
27977 ;; UINTR
27978 (define_int_iterator UINTR [UNSPECV_CLUI UNSPECV_STUI])
27979 (define_int_attr uintr [(UNSPECV_CLUI "clui") (UNSPECV_STUI "stui")])
27980
27981 (define_insn "<uintr>"
27982 [(unspec_volatile [(const_int 0)] UINTR)]
27983 "TARGET_UINTR && TARGET_64BIT"
27984 "<uintr>"
27985 [(set_attr "type" "other")
27986 (set_attr "length" "4")])
27987
27988 (define_insn "testui"
27989 [(set (reg:CCC FLAGS_REG)
27990 (unspec_volatile:CCC [(const_int 0)] UNSPECV_TESTUI))]
27991 "TARGET_UINTR && TARGET_64BIT"
27992 "testui"
27993 [(set_attr "type" "other")
27994 (set_attr "length" "4")])
27995
27996 (define_insn "senduipi"
27997 [(unspec_volatile
27998 [(match_operand:DI 0 "register_operand" "r")]
27999 UNSPECV_SENDUIPI)]
28000 "TARGET_UINTR && TARGET_64BIT"
28001 "senduipi\t%0"
28002 [(set_attr "type" "other")
28003 (set_attr "length" "4")])
28004
28005 ;; WAITPKG
28006
28007 (define_insn "umwait"
28008 [(set (reg:CCC FLAGS_REG)
28009 (unspec_volatile:CCC [(match_operand:SI 0 "register_operand" "r")
28010 (match_operand:DI 1 "register_operand" "A")]
28011 UNSPECV_UMWAIT))]
28012 "!TARGET_64BIT && TARGET_WAITPKG"
28013 "umwait\t%0"
28014 [(set_attr "length" "3")])
28015
28016 (define_insn "umwait_rex64"
28017 [(set (reg:CCC FLAGS_REG)
28018 (unspec_volatile:CCC [(match_operand:SI 0 "register_operand" "r")
28019 (match_operand:SI 1 "register_operand" "a")
28020 (match_operand:SI 2 "register_operand" "d")]
28021 UNSPECV_UMWAIT))]
28022 "TARGET_64BIT && TARGET_WAITPKG"
28023 "umwait\t%0"
28024 [(set_attr "length" "3")])
28025
28026 (define_insn "@umonitor_<mode>"
28027 [(unspec_volatile [(match_operand:P 0 "register_operand" "r")]
28028 UNSPECV_UMONITOR)]
28029 "TARGET_WAITPKG"
28030 "umonitor\t%0"
28031 [(set (attr "length")
28032 (symbol_ref ("(Pmode != word_mode) + 3")))])
28033
28034 (define_insn "tpause"
28035 [(set (reg:CCC FLAGS_REG)
28036 (unspec_volatile:CCC [(match_operand:SI 0 "register_operand" "r")
28037 (match_operand:DI 1 "register_operand" "A")]
28038 UNSPECV_TPAUSE))]
28039 "!TARGET_64BIT && TARGET_WAITPKG"
28040 "tpause\t%0"
28041 [(set_attr "length" "3")])
28042
28043 (define_insn "tpause_rex64"
28044 [(set (reg:CCC FLAGS_REG)
28045 (unspec_volatile:CCC [(match_operand:SI 0 "register_operand" "r")
28046 (match_operand:SI 1 "register_operand" "a")
28047 (match_operand:SI 2 "register_operand" "d")]
28048 UNSPECV_TPAUSE))]
28049 "TARGET_64BIT && TARGET_WAITPKG"
28050 "tpause\t%0"
28051 [(set_attr "length" "3")])
28052
28053 (define_insn "cldemote"
28054 [(unspec_volatile[(match_operand 0 "address_operand" "p")]
28055 UNSPECV_CLDEMOTE)]
28056 "TARGET_CLDEMOTE"
28057 "cldemote\t%a0"
28058 [(set_attr "type" "other")
28059 (set_attr "memory" "unknown")])
28060
28061 (define_insn "speculation_barrier"
28062 [(unspec_volatile [(const_int 0)] UNSPECV_SPECULATION_BARRIER)]
28063 ""
28064 "lfence"
28065 [(set_attr "type" "other")
28066 (set_attr "length" "3")])
28067
28068 (define_insn "serialize"
28069 [(unspec_volatile [(const_int 0)] UNSPECV_SERIALIZE)]
28070 "TARGET_SERIALIZE"
28071 "serialize"
28072 [(set_attr "type" "other")
28073 (set_attr "length" "3")])
28074
28075 (define_insn "patchable_area"
28076 [(unspec_volatile [(match_operand 0 "const_int_operand")
28077 (match_operand 1 "const_int_operand")]
28078 UNSPECV_PATCHABLE_AREA)]
28079 ""
28080 {
28081 ix86_output_patchable_area (INTVAL (operands[0]),
28082 INTVAL (operands[1]) != 0);
28083 return "";
28084 }
28085 [(set (attr "length") (symbol_ref "INTVAL (operands[0])"))
28086 (set_attr "length_immediate" "0")
28087 (set_attr "modrm" "0")])
28088
28089 (define_insn "hreset"
28090 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")]
28091 UNSPECV_HRESET)]
28092 "TARGET_HRESET"
28093 "hreset\t{$0|0}"
28094 [(set_attr "type" "other")
28095 (set_attr "length" "4")])
28096
28097 ;; Spaceship optimization
28098 (define_expand "spaceship<mode>3"
28099 [(match_operand:SI 0 "register_operand")
28100 (match_operand:MODEF 1 "cmp_fp_expander_operand")
28101 (match_operand:MODEF 2 "cmp_fp_expander_operand")]
28102 "(TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
28103 && (TARGET_CMOVE || (TARGET_SAHF && TARGET_USE_SAHF))"
28104 {
28105 ix86_expand_fp_spaceship (operands[0], operands[1], operands[2]);
28106 DONE;
28107 })
28108
28109 (define_expand "spaceshipxf3"
28110 [(match_operand:SI 0 "register_operand")
28111 (match_operand:XF 1 "nonmemory_operand")
28112 (match_operand:XF 2 "nonmemory_operand")]
28113 "TARGET_80387 && (TARGET_CMOVE || (TARGET_SAHF && TARGET_USE_SAHF))"
28114 {
28115 ix86_expand_fp_spaceship (operands[0], operands[1], operands[2]);
28116 DONE;
28117 })
28118
28119 ;; Defined because the generic expand_builtin_issignaling for XFmode
28120 ;; only tests for sNaNs, but i387 treats also pseudo numbers as always
28121 ;; signaling.
28122 (define_expand "issignalingxf2"
28123 [(match_operand:SI 0 "register_operand")
28124 (match_operand:XF 1 "general_operand")]
28125 ""
28126 {
28127 rtx temp = operands[1];
28128 if (!MEM_P (temp))
28129 {
28130 rtx mem = assign_stack_temp (XFmode, GET_MODE_SIZE (XFmode));
28131 emit_move_insn (mem, temp);
28132 temp = mem;
28133 }
28134 rtx ex = adjust_address (temp, HImode, 8);
28135 rtx hi = adjust_address (temp, SImode, 4);
28136 rtx lo = adjust_address (temp, SImode, 0);
28137 rtx val = GEN_INT (HOST_WIDE_INT_M1U << 30);
28138 rtx mask = GEN_INT (0x7fff);
28139 rtx bit = GEN_INT (HOST_WIDE_INT_1U << 30);
28140 /* Expand to:
28141 ((ex & mask) && (int) hi >= 0)
28142 || ((ex & mask) == mask && ((hi ^ bit) | ((lo | -lo) >> 31)) > val). */
28143 rtx nlo = expand_unop (SImode, neg_optab, lo, NULL_RTX, 0);
28144 lo = expand_binop (SImode, ior_optab, lo, nlo,
28145 NULL_RTX, 1, OPTAB_LIB_WIDEN);
28146 lo = expand_shift (RSHIFT_EXPR, SImode, lo, 31, NULL_RTX, 1);
28147 temp = expand_binop (SImode, xor_optab, hi, bit,
28148 NULL_RTX, 1, OPTAB_LIB_WIDEN);
28149 temp = expand_binop (SImode, ior_optab, temp, lo,
28150 NULL_RTX, 1, OPTAB_LIB_WIDEN);
28151 temp = emit_store_flag_force (gen_reg_rtx (SImode), GTU, temp, val,
28152 SImode, 1, 1);
28153 ex = expand_binop (HImode, and_optab, ex, mask,
28154 NULL_RTX, 1, OPTAB_LIB_WIDEN);
28155 rtx temp2 = emit_store_flag_force (gen_reg_rtx (SImode), NE,
28156 ex, const0_rtx, SImode, 1, 1);
28157 ex = emit_store_flag_force (gen_reg_rtx (SImode), EQ,
28158 ex, mask, HImode, 1, 1);
28159 temp = expand_binop (SImode, and_optab, temp, ex,
28160 NULL_RTX, 1, OPTAB_LIB_WIDEN);
28161 rtx temp3 = emit_store_flag_force (gen_reg_rtx (SImode), GE,
28162 hi, const0_rtx, SImode, 0, 1);
28163 temp2 = expand_binop (SImode, and_optab, temp2, temp3,
28164 NULL_RTX, 1, OPTAB_LIB_WIDEN);
28165 temp = expand_binop (SImode, ior_optab, temp, temp2,
28166 NULL_RTX, 1, OPTAB_LIB_WIDEN);
28167 emit_move_insn (operands[0], temp);
28168 DONE;
28169 })
28170
28171 (define_insn "urdmsr"
28172 [(set (match_operand:DI 0 "register_operand" "=r")
28173 (unspec_volatile:DI
28174 [(match_operand:DI 1 "x86_64_szext_nonmemory_operand" "reZ")]
28175 UNSPECV_URDMSR))]
28176 "TARGET_USER_MSR && TARGET_64BIT"
28177 "urdmsr\t{%1, %0|%0, %1}"
28178 [(set_attr "prefix" "vex")
28179 (set_attr "type" "other")])
28180
28181 (define_insn "uwrmsr"
28182 [(unspec_volatile
28183 [(match_operand:DI 0 "x86_64_szext_nonmemory_operand" "reZ")
28184 (match_operand:DI 1 "register_operand" "r")]
28185 UNSPECV_UWRMSR)]
28186 "TARGET_USER_MSR && TARGET_64BIT"
28187 "uwrmsr\t{%1, %0|%0, %1}"
28188 [(set_attr "prefix" "vex")
28189 (set_attr "type" "other")])
28190
28191 (include "mmx.md")
28192 (include "sse.md")
28193 (include "sync.md")